💾 Archived View for gmn.clttr.info › sources › geminispace.info.git › tree › gus › excludes.py.txt captured on 2023-12-28 at 15:48:39.

View Raw

More Information

⬅️ Previous capture (2023-09-08)

🚧 View Differences

-=-=-=-=-=-=-

# These are checked against normalized_url, so they should be
# prepended with the gemini:// protocol, be all lowercased, and
# not have the port specified if it is 1965.
EXCLUDED_URL_PREFIXES = [
    "gemini://localhost",
    "gemini://example.org",
    "gemini://example.com",
    "gemini://www.youtube.com/",
    # LEO generating useless URIs
    "gemini://tilde.team/~khuxkm/leo/",
    # all combinations of a tictactoe board
    "gemini://tictactoe.lanterne.chilliet.eu",

    "gemini://kennedy.gemi.dev/",
    "gemini://gemi.dev/cgi-bin/",
    "gemini://auragem.space/texts/jewish",
    "gemini://auragem.space/twitch/",
    # serving big files and slooow capsule -> takes to long to crawl
    "gemini://kamalatta.ddnss.de/",
    "gemini://tweek.zyxxyz.eu/valentina/",

    # ASCII art with emulated modem speed
    "gemini://ansi.hrtk.in/",
    "gemini://matrix.kiwifarms.net",

    # ZachDeCooks songs
    "gemini://songs.zachdecook.com/song.gmi.php/",
    "gemini://songs.zachdecook.com/chord.svg/",
    "gemini://gemini.zachdecook.com/cgi-bin/ccel.sh",

    # kwiecien gemcast
    "gemini://kwiecien.us/gemcast/",

    # breaks crawl due to recursion overflow
    "gemini://cadence.moe/chapo/",

    "gemini://nixo.xyz/reply/", 
    "gemini://nixo.xyz/notify",
    "gemini://gemini.thebackupbox.net/queryresponse",
    "gemini://gemini.thebackupbox.net/cgi-bin/",
    "gemini://gem.garichankar.com/share_audio", 

    # Mastodon mirror
    "gemini://vps01.rdelaage.ovh/",
    "gemini://mastogem.picasoft.net/",
    "gemini://mastogem.remorse.us/",

    # various failing resources on runjimmyrunrunyoufuckerrun.com
    "gemini://runjimmyrunrunyoufuckerrun.com/fonts/",
    "gemini://runjimmyrunrunyoufuckerrun.com/tmp/",

    # Search providers 
    "gemini://houston.coder.town/search?",
    "gemini://houston.coder.town/search/",
	"gemini://marginalia.nu/search",
    "gemini://geminispace.info",
    "gemini://tlgs.one/",
    "gemini://gus.guru/",

    # Geddit
    "gemini://geddit.pitr.ca/post?",
    "gemini://geddit.pitr.ca/c/",
    "gemini://geddit.glv.one/post?",
    "gemini://geddit.glv.one/c/",
    
    # Marmaladefoo calculator
    "gemini://gemini.marmaladefoo.com/cgi-bin/calc.cgi?",
    "gemini://gemini.circumlunar.space/users/fgaz/calculator/",

    # Individual weather pages
    "gemini://acidic.website/cgi-bin/weather.tcl?",
    "gemini://caolan.uk/weather/",

    # Alex Schroeder's problematic stuff
    "gemini://alexschroeder.ch/image_external",
    "gemini://alexschroeder.ch/html/",
    "gemini://alexschroeder.ch/diff/",
    "gemini://alexschroeder.ch/history/",
    "gemini://alexschroeder.ch/http",
    "gemini://alexschroeder.ch/https",
    "gemini://alexschroeder.ch/tag/",
    "gemini://alexschroeder.ch/raw/",
    "gemini://alexschroeder.ch/map/",
    "gemini://alexschroeder.ch/do/comment",
    "gemini://alexschroeder.ch/do/rc",
    "gemini://alexschroeder.ch/do/rss",
    "gemini://alexschroeder.ch/do/new",
    "gemini://alexschroeder.ch/do/more",
    "gemini://alexschroeder.ch/do/tags",
    "gemini://alexschroeder.ch/do/match",
    "gemini://alexschroeder.ch/do/search",
    "gemini://alexschroeder.ch/do/gallery/",

    # mozz mailing list linkscraper 
    "gemini://mozz.us/files/gemini-links.gmi",
    "gemini://gem.benscraft.info/mailing-list",
    # gemini.techrights.org
    "gemini://gemini.techrights.org/",

    # endless stream
    "gemini://202x.moe/resonance",

    # big file
    "gemini://mirrors.apple2.org.za/active/ftp.apple.asimov.net/",

    # hackernews mirror
    "gemini://gem.graypegg.com/hn/",
    # antenna filters
    "gemini://warmedal.se/~antenna/filter",

    # youtube mirror
    "gemini://auragem.space/cgi-bin/youtube.cgi?",
    "gemini://auragem.space/youtube/",
    
	# news mirrors - not our business
    "gemini://teapot.styx.org",
    "gemini://taz.de/",
    "gemini://gemini.knusbaum.com/feeds",
    "gemini://guardian.shit.cx/",
    "gemini://simplynews.metalune.xyz",
    "gemini://illegaldrugs.net/cgi-bin/news.php",
    "gemini://illegaldrugs.net/cgi-bin/reader",
    "gemini://illegaldrugs.net:1965/cgi-bin/reader",
    "gemini://rawtext.club/~sloum/geminews",
    "gemini://gemini.cabestan.tk/hn",
    "gemini://hn.filiuspatris.net/",
    "gemini://schmittstefan.de/de/nachrichten/",
    "gemini://gmi.noulin.net/mobile",
    "gemini://jpfox.fr/rss/",
    "gemini://dw.schettler.net/",
	"gemini://dioskouroi.xyz/top",
	"gemini://drewdevault.com/cgi-bin/hn.py",
	"gemini://tobykurien.com/maverick/",
    "gemini://news.manuceau.net/",
    "gemini://gemini-news.com/",
    "gemini://news.tuxmachines.org/",
    "gemini://musicdir.zachdecook.com/",
    "gemini://federal.cx/news",
    "gemini://kypan.me/cgi",
    
	# wikipedia proxy
    "gemini://wp.pitr.ca/",
    "gemini://wp.glv.one/",
    "gemini://wikipedia.geminet.org/",
	"gemini://wikipedia.geminet.org:1966",
    "gemini://vault.transjovian.org/",
    
    # client torture test
    "gemini://egsam.pitr.ca/",
    "gemini://egsam.glv.one/",
    "gemini://gemini.conman.org/test",

    # mozz's chat
    "gemini://chat.mozz.us/stream",
    "gemini://chat.mozz.us/submit",

    # gempod
    "gemini://rocketcaster.xyz/share/",

    # gopher proxy
    "gemini://80h.dev/agena/",

    # astrobotany
    "gemini://astrobotany.mozz.us/",
    "gemini://carboncopy.xyz/cgi-bin/apache.gex/",

    # infinite maze
    "gemini://alexey.shpakovsky.ru/maze",

    # susa.net
    "gemini://gemini.susa.net/cgi-bin/search?",
    "gemini://gemini.susa.net/cgi-bin/twitter?",
    "gemini://gemini.susa.net/cgi-bin/vim-search?",
    "gemini://gemini.susa.net/cgi-bin/links_stu.lua?",

    "gemini://gemini.spam.works/textfiles/",
    "gemini://gemini.spam.works/mirrors/textfiles/",
    "gemini://gemini.spam.works/users/dvn/archive/",

    # streams that never end...
    "gemini://gemini.thebackupbox.net/radio",
    "gemini://higeki.jp/radio",

    # full web proxy
    "gemini://webgate.geminet.org/",
    "gemini://drewdevault.com/cgi-bin/web.sh?",
	"gemini://gemiprox.pollux.casa/",
    "gemini://gemiprox.pollux.casa:1966",
    "gemini://ecs.d2evs.net/proxy/",

	# killing crawl, I think maybe because it's too big
	# cryptocurrency bullshit
    "gemini://gem.denarii.cloud/",

    # docs - not our business
    "gemini://cfdocs.wetterberg.nu/",
    "gemini://godocs.io",

    # git repos
    "gemini://git.skyjake.fi",
    "gemini://gemini.unlimited.pizza/git",
    # games
    "gemini://jsreed5.org/live/",
    "gemini://gemini.thegonz.net/ski",
    "gemini://gemini.thegonz.net/gemski",
    "gemini://thegonz.net/",
    "gemini://gemlog.stargrave.org/"
]

EXCLUDED_URL_PATHS = [
    "favicon.ico",
    "favicon.txt",
    "robots.txt",
    "rss.txt",
    "rss.xml",
]