# These are checked against normalized_url, so they should be # prepended with the gemini:// protocol, be all lowercased, and # not have the port specified if it is 1965. EXCLUDED_URL_PREFIXES = [ "gemini://localhost", "gemini://example.org", "gemini://example.com", "gemini://www.youtube.com/", # LEO generating useless URIs "gemini://tilde.team/~khuxkm/leo/", # all combinations of a tictactoe board "gemini://tictactoe.lanterne.chilliet.eu", "gemini://kennedy.gemi.dev/", "gemini://gemi.dev/cgi-bin/", "gemini://auragem.space/texts/jewish", "gemini://auragem.space/twitch/", # serving big files and slooow capsule -> takes to long to crawl "gemini://kamalatta.ddnss.de/", "gemini://tweek.zyxxyz.eu/valentina/", # ASCII art with emulated modem speed "gemini://ansi.hrtk.in/", "gemini://matrix.kiwifarms.net", # ZachDeCooks songs "gemini://songs.zachdecook.com/song.gmi.php/", "gemini://songs.zachdecook.com/chord.svg/", "gemini://gemini.zachdecook.com/cgi-bin/ccel.sh", # kwiecien gemcast "gemini://kwiecien.us/gemcast/", # breaks crawl due to recursion overflow "gemini://cadence.moe/chapo/", "gemini://nixo.xyz/reply/", "gemini://nixo.xyz/notify", "gemini://gemini.thebackupbox.net/queryresponse", "gemini://gemini.thebackupbox.net/cgi-bin/", "gemini://gem.garichankar.com/share_audio", # Mastodon mirror "gemini://vps01.rdelaage.ovh/", "gemini://mastogem.picasoft.net/", "gemini://mastogem.remorse.us/", # various failing resources on runjimmyrunrunyoufuckerrun.com "gemini://runjimmyrunrunyoufuckerrun.com/fonts/", "gemini://runjimmyrunrunyoufuckerrun.com/tmp/", # Search providers "gemini://houston.coder.town/search?", "gemini://houston.coder.town/search/", "gemini://marginalia.nu/search", "gemini://geminispace.info", "gemini://tlgs.one/", "gemini://gus.guru/", # Geddit "gemini://geddit.pitr.ca/post?", "gemini://geddit.pitr.ca/c/", "gemini://geddit.glv.one/post?", "gemini://geddit.glv.one/c/", # Marmaladefoo calculator "gemini://gemini.marmaladefoo.com/cgi-bin/calc.cgi?", "gemini://gemini.circumlunar.space/users/fgaz/calculator/", # Individual weather pages "gemini://acidic.website/cgi-bin/weather.tcl?", "gemini://caolan.uk/weather/", # Alex Schroeder's problematic stuff "gemini://alexschroeder.ch/image_external", "gemini://alexschroeder.ch/html/", "gemini://alexschroeder.ch/diff/", "gemini://alexschroeder.ch/history/", "gemini://alexschroeder.ch/http", "gemini://alexschroeder.ch/https", "gemini://alexschroeder.ch/tag/", "gemini://alexschroeder.ch/raw/", "gemini://alexschroeder.ch/map/", "gemini://alexschroeder.ch/do/comment", "gemini://alexschroeder.ch/do/rc", "gemini://alexschroeder.ch/do/rss", "gemini://alexschroeder.ch/do/new", "gemini://alexschroeder.ch/do/more", "gemini://alexschroeder.ch/do/tags", "gemini://alexschroeder.ch/do/match", "gemini://alexschroeder.ch/do/search", "gemini://alexschroeder.ch/do/gallery/", # mozz mailing list linkscraper "gemini://mozz.us/files/gemini-links.gmi", "gemini://gem.benscraft.info/mailing-list", # gemini.techrights.org "gemini://gemini.techrights.org/", # endless stream "gemini://202x.moe/resonance", # big file "gemini://mirrors.apple2.org.za/active/ftp.apple.asimov.net/", # hackernews mirror "gemini://gem.graypegg.com/hn/", # antenna filters "gemini://warmedal.se/~antenna/filter", # youtube mirror "gemini://auragem.space/cgi-bin/youtube.cgi?", "gemini://auragem.space/youtube/", # news mirrors - not our business "gemini://teapot.styx.org", "gemini://taz.de/", "gemini://gemini.knusbaum.com/feeds", "gemini://guardian.shit.cx/", "gemini://simplynews.metalune.xyz", "gemini://illegaldrugs.net/cgi-bin/news.php", "gemini://illegaldrugs.net/cgi-bin/reader", "gemini://illegaldrugs.net:1965/cgi-bin/reader", "gemini://rawtext.club/~sloum/geminews", "gemini://gemini.cabestan.tk/hn", "gemini://hn.filiuspatris.net/", "gemini://schmittstefan.de/de/nachrichten/", "gemini://gmi.noulin.net/mobile", "gemini://jpfox.fr/rss/", "gemini://dw.schettler.net/", "gemini://dioskouroi.xyz/top", "gemini://drewdevault.com/cgi-bin/hn.py", "gemini://tobykurien.com/maverick/", "gemini://news.manuceau.net/", "gemini://gemini-news.com/", "gemini://news.tuxmachines.org/", "gemini://musicdir.zachdecook.com/", "gemini://federal.cx/news", "gemini://kypan.me/cgi", # wikipedia proxy "gemini://wp.pitr.ca/", "gemini://wp.glv.one/", "gemini://wikipedia.geminet.org/", "gemini://wikipedia.geminet.org:1966", "gemini://vault.transjovian.org/", # client torture test "gemini://egsam.pitr.ca/", "gemini://egsam.glv.one/", "gemini://gemini.conman.org/test", # mozz's chat "gemini://chat.mozz.us/stream", "gemini://chat.mozz.us/submit", # gempod "gemini://rocketcaster.xyz/share/", # gopher proxy "gemini://80h.dev/agena/", # astrobotany "gemini://astrobotany.mozz.us/", "gemini://carboncopy.xyz/cgi-bin/apache.gex/", # infinite maze "gemini://alexey.shpakovsky.ru/maze", # susa.net "gemini://gemini.susa.net/cgi-bin/search?", "gemini://gemini.susa.net/cgi-bin/twitter?", "gemini://gemini.susa.net/cgi-bin/vim-search?", "gemini://gemini.susa.net/cgi-bin/links_stu.lua?", "gemini://gemini.spam.works/textfiles/", "gemini://gemini.spam.works/mirrors/textfiles/", "gemini://gemini.spam.works/users/dvn/archive/", # streams that never end... "gemini://gemini.thebackupbox.net/radio", "gemini://higeki.jp/radio", # full web proxy "gemini://webgate.geminet.org/", "gemini://drewdevault.com/cgi-bin/web.sh?", "gemini://gemiprox.pollux.casa/", "gemini://gemiprox.pollux.casa:1966", "gemini://ecs.d2evs.net/proxy/", # killing crawl, I think maybe because it's too big # cryptocurrency bullshit "gemini://gem.denarii.cloud/", # docs - not our business "gemini://cfdocs.wetterberg.nu/", "gemini://godocs.io", # git repos "gemini://git.skyjake.fi", "gemini://gemini.unlimited.pizza/git", # games "gemini://jsreed5.org/live/", "gemini://gemini.thegonz.net/ski", "gemini://gemini.thegonz.net/gemski", "gemini://thegonz.net/", "gemini://gemlog.stargrave.org/" ] EXCLUDED_URL_PATHS = [ "favicon.ico", "favicon.txt", "robots.txt", "rss.txt", "rss.xml", ]