💾 Archived View for gmn.clttr.info › sources › geminispace.info.git › tree › tests › gus › lib › tes… captured on 2023-06-14 at 14:32:05.
⬅️ Previous capture (2023-03-20)
-=-=-=-=-=-=-
import pytest from gus.lib.gemini import GeminiResource, GeminiRobotFileParser class TestGeminiResource: def test_extract_contained_resources(self): url = "gemini://host" # no content resources = GeminiResource(url).extract_contained_resources("") assert resources == [] # not a link resources = GeminiResource(url).extract_contained_resources(" => link") assert resources == [] resources = GeminiResource(url).extract_contained_resources( "```\n=> preformatted\n```" ) assert resources == [] # some links resources = GeminiResource(url).extract_contained_resources( "=> link\ntext\n=> other" ) assert len(resources) == 2 assert resources[0].raw_url == "link" assert resources[1].raw_url == "other" resources = GeminiResource(url).extract_contained_resources( """ # title text => link text
=> other """ ) assert len(resources) == 2 assert resources[0].raw_url == "link" assert resources[1].raw_url == "other" @pytest.mark.parametrize("test_input,expected_result", [ (["gemini://gus.guru", None, None], [True, "gemini://gus.guru/", "gus.guru", "gemini://gus.guru/"]), (["/bar", "gemini://gus.guru/foo", None], [False, None, None, None]), (["/bar", "gemini://gus.guru/foo/", None], [False, None, None, None]), (["/bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru", "gemini://gus.guru/bar"]), (["/bar", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru", "gemini://gus.guru/bar"]), (["bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru", "gemini://gus.guru/bar"]), (["bar/", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/foo/bar/", "gus.guru", "gemini://gus.guru/foo/bar/"]), (["//foo.com", None, None], [True, "gemini://foo.com/", "foo.com", "gemini://foo.com/"]), (["gemini://gem.splatt9990.com/index.gmi", None, None], [True, "gemini://gem.splatt9990.com/index.gmi", "gem.splatt9990.com", "gemini://gem.splatt9990.com/index.gmi"] ), (["gemini://gem.splatt9990.com:1965/index.gmi", None, None], [True, "gemini://gem.splatt9990.com/index.gmi", "gem.splatt9990.com", "gemini://gem.splatt9990.com/index.gmi"]), (["gemini://gem.splatt9990.com:1966/index.gmi", None, None], [True, "gemini://gem.splatt9990.com:1966/index.gmi", "gem.splatt9990.com", "gemini://gem.splatt9990.com:1966/index.gmi"]), (["gemini://michaelnordmeyer.com", None, None], [True, "gemini://michaelnordmeyer.com/", "michaelnordmeyer.com", "gemini://michaelnordmeyer.com/"]), (["log.gmi", "gemini://michaelnordmeyer.com/", None], [True, "gemini://michaelnordmeyer.com/log.gmi", "michaelnordmeyer.com", "gemini://michaelnordmeyer.com/log.gmi"]), (["Log.gmi", "gemini://michaelnordmeyer.com/", None], [True, "gemini://michaelnordmeyer.com/Log.gmi", "michaelnordmeyer.com", "gemini://michaelnordmeyer.com/Log.gmi"]), ]) def test_url_parsing(self, test_input, expected_result): gr = GeminiResource(test_input[0], test_input[1], test_input[2]) assert gr.is_valid == expected_result[0] assert gr.normalized_host == expected_result[2] assert gr.fetchable_url == expected_result[3] @pytest.mark.parametrize("test_url,expected_result", [ ("gemini://gus.guru", True), ("gemini://gus.guru/", True), ("gemini://gus.guru/franz", False), ("gemini://gus.guru/~franz", True), ("gemini://gus.guru/~franz/foo", False), ]) def test_is_root_like(self, test_url, expected_result): gr = GeminiResource(test_url) assert gr.is_root_like == expected_result class TestGeminiRobotFileParser: def _get_parser(self, content): dummy_url = "gemini://dummy/robots.txt" rp = GeminiRobotFileParser(dummy_url) rp.read_from_string(content) return rp def _assert_fetchable(self, rp, url="/", fetchable=True): useragents = ["testbot", "genericbot", "*"] assert rp.can_fetch_prioritized(useragents, url) == fetchable def test_empty_robots(self): rp = self._get_parser("") self._assert_fetchable(rp) def test_disallow_star(self): rp = self._get_parser("""User-agent: * Disallow: /""") self._assert_fetchable(rp, "/", False) def test_allow_genericbot(self): rp = self._get_parser("""User-agent: * Disallow: / User-agent: genericbot Allow: /""") self._assert_fetchable(rp) def test_allow_genericbot_but_disallow_testbot(self): rp = self._get_parser("""User-agent: genericbot Allow: / User-agent: testbot Disallow: /""") self._assert_fetchable(rp, "/", False) def test_allow_star_but_disallow_genericbot(self): rp = self._get_parser("""User-agent: * Allow: / User-agent: genericbot Disallow: /""") self._assert_fetchable(rp, "/", False) def test_allow_only_testbot(self): rp = self._get_parser("""User-agent: * Disallow: / User-agent: genericbot Disallow: / User-agent: testbot Allow: /""") self._assert_fetchable(rp) def test_disallow_gemidev_waffle(self): rp = self._get_parser("""user-agent: * Disallow: /cgi-bin/wp.cgi/view Disallow: /cgi-bin/wp.cgi/media Disallow: /cgi-bin/wp.cgi/search Disallow: /cgi-bin/waffle.cgi/article Disallow: /cgi-bin/waffle.cgi/feed Disallow: /cgi-bin/waffle.cgi/links Disallow: /cgi-bin/waffle.cgi/view Disallow: /cgi-bin/witw.cgi/play """) self._assert_fetchable(rp, "/cgi-bin/waffle.cgi/feed/link", False) def test_disallow_infinite_maze(self): rp = self._get_parser("""User-agent: * # We don't accept automated donations Disallow: /donate # Robots are not allowed to vote Disallow: /vote Disallow: /vote/ Disallow: /voteru Disallow: /voteru/ # Robots are forbidden to enter the infinite maze Disallow: /maze Disallow: /maze/ """) self._assert_fetchable(rp, "/maze/l/", False)