💾 Archived View for gmn.clttr.info › sources › geminispace.info.git › tree › tests › gus › lib › tes… captured on 2023-01-29 at 05:09:19.

View Raw

More Information

➡️ Next capture (2023-03-20)

-=-=-=-=-=-=-

import pytest

from gus.lib.gemini import GeminiResource, GeminiRobotFileParser

class TestGeminiResource:
    def test_extract_contained_resources(self):
        url = "gemini://host"

        # no content
        resources = GeminiResource(url).extract_contained_resources("")
        assert resources == []

        # not a link
        resources = GeminiResource(url).extract_contained_resources(" => link")
        assert resources == []
        resources = GeminiResource(url).extract_contained_resources(
            "```\n=> preformatted\n```"
        )
        assert resources == []

        # some links
        resources = GeminiResource(url).extract_contained_resources(
            "=> link\ntext\n=> other"
        )
        assert len(resources) == 2
        assert resources[0].raw_url == "link"
        assert resources[1].raw_url == "other"

        resources = GeminiResource(url).extract_contained_resources(
            """
# title
text
=> link
text

link

=> other
            """
        )
        assert len(resources) == 2
        assert resources[0].raw_url == "link"
        assert resources[1].raw_url == "other"


    @pytest.mark.parametrize("test_input,expected_result", [
        (["gemini://gus.guru", None, None], [True, "gemini://gus.guru", "gus.guru"]),
        (["/bar", "gemini://gus.guru/foo", None], [False, None, None]),
        (["/bar", "gemini://gus.guru/foo/", None], [False, None, None]),
        (["/bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru"]),
        (["/bar", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru"]),
        (["bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru"]),
        (["bar", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/foo/bar", "gus.guru"]),
        (["//foo.com", None, None], [True, "gemini://foo.com", "foo.com"]),
        (["gemini://gem.splatt9990.com/index.gmi", None, None], [True, "gemini://gem.splatt9990.com/index.gmi", "gem.splatt9990.com"]),
        (["gemini://gem.splatt9990.com:1965/index.gmi", None, None], [True, "gemini://gem.splatt9990.com/index.gmi", "gem.splatt9990.com"]),

    ])
    def test_url_parsing(self, test_input, expected_result):
        gr = GeminiResource(test_input[0], test_input[1], test_input[2])
        assert gr.is_valid == expected_result[0]
        assert gr.normalized_url == expected_result[1]
        assert gr.normalized_host == expected_result[2]


    @pytest.mark.parametrize("test_url,expected_result", [
        ("gemini://gus.guru", True),
        ("gemini://gus.guru/", True),
        ("gemini://gus.guru/franz", False),
        ("gemini://gus.guru/~franz", True),
        ("gemini://gus.guru/~franz/foo", False),
    ])
    def test_is_root_like(self, test_url, expected_result):
        gr = GeminiResource(test_url)
        assert gr.is_root_like == expected_result


class TestGeminiRobotFileParser:
    def _get_parser(self, content):
        dummy_url = "gemini://dummy/robots.txt"
        rp = GeminiRobotFileParser(dummy_url)
        rp.read_from_string(content)
        return rp

    def _assert_fetchable(self, rp, url="/", fetchable=True):
        useragents = ["testbot", "genericbot", "*"]
        assert rp.can_fetch_prioritized(useragents, url) == fetchable

    def test_empty_robots(self):
        rp = self._get_parser("")
        self._assert_fetchable(rp)

    def test_disallow_star(self):
        rp = self._get_parser("""User-agent: *
Disallow: /""")
        self._assert_fetchable(rp, "/", False)

    def test_allow_genericbot(self):
        rp = self._get_parser("""User-agent: *
Disallow: /

User-agent: genericbot
Allow: /""")
        self._assert_fetchable(rp)

    def test_allow_genericbot_but_disallow_testbot(self):
        rp = self._get_parser("""User-agent: genericbot
Allow: /

User-agent: testbot
Disallow: /""")
        self._assert_fetchable(rp, "/", False)

    def test_allow_star_but_disallow_genericbot(self):
        rp = self._get_parser("""User-agent: *
Allow: /

User-agent: genericbot
Disallow: /""")
        self._assert_fetchable(rp, "/", False)

    def test_allow_only_testbot(self):
        rp = self._get_parser("""User-agent: *
Disallow: /

User-agent: genericbot
Disallow: /

User-agent: testbot
Allow: /""")
        self._assert_fetchable(rp)

    def test_disallow_gemidev_waffle(self):
        rp = self._get_parser("""user-agent: *
Disallow: /cgi-bin/wp.cgi/view
Disallow: /cgi-bin/wp.cgi/media
Disallow: /cgi-bin/wp.cgi/search
Disallow: /cgi-bin/waffle.cgi/article
Disallow: /cgi-bin/waffle.cgi/feed
Disallow: /cgi-bin/waffle.cgi/links
Disallow: /cgi-bin/waffle.cgi/view
Disallow: /cgi-bin/witw.cgi/play
""")
        self._assert_fetchable(rp, "/cgi-bin/waffle.cgi/feed/link", False)