💾 Archived View for gmn.clttr.info › sources › geminispace.info.git › tree › tests › gus › lib › tes… captured on 2023-06-14 at 14:32:05.

View Raw

More Information

⬅️ Previous capture (2023-03-20)

➡️ Next capture (2023-09-08)

🚧 View Differences

-=-=-=-=-=-=-

import pytest

from gus.lib.gemini import GeminiResource, GeminiRobotFileParser

class TestGeminiResource:
    def test_extract_contained_resources(self):
        url = "gemini://host"

        # no content
        resources = GeminiResource(url).extract_contained_resources("")
        assert resources == []

        # not a link
        resources = GeminiResource(url).extract_contained_resources(" => link")
        assert resources == []
        resources = GeminiResource(url).extract_contained_resources(
            "```\n=> preformatted\n```"
        )
        assert resources == []

        # some links
        resources = GeminiResource(url).extract_contained_resources(
            "=> link\ntext\n=> other"
        )
        assert len(resources) == 2
        assert resources[0].raw_url == "link"
        assert resources[1].raw_url == "other"

        resources = GeminiResource(url).extract_contained_resources(
            """
# title
text
=> link
text

link

=> other
            """
        )
        assert len(resources) == 2
        assert resources[0].raw_url == "link"
        assert resources[1].raw_url == "other"

    @pytest.mark.parametrize("test_input,expected_result", [
        (["gemini://gus.guru", None, None], [True, "gemini://gus.guru/", "gus.guru", "gemini://gus.guru/"]),
        (["/bar", "gemini://gus.guru/foo", None], [False, None, None, None]),
        (["/bar", "gemini://gus.guru/foo/", None], [False, None, None, None]),
        (["/bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru", "gemini://gus.guru/bar"]),
        (["/bar", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru", "gemini://gus.guru/bar"]),
        (["bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru", "gemini://gus.guru/bar"]),
        (["bar/", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/foo/bar/", "gus.guru", "gemini://gus.guru/foo/bar/"]),
        (["//foo.com", None, None], [True, "gemini://foo.com/", "foo.com", "gemini://foo.com/"]),
        (["gemini://gem.splatt9990.com/index.gmi", None, None], [True, "gemini://gem.splatt9990.com/index.gmi", "gem.splatt9990.com", "gemini://gem.splatt9990.com/index.gmi"] ),
        (["gemini://gem.splatt9990.com:1965/index.gmi", None, None], [True, "gemini://gem.splatt9990.com/index.gmi", "gem.splatt9990.com", "gemini://gem.splatt9990.com/index.gmi"]),
        (["gemini://gem.splatt9990.com:1966/index.gmi", None, None], [True, "gemini://gem.splatt9990.com:1966/index.gmi", "gem.splatt9990.com", "gemini://gem.splatt9990.com:1966/index.gmi"]),
        (["gemini://michaelnordmeyer.com", None, None], [True, "gemini://michaelnordmeyer.com/", "michaelnordmeyer.com", "gemini://michaelnordmeyer.com/"]),
        (["log.gmi", "gemini://michaelnordmeyer.com/", None], [True, "gemini://michaelnordmeyer.com/log.gmi", "michaelnordmeyer.com", "gemini://michaelnordmeyer.com/log.gmi"]),
        (["Log.gmi", "gemini://michaelnordmeyer.com/", None], [True, "gemini://michaelnordmeyer.com/Log.gmi", "michaelnordmeyer.com", "gemini://michaelnordmeyer.com/Log.gmi"]),

    ])
    def test_url_parsing(self, test_input, expected_result):
        gr = GeminiResource(test_input[0], test_input[1], test_input[2])
        assert gr.is_valid == expected_result[0]
        assert gr.normalized_host == expected_result[2]
        assert gr.fetchable_url == expected_result[3]


    @pytest.mark.parametrize("test_url,expected_result", [
        ("gemini://gus.guru", True),
        ("gemini://gus.guru/", True),
        ("gemini://gus.guru/franz", False),
        ("gemini://gus.guru/~franz", True),
        ("gemini://gus.guru/~franz/foo", False),
    ])
    def test_is_root_like(self, test_url, expected_result):
        gr = GeminiResource(test_url)
        assert gr.is_root_like == expected_result


class TestGeminiRobotFileParser:
    def _get_parser(self, content):
        dummy_url = "gemini://dummy/robots.txt"
        rp = GeminiRobotFileParser(dummy_url)
        rp.read_from_string(content)
        return rp

    def _assert_fetchable(self, rp, url="/", fetchable=True):
        useragents = ["testbot", "genericbot", "*"]
        assert rp.can_fetch_prioritized(useragents, url) == fetchable

    def test_empty_robots(self):
        rp = self._get_parser("")
        self._assert_fetchable(rp)

    def test_disallow_star(self):
        rp = self._get_parser("""User-agent: *
Disallow: /""")
        self._assert_fetchable(rp, "/", False)

    def test_allow_genericbot(self):
        rp = self._get_parser("""User-agent: *
Disallow: /

User-agent: genericbot
Allow: /""")
        self._assert_fetchable(rp)

    def test_allow_genericbot_but_disallow_testbot(self):
        rp = self._get_parser("""User-agent: genericbot
Allow: /

User-agent: testbot
Disallow: /""")
        self._assert_fetchable(rp, "/", False)

    def test_allow_star_but_disallow_genericbot(self):
        rp = self._get_parser("""User-agent: *
Allow: /

User-agent: genericbot
Disallow: /""")
        self._assert_fetchable(rp, "/", False)

    def test_allow_only_testbot(self):
        rp = self._get_parser("""User-agent: *
Disallow: /

User-agent: genericbot
Disallow: /

User-agent: testbot
Allow: /""")
        self._assert_fetchable(rp)

    def test_disallow_gemidev_waffle(self):
        rp = self._get_parser("""user-agent: *
Disallow: /cgi-bin/wp.cgi/view
Disallow: /cgi-bin/wp.cgi/media
Disallow: /cgi-bin/wp.cgi/search
Disallow: /cgi-bin/waffle.cgi/article
Disallow: /cgi-bin/waffle.cgi/feed
Disallow: /cgi-bin/waffle.cgi/links
Disallow: /cgi-bin/waffle.cgi/view
Disallow: /cgi-bin/witw.cgi/play
""")
        self._assert_fetchable(rp, "/cgi-bin/waffle.cgi/feed/link", False)
    
    def test_disallow_infinite_maze(self):
        rp = self._get_parser("""User-agent: *
# We don't accept automated donations
Disallow: /donate
# Robots are not allowed to vote
Disallow: /vote
Disallow: /vote/
Disallow: /voteru
Disallow: /voteru/
# Robots are forbidden to enter the infinite maze
Disallow: /maze
Disallow: /maze/

""")
        self._assert_fetchable(rp, "/maze/l/", False)