💾 Archived View for gmn.clttr.info › sources › geminispace.git › tree › gus › lib › db_model.py.txt captured on 2021-12-05 at 23:47:19.

View Raw

More Information

⬅️ Previous capture (2021-12-03)

➡️ Next capture (2022-03-01)

-=-=-=-=-=-=-

from peewee import (
    BooleanField,
    DateTimeField,
    DoesNotExist,
    FloatField,
    ForeignKeyField,
    IntegerField,
    Model,
    SqliteDatabase,
    TextField,
)

from gus.lib.gemini import GeminiResource


def init_db(filename=":memory:"):
    """
    Bind an SQLite database to the Peewee ORM models.
    """
    models = [Link, Page]
    db = SqliteDatabase(filename)
    db.bind(models)
    db.create_tables(models)
    return db


class Page(Model):
    """
    All the pages crawled in Geminispace
    """

    url = TextField(unique=True, index=True)
    fetchable_url = TextField(null=True)
    domain = TextField(null=True)
    port = IntegerField(null=True)
    content_type = TextField(null=True)
    charset = TextField(null=True)
    # TODO: normalize lang out to handle multiple values better
    lang = TextField(null=True)
    content = TextField(null=True)
    prompt = TextField(null=True)
    size = IntegerField(null=True)  # in bytes
    change_frequency = IntegerField(null=True)  # in hours
    indexed_at = DateTimeField(null=True)
    last_crawl_at = DateTimeField(null=True)
    last_crawl_success_at = DateTimeField(null=True)
    last_status = IntegerField(null=True)
    last_status_message = TextField(null=True)
    last_success_status = IntegerField(null=True)
    first_seen_at = DateTimeField(null=True)

class Link(Model):
    """
    Hyperlinks between pages in Geminispace
    """

    from_page = ForeignKeyField(Page, backref="outbound_links", on_delete="CASCADE")
    to_page = ForeignKeyField(Page, backref="backlinks", on_delete="CASCADE")
    is_cross_host_like = BooleanField()

    def get_is_cross_host_like(from_resource, to_resource):
        return from_resource.normalized_host_like != to_resource.normalized_host_like