💾 Archived View for gmn.clttr.info › sources › geminispace.info.git › tree › gus › lib › db_model.py… captured on 2023-01-29 at 05:07:17.

View Raw

More Information

➡️ Next capture (2023-06-14)

-=-=-=-=-=-=-

from peewee import (
    BooleanField,
    DateTimeField,
    DoesNotExist,
    FloatField,
    ForeignKeyField,
    IntegerField,
    Model,
    SqliteDatabase,
    TextField,
)

from gus.lib.gemini import GeminiResource


def init_db(filename=":memory:"):
    """
    Bind an SQLite database to the Peewee ORM models.
    """
    models = [Link, Page]
    db = SqliteDatabase(filename, pragmas={ 
        'journal_mode': 'wal',
        'cache_size': -128 * 1000,
        'foreign_keys': 1,
        'ignore_check_constraints': 0})
    db.bind(models)
    db.create_tables(models)
    return db


class Page(Model):
    """
    All the pages crawled in Geminispace
    """

    url = TextField(unique=True, index=True)
    fetchable_url = TextField(null=True)
    domain = TextField(null=True, index=True)
    port = IntegerField(null=True)
    content_type = TextField(null=True)
    charset = TextField(null=True)
    # TODO: normalize lang out to handle multiple values better
    lang = TextField(null=True)
    content = TextField(null=True)
    prompt = TextField(null=True)
    size = IntegerField(null=True)  # in bytes
    change_frequency = IntegerField(null=True)  # in hours
    indexed_at = DateTimeField(null=True)
    last_crawl_at = DateTimeField(null=True)
    last_crawl_success_at = DateTimeField(null=True)
    last_status = IntegerField(null=True)
    last_status_message = TextField(null=True)
    last_success_status = IntegerField(null=True)
    first_seen_at = DateTimeField(null=True)
    class Meta:
        indexes=(
             (('last_success_status', 'first_seen_at', 'indexed_at', 'domain', 'url', 'content_type', 'fetchable_url'), False),
             (('last_crawl_at', 'last_crawl_success_at'), False)
        )


class Link(Model):
    """
    Hyperlinks between pages in Geminispace
    """

    from_page = ForeignKeyField(Page, backref="outbound_links", on_delete="CASCADE")
    to_page = ForeignKeyField(Page, backref="backlinks", on_delete="CASCADE")
    is_cross_host_like = BooleanField()

    def get_is_cross_host_like(from_resource, to_resource):
        return from_resource.normalized_host_like != to_resource.normalized_host_like