💾 Archived View for gmn.clttr.info › sources › geminispace.info.git › tree › gus › lib › db_model.py… captured on 2023-06-14 at 14:31:44.
⬅️ Previous capture (2023-01-29)
-=-=-=-=-=-=-
from peewee import ( BooleanField, DateTimeField, DoesNotExist, FloatField, ForeignKeyField, IntegerField, Model, SqliteDatabase, TextField, ) from gus.lib.gemini import GeminiResource def init_db(filename=":memory:"): """ Bind an SQLite database to the Peewee ORM models. """ models = [Link, Page, PageContent] db = SqliteDatabase(filename, pragmas={ 'journal_mode': 'wal', 'cache_size': -256 * 1000, 'synchronous': 'normal', 'foreign_keys': 1, 'ignore_check_constraints': 0}) db.bind(models) db.create_tables(models) return db class Page(Model): """ Metadata of all the pages """ url = TextField(unique=True, index=True) domain = TextField(null=True, index=True) port = IntegerField(null=True) content_type = TextField(null=True) charset = TextField(null=True) # TODO: normalize lang out to handle multiple values better lang = TextField(null=True) size = IntegerField(null=True) # in bytes change_frequency = IntegerField(null=True) # in hours indexed_at = DateTimeField(null=True) last_crawl_at = DateTimeField(null=True) last_crawl_success_at = DateTimeField(null=True) last_status = IntegerField(null=True) last_status_message = TextField(null=True) last_success_status = IntegerField(null=True) first_seen_at = DateTimeField(null=True) class Meta: indexes=( (('last_success_status', 'first_seen_at', 'indexed_at', 'domain', 'url', 'content_type'), False), (('last_crawl_at', 'last_crawl_success_at'), False) ) class PageContent(Model): """ Content of all pages """ page = ForeignKeyField(Page, backref="page_content", on_delete="CASCADE") content = TextField(null=True) prompt = TextField(null=True) class Link(Model): """ Hyperlinks between pages in Geminispace """ from_page = ForeignKeyField(Page, backref="outbound_links", on_delete="CASCADE") to_page = ForeignKeyField(Page, backref="backlinks", on_delete="CASCADE") is_cross_host_like = BooleanField() def get_is_cross_host_like(from_resource, to_resource): return from_resource.normalized_host_like != to_resource.normalized_host_like