gemini - kennedy.gemi.dev

💾 Archived View for skyjake.fi › misc › makeindex.py captured on 2023-11-04 at 16:12:54.
-=-=-=-=-=-=-
#!/usr/bin/python3
# Generates the index page and Atom feed.

import os

GEMLOG_NAME = "skyjake"
GEMLOG_SHORTNAME = "skyjake" # for prefixes
GEMLOG_ROOT = "skyjake.fi/gemlog"
GEMLOG_AUTHOR = "Jaakko Keränen"
GEMLOG_DESC = "Recent gemlog posts"

pages = []
tag_index = {}  # tag -> [(url, title)]

def split_date(d):
    return d.split('-')

def get_month(d):
    return split_date(d)[1]

def get_year(d):
    return split_date(d)[0]

def parse_meta(page):
    date = ''
    tags = ''
    title = None
    for line in open(page, 'rt').readlines():
        if line.startswith('\U0001F4C5'):
            date = line[1:].strip()
        elif line.startswith('\U0001F3F7'):
            tags = line[1:].strip()
        elif title is None and line.startswith('# '):
            title = line[1:].strip()
    return title, date, tags

def page_timestamp(page):
    return os.stat(page).st_ctime


PI_PAGE, PI_TITLE, PI_DATE, PI_TAGS, PI_TIMESTAMP = range(5)

for page in os.listdir('.'):
    if page.endswith('.gmi') and page.startswith('20'):
        title, date, tags = parse_meta(page)
        pages.append((page, title, date, tags, page_timestamp(page)))
        for tag in tags.split(','):
            tag = tag.strip()
            if len(tag) > 0:
                if not tag in tag_index:
                    tag_index[tag] = []
                tag_index[tag] += [(date, title, page)]

bytime = list(sorted(pages, key=lambda p: (p[PI_DATE], p[PI_TIMESTAMP]), reverse=True))

# The index page.
index_page = open('index.gmi', 'wt')
archive_page = open('archive.gmi', 'wt')
print(f'# {GEMLOG_NAME}\n## {GEMLOG_DESC}\n', file=index_page)
print(f'# {GEMLOG_NAME}\n## Archived posts', file=archive_page)
last_page = None
count = 0
out_page = index_page
for page in bytime:
    tag_head = ''
    if not page[PI_TITLE].startswith('Re:'):
        tag_head = page[PI_TAGS].split(',')[0].strip()
        if tag_head in page[PI_TITLE]:
            tag_head = ''
    if tag_head:
        tag_head = '' #f'[{tag_head}] '
    if out_page is archive_page and (last_page is None or
                                     get_year(last_page[PI_DATE]) != get_year(page[PI_DATE])):
        print(f'\n### {get_year(page[PI_DATE])}', file=out_page)
    print(f'=> {page[PI_PAGE]} {page[PI_DATE]} {tag_head}{page[PI_TITLE]}', file=out_page)
    last_page = page
    count += 1
    if count == 10:
        out_page = archive_page
        last_page = None

print('\n=> archive.gmi  More...', file=index_page)
print('=> tags.gmi  Posts grouped by topic', file=index_page)
print('=> .. Home', file=index_page)

print('\n=> index.gmi  Latest posts', file=archive_page)
print('=> tags.gmi  Posts grouped by topic', file=archive_page)
print('=> .. Home', file=archive_page)

# Tag index page.
tags_page = open('tags.gmi', 'wt')
print(f'# {GEMLOG_NAME}\n## Posts grouped by topic', file=tags_page)
for tag in sorted(tag_index):
    print(f"=> topic-{tag.lower()}.gmi {tag}", file=tags_page)
    topic_page = open(f'topic-{tag.lower()}.gmi', 'wt')
    print(f'# {GEMLOG_SHORTNAME}: {tag} \n## Posts', file=topic_page)
    for date, title, page in sorted(tag_index[tag], reverse=True):
        print(f'=> {page} {date} {title}', file=topic_page)
    print('\n=> tags.gmi  Posts grouped by topic', file=topic_page)
    print('=> .. Home', file=topic_page)
    topic_page.close()
print('\n=> index.gmi  Latest posts', file=tags_page)
print('=> archive.gmi  Archived posts', file=tags_page)
print('=> .. Home', file=tags_page)

# The Atom feed.
atom_page = open('atom.xml', 'wt')
print(f"""<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>{GEMLOG_NAME}</title>
<link href="gemini://{GEMLOG_ROOT}/" />
<link rel="self" type="application/atom+xml" href="gemini://{GEMLOG_ROOT}/atom.xml" />
<id>gemini://{GEMLOG_ROOT}/</id>
<updated>{bytime[0][2]}T12:00:00Z</updated>
<generator uri="gemini://{GEMLOG_ROOT}/" version="1.0">gmi-hatch</generator>""", file=atom_page)
count = 0
for page in bytime:
    print(f"""<entry>
    <title>{page[1]}</title>
    <author><name>{GEMLOG_AUTHOR}</name></author>
    <link href="gemini://{GEMLOG_ROOT}/{page[0]}" />
    <id>gemini://{GEMLOG_ROOT}/{page[0]}</id>
    <updated>{page[2]}T12:00:00Z</updated>
</entry>""", file=atom_page)
    count += 1
    if count == 15: break
print("</feed>", file=atom_page)