💾 Archived View for yujiri.xyz › gem2rss.py captured on 2023-11-14 at 08:01:06.

View Raw

More Information

⬅️ Previous capture (2023-11-04)

➡️ Next capture (2024-08-18)

-=-=-=-=-=-=-

import sys, re, urllib.parse
from datetime import datetime

if len(sys.argv) < 2:
    print("pass main URL")
    sys.exit(1)
main_url = sys.argv[1]

date_regex = '[0-9]{4}-[0-9]{2}-[0-9]{2}'
link_regex = re.compile(f'=> ([^ ]+) +({date_regex}) +(.+)')

lines = sys.stdin.readlines()

feed_title = None
entries = []
in_preformatted = False

for line in lines:
    if in_preformatted:
        if line.startswith("```"):
            in_preformatted = False
        continue
    if not feed_title:
        if line.startswith("# "):
            feed_title = line[2:-1]
            continue
    match = link_regex.match(line)
    if match is None: continue
    entries.append(match.groups())

print(f"""<?xml version="1.0"?>
<rss version="2.0">
<channel>
<link>{main_url}</link>
<description/>""")
if feed_title:
    print(f"<title>{feed_title}</title>")

for entry in entries:
    (link, date, title) = entry
    if title.startswith("- "): title = title[2:]
    print(f"<item><title>{title}</title>")
    date = datetime.strptime(date, "%Y-%m-%d").replace(hour=12)
    date.replace(hour=12)
    print(f"<pubDate>{date.strftime('%a, %d %b %Y %H:%M:%S')} GMT</pubDate>")
    url = urllib.parse.urlparse(link)
    if not url.scheme:
        url = main_url + link
    print(f"<link>{url}</link><guid>{url}</guid>")
    print("<description/></item>")

print("</channel></rss>")