💾 Archived View for going-flying.com › files › thoughts-to-gemini.py captured on 2024-08-25 at 01:21:43.

View Raw

More Information

⬅️ Previous capture (2024-08-18)

-=-=-=-=-=-=-

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
'''thoughts-to-gemini.py (c) 2020-2024 Matthew J Ernisse <matt@going-flying.com>
All Rights Reserved.

Redistribution and use in source and binary forms,
with or without modification, are permitted provided
that the following conditions are met:

    * Redistributions of source code must retain the
      above copyright notice, this list of conditions
      and the following disclaimer.
    * Redistributions in binary form must reproduce
      the above copyright notice, this list of conditions
      and the following disclaimer in the documentation
      and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
import datetime
import jinja2
import json
import os
import requests
import sys
import time
import warnings
import zoneinfo

from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from feedgen.feed import FeedGenerator

entry_template = '''╒═════╣▒ {{ entry.date }} ▒╟──────────┘
{% if entry.in_reply_to_date %}
│ In-Reply-To: {{ entry.in_reply_to_date }}
{% endif %}
{{ entry.message }}

{% if entry.attachment %}
    Attachments:
{% for attachment in entry.attachment %}
=> {{ attachment.name|urlencode }}	{{ attachment.type }}
{% endfor %}

{% endif %}
'''

index_template = '''```
 _______  __                          __     __          
|_     _||  |--..-----..--.--..-----.|  |--.|  |_ .-----.
  |   |  |     ||  _  ||  |  ||  _  ||     ||   _||__ --|
  |___|  |__|__||_____||_____||___  ||__|__||____||_____|
                              |_____|                    

💭 Random Thoughts

Atom feed of my thoughts

{% for year in thoughts.years %}

{{ year }}

{% for month in thoughts.byYear(year) %}

{{ month }}

{% for entry in thoughts.forMonth(year, month) %}

{{ entry }}

{% endfor %}

{% endfor %}

{% endfor %}

Ω

🕰️ Generated at {{ build_time }}

🚀 © MMXX-MMXXIV matt@going-flying.com

'''

URLBASE = 'gemini://going-flying.com/thoughts/'

WEBBASE = 'https://www.going-flying.com/thoughts/'

warnings.filterwarnings(

'ignore',

category=MarkupResemblesLocatorWarning

)

class DeHTMLizer(object):

'''Converter for the lightweight Thoughts HTML into gemini's

markup language.

'''

def __init__(self, s):

''' Given a HTML string, convert it into text/gemini '''

soup = BeautifulSoup(s, 'lxml')

self.gemini = ''

self.links = []

if not s or not soup.find('body'):

return

for el in soup.find('body').contents:

self.gemini += self.parseElement(el)

def __str__(self):

if len(self.gemini) == 0:

return '~ NO MESSAGE ~'

if len(self.links) == 0:

return self.gemini

trailer = '\n\n'

for n, link in enumerate(self.links):

trailer += f'=> {link} [{n + 1}] {link}\n'

return self.gemini + trailer

def parseElement(self, el):

''' Parse an Element from BeautifulSoup, this will recursively

call parseTag on nested tags as needed. It also handles

the difference between a Tag and a NavigableString.

'''

if el.name is not None:

return self.parseTag(el)

elif el.string is not None:

return el.string

else:

return ''

def parseTag(self, tag):

''' Convert HTML tags into various plain-text formatted

elements. Handle nested blockquote and p tags and create

a list of links in self.links that can be used in any way

the caller desires.

Strips style and script elements completely. Converts

blockquote to >, pre to ```, strong to ** and a few more.

'''

nestable = ['blockquote', 'div', 'p']

noprint = ['style', 'script']

if tag.name == 'a':

self.links.append(tag['href'])

num = len(self.links)

sNum = SuperNum(num)

if not tag.string:

return f'«{tag["href"]}»{sNum!s}'

return f'«{tag.string}»{sNum!s}'

elif tag.name == 'br':

return '\n'

elif tag.name == 'del':

return f'{tag.string}^W'

elif tag.name == 'pre':

return f'```\n{tag.string}\n```'

elif tag.name in nestable:

buf = ''

if hasattr(tag, 'contents'):

for el in tag.contents:

buf += self.parseElement(el)

else:

buf = tag.string

if tag.name == 'blockquote':

return f'> {buf}'

return buf

# These are also nestable, sort of?.

elif tag.name in ['em', 'strong']:

buf = ''

if hasattr(tag, 'contents'):

for el in tag.contents:

buf += self.parseElement(el)

else:

if tag.string is not None:

buf += tag.string

return f'*{buf}*'

elif tag.name in noprint:

return ''

elif tag.string == None:

return ''

else:

return tag.string

class Thoughts(object):

''' Render Thoughts from the API and save state to disk.'''

attachurl = 'https://thoughtsassets.blob.core.windows.net/assets'

def __init__(self, thoughtdir):

self.api = ThoughtApi()

self.thoughtdir = thoughtdir

self.thoughts = []

self._years = {}

if not os.path.exists(thoughtdir):

raise ValueError('Dir does not exist')

t_json = os.path.join(thoughtdir, 'thoughts.json')

if os.path.exists(t_json):

with open(t_json, 'r', encoding='utf-8') as fd:

self.thoughts = json.load(fd)

if len(self.thoughts) != 0:

local_newest = self.thoughts[0]['id']

if self.api.newest > local_newest:

_t = ThoughtApi(local_newest)

self.thoughts.extend(_t.thoughts)

else:

_t = ThoughtApi()

self.thoughts = list(_t.thoughts)

self.thoughts.sort(key=lambda k: k['id'], reverse=True)

# Save the date of the In-Reply-To thought on the

# in_reply_to property insted of the ID since we

# would much rather print the datestr out.

for thought in self.thoughts:

if thought.get('in-reply-to'):

tId = thought['in-reply-to']

parent = [

t['date'] for t in self.thoughts

if t['id'] == tId

][0]

thought['in_reply_to_date'] = parent

with open(t_json, 'w', encoding='utf-8') as fd:

json.dump(

self.thoughts,

fd,

ensure_ascii=False

)

self.tmpl = jinja2.Template(

entry_template,

trim_blocks=True,

lstrip_blocks=True

)

for thought in self.thoughts:

self._processThought(thought)

self._downloadAttachments(thoughtdir, thought)

now = datetime.datetime.now(zoneinfo.ZoneInfo('US/Eastern'))

tmpl = jinja2.Template(

index_template,

trim_blocks=True,

lstrip_blocks=True

)

outFile = os.path.join(thoughtdir, 'index.gmi')

with open(outFile, 'w', encoding='utf-8') as fd:

fd.write(tmpl.render({

'build_time': now.strftime('%c %z'),

'thoughts': self

}))

# Generate the atom feed.

feed = FeedGenerator()

feed.id(URLBASE)

feed.title('Thoughts from mernisse')

feed.author({

'name': 'mernisse',

'email': 'matt@going-flying.com'

})

feed.link(

href=URLBASE,

rel='alternate'

)

feed.link(

href=URLBASE + 'atom.xml',

rel='self'

)

for entry in self.thoughts:

pubdate = datetime.datetime.utcfromtimestamp(

entry['id']

)

pubdate = pubdate.replace(

tzinfo=zoneinfo.ZoneInfo('UTC')

)

e = feed.add_entry()

message = ''

if entry.get('in-reply-to'):

message += 'In-Reply-To: '

message += entry['in_reply_to_date'] + '\n'

message += str(entry['message'])

e.content(content=message, type='text')

e.id(str(entry['id']))

e.title('A brief thought from mernisse')

e.link(

href=f'{ WEBBASE }{ entry["id"] }.html',

rel='alternate',

type='text/html'

)

e.updated(pubdate)

outFile = os.path.join(thoughtdir, 'atom.xml')

feed.atom_file(outFile)

def _downloadAttachments(self, localdir, thought):

if 'attachment' not in thought:

return

for a in thought['attachment']:

outFile = os.path.join(localdir, a['name'])

if os.path.exists(outFile):

continue

resp = requests.get(self.attachurl + '/' + a['name'])

resp.raise_for_status()

with open(outFile, 'wb') as fd:

fd.write(resp.content)

def _processThought(self, thought):

dt = datetime.datetime.utcfromtimestamp(thought['id'])

if dt.year not in self._years:

self._years[dt.year] = {}

month = dt.strftime('%B')

if month not in self._years[dt.year]:

self._years[dt.year][month] = []

thought['message'] = DeHTMLizer(thought['message'])

self._years[dt.year][month].append(

self.tmpl.render(entry=thought)

)

@property

def years(self):

for year in self._years.keys():

yield year

def byYear(self, year):

return self._years[year].keys()

def forMonth(self, year, month):

return self._years[year][month]

class SuperNum(object):

''' Return given number as unicode superscript. '''

_u = ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹']

def __init__(self, val):

try:

int(val)

except ValueError:

raise ValueError('Value must be a base 10 integer')

self.val = str(val)

def __str__(self):

return ''.join([self._u[ord(ch) - 48] for ch in self.val])

class ThoughtApi(object):

''' Provide an interface to my Thoughts. '''

_ua = 'thought-to-gemini/1.0 (+matt@going-flying.com)'

_url = 'https://vociferate.azurewebsites.net/api/thoughts'

def __init__(self, since=0):

self.since = since

@property

def newest(self):

''' Return the ID of the newest thought. '''

headers = {'User-Agent': self._ua}

resp = requests.get(f'{self._url}/latest', headers=headers)

resp.raise_for_status()

return resp.json()

@property

def oldest(self):

_t = self._get(1, since=0)[0]

return _t['id']

@property

def thoughts(self):

''' Fetch the thoughts from the API and emit them. '''

more = True

while more:

_t = self._getRange()

if len(_t) < 25:

more = False

for thought in _t:

self.since = thought['id']

yield thought

def _get(self, count=25, before=None, since=None):

headers = {'User-Agent': self._ua}

params = {'count': count, 'raw': True}

if before is not None:

params['before'] = before

if since is not None:

params['since'] = since

resp = requests.get(self._url, headers=headers, params=params)

resp.raise_for_status()

thoughts = resp.json()

thoughts.sort(key=lambda k: k['id'])

return thoughts

def _getRange(self):

''' Return a range of 25 thoughts from self.since. '''

return self._get(since=self.since)

if __name__ == '__main__':

if len(sys.argv) != 2:

print(f'Usage: {os.path.basename(sys.argv[0])} path')

print()

print('This will write all Thoughts to index.gmi at the given')

print('path and download all attachments there as well.')

sys.exit(1)

localdir = sys.argv[1]

if not os.path.exists(localdir):

print(f'{localdir} does not exist or is not readable.')

sys.exit(1)

Thoughts(localdir)