💾 Archived View for going-flying.com › files › thoughts-to-gemini.py captured on 2022-04-29 at 11:38:00.
⬅️ Previous capture (2022-04-28)
-=-=-=-=-=-=-
#!/usr/bin/env python3 # -*- coding: UTF-8 -*- '''thoughts-to-gemini.py (c) 2020-2022 Matthew J Ernisse <matt@going-flying.com> All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' import datetime import jinja2 import json import os import pytz import requests import sys import time from bs4 import BeautifulSoup from feedgen.feed import FeedGenerator entry_template = '''╒═════╣▒ {{ entry.date }} ▒╟──────────┘ {{ entry.message }} {% if 'attachment' in entry.keys() %} Attachments: {% for attachment in entry.attachment %} => {{ attachment.name|urlencode }} {{ attachment.type }} {% endfor %} {% endif %} ''' index_template = '''``` _______ __ __ __ |_ _|| |--..-----..--.--..-----.| |--.| |_ .-----. | | | || _ || | || _ || || _||__ --| |___| |__|__||_____||_____||___ ||__|__||____||_____| |_____|
{% for year in thoughts.years %}
{% for month in thoughts.byYear(year) %}
{% for entry in thoughts.forMonth(year, month) %}
{{ entry }}
{% endfor %}
{% endfor %}
{% endfor %}
Ω
🚀 © MMXX-MMXXII matt@going-flying.com
'''
URLBASE = 'gemini://going-flying.com/thoughts/'
WEBBASE = 'https://www.going-flying.com/thoughts/'
class DeHTMLizer(object):
'''Converter for the lightweight Thoughts HTML into gemini's
markup language.
'''
def __init__(self, s):
''' Given a HTML string, convert it into text/gemini '''
soup = BeautifulSoup(s, 'lxml')
self.gemini = ''
self.links = []
for el in soup.find('body').contents:
self.gemini += self.parseElement(el)
def __str__(self):
if len(self.gemini) == 0:
return '~ NO MESSAGE ~'
if len(self.links) == 0:
return self.gemini
trailer = '\n\n'
for n, link in enumerate(self.links):
trailer += f'=> {link} [{n + 1}] {link}\n'
return self.gemini + trailer
def parseElement(self, el):
''' Parse an Element from BeautifulSoup, this will recursively
call parseTag on nested tags as needed. It also handles
the difference between a Tag and a NavigableString.
'''
if el.name is not None:
return self.parseTag(el)
elif el.string is not None:
return el.string
else:
return ''
def parseTag(self, tag):
''' Convert HTML tags into various plain-text formatted
elements. Handle nested blockquote and p tags and create
a list of links in self.links that can be used in any way
the caller desires.
Strips style and script elements completely. Converts
blockquote to >, pre to ```, strong to ** and a few more.
'''
nestable = ['blockquote', 'p']
noprint = ['style', 'script']
if tag.name == 'a':
self.links.append(tag['href'])
num = len(self.links)
sNum = SuperNum(num)
if not tag.string:
return f'«{tag["href"]}»{sNum!s}'
return f'«{tag.string}»{sNum!s}'
elif tag.name == 'del':
return f'{tag.string}^W'
elif tag.name == 'pre':
return f'```\n{tag.string}\n```'
elif tag.name in nestable:
buf = ''
if hasattr(tag, 'contents'):
for el in tag.contents:
buf += self.parseElement(el)
else:
buf = tag.string
if tag.name == 'blockquote':
return f'> {buf}'
return buf
elif tag.name in ['em', 'strong']:
return f'*{tag.string}*'
elif tag.name in noprint:
return ''
elif tag.string == None:
return ''
else:
return tag.string
class Thoughts(object):
''' Render Thoughts from the API and save state to disk.'''
attachurl = 'https://thoughtsassets.blob.core.windows.net/assets'
def __init__(self, thoughtdir):
self.api = ThoughtApi()
self.thoughtdir = thoughtdir
self.thoughts = []
self._years = {}
if not os.path.exists(thoughtdir):
raise ValueError('Dir does not exist')
t_json = os.path.join(thoughtdir, 'thoughts.json')
if os.path.exists(t_json):
with open(t_json, 'r', encoding='utf-8') as fd:
self.thoughts = json.load(fd)
if len(self.thoughts) != 0:
local_newest = self.thoughts[0]['id']
if self.api.newest > local_newest:
_t = ThoughtApi(local_newest)
self.thoughts.extend(_t.thoughts)
else:
_t = ThoughtApi()
self.thoughts = list(_t.thoughts)
self.thoughts.sort(key=lambda k: k['id'], reverse=True)
with open(t_json, 'w', encoding='utf-8') as fd:
json.dump(
self.thoughts,
fd,
ensure_ascii=False
)
self.tmpl = jinja2.Template(
entry_template,
trim_blocks=True,
lstrip_blocks=True
)
for thought in self.thoughts:
self._processThought(thought)
self._downloadAttachments(thoughtdir, thought)
now = datetime.datetime.now(pytz.timezone('US/Eastern'))
tmpl = jinja2.Template(
index_template,
trim_blocks=True,
lstrip_blocks=True
)
outFile = os.path.join(thoughtdir, 'index.gmi')
with open(outFile, 'w', encoding='utf-8') as fd:
fd.write(tmpl.render({
'build_time': now.strftime('%c %z'),
'thoughts': self
}))
# Generate the atom feed.
feed = FeedGenerator()
feed.id(URLBASE)
feed.title('Thoughts from mernisse')
feed.author({
'name': 'mernisse',
'email': 'matt@going-flying.com'
})
feed.link(
href=URLBASE,
rel='alternate'
)
feed.link(
href=URLBASE + 'atom.xml',
rel='self'
)
for entry in self.thoughts:
pubdate = datetime.datetime.utcfromtimestamp(
entry['id']
)
pubdate = pubdate.replace(tzinfo=pytz.utc)
e = feed.add_entry()
e.content(content=str(entry['message']), type='text')
e.id(str(entry['id']))
e.title('A brief thought from mernisse')
e.link(
href=f'{ WEBBASE }{ entry["id"] }.html',
rel='alternate',
type='text/html'
)
e.updated(pubdate)
outFile = os.path.join(thoughtdir, 'atom.xml')
feed.atom_file(outFile)
def _downloadAttachments(self, localdir, thought):
if 'attachment' not in thought:
return
for a in thought['attachment']:
outFile = os.path.join(localdir, a['name'])
if os.path.exists(outFile):
continue
resp = requests.get(self.attachurl + '/' + a['name'])
resp.raise_for_status()
with open(outFile, 'wb') as fd:
fd.write(resp.content)
def _processThought(self, thought):
dt = datetime.datetime.utcfromtimestamp(thought['id'])
if dt.year not in self._years:
self._years[dt.year] = {}
month = dt.strftime('%B')
if month not in self._years[dt.year]:
self._years[dt.year][month] = []
thought['message'] = DeHTMLizer(thought['message'])
self._years[dt.year][month].append(
self.tmpl.render(entry=thought)
)
@property
def years(self):
for year in self._years.keys():
yield year
def byYear(self, year):
return self._years[year].keys()
def forMonth(self, year, month):
return self._years[year][month]
class SuperNum(object):
''' Return given number as unicode superscript. '''
_u = ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹']
def __init__(self, val):
try:
int(val)
except ValueError:
raise ValueError('Value must be a base 10 integer')
self.val = str(val)
def __str__(self):
return ''.join([self._u[ord(ch) - 48] for ch in self.val])
class ThoughtApi(object):
''' Provide an interface to my Thoughts. '''
def __init__(self, since=0):
self.since = since
@property
def newest(self):
''' Return the ID of the newest thought. '''
_t = self._get(1, before=int(time.time()))[0]
return _t['id']
@property
def oldest(self):
_t = self._get(1, since=0)[0]
return _t['id']
@property
def thoughts(self):
''' Fetch the thoughts from the API and emit them. '''
more = True
while more:
_t = self._getRange()
if len(_t) < 25:
more = False
for thought in _t:
self.since = thought['id']
yield thought
def _get(self, count=25, before=None, since=None):
headers = {'User-Agent': 'thought-to-gemini/1.0'}
params = {'count': count}
if before is not None:
params['before'] = before
if since is not None:
params['since'] = since
resp = requests.get(
'https://vociferate.azurewebsites.net/api/thoughts',
headers=headers,
params=params
)
resp.raise_for_status()
thoughts = resp.json()
thoughts.sort(key=lambda k: k['id'])
return thoughts
def _getRange(self):
''' Return a range of 25 thoughts from self.since. '''
return self._get(since=self.since)
if __name__ == '__main__':
if len(sys.argv) != 2:
print(f'Usage: {os.path.basename(sys.argv[0])} path')
print()
print('This will write all Thoughts to index.gmi at the given')
print('path and download all attachments there as well.')
sys.exit(1)
localdir = sys.argv[1]
if not os.path.exists(localdir):
print(f'{localdir} does not exist or is not readable.')
sys.exit(1)
Thoughts(localdir)