gemini - kennedy.gemi.dev

💾 Archived View for gemini.ctrl-c.club › ~nttp › toys › eyeout.py captured on 2024-05-26 at 16:37:44.
-=-=-=-=-=-=-
#!/usr/bin/env python3
# coding=utf-8
# 
# Keep an EyeOut: a tool to eye the outline of various documents
# 2021-09-25 Felix Pleșoianu <https://felix.plesoianu.ro/>
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"a tool to eye the outline of various documents (and convert them)"

from __future__ import print_function

import re
import xml.dom.minidom
from xml.parsers.expat import ExpatError

version_string = "Keep an EyeOut v1.0 beta (2021-09-25)"
formats_in = ["auto", "org", "md", "gmi", "wiki", "opml"]
formats_out = ["text", "org", "md", "gmi", "wiki", "opml"]

class MarkupParser(object):
	def __init__(self, lines, headChar):
		self.lines = lines
		
		self.metadata = {}
		
		self.cursor = 0
		self.headChar = headChar
		self.headline = None
	
	def parseMeta(self):
		pass # Most formats lack inherent metadata.
	
	def skipSection(self):
		if self.cursor >= len(self.lines):
			return None
		
		# body = []
		
		while self.lines[self.cursor][0] != self.headChar:
			# body.push(this.lines[this.cursor]);
			self.cursor += 1
			if self.cursor >= len(self.lines):
				break;
		
		# return body
	
	def matchHeadline(self, level = 0):
		if self.cursor >= len(self.lines):
			return False

		for i in range(level):
			if self.lines[self.cursor][i] != self.headChar:
				return False
		
		self.headline = self.lines[self.cursor][i + 1:].strip()
		self.cursor += 1
		
		return True

class OrgParser(MarkupParser):
	def __init__(self, lines):
		super(OrgParser, self).__init__(lines, '*')
		
		self.re_meta = re.compile(
			"^\s*#\+([A-Z]+):(.*)$", re.IGNORECASE)

	def parseMeta(self):
		while self.cursor < len(self.lines):
			ln = self.lines[self.cursor]
			m = self.re_meta.match(ln)
			if m != None:
				key = m.group(1).strip()
				value = m.group(2).strip()
				self.metadata[key] = value
			else:
				break
			self.cursor += 1

def parseMarkup(parser, level = 0):
	parser.skipSection()
	
	subnodes = []
	while parser.matchHeadline(level + 1):
		node = {
			"text": parser.headline,
			"children": parseMarkup(parser, level + 1)
		}
		subnodes.append(node)
	return subnodes

class OPMLoader:
	def __init__(self, source):
		data = xml.dom.minidom.parse(source)
		self.head = data.getElementsByTagName("head")[0]
		self.body = data.getElementsByTagName("body")[0]
		self.metadata = {}
	
	def parseMeta(self):
		for i in self.head.childNodes:
			if i.nodeType == i.ELEMENT_NODE:
				text = i.firstChild.nodeValue
				self.metadata[i.nodeName] = text

def parseOPML(node):
	subnodes = []
	for i in node.childNodes:
		if i.nodeType == i.ELEMENT_NODE:
			node = {
				"text": i.getAttribute("text"),
				"children": parseOPML(i)
			}
			subnodes.append(node)
	return subnodes

def printOutline(outline, level = 0):
	for i in outline:
		print('  ' * level, "`-", i["text"])
		printOutline(i["children"], level + 1)

def printMarkup(outline, headChar, level = 1):
	for i in outline:
		print(headChar * level, i["text"])
		printMarkup(i["children"], headChar, level + 1)

def buildOutline(document, parent, outline):
	for i in outline:
		node = document.createElement("outline");
		node.setAttribute("text", i["text"])
		buildOutline(document, node, i["children"])
		parent.appendChild(node)

def buildOPML(metadata, outline):
	document = xml.dom.minidom.parseString(
		"<opml version='2.0'><head></head><body></body></opml>")
	head = document.getElementsByTagName("head")[0]
	if "title" in metadata:
		title = document.createElement("title")
		title.appendChild(
			document.createTextNode(
				metadata["title"]))
		head.appendChild(title)
	body = document.getElementsByTagName("body")[0]
	buildOutline(document, body, outline)
	return document

if __name__ == "__main__":
	import argparse
	import sys
	
	cmdline = argparse.ArgumentParser(
		description="Eye the outline of various documents.")
	cmdline.add_argument("-v", "--version",
		action="version", version=version_string)
	cmdline.add_argument("-f", "--formats", action="store_true",
		help="list supported file formats and exit")

	cmdline.add_argument("-i", "--input",
		metavar="FORMAT", choices=formats_in, default="auto",
		help="format to read the input file as (default: auto)")
	cmdline.add_argument("-o", "--output",
		metavar="FORMAT", choices=formats_out, default="text",
		help="format to write the outline as (default: text)")
	cmdline.add_argument("-m", "--minimum",
		metavar="NUMBER", type=int, default=1,
		help="minimum heading level to read (default: 1)")
	cmdline.add_argument("-n", "--nudge",
		metavar="NUMBER", type=int, default=0,
		help="extra heading levels to add on output (default: 0)")
	
	cmdline.add_argument('source',
		type=argparse.FileType('r'), nargs='?', default="-",
		help="file to read from (default: standard input)")
	
	args = cmdline.parse_args()
	name = args.source.name
	
	if args.formats:
		print("Input formats:", ", ".join(formats_in))
		print("Ouput formats:", ", ".join(formats_out))
		sys.exit(0)
	else:
		min_level = max(1, args.minimum) - 1
		pad_level = max(0, args.nudge) + 1

	if args.input == "org":
		parser = OrgParser(args.source.readlines())
	elif args.input == "md" or args.input == "gmi":
		parser = MarkupParser(args.source.readlines(), '#')
	elif args.input == "wiki":
		parser = MarkupParser(args.source.readlines(), '=')
	elif args.input == "opml":
		parser = None
	elif args.input != "auto":
		raise ValueError("Logic error, please file a bug.")
	elif name.endswith(".org"):
		parser = OrgParser(args.source.readlines())
	elif name.endswith(".md"):
		parser = MarkupParser(args.source.readlines(), '#')
	elif name.endswith(".gmi"):
		parser = MarkupParser(args.source.readlines(), '#')
	elif name.endswith(".opml"):
		parser = None
	else:
		print("Can't detect format, please choose.", file=sys.stderr)
		sys.exit(1)

	if parser == None:
		try:
			parser = OPMLoader(args.source)
			parser.parseMeta()
			outline = parseOPML(parser.body)
		except AttributeError as e:
			print("File missing head or body:",
				e, file=sys.stderr)
			sys.exit(2)
		except ExpatError as e:
			print("Bad XML in input file:",
				e, file=sys.stderr)
			sys.exit(2)
	else:
		parser.parseMeta()
		outline = parseMarkup(parser, min_level)
	
	if args.output == "org":
		if len(parser.metadata) > 0:
			for i in parser.metadata:
				print("#+{}: {}".format(
					i, parser.metadata[i]))
			print()
		printMarkup(outline, '*', pad_level)
	elif args.output == "md" or args.input == "gmi":
		printMarkup(outline, '#', pad_level)
	elif args.output == "wiki":
		printMarkup(outline, '=', pad_level)
	elif args.output == "opml":
		markup = buildOPML(parser.metadata, outline)
		markup.writexml(sys.stdout, "", " ", "\n", encoding="UTF-8")
	else:
		print("---")
		if len(parser.metadata) > 0:
			for i in parser.metadata:
				print(i, '\t', parser.metadata[i])
			print("---")
		printOutline(outline, pad_level - 1)