💾 Archived View for gemini.ctrl-c.club › ~nttp › toys › eyeout.py captured on 2021-12-03 at 14:04:38.
-=-=-=-=-=-=-
#!/usr/bin/env python3 # coding=utf-8 # # Keep an EyeOut: a tool to eye the outline of various documents # 2021-09-25 Felix Pleșoianu <https://felix.plesoianu.ro/> # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. "a tool to eye the outline of various documents (and convert them)" from __future__ import print_function import re import xml.dom.minidom from xml.parsers.expat import ExpatError version_string = "Keep an EyeOut v1.0 beta (2021-09-25)" formats_in = ["auto", "org", "md", "gmi", "wiki", "opml"] formats_out = ["text", "org", "md", "gmi", "wiki", "opml"] class MarkupParser(object): def __init__(self, lines, headChar): self.lines = lines self.metadata = {} self.cursor = 0 self.headChar = headChar self.headline = None def parseMeta(self): pass # Most formats lack inherent metadata. def skipSection(self): if self.cursor >= len(self.lines): return None # body = [] while self.lines[self.cursor][0] != self.headChar: # body.push(this.lines[this.cursor]); self.cursor += 1 if self.cursor >= len(self.lines): break; # return body def matchHeadline(self, level = 0): if self.cursor >= len(self.lines): return False for i in range(level): if self.lines[self.cursor][i] != self.headChar: return False self.headline = self.lines[self.cursor][i + 1:].strip() self.cursor += 1 return True class OrgParser(MarkupParser): def __init__(self, lines): super(OrgParser, self).__init__(lines, '*') self.re_meta = re.compile( "^\s*#\+([A-Z]+):(.*)$", re.IGNORECASE) def parseMeta(self): while self.cursor < len(self.lines): ln = self.lines[self.cursor] m = self.re_meta.match(ln) if m != None: key = m.group(1).strip() value = m.group(2).strip() self.metadata[key] = value else: break self.cursor += 1 def parseMarkup(parser, level = 0): parser.skipSection() subnodes = [] while parser.matchHeadline(level + 1): node = { "text": parser.headline, "children": parseMarkup(parser, level + 1) } subnodes.append(node) return subnodes class OPMLoader: def __init__(self, source): data = xml.dom.minidom.parse(source) self.head = data.getElementsByTagName("head")[0] self.body = data.getElementsByTagName("body")[0] self.metadata = {} def parseMeta(self): for i in self.head.childNodes: if i.nodeType == i.ELEMENT_NODE: text = i.firstChild.nodeValue self.metadata[i.nodeName] = text def parseOPML(node): subnodes = [] for i in node.childNodes: if i.nodeType == i.ELEMENT_NODE: node = { "text": i.getAttribute("text"), "children": parseOPML(i) } subnodes.append(node) return subnodes def printOutline(outline, level = 0): for i in outline: print(' ' * level, "`-", i["text"]) printOutline(i["children"], level + 1) def printMarkup(outline, headChar, level = 1): for i in outline: print(headChar * level, i["text"]) printMarkup(i["children"], headChar, level + 1) def buildOutline(document, parent, outline): for i in outline: node = document.createElement("outline"); node.setAttribute("text", i["text"]) buildOutline(document, node, i["children"]) parent.appendChild(node) def buildOPML(metadata, outline): document = xml.dom.minidom.parseString( "<opml version='2.0'><head></head><body></body></opml>") head = document.getElementsByTagName("head")[0] if "title" in metadata: title = document.createElement("title") title.appendChild( document.createTextNode( metadata["title"])) head.appendChild(title) body = document.getElementsByTagName("body")[0] buildOutline(document, body, outline) return document if __name__ == "__main__": import argparse import sys cmdline = argparse.ArgumentParser( description="Eye the outline of various documents.") cmdline.add_argument("-v", "--version", action="version", version=version_string) cmdline.add_argument("-f", "--formats", action="store_true", help="list supported file formats and exit") cmdline.add_argument("-i", "--input", metavar="FORMAT", choices=formats_in, default="auto", help="format to read the input file as (default: auto)") cmdline.add_argument("-o", "--output", metavar="FORMAT", choices=formats_out, default="text", help="format to write the outline as (default: text)") cmdline.add_argument("-m", "--minimum", metavar="NUMBER", type=int, default=1, help="minimum heading level to read (default: 1)") cmdline.add_argument("-n", "--nudge", metavar="NUMBER", type=int, default=0, help="extra heading levels to add on output (default: 0)") cmdline.add_argument('source', type=argparse.FileType('r'), nargs='?', default="-", help="file to read from (default: standard input)") args = cmdline.parse_args() name = args.source.name if args.formats: print("Input formats:", ", ".join(formats_in)) print("Ouput formats:", ", ".join(formats_out)) sys.exit(0) else: min_level = max(1, args.minimum) - 1 pad_level = max(0, args.nudge) + 1 if args.input == "org": parser = OrgParser(args.source.readlines()) elif args.input == "md" or args.input == "gmi": parser = MarkupParser(args.source.readlines(), '#') elif args.input == "wiki": parser = MarkupParser(args.source.readlines(), '=') elif args.input == "opml": parser = None elif args.input != "auto": raise ValueError("Logic error, please file a bug.") elif name.endswith(".org"): parser = OrgParser(args.source.readlines()) elif name.endswith(".md"): parser = MarkupParser(args.source.readlines(), '#') elif name.endswith(".gmi"): parser = MarkupParser(args.source.readlines(), '#') elif name.endswith(".opml"): parser = None else: print("Can't detect format, please choose.", file=sys.stderr) sys.exit(1) if parser == None: try: parser = OPMLoader(args.source) parser.parseMeta() outline = parseOPML(parser.body) except AttributeError as e: print("File missing head or body:", e, file=sys.stderr) sys.exit(2) except ExpatError as e: print("Bad XML in input file:", e, file=sys.stderr) sys.exit(2) else: parser.parseMeta() outline = parseMarkup(parser, min_level) if args.output == "org": if len(parser.metadata) > 0: for i in parser.metadata: print("#+{}: {}".format( i, parser.metadata[i])) print() printMarkup(outline, '*', pad_level) elif args.output == "md" or args.input == "gmi": printMarkup(outline, '#', pad_level) elif args.output == "wiki": printMarkup(outline, '=', pad_level) elif args.output == "opml": markup = buildOPML(parser.metadata, outline) markup.writexml(sys.stdout, "", " ", "\n", encoding="UTF-8") else: print("---") if len(parser.metadata) > 0: for i in parser.metadata: print(i, '\t', parser.metadata[i]) print("---") printOutline(outline, pad_level - 1)