💾 Archived View for l-3.space › gmi2html.py captured on 2023-04-26 at 12:55:54.
⬅️ Previous capture (2021-12-03)
-=-=-=-=-=-=-
#!/usr/bin/env python3 # Convert gemtext to HTML, accepting HTML header and footer files # Copyright 2021 hunterb # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see https://www.gnu.org/licenses/. # Original code found at: # https://github.com/huntingb/gemtext-html-converter # Modified by tslil clingman, April 2021 in the following ways # - added HTML escaping of all non-pre lines # - added header and footer file input # - added generation of <ul> ... </ul> for list items # - fixed stripping of lines, no longer occurs in pre blocks, and # otherwise is rstrip only # - fixed handling of links without titles # - replace := syntax with something my version of python3 accepts # - modified description below """ HUNTER'S SIMPLE GEMTEXT TO HTML CONVERTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A simple script that converts gemtext to HTML. Takes four arguments from stdin. The first two arguments are the names of files which will be used as a header and footer for the generated output, in that order. The next argument is the name of of a gemtext file, and the last argument is the name of the desired output file. The output file consists of, in order: the header; the lines of gemtext in the input file converted to their HTML equivalents: <h1>, <h2>, <h3>, <p>, <a>, <blockquote>, <li>, and <pre> tags; the footer. """ # importing required libraries import sys import re import html # A dictionary that maps regex to match at the beginning of gmi lines # to their corresponding HTML tag names. Used by # convert_single_line(). tags_dict = { r"^# (.*)": "h1", r"^## (.*)": "h2", r"^### (.*)": "h3", r"^\* (.*)": "li", r"^> (.*)": "blockquote", r"^=>\s*(\S+)(\s+.*)?": "a" } # This function takes a string of gemtext as input and returns a # string of HTML def convert_single_line(gmi_line): for pattern in tags_dict.keys(): match = re.match(pattern, gmi_line) if match: tag = tags_dict[pattern] groups = match.groups() if tag == "a": href = re.sub("^gemini://", "https://", groups[0]) href = re.sub(r"\.gmi$", ".html", href) if len(groups) > 1 and groups[1] is not None: inner_text = groups[1].strip() else: inner_text = href return f"<p><{tag} href='{href}'>{inner_text}</{tag}></p>" else: inner_text = html.escape(groups[0].strip()) return f"<{tag}>{inner_text}</{tag}>" gmi_line = html.escape(gmi_line) return f"<p>{gmi_line}</p>" # Reads the contents of the input file line by line and outputs HTML. # Renders text in preformat blocks (toggled by ```) as multiline <pre> # tags. def main(args): with open(args[3]) as gmi, open(args[4], "w") as output: # Write header header = open(args[1]) output.write(header.read()) header.close() # Parse gmitext pre = False listing = False for line in gmi: if line.startswith("```"): pre = not pre if pre: line = line.rstrip() if len(line) > 3: line = html.escape(line[3:]) output.write(f"<pre title=\"{line}\">\n") else: output.write("<pre>\n") else: output.write("</pre>\n") elif pre: output.write(html.escape(line)) else: line = line.rstrip() if line.startswith("*") and not listing: listing = True output.write("<ul>\n") if not line.startswith("*") and listing: listing = False output.write("</ul>\n") output_line = convert_single_line(line) output.write(output_line+"\n") # Write footer footer = open(args[2]) output.write(footer.read()) footer.close() # Main guard if __name__ == "__main__": main(sys.argv)