gemini - kennedy.gemi.dev

💾 Archived View for l-3.space › gmi2html.py captured on 2023-04-19 at 22:31:32.
-=-=-=-=-=-=-
#!/usr/bin/env python3

# Convert gemtext to HTML, accepting HTML header and footer files

# Copyright 2021 hunterb
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see https://www.gnu.org/licenses/.

# Original code found at:
#   https://github.com/huntingb/gemtext-html-converter

# Modified by tslil clingman, April 2021 in the following ways
# - added HTML escaping of all non-pre lines
# - added header and footer file input
# - added generation of <ul> ... </ul> for list items
# - fixed stripping of lines, no longer occurs in pre blocks, and
#   otherwise is rstrip only
# - fixed handling of links without titles
# - replace := syntax with something my version of python3 accepts
# - modified description below

"""
HUNTER'S SIMPLE GEMTEXT TO HTML CONVERTER
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A simple script that converts gemtext to HTML.

Takes four arguments from stdin. The first two arguments are the names
of files which will be used as a header and footer for the generated
output, in that order.

The next argument is the name of of a gemtext file, and the last
argument is the name of the desired output file.

The output file consists of, in order: the header; the lines of
gemtext in the input file converted to their HTML equivalents: <h1>,
<h2>, <h3>, <p>, <a>, <blockquote>, <li>, and <pre> tags; the footer.
"""

# importing required libraries
import sys
import re
import html

# A dictionary that maps regex to match at the beginning of gmi lines
# to their corresponding HTML tag names. Used by
# convert_single_line().
tags_dict = {
    r"^# (.*)": "h1",
    r"^## (.*)": "h2",
    r"^### (.*)": "h3",
    r"^\* (.*)": "li",
    r"^> (.*)": "blockquote",
    r"^=>\s*(\S+)(\s+.*)?": "a"
}


# This function takes a string of gemtext as input and returns a
# string of HTML
def convert_single_line(gmi_line):
    for pattern in tags_dict.keys():
        match = re.match(pattern, gmi_line)
        if match:
            tag = tags_dict[pattern]
            groups = match.groups()
            if tag == "a":
                href = re.sub("^gemini://", "https://", groups[0])
                href = re.sub(r"\.gmi$", ".html", href)
                if len(groups) > 1 and groups[1] is not None:
                    inner_text = groups[1].strip()
                else:
                    inner_text = href
                return f"<p><{tag} href='{href}'>{inner_text}</{tag}></p>"
            else:
                inner_text = html.escape(groups[0].strip())
                return f"<{tag}>{inner_text}</{tag}>"
    gmi_line = html.escape(gmi_line)
    return f"<p>{gmi_line}</p>"


# Reads the contents of the input file line by line and outputs HTML.
# Renders text in preformat blocks (toggled by ```) as multiline <pre>
# tags.
def main(args):
    with open(args[3]) as gmi, open(args[4], "w") as output:
        # Write header
        header = open(args[1])
        output.write(header.read())
        header.close()
        # Parse gmitext
        pre = False
        listing = False
        for line in gmi:
            if line.startswith("```"):
                pre = not pre
                if pre:
                    line = line.rstrip()
                    if len(line) > 3:
                        line = html.escape(line[3:])
                        output.write(f"<pre title=\"{line}\">\n")
                    else:
                        output.write("<pre>\n")
                else:
                    output.write("</pre>\n")
            elif pre:
                output.write(html.escape(line))
            else:
                line = line.rstrip()
                if line.startswith("*") and not listing:
                    listing = True
                    output.write("<ul>\n")
                if not line.startswith("*") and listing:
                    listing = False
                    output.write("</ul>\n")
                output_line = convert_single_line(line)
                output.write(output_line+"\n")
        # Write footer
        footer = open(args[2])
        output.write(footer.read())
        footer.close()

# Main guard
if __name__ == "__main__":
    main(sys.argv)