💾 Archived View for gemini.zachdecook.com › usfm2gmi › usfm2gmi.py captured on 2023-09-28 at 16:10:44.
⬅️ Previous capture (2023-09-08)
-=-=-=-=-=-=-
#!/usr/bin/env python3 """Convert usfm line-by-line into gemtext""" __author__ = "Zach DeCook" __email__ = "zachdecook@librem.one" __copyright__ = "Copyright (C) 2021 Zach DeCook" __license__ = "AGPL" __version__ = "3" import fileinput def printf(string): print(string,end='') def smallcaps(word): sc = 'ᴀʙᴄᴅᴇғɢʜɪᴊᴋʟᴍɴᴏᴘǫʀsᴛᴜᴠᴡxʏᴢ' new = '' for c in word: if c >= 'a' and c <= 'z': # I like C programming. new += sc[ord(c)-ord('a')] else: new += c return new def superscript(word): #TODO: also superscript lowercase letters ss='⁰¹²³⁴⁵⁶⁷⁸⁹:;<=>?@ᴬᴮCᴰᴱFᴳᴴᶦᴶᴷᴸᴹᴺᴼᴾQᴿSᵀᵁⱽᵂ' new = '' for c in word: if c >= '0' and c <= 'W': new += ss[ord(c)-ord('0')] else: new += c return new def convert(line): """Convert a string to a list of tuples, each a token""" # TODO: preserve the lack of whitespace before a backslash. split = line.replace('\\', ' \\').replace('\\nd*','\\nd* ').replace('\\+nd*','\\+nd* ').replace('\\f*','\\f* ').replace('\\wj*','\\wj* ').replace('\\w*',' \\w* ').replace('\\+w*', '\\+w* ').split() out = '' nd = False if len(split) == 0: return out elif split[0] in ['\\mt1','\\mt','\\ms','\\h']: return '\n# ' + convert(' '.join(split[1:])) # TODO: parse as word for title tags in title line elif split[0] in ['\\mt2','\\s','\\s1']: return '\n## ' + convert(' '.join(split[1:])) elif split[0] in ['\\mt3','\\d', '\\sp']: return '\n### ' + convert(' '.join(split[1:])) elif split[0] == '\\b': return '\n' elif split[0] == '\\rem': return out skip = 0 for word in split: if skip > 0: skip = skip - 1 elif word in ['\\id','\\ide']: skip = 1 elif word in ['\\v','\\c']: skip = 1 elif word in ['\\p','\\m']: out += '\n' elif word in ['\\pi','\\pi1','\\mi']: out += '\n\t' elif word in ['\\li1']: out += '\n* ' elif word in ['\\q', '\\q1']: out += '\n> ' elif word in ['\\q2', '\\q22']: # \q22 is bad input out += '\n>\t' elif word in ['\\q3']: out += '\n>\t\t' elif word in ['\\qs']: out += '\t' elif word in ['\\qs*']: continue elif word in ['\\wj','\\wj*']: continue elif word in ['\\em','\\it']: out += '*' elif word in ['\\em*', '\\it*']: out = out.rstrip() + '*' elif word in ['\\nd','\\+nd']: nd = True elif word in ['\\nd*','\\+nd*']: nd = False # Footnotes (https://ubsicap.github.io/usfm/notes_basic/fnotes.html) elif word == '\\f': out += '[' skip = 1 # the next character is the footnote caller elif word == '\\fr': skip = 1 # verse reference not necessary for inline fn elif word == '\\f*': out += ']' # Cross-references (https://ubsicap.github.io/usfm/notes_basic/xrefs.html) elif word == '\\x': out += '(' skip = 1 # next character is xref caller elif word == '\\xo': skip = 1 # verse reference not necessary for inline xref elif word in ['\\xt']: continue elif word == '\\x*': out += ')' # TODO: support Endnotes (\fe and \fe*) elif word in ['\\ft']: continue # TODO: fancy formatting of more types # Words which appear in the glossary. elif word in ['\\w','\\w*', '\\+w', '\\+w*']: continue elif word in ['\\nb']: continue elif '|strong="' in word: spl = word.split('|') out += spl[0] + ' ' #superscript(spl[1][8:-1]) + ' ' # Remove those extra spaces that sneak in. elif word in [',', '.', ';', '”', ',”', '.”', '?”', ')', ':', '!', '?', '.’', '.’”', '?’”', '?’', ';”', '!”', ');', '),']: if out[-1] == ' ': out = out[:-1] + word + ' ' else: out += word + ' ' elif word in ['“', '(', '‘']: out += word else: if nd: out += smallcaps(word) + ' ' else: out += word + ' ' return out def main(): """Read usfm from stdin, output gemtext to stdout ./usfm2gmi <in.usfm >out.md """ for line in fileinput.input(): gmi = convert(line) printf(gmi) if __name__ == '__main__': main()