💾 Archived View for gemini.ctrl-c.club › ~philaeni › files › lipu-pona.sh captured on 2024-09-29 at 03:33:45.

View Raw

More Information

⬅️ Previous capture (2023-09-08)

-=-=-=-=-=-=-

#!/bin/sh
wget https://raw.githubusercontent.com/lipu-sona-pona/lipu-sona-pona.github.io/main/pages/dictionary.md
sed -e '# Remove markdown links
        s/\[\([^]]*\)\]([^)]*)/\1/;       # both [stand](alone)
        s/\[\([^]]*\)$/\1/;               # and [across
        s/^\([^]]*\)\]([^)]*)/\1/;        #   line](boundaries)
        # Remove inline markdown formatting
        # We do this *before* processing headings to also handle it
        # when it appears in the title of a new section
        s/\*\*//;                         # remove bold markers
        s/\*\([^*]*\)\*/\1/;              # remove italic markers
        s/\([0-9]\)\\\./\1./;             # fix \. numbers
        # Turn headings into dict entries
        s/^#### \(.*\)$/_____\n\nHEADWORD \1\n\1/;
        s/HEADWORD \([^(]*\) (nimi ku suli)\n/\1\n/;
        s/HEADWORD \(.*\)\n/\1\n/;
        # Make sure to indent all the other lines (i.e., lines that
        # do not represent the start of a dict entry)
        s/^[^_]/   &/;
        # Fix markdown-isms
        s/%warning%/Warning: /;
        s/%note%/Note: /;
        # Set up the text so it can be fed to a two-fmt pipeline
        s/   \([^*]\)/   |\1/;            # mark non-* lines for fmt
        s/   \* \(.*\)/   |* .\1\n   ;/;  # mark * lines for fmt
        s/|>\(.* -- .*\)/|>  \1/;         # indent examples further
        ' dictionary.md |\
    fmt -p '   |*' | \
    sed 's/^   |\*/   */;
         s/|> \?/|/' |\
    fmt -p '   |' | \
    sed '# Drop "|" fmt markers from the beginning of wrapped lines
         s/^   |/   /;
         # Fix definition lines - if starting with "* .", they
         # represent the start of the definition, otherwise the
         # continuation of the previous definition
         s/^   \* [^.]/     /;
         s/^   \* \./   * /;
         # ";" markers separate different definition lines, so that
         # fmt will see them as different paragraphs and not make
         # them run together
         /   ;/d;
         # Clean up some of the leftover stuff in the markdown file
         /   ---/d;
         /   Top page/d;
         # Clean up empty lines so they can be collapsed by cat
         s/   $//
         # Add special entries at the beginning of the file
         1i _____\n\n
         1i 00-database-url
         1i https://lipu-sona.pona.la/dictionary.html
         1i _____\n\n
         1i 00-database-short
         # Turns the (collapsed by fmt, thus made out of three
         # consecutive fields prefixed by "%") metadata at the top
         # of the markdown file into text that makes sense for the
         # database-short and database-info entries
         s/^   % \([^%]*\) % \([^%]*\) % \([^%]*\)/>\1 (\3)\
           ~\1, by \2 (\3)\n/;
         s/^>/     /;
         1s/ *~/_____\n\n00-database-info\n/;
    ' | cat --squeeze-blank > lipu-pona.dictsrc
dictfmt -t --headword-separator '/' lipu-pona < lipu-pona.dictsrc