💾 Archived View for gemini.susa.net › parse_irc.awk.gmi captured on 2022-03-01 at 15:19:32. Gemini links have been rewritten to link to archived content

View Raw

More Information

⬅️ Previous capture (2021-11-30)

-=-=-=-=-=-=-

Awk script to parse IRC log

This script parses out the useful bits of an IRC log and presents it in Gemini markup.

Example that re-parses the script from makeworld.gq

#!/usr/bin/awk -f

BEGIN {
    block_idx = 0
}

/<.+>/ {

    if(match($0, /\[([0-9]+-[0-9]+-[0-9]+)T([0-9]+:[0-9]+).+] <([^>]+)>(.+)/, arr)) {

        headers = ""

        if(msg_date != arr[1]) {
            msg_date = arr[1]
            headers = "\n## " msg_date "\n"
        }

        if(msg_user != arr[3]) {
            msg_user = arr[3]
            headers = headers "\n### " arr[2] " - " msg_user
        }

        if(headers != "") {
            # Store any existing block and start a new one
            if(block != "") {
                blocks[block_idx++] = block
                block = ""
            }
            block = headers "\n"
        }

        block = block "" arr[4] "\n"

        # Store any gemini links
        for (idx=1; idx <= NF; idx++) {
            if($idx ~ /(gemini|http|https):\/\/.+/)
                block = block "=> " $idx " " msg_user "'s link to '" $idx "'\n"
        }
    }
}

END {
    if(block != "")
        blocks[block_idx] = block;

    max_block = block_idx

    while(block_idx >= 0) {
        idx = ( sort == "desc" ? block_idx : max_block - block_idx )
        printf("%s", blocks[idx])
        block_idx--;
    }
}