#!/usr/bin/env bash # encoding: utf-8 url="${1:-gemini://gemini.cyberbot.space/gmisub.gmi}" date="${2:-""}" document="$("gemget" -o - "$url")" output_gmi_directory="gmi_docs" newly_added_list="$output_gmi_directory/newly_added.txt" function slugify() { # Taken from: https://gist.github.com/oneohthree/f528c7ae1e701ad990e6 # and slightly adapted to allow for periods echo "$1" | iconv -t ascii//TRANSLIT | sed -E 's/[^a-zA-Z0-9\.]+/-/g' | sed -E 's/^-+|-+$//g' | tr A-Z a-z } if [[ "$date" ]]; then min_date_header_line_nr="$(echo "$document" | awk -v min_date="$date" '/^##\s{1,}([0-9]{4}(-[0-9]{2}){2})/{if($2 >= min_date) { print NR}}' | tail -1)" document_selection="$(echo "$document" | head -n $min_date_header_line_nr)" else document_selection="$document" fi mapfile -t gemini_links < <(echo "$document_selection" | grep -oP '\s*=>\s+\K(gemini://\S+)' | sort -u) touch "$newly_added_list" for link in "${gemini_links[@]}" do # echo "$link" # very simplified URI scheme parser; doesn't properly handle username:password@host for example. domain_name="$(echo "$link" | grep -oP 'gemini://\K([^:/\s]+)')" slugified_domain_name="$(slugify "$domain_name")" #echo "$link => $domain_name => $slugified_domain_name" target_dir="$output_gmi_directory/$slugified_domain_name" [ ! -f "$target_dir" ] && mkdir -p "$target_dir" gemget --add-extension --directory "$target_dir" "$link" done find "$output_gmi_directory" -iname '*.gmi' -newer "$newly_added_list" > "${newly_added_list}.tmp" && mv "${newly_added_list}.tmp" "${newly_added_list}" cat "$newly_added_list" | xargs -I {} python3 gemtext2epub "{}"