💾 Archived View for fixato.org › gemsub_to_ebook.sh.txt captured on 2023-04-19 at 22:44:49.

View Raw

More Information

⬅️ Previous capture (2022-03-01)

-=-=-=-=-=-=-

#!/usr/bin/env bash
# encoding: utf-8
url="${1:-gemini://gemini.cyberbot.space/gmisub.gmi}"
date="${2:-""}"
document="$("gemget" -o - "$url")"
output_gmi_directory="gmi_docs"
newly_added_list="$output_gmi_directory/newly_added.txt"

function slugify() {
	# Taken from: https://gist.github.com/oneohthree/f528c7ae1e701ad990e6
  # and slightly adapted to allow for periods
	echo "$1" | iconv -t ascii//TRANSLIT | sed -E 's/[^a-zA-Z0-9\.]+/-/g' | sed -E 's/^-+|-+$//g' | tr A-Z a-z
}

if [[ "$date" ]]; then
	min_date_header_line_nr="$(echo "$document" | awk -v min_date="$date" '/^##\s{1,}([0-9]{4}(-[0-9]{2}){2})/{if($2 >= min_date) { print NR}}' | tail -1)"
	document_selection="$(echo "$document" | head -n $min_date_header_line_nr)"
else
	document_selection="$document"
fi
mapfile -t gemini_links < <(echo "$document_selection" | grep -oP '\s*=>\s+\K(gemini://\S+)' | sort -u)

touch "$newly_added_list"
for link in "${gemini_links[@]}"
do
#	echo "$link"
	# very simplified URI scheme parser; doesn't properly handle username:password@host for example.
	domain_name="$(echo "$link" | grep -oP 'gemini://\K([^:/\s]+)')"
	slugified_domain_name="$(slugify "$domain_name")"
	
	#echo "$link => $domain_name => $slugified_domain_name"
	target_dir="$output_gmi_directory/$slugified_domain_name"
  [ ! -f "$target_dir" ] && mkdir -p "$target_dir"
  gemget --add-extension --directory "$target_dir" "$link"
done

find "$output_gmi_directory" -iname '*.gmi' -newer "$newly_added_list" > "${newly_added_list}.tmp" && mv "${newly_added_list}.tmp" "${newly_added_list}"
cat "$newly_added_list" | xargs -I {} python3 gemtext2epub "{}"