💾 Archived View for gamma.lyk.so › systems › food › scripts › scraping-foodista › cache-html.sh captured on 2024-07-09 at 00:19:17.
⬅️ Previous capture (2023-07-22)
-=-=-=-=-=-=-
#!/usr/bin/env sh [ "$2" ] || { >&2 echo "usage: $0 <cache directory> <url list file>" && exit; } export CACHE_DIR="$1" tmp="$(mktemp)" trap 'rm "$tmp"' EXIT INT HUP cat > "$tmp" <<"EOF" url="$1" path="$CACHE_DIR/$(echo "$url" | sed 's|https\?://||')" if [ -f "$path" ]; then echo "Already exists, skipping: $path" else echo "Caching to $path" dir="$(dirname "$path")" mkdir -p "$dir" curl -s -o "$path" "$url" # rate limit, don't be *too* obnoxious sleep 1 fi EOF chmod +x "$tmp" cat "$2" | xargs -P 10 -n 1 "$tmp"