💾 Archived View for beta.lyk.so › systems › food › scripts › scraping-foodista › cache-html.sh captured on 2023-06-14 at 14:13:10.

View Raw

More Information

⬅️ Previous capture (2021-12-04)

-=-=-=-=-=-=-

#!/usr/bin/env sh

[ "$2" ] || { >&2 echo "usage: $0 <cache directory> <url list file>" && exit; }

export CACHE_DIR="$1"

tmp="$(mktemp)"
trap 'rm "$tmp"' EXIT INT HUP

cat > "$tmp" <<"EOF"
url="$1"
path="$CACHE_DIR/$(echo "$url" | sed 's|https\?://||')"

if [ -f "$path" ]; then
  echo "Already exists, skipping: $path"
else
  echo "Caching to $path"

  dir="$(dirname "$path")"
  mkdir -p "$dir"
  curl -s -o "$path" "$url"

  # rate limit, don't be *too* obnoxious
  sleep 1
fi
EOF

chmod +x "$tmp"

cat "$2" | xargs -P 10 -n 1 "$tmp"