💾 Archived View for gamma.lyk.so › systems › food › scripts › scraping-foodista › convert-to-yaml.sh captured on 2024-07-09 at 00:19:35.

View Raw

More Information

⬅️ Previous capture (2023-07-22)

-=-=-=-=-=-=-

#!/usr/bin/env sh

# Dependency: pup

set -e

[ "$2" ] || { echo "usage: $0 <recipe dir> <html source>"; exit 1; }
echo "Converting $2"

mkdir -p "$1/images" || true
img="$1/images/$(basename "$2").jpg"
imgurl="$(pup -f "$2" 'div.featured-image img attr{src}')"

[ -f "$img" ] || curl -s -o "$img" "$imgurl"

title="$(pup -f "$2" '#page-title text{}')"
author="$(pup -f "$2" '.username text{}')"
imgcredit="$(pup -f "$2" 'div.featured-image a text{}')"

if [ "$imgcredit" ]; then
  imgcrediturl="$(pup -f "$2" 'div.featured-image a attr{href}' | tail -n1)"
else
  imgcrediturl=""
  imgcredit="$author"
fi

description="$(pup -f "$2" 'div.field-type-text-with-summary text{}' \
  | sed -z 's/\n\n\+/\n\n/g')"

ingredients="$(pup -f "$2" "div[itemprop="ingredients"]" \
  | tr -d "\n" \
  | sed 's|</div>|</div>\n|g; s|<[^>]\+>||g;' \
  | sed 's/^ \+//g; s/^/- /g' | tr -s ' ')"

directions="$(pup -f "$2" "div[itemprop="recipeInstructions"].step-body" \
  | tr -d "\n" \
  | sed 's|</div>|</div>\n|g; s|<[^>]\+>||g;' \
  | sed 's/^ \+//g; s/^[0-9]\+\. \+//g; s/^/- /g' | tr -s ' ')"

tags="$(pup -f "$2" 'div.field-type-taxonomy-term-reference a text{}' \
  | tr "\n" "," | sed 's/,$//g; s/,/, /g;')"

cat > "$1/$(basename "$2").yml" <<EOF
---

layout: recipe
title: $title
author: $author
license: https://creativecommons.org/licenses/by/3.0/
image: $img
image_credit: $imagecredit
image_credit_url: $imagecrediturl
tags: $tags

ingredients:
$ingredients

directions:
$(echo "$directions" | sed 's/&nbsp;/ /g')

---

$(echo "$description" | sed 's/&nbsp;/ /g')
EOF