💾 Archived View for gamma.lyk.so › systems › food › scripts › scraping-foodista › convert-to-yaml.sh captured on 2024-07-09 at 00:19:35.
⬅️ Previous capture (2023-07-22)
-=-=-=-=-=-=-
#!/usr/bin/env sh # Dependency: pup set -e [ "$2" ] || { echo "usage: $0 <recipe dir> <html source>"; exit 1; } echo "Converting $2" mkdir -p "$1/images" || true img="$1/images/$(basename "$2").jpg" imgurl="$(pup -f "$2" 'div.featured-image img attr{src}')" [ -f "$img" ] || curl -s -o "$img" "$imgurl" title="$(pup -f "$2" '#page-title text{}')" author="$(pup -f "$2" '.username text{}')" imgcredit="$(pup -f "$2" 'div.featured-image a text{}')" if [ "$imgcredit" ]; then imgcrediturl="$(pup -f "$2" 'div.featured-image a attr{href}' | tail -n1)" else imgcrediturl="" imgcredit="$author" fi description="$(pup -f "$2" 'div.field-type-text-with-summary text{}' \ | sed -z 's/\n\n\+/\n\n/g')" ingredients="$(pup -f "$2" "div[itemprop="ingredients"]" \ | tr -d "\n" \ | sed 's|</div>|</div>\n|g; s|<[^>]\+>||g;' \ | sed 's/^ \+//g; s/^/- /g' | tr -s ' ')" directions="$(pup -f "$2" "div[itemprop="recipeInstructions"].step-body" \ | tr -d "\n" \ | sed 's|</div>|</div>\n|g; s|<[^>]\+>||g;' \ | sed 's/^ \+//g; s/^[0-9]\+\. \+//g; s/^/- /g' | tr -s ' ')" tags="$(pup -f "$2" 'div.field-type-taxonomy-term-reference a text{}' \ | tr "\n" "," | sed 's/,$//g; s/,/, /g;')" cat > "$1/$(basename "$2").yml" <<EOF --- layout: recipe title: $title author: $author license: https://creativecommons.org/licenses/by/3.0/ image: $img image_credit: $imagecredit image_credit_url: $imagecrediturl tags: $tags ingredients: $ingredients directions: $(echo "$directions" | sed 's/ / /g') --- $(echo "$description" | sed 's/ / /g') EOF