scraping
HTML Scraping, Screen Scraping
Written by georg on Oktober 31, 2019
#!/bin/bash
URL="https://example.com/bla.html"
FILE="/tmp/in"
OUT="/tmp/out"
TARGET="/path/to/file/output.html"
wget "$URL" -O $FILE
echo "<html><head></head><body>" > $OUT
echo "<h1>" >> $OUT
echo "$(tr -d '\n' <$FILE)" | awk -F"h1" '{print $6 }' | cut -d'>' -f2 | cut -d '<' -f1 >> $OUT
echo "</h1>" >> $OUT
echo "$(tr -d '\n' <$FILE)" | awk -F"table" '{print "<table "$5"table>" }' >> $OUT
echo "</body></html>" >> $OUT
iconv -f utf-8 -t ascii//TRANSLIT < $OUT > $TARGET
