#!/bin/bash html_file="${1:-search.fed.wiki.org.html}" if [ -z $html_file ]; then echo "HTML file '$html_file' does not exist" exit 1; fi process_result() { wiki=$1 farm=$(echo "$wiki" | xargs tldextract | cut -d' ' -f2,3 | sed 's/ /./g') status_http=$(curl -I -q --silent -m 5 $wiki > /dev/null && echo -n "OK" || echo -n "FAIL") status_https=$(echo "$wiki" | sed 's/http:/https:/' | xargs curl -I -q --silent -m 5 > /dev/null && echo "OK" || echo "FAIL") ip=$(echo "$wiki" | sed -e 's;http://;;' -e 's;/;;' | xargs dig +short | tail -1) echo "$wiki,$farm,$status_http,$status_https,$ip"; } echo "Wiki,Farm,HTTP?,HTTPS?,IP" > ./wikiverse-https-scan.csv for wiki in $(pup '#results a attr{href}' < "$html_file"); do process_result "$wiki" & done >> wikiverse-https-scan.csv wait $(jobs -p)