#!/bin/bash

html_file="${1:-search.fed.wiki.org.html}"

if [ -z $html_file ]; then
	echo "HTML file '$html_file' does not exist"
	exit 1;
fi

process_result() {
	wiki=$1
	farm=$(echo "$wiki" | xargs tldextract | cut -d' ' -f2,3 | sed 's/ /./g')
	status_http=$(curl -I -q --silent -m 5 $wiki > /dev/null && echo -n "OK" || echo -n "FAIL")
	status_https=$(echo "$wiki" | sed 's/http:/https:/' | xargs curl -I -q --silent -m 5 > /dev/null && echo "OK" || echo "FAIL")
	ip=$(echo "$wiki" | sed -e 's;http://;;' -e 's;/;;' | xargs dig +short | tail -1)
	echo "$wiki,$farm,$status_http,$status_https,$ip"; 
}

echo "Wiki,Farm,HTTP?,HTTPS?,IP" > ./wikiverse-https-scan.csv

for wiki in $(pup '#results a attr{href}' < "$html_file"); do   
	process_result "$wiki" & 
done >> wikiverse-https-scan.csv

wait $(jobs -p)