wikiverse-analysis/wikiverse-scan.sh

26 lines
810 B
Bash
Executable File

#!/bin/bash
html_file="${1:-search.fed.wiki.org.html}"
if [ -z $html_file ]; then
echo "HTML file '$html_file' does not exist"
exit 1;
fi
process_result() {
wiki=$1
farm=$(echo "$wiki" | xargs tldextract | cut -d' ' -f2,3 | sed 's/ /./g')
status_http=$(curl -I -q --silent -m 5 $wiki > /dev/null && echo -n "OK" || echo -n "FAIL")
status_https=$(echo "$wiki" | sed 's/http:/https:/' | xargs curl -I -q --silent -m 5 > /dev/null && echo "OK" || echo "FAIL")
ip=$(echo "$wiki" | sed -e 's;http://;;' -e 's;/;;' | xargs dig +short | tail -1)
echo "$wiki,$farm,$status_http,$status_https,$ip";
}
echo "Wiki,Farm,HTTP?,HTTPS?,IP" > ./wikiverse-https-scan.csv
for wiki in $(pup '#results a attr{href}' < "$html_file"); do
process_result "$wiki" &
done >> wikiverse-https-scan.csv
wait $(jobs -p)