2023-03-24 04:43:22 +00:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
html_file="${1:-search.fed.wiki.org.html}"
|
|
|
|
|
|
|
|
if [ -z $html_file ]; then
|
|
|
|
echo "HTML file '$html_file' does not exist"
|
|
|
|
exit 1;
|
|
|
|
fi
|
|
|
|
|
2023-03-24 04:20:11 +00:00
|
|
|
process_result() {
|
|
|
|
wiki=$1
|
|
|
|
farm=$(echo "$wiki" | xargs tldextract | cut -d' ' -f2,3 | sed 's/ /./g')
|
|
|
|
status_http=$(curl -I -q --silent -m 5 $wiki > /dev/null && echo -n "OK" || echo -n "FAIL")
|
2023-03-25 19:59:24 +00:00
|
|
|
status_https=$(echo "$wiki" | sed 's/http:/https:/' | xargs curl -I -q --silent -m 5 > /dev/null && echo "OK" || echo "FAIL")
|
|
|
|
ip=$(echo "$wiki" | sed -e 's;http://;;' -e 's;/;;' | xargs dig +short | tail -1)
|
|
|
|
echo "$wiki,$farm,$status_http,$status_https,$ip";
|
2023-03-24 04:20:11 +00:00
|
|
|
}
|
|
|
|
|
2023-03-25 19:59:24 +00:00
|
|
|
echo "Wiki,Farm,HTTP?,HTTPS?,IP" > ./wikiverse-https-scan.csv
|
2023-03-24 04:20:11 +00:00
|
|
|
|
2023-03-24 04:43:22 +00:00
|
|
|
for wiki in $(pup '#results a attr{href}' < "$html_file"); do
|
2023-03-24 04:20:11 +00:00
|
|
|
process_result "$wiki" &
|
2023-03-24 04:43:22 +00:00
|
|
|
done >> wikiverse-https-scan.csv
|
2023-03-24 04:20:11 +00:00
|
|
|
|
|
|
|
wait $(jobs -p)
|