Compare commits

...

2 Commits

Author SHA1 Message Date
3wc 4174f7d5d5 Add README 2023-03-24 00:43:42 -04:00
3wc 51d443b160 Add argument processing, update data 2023-03-24 00:43:22 -04:00
3 changed files with 795 additions and 779 deletions

7
README.md Normal file
View File

@ -0,0 +1,7 @@
# wikiverse-analysis
1. Visit this page in a web browser and save it to an HTML file: http://search.fed.wiki.org:3030/#/find=sites&within=sites&match=and&query=
2. Install [`pup`](https://github.com/EricChiang/pup)
3. Run `./wikiverse-scan.sh`, optionally providing the path to the HTML file as
an argument
4. Results will appear in `wikiverse-https-scan.csv`

File diff suppressed because it is too large Load Diff

15
wikiverse-scan.sh Normal file → Executable file
View File

@ -1,3 +1,12 @@
#!/bin/bash
html_file="${1:-search.fed.wiki.org.html}"
if [ -z $html_file ]; then
echo "HTML file '$html_file' does not exist"
exit 1;
fi
process_result() {
wiki=$1
farm=$(echo "$wiki" | xargs tldextract | cut -d' ' -f2,3 | sed 's/ /./g')
@ -6,10 +15,10 @@ process_result() {
echo "$wiki,$farm,$status_http,$status_https";
}
echo "Wiki,Farm,HTTP?,HTTPS?" > ./wikiverse-https-scan.txt.csv
echo "Wiki,Farm,HTTP?,HTTPS?" > ./wikiverse-https-scan.csv
for wiki in $(pup '#results a attr{href}' < search.fed.wiki.org.html); do
for wiki in $(pup '#results a attr{href}' < "$html_file"); do
process_result "$wiki" &
done >> wikiverse-https-scan.txt.csv
done >> wikiverse-https-scan.csv
wait $(jobs -p)