eulaurarien/fetch_resources.sh

46 lines
1.8 KiB
Bash
Raw Normal View History

2019-11-14 10:23:59 +00:00
#!/usr/bin/env bash
2019-12-07 17:45:48 +00:00
function log() {
echo -e "\033[33m$@\033[0m"
}
function dl() {
2019-12-07 17:45:48 +00:00
echo "Downloading $1 to $2"
curl --silent "$1" > "$2"
if [ $? -ne 0 ]
then
echo "Failed!"
fi
}
log "Retrieving tests…"
rm -f tests/*.cache.csv
dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv
(echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv
2019-12-07 17:45:48 +00:00
log "Retrieving rules…"
rm -f rules*/*.cache.*
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
2020-12-06 19:43:18 +00:00
dl https://filters.adtidy.org/extension/chromium/filters/3.txt rules_adblock/adguard.cache.txt
log "Retrieving TLD list…"
dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list
2019-12-07 17:45:48 +00:00
log "Retrieving nameservers…"
dl https://public-dns.info/nameservers.txt nameservers/public-dns.cache.list
2019-11-14 10:23:59 +00:00
2019-12-07 17:45:48 +00:00
log "Retrieving top subdomains…"
dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
2019-11-14 10:23:59 +00:00
unzip top-1m.csv.zip
sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list
2019-11-14 10:23:59 +00:00
rm top-1m.csv top-1m.csv.zip
2019-12-03 16:33:46 +00:00
if [ -f subdomains/cisco-umbrella_popularity.cache.list ]
then
cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list
pv -f temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list
2019-12-05 18:38:26 +00:00
rm temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list
2019-12-03 16:33:46 +00:00
else
mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list
fi