#!/usr/bin/env bash function log() { echo -e "\033[33m$@\033[0m" } function dl() { echo "Downloading $1 to $2…" curl --silent "$1" > "$2" if [ $? -ne 0 ] then echo "Failed!" fi } log "Retrieving tests…" rm -f tests/*.cache.csv dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv (echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv log "Retrieving rules…" rm -f rules*/*.cache.* dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt dl https://filters.adtidy.org/extension/chromium/filters/3.txt rules_adblock/adguard.cache.txt log "Retrieving TLD list…" dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list log "Retrieving nameservers…" dl https://public-dns.info/nameservers.txt nameservers/public-dns.cache.list log "Retrieving top subdomains…" dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip unzip top-1m.csv.zip sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list rm top-1m.csv top-1m.csv.zip if [ -f subdomains/cisco-umbrella_popularity.cache.list ] then cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list pv -f temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list rm temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list else mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list fi