Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

45 lines
1.8 KiB

2 years ago
  1. #!/usr/bin/env bash
  2. function log() {
  3. echo -e "\033[33m$@\033[0m"
  4. }
  5. function dl() {
  6. echo "Downloading $1 to $2"
  7. curl --silent "$1" > "$2"
  8. if [ $? -ne 0 ]
  9. then
  10. echo "Failed!"
  11. fi
  12. }
  13. log "Retrieving tests…"
  14. rm -f tests/*.cache.csv
  15. dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv
  16. (echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv
  17. log "Retrieving rules…"
  18. rm -f rules*/*.cache.*
  19. dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
  20. dl https://filters.adtidy.org/extension/chromium/filters/3.txt rules_adblock/adguard.cache.txt
  21. log "Retrieving TLD list…"
  22. dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
  23. grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list
  24. log "Retrieving nameservers…"
  25. dl https://public-dns.info/nameservers.txt nameservers/public-dns.cache.list
  26. log "Retrieving top subdomains…"
  27. dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
  28. unzip top-1m.csv.zip
  29. sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list
  30. rm top-1m.csv top-1m.csv.zip
  31. if [ -f subdomains/cisco-umbrella_popularity.cache.list ]
  32. then
  33. cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list
  34. pv -f temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list
  35. rm temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list
  36. else
  37. mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list
  38. fi