Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
2.8 KiB

2 years ago
  1. #!/usr/bin/env bash
  2. function log() {
  3. echo -e "\033[33m$@\033[0m"
  4. }
  5. function dl() {
  6. echo "Downloading $1 to $2"
  7. curl --silent "$1" > "$2"
  8. if [ $? -ne 0 ]
  9. then
  10. echo "Failed!"
  11. fi
  12. }
  13. log "Retrieving rules…"
  14. rm -f rules*/*.cache.*
  15. dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
  16. # From firebog.net Tracking & Telemetry Lists
  17. # dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
  18. # dl https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt rules/notrack-blocklist.cache.list
  19. # False positives: https://github.com/WaLLy3K/wally3k.github.io/issues/73 -> 69.media.tumblr.com chicdn.net
  20. dl https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts rules_hosts/add2o7.cache.txt
  21. dl https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt rules_hosts/spy.cache.txt
  22. # dl https://raw.githubusercontent.com/Kees1958/WS3_annual_most_used_survey_blocklist/master/w3tech_hostfile.txt rules/w3tech.cache.list
  23. # False positives: agreements.apple.com -> edgekey.net
  24. # dl https://www.github.developerdan.com/hosts/lists/ads-and-tracking-extended.txt rules_hosts/ads-and-tracking-extended.cache.txt # Lots of false-positives
  25. # dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/android-tracking.txt rules_hosts/android-tracking.cache.txt
  26. # dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt rules_hosts/smart-tv.cache.txt
  27. # dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/AmazonFireTV.txt rules_hosts/amazon-fire-tv.cache.txt
  28. log "Retrieving TLD list…"
  29. dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
  30. grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list
  31. log "Retrieving nameservers…"
  32. rm -f nameservers
  33. touch nameservers
  34. [ -f nameservers.head ] && cat nameservers.head >> nameservers
  35. dl https://public-dns.info/nameservers.txt nameservers.temp
  36. sort -R nameservers.temp >> nameservers
  37. rm nameservers.temp
  38. log "Retrieving top subdomains…"
  39. dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
  40. unzip top-1m.csv.zip
  41. sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list
  42. rm top-1m.csv top-1m.csv.zip
  43. if [ -f subdomains/cisco-umbrella_popularity.cache.list ]
  44. then
  45. cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list
  46. pv temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list
  47. rm temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list
  48. else
  49. mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list
  50. fi