Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

29 lines
1.3 KiB

  1. #!/usr/bin/env bash
  2. # Filter out the subdomains not pointing to a first-party tracker
  3. cat subdomains/*.list | sort -u > temp/all_subdomains.list
  4. ./filter_subdomains.py temp/all_subdomains.list > temp/all_toblock.list
  5. sort -u temp/all_toblock.list > dist/firstparty-trackers.txt
  6. # Format the blocklist so it can be used as a hostlist
  7. (
  8. echo "# First-party trackers host list"
  9. echo "#"
  10. echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
  11. echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
  12. echo "# Latest version of this list: https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt"
  13. echo "#"
  14. echo "# Generation date: $(date -Isec)"
  15. echo "# Generation version: eulaurarien $(git describe --tags)"
  16. echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
  17. echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
  18. echo "# Number of known trackers : $(python -c 'import regexes; print(len(regexes.REGEXES))')"
  19. echo "# Number of blocked subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
  20. echo
  21. cat dist/firstparty-trackers.txt | while read host;
  22. do
  23. echo "0.0.0.0 $host"
  24. done
  25. ) > dist/firstparty-trackers-hosts.txt