Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.8KB

  1. #!/usr/bin/env bash
  2. function log() {
  3. echo -e "\033[33m$@\033[0m"
  4. }
  5. log "Calculating statistics…"
  6. oldest="$(cat last_updates/*.txt | sort -n | head -1)"
  7. oldest_date=$(date -Isec -d @$oldest)
  8. gen_date=$(date -Isec)
  9. gen_software=$(git describe --tags)
  10. number_websites=$(wc -l < temp/all_websites.list)
  11. number_subdomains=$(wc -l < temp/all_subdomains.list)
  12. number_dns=$(grep 'NOERROR' temp/all_resolved.txt | wc -l)
  13. for partyness in {first,multi}
  14. do
  15. if [ $partyness = "first" ]
  16. then
  17. partyness_flags="--first-party"
  18. else
  19. partyness_flags=""
  20. fi
  21. rules_input=$(./export.py --count --base-rules $partyness_flags)
  22. rules_found=$(./export.py --count --rules $partyness_flags)
  23. rules_found_nd=$(./export.py --count --rules --no-dupplicates $partyness_flags)
  24. echo
  25. echo "Statistics for ${partyness}-party trackers"
  26. echo "Input rules: $rules_input"
  27. echo "Subsequent rules: $rules_found"
  28. echo "Subsequent rules (no dupplicate): $rules_found_nd"
  29. echo "Output hostnames: $(./export.py --count $partyness_flags)"
  30. echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
  31. echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
  32. echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
  33. for trackerness in {trackers,only-trackers}
  34. do
  35. if [ $trackerness = "trackers" ]
  36. then
  37. trackerness_flags=""
  38. else
  39. trackerness_flags="--no-dupplicates"
  40. fi
  41. file_list="dist/${partyness}party-${trackerness}.txt"
  42. file_host="dist/${partyness}party-${trackerness}-hosts.txt"
  43. log "Generating lists for variant ${partyness}-party ${trackerness}…"
  44. # Real export heeere
  45. ./export.py $partyness_flags $trackerness_flags > $file_list
  46. # Sometimes a bit heavy to have the DB open and sort the output
  47. # so this is done in two steps
  48. sort -u $file_list -o $file_list
  49. rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
  50. (
  51. echo "# First-party trackers host list"
  52. echo "# Variant: ${partyness}-party ${trackerness}"
  53. echo "#"
  54. echo "# About first-party trackers: https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
  55. echo "#"
  56. echo "# In case of false positives/negatives, or any other question,"
  57. echo "# contact me the way you like: https://geoffrey.frogeye.fr"
  58. echo "#"
  59. echo "# Latest versions and variants: https://hostfiles.frogeye.fr/#list-variants"
  60. echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
  61. echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE"
  62. echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements"
  63. echo "#"
  64. echo "# Generation software: eulaurarien $gen_software"
  65. echo "# List generation date: $gen_date"
  66. echo "# Oldest record: $oldest_date"
  67. echo "# Number of source websites: $number_websites"
  68. echo "# Number of source subdomains: $number_subdomains"
  69. echo "# Number of source DNS records: ~2E9 + $number_dns"
  70. echo "#"
  71. echo "# Input rules: $rules_input"
  72. echo "# Subsequent rules: $rules_found"
  73. echo "# … no dupplicates: $rules_found_nd"
  74. echo "# Output rules: $rules_output"
  75. echo "#"
  76. echo
  77. sed 's|^|0.0.0.0 |' "$file_list"
  78. ) > "$file_host"
  79. done
  80. done
  81. if [ -d explanations ]
  82. then
  83. filename="$(date -Isec).txt"
  84. ./export.py --explain > "explanations/$filename"
  85. ln --force --symbolic "$filename" "explanations/latest.txt"
  86. fi