Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.0 KiB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. #!/usr/bin/env python3
  2. # pylint: disable=C0103
  3. """
  4. From a list of subdomains, output only
  5. the ones resolving to a first-party tracker.
  6. """
  7. import re
  8. import sys
  9. import dns.resolver
  10. import dns.exception
  11. import progressbar
  12. import regexes
  13. DNS_TIMEOUT = 5.0
  14. def is_subdomain_matching(subdomain: str) -> bool:
  15. """
  16. Indicates if the subdomain redirects to a first-party tracker.
  17. """
  18. # TODO Look at the whole chain rather than the last one
  19. try:
  20. query = dns.resolver.query(subdomain, 'A', lifetime=DNS_TIMEOUT)
  21. except dns.resolver.NXDOMAIN:
  22. return False
  23. except dns.resolver.NoAnswer:
  24. return False
  25. except dns.resolver.YXDOMAIN:
  26. print(f"Query name too long for {subdomain}", file=sys.stderr)
  27. return False
  28. except dns.resolver.NoNameservers:
  29. print(f"All nameservers broken for {subdomain}", file=sys.stderr)
  30. return False
  31. except dns.exception.Timeout:
  32. print(f"Timeout for {subdomain}", file=sys.stderr)
  33. return False
  34. except dns.name.EmptyLabel:
  35. print(f"Empty label for {subdomain}", file=sys.stderr)
  36. return False
  37. canonical = query.canonical_name.to_text()
  38. for regex in regexes.REGEXES:
  39. if re.match(regex, canonical):
  40. return True
  41. return False
  42. def is_subdomain_matching_standalone(subdomain: str) -> None:
  43. """
  44. Print the subdomain if it redirects to a first-party tracker.
  45. """
  46. subdomain = subdomain.strip()
  47. if not subdomain:
  48. return
  49. if is_subdomain_matching(subdomain):
  50. print(subdomain)
  51. if __name__ == '__main__':
  52. assert len(sys.argv) <= 2
  53. filename = None
  54. if len(sys.argv) == 2 and sys.argv[1] != '-':
  55. filename = sys.argv[1]
  56. num_lines = sum(1 for line in open(filename))
  57. iterator = progressbar.progressbar(open(filename), max_value=num_lines)
  58. else:
  59. iterator = sys.stdin
  60. for line in iterator:
  61. is_subdomain_matching_standalone(line)
  62. if filename:
  63. iterator.close()