Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.4 KiB

4 months ago
4 months ago
4 months ago
4 months ago
4 months ago
  1. #!/usr/bin/env python3
  2. # pylint: disable=C0103
  3. """
  4. Extract the domains to block as a whole
  5. from a AdBlock rules list.
  6. """
  7. import argparse
  8. import sys
  9. import typing
  10. import abp.filters
  11. def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
  12. if rule.options:
  13. return
  14. selector_type = rule.selector["type"]
  15. selector_value = rule.selector["value"]
  16. if (
  17. selector_type == "url-pattern"
  18. and selector_value.startswith("||")
  19. and selector_value.endswith("^")
  20. ):
  21. yield selector_value[2:-1]
  22. if __name__ == "__main__":
  23. # Parsing arguments
  24. parser = argparse.ArgumentParser(
  25. description="Extract whole domains from an AdBlock blocking list"
  26. )
  27. parser.add_argument(
  28. "-i",
  29. "--input",
  30. type=argparse.FileType("r"),
  31. default=sys.stdin,
  32. help="Input file with AdBlock rules",
  33. )
  34. parser.add_argument(
  35. "-o",
  36. "--output",
  37. type=argparse.FileType("w"),
  38. default=sys.stdout,
  39. help="Outptut file with one rule tracking subdomain per line",
  40. )
  41. args = parser.parse_args()
  42. # Reading rules
  43. rules = abp.filters.parse_filterlist(args.input)
  44. # Filtering
  45. for rule in rules:
  46. if not isinstance(rule, abp.filters.parser.Filter):
  47. continue
  48. for domain in get_domains(rule):
  49. print(domain, file=args.output)