Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.3 KiB

  1. #!/usr/bin/env python3
  2. # pylint: disable=C0103
  3. """
  4. Extract the domains to block as a whole
  5. from a AdBlock rules list.
  6. """
  7. import argparse
  8. import sys
  9. import typing
  10. import abp.filters
  11. def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
  12. for key, val in rule.options:
  13. if key not in ('third-party',):
  14. return
  15. selector_type = rule.selector['type']
  16. selector_value = rule.selector['value']
  17. if selector_type == 'url-pattern' \
  18. and selector_value.startswith('||') \
  19. and selector_value.endswith('^'):
  20. yield selector_value[2:-1]
  21. if __name__ == '__main__':
  22. # Parsing arguments
  23. parser = argparse.ArgumentParser(
  24. description="TODO")
  25. parser.add_argument(
  26. '-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
  27. help="Input file with AdBlock rules")
  28. parser.add_argument(
  29. '-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
  30. help="Outptut file with one rule tracking subdomain per line")
  31. args = parser.parse_args()
  32. # Reading rules
  33. rules = abp.filters.parse_filterlist(args.input)
  34. # Filtering
  35. for rule in rules:
  36. if not isinstance(rule, abp.filters.parser.Filter):
  37. continue
  38. for domain in get_domains(rule):
  39. print(domain, file=args.output)