Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.3KB

  1. #!/usr/bin/env python3
  2. # pylint: disable=C0103
  3. """
  4. Extract the domains to block as a whole
  5. from a AdBlock rules list.
  6. """
  7. import argparse
  8. import sys
  9. import typing
  10. import abp.filters
  11. def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
  12. if rule.options:
  13. return
  14. selector_type = rule.selector['type']
  15. selector_value = rule.selector['value']
  16. if selector_type == 'url-pattern' \
  17. and selector_value.startswith('||') \
  18. and selector_value.endswith('^'):
  19. yield selector_value[2:-1]
  20. if __name__ == '__main__':
  21. # Parsing arguments
  22. parser = argparse.ArgumentParser(
  23. description="Extract whole domains from an AdBlock blocking list")
  24. parser.add_argument(
  25. '-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
  26. help="Input file with AdBlock rules")
  27. parser.add_argument(
  28. '-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
  29. help="Outptut file with one rule tracking subdomain per line")
  30. args = parser.parse_args()
  31. # Reading rules
  32. rules = abp.filters.parse_filterlist(args.input)
  33. # Filtering
  34. for rule in rules:
  35. if not isinstance(rule, abp.filters.parser.Filter):
  36. continue
  37. for domain in get_domains(rule):
  38. print(domain, file=args.output)