#!/usr/bin/env python3 # pylint: disable=C0103 """ Extract the domains to block as a whole from a AdBlock rules list. """ import argparse import sys import typing import abp.filters def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]: for key, val in rule.options: if key not in ('third-party',): return selector_type = rule.selector['type'] selector_value = rule.selector['value'] if selector_type == 'url-pattern' \ and selector_value.startswith('||') \ and selector_value.endswith('^'): yield selector_value[2:-1] if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( description="TODO") parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="Input file with AdBlock rules") parser.add_argument( '-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Outptut file with one rule tracking subdomain per line") args = parser.parse_args() # Reading rules rules = abp.filters.parse_filterlist(args.input) # Filtering for rule in rules: if not isinstance(rule, abp.filters.parser.Filter): continue for domain in get_domains(rule): print(domain, file=args.output)