2019-12-03 08:48:12 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# pylint: disable=C0103
|
|
|
|
|
|
|
|
"""
|
|
|
|
Extract the domains to block as a whole
|
|
|
|
from a AdBlock rules list.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import sys
|
|
|
|
import typing
|
|
|
|
|
|
|
|
import abp.filters
|
|
|
|
|
|
|
|
|
|
|
|
def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
|
2019-12-05 01:19:10 +01:00
|
|
|
if rule.options:
|
|
|
|
return
|
2019-12-03 08:48:12 +01:00
|
|
|
selector_type = rule.selector['type']
|
|
|
|
selector_value = rule.selector['value']
|
|
|
|
if selector_type == 'url-pattern' \
|
|
|
|
and selector_value.startswith('||') \
|
|
|
|
and selector_value.endswith('^'):
|
|
|
|
yield selector_value[2:-1]
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
# Parsing arguments
|
|
|
|
parser = argparse.ArgumentParser(
|
2019-12-03 09:02:59 +01:00
|
|
|
description="Extract whole domains from an AdBlock blocking list")
|
2019-12-03 08:48:12 +01:00
|
|
|
parser.add_argument(
|
|
|
|
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
|
|
|
|
help="Input file with AdBlock rules")
|
|
|
|
parser.add_argument(
|
|
|
|
'-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
|
|
|
|
help="Outptut file with one rule tracking subdomain per line")
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
# Reading rules
|
|
|
|
rules = abp.filters.parse_filterlist(args.input)
|
|
|
|
|
|
|
|
# Filtering
|
|
|
|
for rule in rules:
|
|
|
|
if not isinstance(rule, abp.filters.parser.Filter):
|
|
|
|
continue
|
|
|
|
for domain in get_domains(rule):
|
|
|
|
print(domain, file=args.output)
|