Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

49 lines
1.3 KiB

#!/usr/bin/env python3
# pylint: disable=C0103
"""
Extract the domains to block as a whole
from a AdBlock rules list.
"""
import argparse
import sys
import typing
import abp.filters
def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
for key, val in rule.options:
if key not in ('third-party',):
return
selector_type = rule.selector['type']
selector_value = rule.selector['value']
if selector_type == 'url-pattern' \
and selector_value.startswith('||') \
and selector_value.endswith('^'):
yield selector_value[2:-1]
if __name__ == '__main__':
# Parsing arguments
parser = argparse.ArgumentParser(
description="TODO")
parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file with AdBlock rules")
parser.add_argument(
'-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
help="Outptut file with one rule tracking subdomain per line")
args = parser.parse_args()
# Reading rules
rules = abp.filters.parse_filterlist(args.input)
# Filtering
for rule in rules:
if not isinstance(rule, abp.filters.parser.Filter):
continue
for domain in get_domains(rule):
print(domain, file=args.output)