eulaurarien/adblock_to_domain_list.py

49 lines
1.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# pylint: disable=C0103
"""
Extract the domains to block as a whole
from a AdBlock rules list.
"""
import argparse
import sys
import typing
import abp.filters
def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
2019-12-05 01:19:10 +01:00
if rule.options:
return
selector_type = rule.selector['type']
selector_value = rule.selector['value']
if selector_type == 'url-pattern' \
and selector_value.startswith('||') \
and selector_value.endswith('^'):
yield selector_value[2:-1]
if __name__ == '__main__':
# Parsing arguments
parser = argparse.ArgumentParser(
description="Extract whole domains from an AdBlock blocking list")
parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file with AdBlock rules")
parser.add_argument(
'-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
help="Outptut file with one rule tracking subdomain per line")
args = parser.parse_args()
# Reading rules
rules = abp.filters.parse_filterlist(args.input)
# Filtering
for rule in rules:
if not isinstance(rule, abp.filters.parser.Filter):
continue
for domain in get_domains(rule):
print(domain, file=args.output)