Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

59 lines
1.4 KiB

#!/usr/bin/env python3
# pylint: disable=C0103
"""
Extract the domains to block as a whole
from a AdBlock rules list.
"""
import argparse
import sys
import typing
import abp.filters
def get_domains(rule: abp.filters.parser.Filter) -> typing.Iterable[str]:
if rule.options:
return
selector_type = rule.selector["type"]
selector_value = rule.selector["value"]
if (
selector_type == "url-pattern"
and selector_value.startswith("||")
and selector_value.endswith("^")
):
yield selector_value[2:-1]
if __name__ == "__main__":
# Parsing arguments
parser = argparse.ArgumentParser(
description="Extract whole domains from an AdBlock blocking list"
)
parser.add_argument(
"-i",
"--input",
type=argparse.FileType("r"),
default=sys.stdin,
help="Input file with AdBlock rules",
)
parser.add_argument(
"-o",
"--output",
type=argparse.FileType("w"),
default=sys.stdout,
help="Outptut file with one rule tracking subdomain per line",
)
args = parser.parse_args()
# Reading rules
rules = abp.filters.parse_filterlist(args.input)
# Filtering
for rule in rules:
if not isinstance(rule, abp.filters.parser.Filter):
continue
for domain in get_domains(rule):
print(domain, file=args.output)