Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.4 KiB

#!/usr/bin/env python3
# pylint: disable=C0103
From a list of subdomains, output only
the ones resolving to a first-party tracker.
import argparse
import sys
import progressbar
import csv
import typing
import adblockparser
OPTIONS = {"third-party": True}
def subdomain_matching(subdomain: str) -> bool:
url = f"https://{subdomain}/"
return rules.should_block(url, OPTIONS)
def get_matching(chain: typing.List[str], no_explicit: bool = False
) -> typing.Iterable[str]:
initial = chain[0]
cname_destinations = chain[1:-1]
# a_destination = chain[-1]
initial_matching = subdomain_matching(initial)
if no_explicit and initial_matching:
cname_matching = any(map(subdomain_matching, cname_destinations))
if cname_matching or initial_matching:
yield initial
if __name__ == '__main__':
# Parsing arguments
parser = argparse.ArgumentParser(
description="Filter first-party trackers from a list of subdomains")
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file with DNS chains")
'-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
help="Outptut file with one tracking subdomain per line")
'-n', '--no-explicit', action='store_true',
help="Don't output domains already blocked with rules without CNAME")
'-r', '--rules', type=argparse.FileType('r'), default='rules',
help="Rules file")
args = parser.parse_args()
# Reading rules
rules: adblockparser.AdblockRules = adblockparser.AdblockRules(args.rules)
# Progress bar
widgets = [
' ', progressbar.SimpleProgress(),
' ', progressbar.Bar(),
' ', progressbar.Timer(),
' ', progressbar.AdaptiveTransferSpeed(unit='req'),
' ', progressbar.AdaptiveETA(),
progress = progressbar.ProgressBar(widgets=widgets)
if args.input.seekable():
progress.max_value = len(args.input.readlines())
# Cleaning input
reader = csv.reader(args.input)
# Filtering
for chain in reader:
for match in get_matching(chain, no_explicit=args.no_explicit):
print(match, file=args.output)
progress.update(progress.value + 1)