#!/usr/bin/env python3 # pylint: disable=C0103 """ From a list of subdomains, output only the ones resolving to a first-party tracker. """ import argparse import sys import progressbar import csv import typing import adblockparser OPTIONS = {"third-party": True} def subdomain_matching(subdomain: str) -> bool: url = f"https://{subdomain}/" return rules.should_block(url, OPTIONS) def get_matching(chain: typing.List[str], no_explicit: bool = False ) -> typing.Iterable[str]: initial = chain[0] cname_destinations = chain[1:-1] # a_destination = chain[-1] initial_matching = subdomain_matching(initial) if no_explicit and initial_matching: return cname_matching = any(map(subdomain_matching, cname_destinations)) if cname_matching or initial_matching: yield initial if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( description="Filter first-party trackers from a list of subdomains") parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="Input file with DNS chains") parser.add_argument( '-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help="Outptut file with one tracking subdomain per line") parser.add_argument( '-n', '--no-explicit', action='store_true', help="Don't output domains already blocked with rules without CNAME") parser.add_argument( '-r', '--rules', type=argparse.FileType('r'), default='rules', help="Rules file") args = parser.parse_args() # Reading rules rules: adblockparser.AdblockRules = adblockparser.AdblockRules(args.rules) # Progress bar widgets = [ progressbar.Percentage(), ' ', progressbar.SimpleProgress(), ' ', progressbar.Bar(), ' ', progressbar.Timer(), ' ', progressbar.AdaptiveTransferSpeed(unit='req'), ' ', progressbar.AdaptiveETA(), ] progress = progressbar.ProgressBar(widgets=widgets) if args.input.seekable(): progress.max_value = len(args.input.readlines()) args.input.seek(0) # Cleaning input reader = csv.reader(args.input) # Filtering progress.start() for chain in reader: for match in get_matching(chain, no_explicit=args.no_explicit): print(match, file=args.output) progress.update(progress.value + 1) progress.finish()