#!/usr/bin/env python3 # pylint: disable=C0103 """ From a list of subdomains, output only the ones resolving to a first-party tracker. """ import re import sys import dns.resolver import progressbar import regexes def is_subdomain_matching(subdomain: str) -> bool: """ Indicates if the subdomain redirects to a first-party tracker. """ # TODO Look at the whole chain rather than the last one try: query = dns.resolver.query(subdomain, 'A') except dns.resolver.NXDOMAIN: return False canonical = query.canonical_name.to_text() for regex in regexes.REGEXES: if re.match(regex, canonical): return True return False def is_subdomain_matching_standalone(subdomain: str) -> None: """ Print the subdomain if it redirects to a first-party tracker. """ subdomain = subdomain.strip() if not subdomain: return if is_subdomain_matching(subdomain): print(subdomain) if __name__ == '__main__': assert len(sys.argv) <= 2 filename = None if len(sys.argv) == 2 and sys.argv[1] != '-': filename = sys.argv[1] num_lines = sum(1 for line in open(filename)) iterator = progressbar.progressbar(open(filename), max_value=num_lines) else: iterator = sys.stdin for line in iterator: is_subdomain_matching_standalone(line) if filename: iterator.close()