#!/usr/bin/env python3 # pylint: disable=C0103 """ From a list of subdomains, output only the ones resolving to a first-party tracker. """ import re import sys import dns.resolver import dns.exception import progressbar import regexes DNS_TIMEOUT = 5.0 def is_subdomain_matching(subdomain: str) -> bool: """ Indicates if the subdomain redirects to a first-party tracker. """ # TODO Look at the whole chain rather than the last one try: query = dns.resolver.query(subdomain, 'A', lifetime=DNS_TIMEOUT) except dns.resolver.NXDOMAIN: return False except dns.resolver.NoAnswer: return False except dns.resolver.YXDOMAIN: print(f"Query name too long for {subdomain}", file=sys.stderr) return False except dns.resolver.NoNameservers: print(f"All nameservers broken for {subdomain}", file=sys.stderr) return False except dns.exception.Timeout: print(f"Timeout for {subdomain}", file=sys.stderr) return False except dns.name.EmptyLabel: print(f"Empty label for {subdomain}", file=sys.stderr) return False canonical = query.canonical_name.to_text() for regex in regexes.REGEXES: if re.match(regex, canonical): return True return False def is_subdomain_matching_standalone(subdomain: str) -> None: """ Print the subdomain if it redirects to a first-party tracker. """ subdomain = subdomain.strip() if not subdomain: return if is_subdomain_matching(subdomain): print(subdomain) if __name__ == '__main__': assert len(sys.argv) <= 2 filename = None if len(sys.argv) == 2 and sys.argv[1] != '-': filename = sys.argv[1] num_lines = sum(1 for line in open(filename)) iterator = progressbar.progressbar(open(filename), max_value=num_lines) else: iterator = sys.stdin for line in iterator: is_subdomain_matching_standalone(line) if filename: iterator.close()