73 lines
1.9 KiB
Python
Executable file
73 lines
1.9 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# pylint: disable=C0103
|
|
|
|
"""
|
|
From a list of subdomains, output only
|
|
the ones resolving to a first-party tracker.
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
|
|
import dns.resolver
|
|
import dns.exception
|
|
import progressbar
|
|
|
|
import regexes
|
|
|
|
DNS_TIMEOUT = 5.0
|
|
|
|
def is_subdomain_matching(subdomain: str) -> bool:
|
|
"""
|
|
Indicates if the subdomain redirects to a first-party tracker.
|
|
"""
|
|
# TODO Look at the whole chain rather than the last one
|
|
try:
|
|
query = dns.resolver.query(subdomain, 'A', lifetime=DNS_TIMEOUT)
|
|
except dns.resolver.NXDOMAIN:
|
|
return False
|
|
except dns.resolver.NoAnswer:
|
|
return False
|
|
except dns.resolver.YXDOMAIN:
|
|
print(f"Query name too long for {subdomain}", file=sys.stderr)
|
|
return False
|
|
except dns.resolver.NoNameservers:
|
|
print(f"All nameservers broken for {subdomain}", file=sys.stderr)
|
|
return False
|
|
except dns.exception.Timeout:
|
|
print(f"Timeout for {subdomain}", file=sys.stderr)
|
|
return False
|
|
canonical = query.canonical_name.to_text()
|
|
for regex in regexes.REGEXES:
|
|
if re.match(regex, canonical):
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_subdomain_matching_standalone(subdomain: str) -> None:
|
|
"""
|
|
Print the subdomain if it redirects to a first-party tracker.
|
|
"""
|
|
subdomain = subdomain.strip()
|
|
if not subdomain:
|
|
return
|
|
if is_subdomain_matching(subdomain):
|
|
print(subdomain)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
assert len(sys.argv) <= 2
|
|
filename = None
|
|
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
|
filename = sys.argv[1]
|
|
num_lines = sum(1 for line in open(filename))
|
|
iterator = progressbar.progressbar(open(filename), max_value=num_lines)
|
|
else:
|
|
iterator = sys.stdin
|
|
|
|
for line in iterator:
|
|
is_subdomain_matching_standalone(line)
|
|
|
|
if filename:
|
|
iterator.close()
|