|
|
@ -1,4 +1,5 @@ |
|
|
|
#!/usr/bin/env python3 |
|
|
|
# pylint: disable=C0103 |
|
|
|
|
|
|
|
""" |
|
|
|
From a list of subdomains, output only |
|
|
@ -9,6 +10,7 @@ import re |
|
|
|
import sys |
|
|
|
|
|
|
|
import dns.resolver |
|
|
|
import progressbar |
|
|
|
|
|
|
|
import regexes |
|
|
|
|
|
|
@ -18,7 +20,10 @@ def is_subdomain_matching(subdomain: str) -> bool: |
|
|
|
Indicates if the subdomain redirects to a first-party tracker. |
|
|
|
""" |
|
|
|
# TODO Look at the whole chain rather than the last one |
|
|
|
query = dns.resolver.query(subdomain, 'A') |
|
|
|
try: |
|
|
|
query = dns.resolver.query(subdomain, 'A') |
|
|
|
except dns.resolver.NXDOMAIN: |
|
|
|
return False |
|
|
|
canonical = query.canonical_name.to_text() |
|
|
|
for regex in regexes.REGEXES: |
|
|
|
if re.match(regex, canonical): |
|
|
@ -26,10 +31,29 @@ def is_subdomain_matching(subdomain: str) -> bool: |
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
def is_subdomain_matching_standalone(subdomain: str) -> None: |
|
|
|
""" |
|
|
|
Print the subdomain if it redirects to a first-party tracker. |
|
|
|
""" |
|
|
|
subdomain = subdomain.strip() |
|
|
|
if not subdomain: |
|
|
|
return |
|
|
|
if is_subdomain_matching(subdomain): |
|
|
|
print(subdomain) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
for line in sys.stdin: |
|
|
|
line = line.strip() |
|
|
|
if not line: |
|
|
|
continue |
|
|
|
if is_subdomain_matching(line): |
|
|
|
print(line) |
|
|
|
assert len(sys.argv) <= 2 |
|
|
|
filename = None |
|
|
|
if len(sys.argv) == 2 and sys.argv[1] != '-': |
|
|
|
filename = sys.argv[1] |
|
|
|
num_lines = sum(1 for line in open(filename)) |
|
|
|
iterator = progressbar.progressbar(open(filename), max_value=num_lines) |
|
|
|
else: |
|
|
|
iterator = sys.stdin |
|
|
|
|
|
|
|
for line in iterator: |
|
|
|
is_subdomain_matching_standalone(line) |
|
|
|
|
|
|
|
if filename: |
|
|
|
iterator.close() |