diff --git a/fetch_resources.sh b/fetch_resources.sh index 5ff9d01..0f7bdb5 100755 --- a/fetch_resources.sh +++ b/fetch_resources.sh @@ -5,7 +5,7 @@ rm -f nameservers touch nameservers [ -f nameservers.head ] && cat nameservers.head >> nameservers -curl https://public-dns.info/nameservers.txt | sort -R | head -64 >> nameservers +curl https://public-dns.info/nameservers.txt | sort -R >> nameservers # Get top 1M subdomains diff --git a/filter_subdomains.py b/filter_subdomains.py index 34682ec..2ac5fab 100755 --- a/filter_subdomains.py +++ b/filter_subdomains.py @@ -7,7 +7,8 @@ the ones resolving to a first-party tracker. """ import logging -import multiprocessing +import threading +import queue import os import re import sys @@ -22,15 +23,17 @@ import regexes DNS_TIMEOUT = 5.0 -class DnsResolver(multiprocessing.Process): +# TODO Try again does not work because sentinel get through first :/ + +class DnsResolver(threading.Thread): """ Worker process for a DNS resolver. Will resolve DNS to match first-party subdomains. """ def __init__(self, - in_queue: multiprocessing.Queue, - out_queue: multiprocessing.Queue, + in_queue: queue.Queue, + out_queue: queue.Queue, server: str): super(DnsResolver, self).__init__() self.log = logging.getLogger(server) @@ -92,8 +95,8 @@ class DnsResolver(multiprocessing.Process): def get_matching_subdomains(subdomains: typing.Iterable[str], nameservers: typing.List[str] = None, ) -> typing.Iterable[typing.Tuple[str, bool]]: - subdomains_queue: multiprocessing.Queue = multiprocessing.Queue() - results_queue: multiprocessing.Queue = multiprocessing.Queue() + subdomains_queue: queue.Queue = queue.Queue() + results_queue: queue.Queue = queue.Queue() """ Orchestrator of the different DnsResolver threads. """ @@ -113,14 +116,12 @@ def get_matching_subdomains(subdomains: typing.Iterable[str], # sentinel = None ~= EOF for _ in servers: subdomains_queue.put(None) - subdomains_queue.close() # Wait for one sentinel per worker # In the meantime output results for _ in servers: for result in iter(results_queue.get, None): yield result - results_queue.close() if __name__ == '__main__':