Use threads not subprocesses

You dumbo
newworkflow_parseropti
Geoffrey Frogeye 2019-11-14 12:57:06 +01:00
parent 32377229db
commit 08a8eaaada
2 changed files with 10 additions and 9 deletions

View File

@ -5,7 +5,7 @@
rm -f nameservers
touch nameservers
[ -f nameservers.head ] && cat nameservers.head >> nameservers
curl https://public-dns.info/nameservers.txt | sort -R | head -64 >> nameservers
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
# Get top 1M subdomains

View File

@ -7,7 +7,8 @@ the ones resolving to a first-party tracker.
"""
import logging
import multiprocessing
import threading
import queue
import os
import re
import sys
@ -22,15 +23,17 @@ import regexes
DNS_TIMEOUT = 5.0
class DnsResolver(multiprocessing.Process):
# TODO Try again does not work because sentinel get through first :/
class DnsResolver(threading.Thread):
"""
Worker process for a DNS resolver.
Will resolve DNS to match first-party subdomains.
"""
def __init__(self,
in_queue: multiprocessing.Queue,
out_queue: multiprocessing.Queue,
in_queue: queue.Queue,
out_queue: queue.Queue,
server: str):
super(DnsResolver, self).__init__()
self.log = logging.getLogger(server)
@ -92,8 +95,8 @@ class DnsResolver(multiprocessing.Process):
def get_matching_subdomains(subdomains: typing.Iterable[str],
nameservers: typing.List[str] = None,
) -> typing.Iterable[typing.Tuple[str, bool]]:
subdomains_queue: multiprocessing.Queue = multiprocessing.Queue()
results_queue: multiprocessing.Queue = multiprocessing.Queue()
subdomains_queue: queue.Queue = queue.Queue()
results_queue: queue.Queue = queue.Queue()
"""
Orchestrator of the different DnsResolver threads.
"""
@ -113,14 +116,12 @@ def get_matching_subdomains(subdomains: typing.Iterable[str],
# sentinel = None ~= EOF
for _ in servers:
subdomains_queue.put(None)
subdomains_queue.close()
# Wait for one sentinel per worker
# In the meantime output results
for _ in servers:
for result in iter(results_queue.get, None):
yield result
results_queue.close()
if __name__ == '__main__':