Compare commits

..

No commits in common. "08a8eaaada9f444b9ed740b27c5d6c9f14f255b6" and "7df00fc859fdac5357d4f3c3245c4509a8ca27cf" have entirely different histories.

4 changed files with 21 additions and 35 deletions

View file

@ -2,7 +2,6 @@
# Main script for eulaurarien
./fetch_resources.sh
./collect_subdomains.sh
./filter_subdomains.sh

View file

@ -1,16 +0,0 @@
#!/usr/bin/env bash
# Get a list of nameservers
rm -f nameservers
touch nameservers
[ -f nameservers.head ] && cat nameservers.head >> nameservers
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
# Get top 1M subdomains
wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip
unzip top-1m.csv.zip
sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list
rm top-1m.csv top-1m.csv.zip

View file

@ -7,8 +7,7 @@ the ones resolving to a first-party tracker.
"""
import logging
import threading
import queue
import multiprocessing
import os
import re
import sys
@ -22,18 +21,19 @@ import progressbar
import regexes
DNS_TIMEOUT = 5.0
MAX_NAMESERVERS = 512
# TODO Try again does not work because sentinel get through first :/
# TODO Retry failed requests
class DnsResolver(threading.Thread):
class DnsResolver(multiprocessing.Process):
"""
Worker process for a DNS resolver.
Will resolve DNS to match first-party subdomains.
"""
def __init__(self,
in_queue: queue.Queue,
out_queue: queue.Queue,
in_queue: multiprocessing.Queue,
out_queue: multiprocessing.Queue,
server: str):
super(DnsResolver, self).__init__()
self.log = logging.getLogger(server)
@ -44,7 +44,7 @@ class DnsResolver(threading.Thread):
self.resolver = dns.resolver.Resolver()
self.resolver.nameservers = [server]
def is_subdomain_matching(self, subdomain: str) -> typing.Optional[bool]:
def is_subdomain_matching(self, subdomain: str) -> bool:
"""
Indicates if the subdomain redirects to a first-party tracker.
"""
@ -61,10 +61,10 @@ class DnsResolver(threading.Thread):
return False
except dns.resolver.NoNameservers:
self.log.warning("All nameservers broken for %s", subdomain)
return None
return False
except dns.exception.Timeout:
self.log.warning("Timeout for %s", subdomain)
return None
return False
except dns.name.EmptyLabel:
self.log.warning("Empty label for %s", subdomain)
return False
@ -78,13 +78,6 @@ class DnsResolver(threading.Thread):
self.log.info("Started")
for subdomain in iter(self.in_queue.get, None):
matching = self.is_subdomain_matching(subdomain)
# If issue, retry
if matching is None:
# matching = False
self.in_queue.put(subdomain)
continue
result = (subdomain, matching)
# self.log.debug("%s", result)
self.out_queue.put(result)
@ -95,14 +88,15 @@ class DnsResolver(threading.Thread):
def get_matching_subdomains(subdomains: typing.Iterable[str],
nameservers: typing.List[str] = None,
) -> typing.Iterable[typing.Tuple[str, bool]]:
subdomains_queue: queue.Queue = queue.Queue()
results_queue: queue.Queue = queue.Queue()
subdomains_queue: multiprocessing.Queue = multiprocessing.Queue()
results_queue: multiprocessing.Queue = multiprocessing.Queue()
"""
Orchestrator of the different DnsResolver threads.
"""
# Use interal resolver by default
servers = nameservers or dns.resolver.Resolver().nameservers
servers = servers[:MAX_NAMESERVERS]
# Create workers
for server in servers:
@ -116,12 +110,14 @@ def get_matching_subdomains(subdomains: typing.Iterable[str],
# sentinel = None ~= EOF
for _ in servers:
subdomains_queue.put(None)
subdomains_queue.close()
# Wait for one sentinel per worker
# In the meantime output results
for _ in servers:
for result in iter(results_queue.get, None):
yield result
results_queue.close()
if __name__ == '__main__':

View file

@ -1,5 +1,12 @@
#!/usr/bin/env bash
# Get a list of nameservers
rm -f nameservers
touch nameservers
[ -f nameservers.head ] && cat nameservers.head >> nameservers
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
# Filter out the subdomains not pointing to a first-party tracker
cat subdomains/*.list | sort -u > temp/all_subdomains.list