Compare commits
3 commits
7df00fc859
...
08a8eaaada
Author | SHA1 | Date | |
---|---|---|---|
Geoffrey Frogeye | 08a8eaaada | ||
Geoffrey Frogeye | 32377229db | ||
Geoffrey Frogeye | 04fe454d99 |
|
@ -2,6 +2,7 @@
|
|||
|
||||
# Main script for eulaurarien
|
||||
|
||||
./fetch_resources.sh
|
||||
./collect_subdomains.sh
|
||||
./filter_subdomains.sh
|
||||
|
||||
|
|
16
fetch_resources.sh
Executable file
16
fetch_resources.sh
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Get a list of nameservers
|
||||
|
||||
rm -f nameservers
|
||||
touch nameservers
|
||||
[ -f nameservers.head ] && cat nameservers.head >> nameservers
|
||||
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
|
||||
|
||||
# Get top 1M subdomains
|
||||
|
||||
wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip
|
||||
unzip top-1m.csv.zip
|
||||
sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list
|
||||
rm top-1m.csv top-1m.csv.zip
|
||||
|
|
@ -7,7 +7,8 @@ the ones resolving to a first-party tracker.
|
|||
"""
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
import threading
|
||||
import queue
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
@ -21,19 +22,18 @@ import progressbar
|
|||
import regexes
|
||||
|
||||
DNS_TIMEOUT = 5.0
|
||||
MAX_NAMESERVERS = 512
|
||||
|
||||
# TODO Retry failed requests
|
||||
# TODO Try again does not work because sentinel get through first :/
|
||||
|
||||
class DnsResolver(multiprocessing.Process):
|
||||
class DnsResolver(threading.Thread):
|
||||
"""
|
||||
Worker process for a DNS resolver.
|
||||
Will resolve DNS to match first-party subdomains.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_queue: multiprocessing.Queue,
|
||||
out_queue: multiprocessing.Queue,
|
||||
in_queue: queue.Queue,
|
||||
out_queue: queue.Queue,
|
||||
server: str):
|
||||
super(DnsResolver, self).__init__()
|
||||
self.log = logging.getLogger(server)
|
||||
|
@ -44,7 +44,7 @@ class DnsResolver(multiprocessing.Process):
|
|||
self.resolver = dns.resolver.Resolver()
|
||||
self.resolver.nameservers = [server]
|
||||
|
||||
def is_subdomain_matching(self, subdomain: str) -> bool:
|
||||
def is_subdomain_matching(self, subdomain: str) -> typing.Optional[bool]:
|
||||
"""
|
||||
Indicates if the subdomain redirects to a first-party tracker.
|
||||
"""
|
||||
|
@ -61,10 +61,10 @@ class DnsResolver(multiprocessing.Process):
|
|||
return False
|
||||
except dns.resolver.NoNameservers:
|
||||
self.log.warning("All nameservers broken for %s", subdomain)
|
||||
return False
|
||||
return None
|
||||
except dns.exception.Timeout:
|
||||
self.log.warning("Timeout for %s", subdomain)
|
||||
return False
|
||||
return None
|
||||
except dns.name.EmptyLabel:
|
||||
self.log.warning("Empty label for %s", subdomain)
|
||||
return False
|
||||
|
@ -78,6 +78,13 @@ class DnsResolver(multiprocessing.Process):
|
|||
self.log.info("Started")
|
||||
for subdomain in iter(self.in_queue.get, None):
|
||||
matching = self.is_subdomain_matching(subdomain)
|
||||
|
||||
# If issue, retry
|
||||
if matching is None:
|
||||
# matching = False
|
||||
self.in_queue.put(subdomain)
|
||||
continue
|
||||
|
||||
result = (subdomain, matching)
|
||||
# self.log.debug("%s", result)
|
||||
self.out_queue.put(result)
|
||||
|
@ -88,15 +95,14 @@ class DnsResolver(multiprocessing.Process):
|
|||
def get_matching_subdomains(subdomains: typing.Iterable[str],
|
||||
nameservers: typing.List[str] = None,
|
||||
) -> typing.Iterable[typing.Tuple[str, bool]]:
|
||||
subdomains_queue: multiprocessing.Queue = multiprocessing.Queue()
|
||||
results_queue: multiprocessing.Queue = multiprocessing.Queue()
|
||||
subdomains_queue: queue.Queue = queue.Queue()
|
||||
results_queue: queue.Queue = queue.Queue()
|
||||
"""
|
||||
Orchestrator of the different DnsResolver threads.
|
||||
"""
|
||||
|
||||
# Use interal resolver by default
|
||||
servers = nameservers or dns.resolver.Resolver().nameservers
|
||||
servers = servers[:MAX_NAMESERVERS]
|
||||
|
||||
# Create workers
|
||||
for server in servers:
|
||||
|
@ -110,14 +116,12 @@ def get_matching_subdomains(subdomains: typing.Iterable[str],
|
|||
# sentinel = None ~= EOF
|
||||
for _ in servers:
|
||||
subdomains_queue.put(None)
|
||||
subdomains_queue.close()
|
||||
|
||||
# Wait for one sentinel per worker
|
||||
# In the meantime output results
|
||||
for _ in servers:
|
||||
for result in iter(results_queue.get, None):
|
||||
yield result
|
||||
results_queue.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,12 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Get a list of nameservers
|
||||
|
||||
rm -f nameservers
|
||||
touch nameservers
|
||||
[ -f nameservers.head ] && cat nameservers.head >> nameservers
|
||||
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
|
||||
|
||||
# Filter out the subdomains not pointing to a first-party tracker
|
||||
|
||||
cat subdomains/*.list | sort -u > temp/all_subdomains.list
|
||||
|
|
Loading…
Reference in a new issue