Compare commits
3 commits
7df00fc859
...
08a8eaaada
Author | SHA1 | Date | |
---|---|---|---|
Geoffrey Frogeye | 08a8eaaada | ||
Geoffrey Frogeye | 32377229db | ||
Geoffrey Frogeye | 04fe454d99 |
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
# Main script for eulaurarien
|
# Main script for eulaurarien
|
||||||
|
|
||||||
|
./fetch_resources.sh
|
||||||
./collect_subdomains.sh
|
./collect_subdomains.sh
|
||||||
./filter_subdomains.sh
|
./filter_subdomains.sh
|
||||||
|
|
||||||
|
|
16
fetch_resources.sh
Executable file
16
fetch_resources.sh
Executable file
|
@ -0,0 +1,16 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Get a list of nameservers
|
||||||
|
|
||||||
|
rm -f nameservers
|
||||||
|
touch nameservers
|
||||||
|
[ -f nameservers.head ] && cat nameservers.head >> nameservers
|
||||||
|
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
|
||||||
|
|
||||||
|
# Get top 1M subdomains
|
||||||
|
|
||||||
|
wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip
|
||||||
|
unzip top-1m.csv.zip
|
||||||
|
sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list
|
||||||
|
rm top-1m.csv top-1m.csv.zip
|
||||||
|
|
|
@ -7,7 +7,8 @@ the ones resolving to a first-party tracker.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import threading
|
||||||
|
import queue
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
@ -21,19 +22,18 @@ import progressbar
|
||||||
import regexes
|
import regexes
|
||||||
|
|
||||||
DNS_TIMEOUT = 5.0
|
DNS_TIMEOUT = 5.0
|
||||||
MAX_NAMESERVERS = 512
|
|
||||||
|
|
||||||
# TODO Retry failed requests
|
# TODO Try again does not work because sentinel get through first :/
|
||||||
|
|
||||||
class DnsResolver(multiprocessing.Process):
|
class DnsResolver(threading.Thread):
|
||||||
"""
|
"""
|
||||||
Worker process for a DNS resolver.
|
Worker process for a DNS resolver.
|
||||||
Will resolve DNS to match first-party subdomains.
|
Will resolve DNS to match first-party subdomains.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_queue: multiprocessing.Queue,
|
in_queue: queue.Queue,
|
||||||
out_queue: multiprocessing.Queue,
|
out_queue: queue.Queue,
|
||||||
server: str):
|
server: str):
|
||||||
super(DnsResolver, self).__init__()
|
super(DnsResolver, self).__init__()
|
||||||
self.log = logging.getLogger(server)
|
self.log = logging.getLogger(server)
|
||||||
|
@ -44,7 +44,7 @@ class DnsResolver(multiprocessing.Process):
|
||||||
self.resolver = dns.resolver.Resolver()
|
self.resolver = dns.resolver.Resolver()
|
||||||
self.resolver.nameservers = [server]
|
self.resolver.nameservers = [server]
|
||||||
|
|
||||||
def is_subdomain_matching(self, subdomain: str) -> bool:
|
def is_subdomain_matching(self, subdomain: str) -> typing.Optional[bool]:
|
||||||
"""
|
"""
|
||||||
Indicates if the subdomain redirects to a first-party tracker.
|
Indicates if the subdomain redirects to a first-party tracker.
|
||||||
"""
|
"""
|
||||||
|
@ -61,10 +61,10 @@ class DnsResolver(multiprocessing.Process):
|
||||||
return False
|
return False
|
||||||
except dns.resolver.NoNameservers:
|
except dns.resolver.NoNameservers:
|
||||||
self.log.warning("All nameservers broken for %s", subdomain)
|
self.log.warning("All nameservers broken for %s", subdomain)
|
||||||
return False
|
return None
|
||||||
except dns.exception.Timeout:
|
except dns.exception.Timeout:
|
||||||
self.log.warning("Timeout for %s", subdomain)
|
self.log.warning("Timeout for %s", subdomain)
|
||||||
return False
|
return None
|
||||||
except dns.name.EmptyLabel:
|
except dns.name.EmptyLabel:
|
||||||
self.log.warning("Empty label for %s", subdomain)
|
self.log.warning("Empty label for %s", subdomain)
|
||||||
return False
|
return False
|
||||||
|
@ -78,6 +78,13 @@ class DnsResolver(multiprocessing.Process):
|
||||||
self.log.info("Started")
|
self.log.info("Started")
|
||||||
for subdomain in iter(self.in_queue.get, None):
|
for subdomain in iter(self.in_queue.get, None):
|
||||||
matching = self.is_subdomain_matching(subdomain)
|
matching = self.is_subdomain_matching(subdomain)
|
||||||
|
|
||||||
|
# If issue, retry
|
||||||
|
if matching is None:
|
||||||
|
# matching = False
|
||||||
|
self.in_queue.put(subdomain)
|
||||||
|
continue
|
||||||
|
|
||||||
result = (subdomain, matching)
|
result = (subdomain, matching)
|
||||||
# self.log.debug("%s", result)
|
# self.log.debug("%s", result)
|
||||||
self.out_queue.put(result)
|
self.out_queue.put(result)
|
||||||
|
@ -88,15 +95,14 @@ class DnsResolver(multiprocessing.Process):
|
||||||
def get_matching_subdomains(subdomains: typing.Iterable[str],
|
def get_matching_subdomains(subdomains: typing.Iterable[str],
|
||||||
nameservers: typing.List[str] = None,
|
nameservers: typing.List[str] = None,
|
||||||
) -> typing.Iterable[typing.Tuple[str, bool]]:
|
) -> typing.Iterable[typing.Tuple[str, bool]]:
|
||||||
subdomains_queue: multiprocessing.Queue = multiprocessing.Queue()
|
subdomains_queue: queue.Queue = queue.Queue()
|
||||||
results_queue: multiprocessing.Queue = multiprocessing.Queue()
|
results_queue: queue.Queue = queue.Queue()
|
||||||
"""
|
"""
|
||||||
Orchestrator of the different DnsResolver threads.
|
Orchestrator of the different DnsResolver threads.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Use interal resolver by default
|
# Use interal resolver by default
|
||||||
servers = nameservers or dns.resolver.Resolver().nameservers
|
servers = nameservers or dns.resolver.Resolver().nameservers
|
||||||
servers = servers[:MAX_NAMESERVERS]
|
|
||||||
|
|
||||||
# Create workers
|
# Create workers
|
||||||
for server in servers:
|
for server in servers:
|
||||||
|
@ -110,14 +116,12 @@ def get_matching_subdomains(subdomains: typing.Iterable[str],
|
||||||
# sentinel = None ~= EOF
|
# sentinel = None ~= EOF
|
||||||
for _ in servers:
|
for _ in servers:
|
||||||
subdomains_queue.put(None)
|
subdomains_queue.put(None)
|
||||||
subdomains_queue.close()
|
|
||||||
|
|
||||||
# Wait for one sentinel per worker
|
# Wait for one sentinel per worker
|
||||||
# In the meantime output results
|
# In the meantime output results
|
||||||
for _ in servers:
|
for _ in servers:
|
||||||
for result in iter(results_queue.get, None):
|
for result in iter(results_queue.get, None):
|
||||||
yield result
|
yield result
|
||||||
results_queue.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,12 +1,5 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# Get a list of nameservers
|
|
||||||
|
|
||||||
rm -f nameservers
|
|
||||||
touch nameservers
|
|
||||||
[ -f nameservers.head ] && cat nameservers.head >> nameservers
|
|
||||||
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
|
|
||||||
|
|
||||||
# Filter out the subdomains not pointing to a first-party tracker
|
# Filter out the subdomains not pointing to a first-party tracker
|
||||||
|
|
||||||
cat subdomains/*.list | sort -u > temp/all_subdomains.list
|
cat subdomains/*.list | sort -u > temp/all_subdomains.list
|
||||||
|
|
Loading…
Reference in a new issue