diff --git a/eulaurarien.sh b/eulaurarien.sh index 88622f5..0cb45d7 100755 --- a/eulaurarien.sh +++ b/eulaurarien.sh @@ -2,6 +2,7 @@ # Main script for eulaurarien +./fetch_resources.sh ./collect_subdomains.sh ./filter_subdomains.sh diff --git a/fetch_resources.sh b/fetch_resources.sh new file mode 100755 index 0000000..5ff9d01 --- /dev/null +++ b/fetch_resources.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Get a list of nameservers + +rm -f nameservers +touch nameservers +[ -f nameservers.head ] && cat nameservers.head >> nameservers +curl https://public-dns.info/nameservers.txt | sort -R | head -64 >> nameservers + +# Get top 1M subdomains + +wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip +unzip top-1m.csv.zip +sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list +rm top-1m.csv top-1m.csv.zip + diff --git a/filter_subdomains.py b/filter_subdomains.py index 3a3fc73..788b2fc 100755 --- a/filter_subdomains.py +++ b/filter_subdomains.py @@ -21,7 +21,6 @@ import progressbar import regexes DNS_TIMEOUT = 5.0 -MAX_NAMESERVERS = 512 # TODO Retry failed requests @@ -96,7 +95,6 @@ def get_matching_subdomains(subdomains: typing.Iterable[str], # Use interal resolver by default servers = nameservers or dns.resolver.Resolver().nameservers - servers = servers[:MAX_NAMESERVERS] # Create workers for server in servers: diff --git a/filter_subdomains.sh b/filter_subdomains.sh index 08ae897..d576b8c 100755 --- a/filter_subdomains.sh +++ b/filter_subdomains.sh @@ -1,12 +1,5 @@ #!/usr/bin/env bash -# Get a list of nameservers - -rm -f nameservers -touch nameservers -[ -f nameservers.head ] && cat nameservers.head >> nameservers -curl https://public-dns.info/nameservers.txt | sort -R >> nameservers - # Filter out the subdomains not pointing to a first-party tracker cat subdomains/*.list | sort -u > temp/all_subdomains.list