Browse Source

Automatically get top 1M subdomains

newworkflow_parseropti
Geoffrey Frogeye 3 years ago
parent
commit
04fe454d99
  1. 1
      eulaurarien.sh
  2. 16
      fetch_resources.sh
  3. 2
      filter_subdomains.py
  4. 7
      filter_subdomains.sh

1
eulaurarien.sh

@ -2,6 +2,7 @@
# Main script for eulaurarien
./fetch_resources.sh
./collect_subdomains.sh
./filter_subdomains.sh

16
fetch_resources.sh

@ -0,0 +1,16 @@
#!/usr/bin/env bash
# Get a list of nameservers
rm -f nameservers
touch nameservers
[ -f nameservers.head ] && cat nameservers.head >> nameservers
curl https://public-dns.info/nameservers.txt | sort -R | head -64 >> nameservers
# Get top 1M subdomains
wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip
unzip top-1m.csv.zip
sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list
rm top-1m.csv top-1m.csv.zip

2
filter_subdomains.py

@ -21,7 +21,6 @@ import progressbar
import regexes
DNS_TIMEOUT = 5.0
MAX_NAMESERVERS = 512
# TODO Retry failed requests
@ -96,7 +95,6 @@ def get_matching_subdomains(subdomains: typing.Iterable[str],
# Use interal resolver by default
servers = nameservers or dns.resolver.Resolver().nameservers
servers = servers[:MAX_NAMESERVERS]
# Create workers
for server in servers:

7
filter_subdomains.sh

@ -1,12 +1,5 @@
#!/usr/bin/env bash
# Get a list of nameservers
rm -f nameservers
touch nameservers
[ -f nameservers.head ] && cat nameservers.head >> nameservers
curl https://public-dns.info/nameservers.txt | sort -R >> nameservers
# Filter out the subdomains not pointing to a first-party tracker
cat subdomains/*.list | sort -u > temp/all_subdomains.list

Loading…
Cancel
Save