From c65ae948926ceb5eea6f32b3769345bb2dba3b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geoffrey=20=E2=80=9CFrogeye=E2=80=9D=20Preud=27homme?= Date: Tue, 24 Dec 2019 15:08:18 +0100 Subject: [PATCH] Added ability to use Rapid7 API Closes #11 --- .env.default | 3 +++ .gitignore | 1 + README.md | 1 + eulaurarien.sh | 3 ++- import_rapid7.sh | 47 ++++++++++++++++++++++++++++--------------- resolve_subdomains.sh | 5 ++++- 6 files changed, 42 insertions(+), 18 deletions(-) create mode 100644 .env.default diff --git a/.env.default b/.env.default new file mode 100644 index 0000000..7090df7 --- /dev/null +++ b/.env.default @@ -0,0 +1,3 @@ +RAPID7_API_KEY= +CACHE_SIZE=536870912 +MASSDNS_HASHMAP_SIZE=1000 diff --git a/.gitignore b/.gitignore index e6abf3c..fd77ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.log *.p +.env diff --git a/README.md b/README.md index 5776bb4..2bde314 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in - [Python 3.4+](https://www.python.org/) - [coloredlogs](https://pypi.org/project/coloredlogs/) (sorry I can't help myself) - [numpy](https://www.numpy.org/) +- [jq](http://stedolan.github.io/jq/) (only if you have a Rapid7 API key) - [massdns](https://github.com/blechschmidt/massdns) in your `$PATH` (only if you have subdomains as a source) - [Firefox](https://www.mozilla.org/firefox/) (only if you have websites as a source) - [selenium (Python bindings)](https://pypi.python.org/pypi/selenium) (only if you have websites as a source) diff --git a/eulaurarien.sh b/eulaurarien.sh index a0cf887..a78ae27 100755 --- a/eulaurarien.sh +++ b/eulaurarien.sh @@ -4,6 +4,7 @@ ./fetch_resources.sh ./collect_subdomains.sh +./import_rules.sh ./resolve_subdomains.sh -./filter_subdomains.sh +./import_rapid7.sh diff --git a/import_rapid7.sh b/import_rapid7.sh index 4b5714f..f4a59bc 100755 --- a/import_rapid7.sh +++ b/import_rapid7.sh @@ -1,26 +1,41 @@ #!/usr/bin/env bash +source .env.default +source .env + function log() { echo -e "\033[33m$@\033[0m" } -function feed_rapid7_fdns { # dataset - dataset=$1 - line=$(curl -s https://opendata.rapid7.com/sonar.fdns_v2/ | grep "href=\".\+-fdns_$dataset.json.gz\"") - link="https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)" - log "Reading $(echo "$dataset" | awk '{print toupper($0)}') records from $link" - curl -L "$link" | gunzip +function api_call { + curl -s -H "X-Api-Key: $RAPID7_API_KEY" "https://us.api.insight.rapid7.com/opendata/studies/$1/" } -function feed_rapid7_rdns { - dataset=$1 - line=$(curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | grep "href=\".\+-rdns.json.gz\"") - link="https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)" - log "Reading PTR records from $link" - curl -L "$link" | gunzip +function get_download_url { # study, dataset + study="$1" + dataset="$2" + if [ -z "$RAPID7_API_KEY" ] + then + line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1) + echo "https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)" + else + filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1) + echo "$filename" + api_call "$study/$filename/download" | jq '.url' -r + fi } -feed_rapid7_rdns | ./feed_dns.py rapid7 -feed_rapid7_fdns a | ./feed_dns.py rapid7 --ip4-cache 536870912 -# feed_rapid7_fdns aaaa | ./feed_dns.py rapid7 --ip6-cache 536870912 -feed_rapid7_fdns cname | ./feed_dns.py rapid7 +function feed_rapid7 { # study, dataset + study="$1" + dataset="$2" + shift; shift + link="$(get_download_url $study $dataset)" + log "Reading $dataset dataset from $link…" + curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@ +} + +feed_rapid7 sonar.rdns_v2 rdns +feed_rapid7 sonar.fdns_v2 fdns_a --ip4-cache "$CACHE_SIZE" +feed_rapid7 sonar.fdns_v2 fdns_aaaa --ip6-cache "$CACHE_SIZE" +feed_rapid7 sonar.fdns_v2 fdns_cname + diff --git a/resolve_subdomains.sh b/resolve_subdomains.sh index b5b2079..d163b77 100755 --- a/resolve_subdomains.sh +++ b/resolve_subdomains.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +source .env.default +source .env + function log() { echo -e "\033[33m$@\033[0m" } @@ -13,7 +16,7 @@ log "Compiling subdomains…" pv subdomains/*.list | ./validate_list.py --domain | rev | sort -u | rev > temp/all_subdomains.list log "Resolving subdomain…" -massdns --output Snrql --retry REFUSED,SERVFAIL --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list +massdns --output Snrql --retry REFUSED,SERVFAIL --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list log "Importing into database…" pv temp/all_resolved.txt | ./feed_dns.py massdns