Added ability to use Rapid7 API

Closes #11
master
Geoffrey Frogeye 2019-12-24 15:08:18 +01:00
parent 7d1c1a1d54
commit c65ae94892
Signed by: geoffrey
GPG Key ID: D8A7ECA00A8CD3DD
6 changed files with 42 additions and 18 deletions

3
.env.default Normal file
View File

@ -0,0 +1,3 @@
RAPID7_API_KEY=
CACHE_SIZE=536870912
MASSDNS_HASHMAP_SIZE=1000

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
*.log
*.p
.env

View File

@ -39,6 +39,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in
- [Python 3.4+](https://www.python.org/)
- [coloredlogs](https://pypi.org/project/coloredlogs/) (sorry I can't help myself)
- [numpy](https://www.numpy.org/)
- [jq](http://stedolan.github.io/jq/) (only if you have a Rapid7 API key)
- [massdns](https://github.com/blechschmidt/massdns) in your `$PATH` (only if you have subdomains as a source)
- [Firefox](https://www.mozilla.org/firefox/) (only if you have websites as a source)
- [selenium (Python bindings)](https://pypi.python.org/pypi/selenium) (only if you have websites as a source)

View File

@ -4,6 +4,7 @@
./fetch_resources.sh
./collect_subdomains.sh
./import_rules.sh
./resolve_subdomains.sh
./filter_subdomains.sh
./import_rapid7.sh

View File

@ -1,26 +1,41 @@
#!/usr/bin/env bash
source .env.default
source .env
function log() {
echo -e "\033[33m$@\033[0m"
}
function feed_rapid7_fdns { # dataset
dataset=$1
line=$(curl -s https://opendata.rapid7.com/sonar.fdns_v2/ | grep "href=\".\+-fdns_$dataset.json.gz\"")
link="https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)"
log "Reading $(echo "$dataset" | awk '{print toupper($0)}') records from $link"
curl -L "$link" | gunzip
function api_call {
curl -s -H "X-Api-Key: $RAPID7_API_KEY" "https://us.api.insight.rapid7.com/opendata/studies/$1/"
}
function feed_rapid7_rdns {
dataset=$1
line=$(curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | grep "href=\".\+-rdns.json.gz\"")
link="https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)"
log "Reading PTR records from $link"
curl -L "$link" | gunzip
function get_download_url { # study, dataset
study="$1"
dataset="$2"
if [ -z "$RAPID7_API_KEY" ]
then
line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
echo "https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)"
else
filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
echo "$filename"
api_call "$study/$filename/download" | jq '.url' -r
fi
}
feed_rapid7_rdns | ./feed_dns.py rapid7
feed_rapid7_fdns a | ./feed_dns.py rapid7 --ip4-cache 536870912
# feed_rapid7_fdns aaaa | ./feed_dns.py rapid7 --ip6-cache 536870912
feed_rapid7_fdns cname | ./feed_dns.py rapid7
function feed_rapid7 { # study, dataset
study="$1"
dataset="$2"
shift; shift
link="$(get_download_url $study $dataset)"
log "Reading $dataset dataset from $link"
curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@
}
feed_rapid7 sonar.rdns_v2 rdns
feed_rapid7 sonar.fdns_v2 fdns_a --ip4-cache "$CACHE_SIZE"
feed_rapid7 sonar.fdns_v2 fdns_aaaa --ip6-cache "$CACHE_SIZE"
feed_rapid7 sonar.fdns_v2 fdns_cname

View File

@ -1,5 +1,8 @@
#!/usr/bin/env bash
source .env.default
source .env
function log() {
echo -e "\033[33m$@\033[0m"
}
@ -13,7 +16,7 @@ log "Compiling subdomains…"
pv subdomains/*.list | ./validate_list.py --domain | rev | sort -u | rev > temp/all_subdomains.list
log "Resolving subdomain…"
massdns --output Snrql --retry REFUSED,SERVFAIL --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list
massdns --output Snrql --retry REFUSED,SERVFAIL --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list
log "Importing into database…"
pv temp/all_resolved.txt | ./feed_dns.py massdns