2019-12-17 15:04:19 +01:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
2019-12-24 15:08:18 +01:00
|
|
|
source .env.default
|
|
|
|
source .env
|
|
|
|
|
2019-12-17 15:04:19 +01:00
|
|
|
function log() {
|
|
|
|
echo -e "\033[33m$@\033[0m"
|
|
|
|
}
|
|
|
|
|
2019-12-24 15:08:18 +01:00
|
|
|
function api_call {
|
|
|
|
curl -s -H "X-Api-Key: $RAPID7_API_KEY" "https://us.api.insight.rapid7.com/opendata/studies/$1/"
|
|
|
|
}
|
|
|
|
|
2019-12-25 14:54:57 +01:00
|
|
|
function get_timestamp { # study, dataset
|
|
|
|
study="$1"
|
|
|
|
dataset="$2"
|
|
|
|
if [ -z "$RAPID7_API_KEY" ]
|
|
|
|
then
|
|
|
|
line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
|
|
|
|
echo "$line" | cut -d'"' -f2 | cut -d'/' -f3 | cut -d'-' -f4
|
|
|
|
else
|
|
|
|
filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
|
|
|
|
echo $filename | cut -d'-' -f4
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2019-12-24 15:08:18 +01:00
|
|
|
function get_download_url { # study, dataset
|
|
|
|
study="$1"
|
|
|
|
dataset="$2"
|
|
|
|
if [ -z "$RAPID7_API_KEY" ]
|
|
|
|
then
|
|
|
|
line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
|
|
|
|
echo "https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)"
|
|
|
|
else
|
|
|
|
filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
|
|
|
|
api_call "$study/$filename/download" | jq '.url' -r
|
|
|
|
fi
|
2019-12-17 15:04:19 +01:00
|
|
|
}
|
|
|
|
|
2020-01-04 10:54:46 +01:00
|
|
|
function feed_rapid7 { # study, dataset, rule_file, ./feed_dns args
|
|
|
|
# The dataset will be imported if:
|
|
|
|
# none of this dataset was ever imported
|
|
|
|
# or
|
|
|
|
# the last dataset imported is older than the one to be imported
|
|
|
|
# or
|
|
|
|
# the rule_file is newer than when the last dataset was imported
|
|
|
|
#
|
|
|
|
# (note the difference between the age oft the dataset itself and
|
|
|
|
# the date when it is imported)
|
2019-12-24 15:08:18 +01:00
|
|
|
study="$1"
|
|
|
|
dataset="$2"
|
2020-01-04 10:54:46 +01:00
|
|
|
rule_file="$3"
|
|
|
|
shift; shift; shift
|
2019-12-25 14:54:57 +01:00
|
|
|
new_ts="$(get_timestamp $study $dataset)"
|
|
|
|
old_ts_file="last_updates/rapid7_${study}_${dataset}.txt"
|
|
|
|
if [ -f "$old_ts_file" ]
|
|
|
|
then
|
|
|
|
old_ts=$(cat "$old_ts_file")
|
|
|
|
else
|
|
|
|
old_ts="0"
|
|
|
|
fi
|
2020-01-04 10:54:46 +01:00
|
|
|
if [ $new_ts -gt $old_ts ] || [ $rule_file -nt $old_ts_file ]
|
2019-12-25 14:54:57 +01:00
|
|
|
then
|
|
|
|
link="$(get_download_url $study $dataset)"
|
|
|
|
log "Reading $dataset dataset from $link ($old_ts -> $new_ts)…"
|
2019-12-25 15:15:49 +01:00
|
|
|
[ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process"
|
|
|
|
curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@ $EXTRA_ARGS
|
2019-12-25 14:54:57 +01:00
|
|
|
if [ $? -eq 0 ]
|
|
|
|
then
|
|
|
|
echo $new_ts > $old_ts_file
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
log "Skipping $dataset as there is no new version since $old_ts"
|
|
|
|
fi
|
2019-12-17 15:04:19 +01:00
|
|
|
}
|
|
|
|
|
2020-01-04 10:54:46 +01:00
|
|
|
feed_rapid7 sonar.rdns_v2 rdns rules_asn/first-party.list
|
|
|
|
feed_rapid7 sonar.fdns_v2 fdns_a rules_asn/first-party.list --ip4-cache "$CACHE_SIZE"
|
2020-01-05 22:35:12 +01:00
|
|
|
# feed_rapid7 sonar.fdns_v2 fdns_aaaa rules_asn/first-party.list --ip6-cache "$CACHE_SIZE"
|
|
|
|
feed_rapid7 sonar.fdns_v2 fdns_cname rules/first-party.list
|
2019-12-24 15:08:18 +01:00
|
|
|
|