Re-import Rapid7 datasets when rules have been updated

This commit is contained in:
Geoffrey Frogeye 2020-01-04 10:54:46 +01:00
parent 708c53041e
commit 0cc18303fd
Signed by: geoffrey
GPG key ID: D8A7ECA00A8CD3DD
3 changed files with 19 additions and 8 deletions

View file

@ -146,7 +146,8 @@ Then, run `./import_rapid7.sh`.
This will download about 35 GiB of data the first time, but only the matching records will be stored (about a few MiB for the tracking rules).
Note the download speed will most likely be limited by the database operation thoughput (a quick RAM will help).
The script remembers which were the last sets downloaded, and will only import newer sets.
The script remembers which were the last sets downloaded, and will only newer sets.
If the first-party rules changed, the corresponding sets will be re-imported anyway.
If you want to force re-importing, run `rm last_updates/rapid7_*.txt`.
### Export the lists

View file

@ -37,10 +37,20 @@ function get_download_url { # study, dataset
fi
}
function feed_rapid7 { # study, dataset
function feed_rapid7 { # study, dataset, rule_file, ./feed_dns args
# The dataset will be imported if:
# none of this dataset was ever imported
# or
# the last dataset imported is older than the one to be imported
# or
# the rule_file is newer than when the last dataset was imported
#
# (note the difference between the age oft the dataset itself and
# the date when it is imported)
study="$1"
dataset="$2"
shift; shift
rule_file="$3"
shift; shift; shift
new_ts="$(get_timestamp $study $dataset)"
old_ts_file="last_updates/rapid7_${study}_${dataset}.txt"
if [ -f "$old_ts_file" ]
@ -49,7 +59,7 @@ function feed_rapid7 { # study, dataset
else
old_ts="0"
fi
if [ $new_ts -gt $old_ts ]
if [ $new_ts -gt $old_ts ] || [ $rule_file -nt $old_ts_file ]
then
link="$(get_download_url $study $dataset)"
log "Reading $dataset dataset from $link ($old_ts -> $new_ts)…"
@ -64,8 +74,8 @@ function feed_rapid7 { # study, dataset
fi
}
feed_rapid7 sonar.rdns_v2 rdns
feed_rapid7 sonar.fdns_v2 fdns_a --ip4-cache "$CACHE_SIZE"
# feed_rapid7 sonar.fdns_v2 fdns_aaaa --ip6-cache "$CACHE_SIZE"
feed_rapid7 sonar.fdns_v2 fdns_cname
feed_rapid7 sonar.rdns_v2 rdns rules_asn/first-party.list
feed_rapid7 sonar.fdns_v2 fdns_a rules_asn/first-party.list --ip4-cache "$CACHE_SIZE"
# feed_rapid7 sonar.fdns_v2 fdns_aaaa rules_asn/first-party.list --ip6-cache "$CACHE_SIZE"
feed_rapid7 sonar.fdns_v2 rules/first-party.list fdns_cname