From 0cc18303fdfd793b6fbd6a53346f021903829f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geoffrey=20=E2=80=9CFrogeye=E2=80=9D=20Preud=27homme?= Date: Sat, 4 Jan 2020 10:54:46 +0100 Subject: [PATCH] Re-import Rapid7 datasets when rules have been updated --- README.md | 3 ++- import_rapid7.sh | 24 +++++++++++++++++------- rules_ip/first-party.txt | 0 3 files changed, 19 insertions(+), 8 deletions(-) delete mode 100644 rules_ip/first-party.txt diff --git a/README.md b/README.md index 68b9f4d..2579e1f 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,8 @@ Then, run `./import_rapid7.sh`. This will download about 35 GiB of data the first time, but only the matching records will be stored (about a few MiB for the tracking rules). Note the download speed will most likely be limited by the database operation thoughput (a quick RAM will help). -The script remembers which were the last sets downloaded, and will only import newer sets. +The script remembers which were the last sets downloaded, and will only newer sets. +If the first-party rules changed, the corresponding sets will be re-imported anyway. If you want to force re-importing, run `rm last_updates/rapid7_*.txt`. ### Export the lists diff --git a/import_rapid7.sh b/import_rapid7.sh index a44e822..1e90851 100755 --- a/import_rapid7.sh +++ b/import_rapid7.sh @@ -37,10 +37,20 @@ function get_download_url { # study, dataset fi } -function feed_rapid7 { # study, dataset +function feed_rapid7 { # study, dataset, rule_file, ./feed_dns args + # The dataset will be imported if: + # none of this dataset was ever imported + # or + # the last dataset imported is older than the one to be imported + # or + # the rule_file is newer than when the last dataset was imported + # + # (note the difference between the age oft the dataset itself and + # the date when it is imported) study="$1" dataset="$2" - shift; shift + rule_file="$3" + shift; shift; shift new_ts="$(get_timestamp $study $dataset)" old_ts_file="last_updates/rapid7_${study}_${dataset}.txt" if [ -f "$old_ts_file" ] @@ -49,7 +59,7 @@ function feed_rapid7 { # study, dataset else old_ts="0" fi - if [ $new_ts -gt $old_ts ] + if [ $new_ts -gt $old_ts ] || [ $rule_file -nt $old_ts_file ] then link="$(get_download_url $study $dataset)" log "Reading $dataset dataset from $link ($old_ts -> $new_ts)…" @@ -64,8 +74,8 @@ function feed_rapid7 { # study, dataset fi } -feed_rapid7 sonar.rdns_v2 rdns -feed_rapid7 sonar.fdns_v2 fdns_a --ip4-cache "$CACHE_SIZE" -# feed_rapid7 sonar.fdns_v2 fdns_aaaa --ip6-cache "$CACHE_SIZE" -feed_rapid7 sonar.fdns_v2 fdns_cname +feed_rapid7 sonar.rdns_v2 rdns rules_asn/first-party.list +feed_rapid7 sonar.fdns_v2 fdns_a rules_asn/first-party.list --ip4-cache "$CACHE_SIZE" +# feed_rapid7 sonar.fdns_v2 fdns_aaaa rules_asn/first-party.list --ip6-cache "$CACHE_SIZE" +feed_rapid7 sonar.fdns_v2 rules/first-party.list fdns_cname diff --git a/rules_ip/first-party.txt b/rules_ip/first-party.txt deleted file mode 100644 index e69de29..0000000