Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

81 lines
2.7 KiB

#!/usr/bin/env bash
source .env.default
source .env
function log() {
echo -e "\033[33m$@\033[0m"
}
function api_call {
curl -s -H "X-Api-Key: $RAPID7_API_KEY" "https://us.api.insight.rapid7.com/opendata/studies/$1/"
}
function get_timestamp { # study, dataset
study="$1"
dataset="$2"
if [ -z "$RAPID7_API_KEY" ]
then
line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
echo "$line" | cut -d'"' -f2 | cut -d'/' -f3 | cut -d'-' -f4
else
filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
echo $filename | cut -d'-' -f4
fi
}
function get_download_url { # study, dataset
study="$1"
dataset="$2"
if [ -z "$RAPID7_API_KEY" ]
then
line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
echo "https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)"
else
filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
api_call "$study/$filename/download" | jq '.url' -r
fi
}
function feed_rapid7 { # study, dataset, rule_file, ./feed_dns args
# The dataset will be imported if:
# none of this dataset was ever imported
# or
# the last dataset imported is older than the one to be imported
# or
# the rule_file is newer than when the last dataset was imported
#
# (note the difference between the age oft the dataset itself and
# the date when it is imported)
study="$1"
dataset="$2"
rule_file="$3"
shift; shift; shift
new_ts="$(get_timestamp $study $dataset)"
old_ts_file="last_updates/rapid7_${study}_${dataset}.txt"
if [ -f "$old_ts_file" ]
then
old_ts=$(cat "$old_ts_file")
else
old_ts="0"
fi
if [ $new_ts -gt $old_ts ] || [ $rule_file -nt $old_ts_file ]
then
link="$(get_download_url $study $dataset)"
log "Reading $dataset dataset from $link ($old_ts -> $new_ts)…"
[ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process"
curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@ $EXTRA_ARGS
if [ $? -eq 0 ]
then
echo $new_ts > $old_ts_file
fi
else
log "Skipping $dataset as there is no new version since $old_ts"
fi
}
# feed_rapid7 sonar.rdns_v2 rdns rules_asn/first-party.list
feed_rapid7 sonar.fdns_v2 fdns_a rules_asn/first-party.list --ip4-cache "$CACHE_SIZE"
# feed_rapid7 sonar.fdns_v2 fdns_aaaa rules_asn/first-party.list --ip6-cache "$CACHE_SIZE"
feed_rapid7 sonar.fdns_v2 fdns_cname rules/first-party.list