Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
2.7KB

  1. #!/usr/bin/env bash
  2. source .env.default
  3. source .env
  4. function log() {
  5. echo -e "\033[33m$@\033[0m"
  6. }
  7. function api_call {
  8. curl -s -H "X-Api-Key: $RAPID7_API_KEY" "https://us.api.insight.rapid7.com/opendata/studies/$1/"
  9. }
  10. function get_timestamp { # study, dataset
  11. study="$1"
  12. dataset="$2"
  13. if [ -z "$RAPID7_API_KEY" ]
  14. then
  15. line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
  16. echo "$line" | cut -d'"' -f2 | cut -d'/' -f3 | cut -d'-' -f4
  17. else
  18. filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
  19. echo $filename | cut -d'-' -f4
  20. fi
  21. }
  22. function get_download_url { # study, dataset
  23. study="$1"
  24. dataset="$2"
  25. if [ -z "$RAPID7_API_KEY" ]
  26. then
  27. line=$(curl -s "https://opendata.rapid7.com/$study/" | grep "href=\".\+-$dataset.json.gz\"" | head -1)
  28. echo "https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)"
  29. else
  30. filename=$(api_call "$study" | jq '.sonarfile_set[]' -r | grep "${dataset}.json.gz" | sort | tail -1)
  31. api_call "$study/$filename/download" | jq '.url' -r
  32. fi
  33. }
  34. function feed_rapid7 { # study, dataset, rule_file, ./feed_dns args
  35. # The dataset will be imported if:
  36. # none of this dataset was ever imported
  37. # or
  38. # the last dataset imported is older than the one to be imported
  39. # or
  40. # the rule_file is newer than when the last dataset was imported
  41. #
  42. # (note the difference between the age oft the dataset itself and
  43. # the date when it is imported)
  44. study="$1"
  45. dataset="$2"
  46. rule_file="$3"
  47. shift; shift; shift
  48. new_ts="$(get_timestamp $study $dataset)"
  49. old_ts_file="last_updates/rapid7_${study}_${dataset}.txt"
  50. if [ -f "$old_ts_file" ]
  51. then
  52. old_ts=$(cat "$old_ts_file")
  53. else
  54. old_ts="0"
  55. fi
  56. if [ $new_ts -gt $old_ts ] || [ $rule_file -nt $old_ts_file ]
  57. then
  58. link="$(get_download_url $study $dataset)"
  59. log "Reading $dataset dataset from $link ($old_ts -> $new_ts)…"
  60. [ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process"
  61. curl -L "$link" | gunzip | ./feed_dns.py rapid7 $@ $EXTRA_ARGS
  62. if [ $? -eq 0 ]
  63. then
  64. echo $new_ts > $old_ts_file
  65. fi
  66. else
  67. log "Skipping $dataset as there is no new version since $old_ts"
  68. fi
  69. }
  70. # feed_rapid7 sonar.rdns_v2 rdns rules_asn/first-party.list
  71. feed_rapid7 sonar.fdns_v2 fdns_a rules_asn/first-party.list --ip4-cache "$CACHE_SIZE"
  72. # feed_rapid7 sonar.fdns_v2 fdns_aaaa rules_asn/first-party.list --ip6-cache "$CACHE_SIZE"
  73. feed_rapid7 sonar.fdns_v2 fdns_cname rules/first-party.list