#!/usr/bin/env bash function log() { echo -e "\033[33m$@\033[0m" } log "Calculating statistics…" gen_date=$(date -Isec) gen_software=$(git describe --tags) number_websites=$(wc -l < temp/all_websites.list) number_subdomains=$(wc -l < temp/all_subdomains.list) number_dns=$(grep '^$' temp/all_resolved.txt | wc -l) for partyness in {first,multi} do if [ $partyness = "first" ] then partyness_flags="--first-party" else partyness_flags="" fi echo "Statistics for ${partyness}-party trackers" echo "Input rules: $(./export.py --count --base-rules $partyness_flags)" echo "Subsequent rules: $(./export.py --count --rules $partyness_flags)" echo "Subsequent rules (no dupplicate): $(./export.py --count --rules --no-dupplicates $partyness_flags)" echo "Output hostnames: $(./export.py --count $partyness_flags)" echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)" echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)" echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)" echo for trackerness in {trackers,only-trackers} do if [ $trackerness = "trackers" ] then trackerness_flags="" else trackerness_flags="--end-chain --no-dupplicates" fi file_list="dist/${partyness}party-${trackerness}.txt" file_host="dist/${partyness}party-${trackerness}-hosts.txt" log "Generating lists for variant ${partyness}-party ${trackerness}…" # Real export heeere ./export.py $partyness_flags $trackerness_flags > $file_list # Sometimes a bit heavy to have the DB open and sort the output # so this is done in two steps sort -u $file_list -o $file_list rules_input=$(./export.py --count --base-rules $partyness_flags) rules_found=$(./export.py --count --rules $partyness_flags) rules_output=$(./export.py --count $partyness_flags $trackerness_flags) function link() { # link partyness, link trackerness url="https://hostfiles.frogeye.fr/${partyness}party-${trackerness}-hosts.txt" if [ "$1" = "$partyness" ] && [ "$2" = "$trackerness" ] then url="$url (this one)" fi echo $url } ( echo "# First-party trackers host list" echo "# Variant: ${partyness}-party ${trackerness}" echo "#" echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker" echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien" echo "#" echo "# In case of false positives/negatives, or any other question," echo "# contact me the way you like: https://geoffrey.frogeye.fr" echo "#" echo "# Latest versions:" echo "# - First-party trackers : $(link first trackers)" echo "# - … excluding redirected: $(link first only-trackers)" echo "# - First and third party : $(link multi trackers)" echo "# - … excluding redirected: $(link multi only-trackers)" echo '# (you can remove `-hosts` to get the raw list)' echo "#" echo "# Generation date: $gen_date" echo "# Generation software: eulaurarien $gen_software" echo "# Number of source websites: $number_websites" echo "# Number of source subdomains: $number_subdomains" echo "# Number of source DNS records: ~2E9 + $number_dns" echo "#" echo "# Input rules: $rules_input" echo "# Subsequent rules: $rules_found" echo "# Output rules: $rules_output" echo "#" echo sed 's|^|0.0.0.0 |' "$file_list" ) > "$file_host" done done