#!/usr/bin/env bash function log() { echo -e "\033[33m$@\033[0m" } log "Exporting lists…" ./export.py --first-party --output dist/firstparty-trackers.txt ./export.py --first-party --end-chain --output dist/firstparty-only-trackers.txt ./export.py --output dist/multiparty-trackers.txt ./export.py --end-chain --output dist/multiparty-only-trackers.txt log "Generating statistics…" ./export.py --count --first-party > temp/count_recs_firstparty.txt ./export.py --count > temp/count_recs_multiparty.txt ./export.py --rules --count --first-party > temp/count_rules_firstparty.txt ./export.py --rules --count > temp/count_rules_multiparty.txt log "Sorting lists…" sort -u dist/firstparty-trackers.txt -o dist/firstparty-trackers.txt sort -u dist/firstparty-only-trackers.txt -o dist/firstparty-only-trackers.txt sort -u dist/multiparty-trackers.txt -o dist/multiparty-trackers.txt sort -u dist/multiparty-only-trackers.txt -o dist/multiparty-only-trackers.txt log "Generating hosts lists…" function generate_hosts { basename="$1" description="$2" description2="$3" ( echo "# First-party trackers host list" echo "# $description" echo "# $description2" echo "#" echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker" echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien" echo "#" echo "# In case of false positives/negatives, or any other question," echo "# contact me the way you like: https://geoffrey.frogeye.fr" echo "#" echo "# Latest version:" echo "# - First-party trackers : https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt" echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt" echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt" echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt" echo '# (you can remove `-hosts` to get the raw list)' echo "#" echo "# Generation date: $(date -Isec)" echo "# Generation software: eulaurarien $(git describe --tags)" echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)" echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)" echo "# Number of source DNS records: ~2E9 + $(wc -l temp/all_resolved.json | cut -d' ' -f1)" # TODO echo "#" echo "# Known first-party trackers: $(cat temp/count_rules_firstparty.txt)" echo "# Found first-party trackers: $(cat temp/count_recs_firstparty.txt)" echo "# Number of first-party hostnames: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)" echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)" echo "#" echo "# Known multi-party trackers: $(cat temp/count_rules_multiparty.txt)" echo "# Found multi-party trackers: $(cat temp/count_recs_multiparty.txt)" echo "# Number of multi-party hostnames: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)" echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)" echo sed 's|^|0.0.0.0 |' "dist/$basename.txt" ) > "dist/$basename-hosts.txt" } generate_hosts "firstparty-trackers" "Generated from a curated list of first-party trackers" "" generate_hosts "firstparty-only-trackers" "Generated from a curated list of first-party trackers" "Only contain the first chain of redirection." generate_hosts "multiparty-trackers" "Generated from known third-party trackers." "Also contains trackers used as third-party." generate_hosts "multiparty-only-trackers" "Generated from known third-party trackers." "Do not contain trackers used in third-party. Use in combination with third-party lists."