2019-11-11 11:19:46 +01:00
#!/usr/bin/env bash
2019-12-07 18:45:48 +01:00
function log( ) {
echo -e " \033[33m $@ \033[0m "
}
2019-12-13 00:11:21 +01:00
log "Exporting lists…"
2019-12-13 08:23:38 +01:00
./export.py --first-party --output dist/firstparty-trackers.txt
2019-12-17 14:09:06 +01:00
./export.py --first-party --end-chain --no-dupplicates --output dist/firstparty-only-trackers.txt
2019-12-13 08:23:38 +01:00
./export.py --output dist/multiparty-trackers.txt
2019-12-17 14:09:06 +01:00
./export.py --end-chain --output --no-dupplicates dist/multiparty-only-trackers.txt
2019-12-05 19:15:24 +01:00
2019-12-17 13:29:02 +01:00
log "Generating statistics…"
./export.py --count --first-party > temp/count_recs_firstparty.txt
./export.py --count > temp/count_recs_multiparty.txt
2019-12-13 18:36:08 +01:00
./export.py --rules --count --first-party > temp/count_rules_firstparty.txt
./export.py --rules --count > temp/count_rules_multiparty.txt
2019-12-17 13:29:02 +01:00
log "Sorting lists…"
sort -u dist/firstparty-trackers.txt -o dist/firstparty-trackers.txt
sort -u dist/firstparty-only-trackers.txt -o dist/firstparty-only-trackers.txt
sort -u dist/multiparty-trackers.txt -o dist/multiparty-trackers.txt
sort -u dist/multiparty-only-trackers.txt -o dist/multiparty-only-trackers.txt
log "Generating hosts lists…"
2019-11-15 08:57:31 +01:00
function generate_hosts {
basename = " $1 "
description = " $2 "
2019-12-05 20:51:53 +01:00
description2 = " $3 "
2019-11-15 08:57:31 +01:00
(
echo "# First-party trackers host list"
echo " # $description "
2019-12-05 19:15:24 +01:00
echo " # $description2 "
2019-11-15 08:57:31 +01:00
echo "#"
echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
echo "#"
2019-12-03 21:45:29 +01:00
echo "# In case of false positives/negatives, or any other question,"
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
echo "#"
2019-11-15 08:57:31 +01:00
echo "# Latest version:"
2019-12-05 19:15:24 +01:00
echo "# - First-party trackers : https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt"
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
2019-12-13 08:23:38 +01:00
echo '# (you can remove `-hosts` to get the raw list)'
2019-11-15 08:57:31 +01:00
echo "#"
echo " # Generation date: $( date -Isec) "
2019-12-03 15:35:21 +01:00
echo " # Generation software: eulaurarien $( git describe --tags) "
2019-12-13 18:36:08 +01:00
echo " # Number of source websites: $( wc -l temp/all_websites.list | cut -d' ' -f1) "
echo " # Number of source subdomains: $( wc -l temp/all_subdomains.list | cut -d' ' -f1) "
2019-12-17 13:29:02 +01:00
echo " # Number of source DNS records: ~2E9 + $( wc -l temp/all_resolved.json | cut -d' ' -f1) " # TODO
2019-12-05 19:38:26 +01:00
echo "#"
2019-12-13 18:36:08 +01:00
echo " # Known first-party trackers: $( cat temp/count_rules_firstparty.txt) "
2019-12-17 13:29:02 +01:00
echo " # Found first-party trackers: $( cat temp/count_recs_firstparty.txt) "
2019-12-13 18:36:08 +01:00
echo " # Number of first-party hostnames: $( wc -l dist/firstparty-trackers.txt | cut -d' ' -f1) "
2019-12-05 19:15:24 +01:00
echo " # … excluding redirected: $( wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1) "
2019-12-05 19:38:26 +01:00
echo "#"
2019-12-13 18:36:08 +01:00
echo " # Known multi-party trackers: $( cat temp/count_rules_multiparty.txt) "
2019-12-17 13:29:02 +01:00
echo " # Found multi-party trackers: $( cat temp/count_recs_multiparty.txt) "
2019-12-13 18:36:08 +01:00
echo " # Number of multi-party hostnames: $( wc -l dist/multiparty-trackers.txt | cut -d' ' -f1) "
2019-12-05 19:15:24 +01:00
echo " # … excluding redirected: $( wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1) "
2019-11-15 08:57:31 +01:00
echo
2019-12-13 08:23:38 +01:00
sed 's|^|0.0.0.0 |' " dist/ $basename .txt "
2019-11-15 08:57:31 +01:00
) > " dist/ $basename -hosts.txt "
}
2019-12-05 19:15:24 +01:00
generate_hosts "firstparty-trackers" "Generated from a curated list of first-party trackers" ""
generate_hosts "firstparty-only-trackers" "Generated from a curated list of first-party trackers" "Only contain the first chain of redirection."
generate_hosts "multiparty-trackers" "Generated from known third-party trackers." "Also contains trackers used as third-party."
generate_hosts "multiparty-only-trackers" "Generated from known third-party trackers." "Do not contain trackers used in third-party. Use in combination with third-party lists."