Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
3.9 KiB

#!/usr/bin/env bash
function log() {
echo -e "\033[33m$@\033[0m"
log "Exporting lists…"
./ --first-party --output dist/firstparty-trackers.txt
./ --first-party --end-chain --no-dupplicates --output dist/firstparty-only-trackers.txt
./ --output dist/multiparty-trackers.txt
./ --end-chain --no-dupplicates --output dist/multiparty-only-trackers.txt
log "Generating statistics…"
./ --count --first-party > temp/count_recs_firstparty.txt
./ --count > temp/count_recs_multiparty.txt
./ --rules --count --first-party > temp/count_rules_firstparty.txt
./ --rules --count > temp/count_rules_multiparty.txt
log "Sorting lists…"
sort -u dist/firstparty-trackers.txt -o dist/firstparty-trackers.txt
sort -u dist/firstparty-only-trackers.txt -o dist/firstparty-only-trackers.txt
sort -u dist/multiparty-trackers.txt -o dist/multiparty-trackers.txt
sort -u dist/multiparty-only-trackers.txt -o dist/multiparty-only-trackers.txt
log "Generating hosts lists…"
function generate_hosts {
echo "# First-party trackers host list"
echo "# $description"
echo "# $description2"
echo "#"
echo "# About first-party trackers:"
echo "# Source code:"
echo "#"
echo "# In case of false positives/negatives, or any other question,"
echo "# contact me the way you like:"
echo "#"
echo "# Latest version:"
echo "# - First-party trackers :"
echo "# - … excluding redirected:"
echo "# - First and third party :"
echo "# - … excluding redirected:"
echo '# (you can remove `-hosts` to get the raw list)'
echo "#"
echo "# Generation date: $(date -Isec)"
echo "# Generation software: eulaurarien $(git describe --tags)"
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
echo "# Number of source DNS records: ~2E9 + $(wc -l temp/all_resolved.json | cut -d' ' -f1)" # TODO
echo "#"
echo "# Known first-party trackers: $(cat temp/count_rules_firstparty.txt)"
echo "# Found first-party trackers: $(cat temp/count_recs_firstparty.txt)"
echo "# Number of first-party hostnames: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
echo "#"
echo "# Known multi-party trackers: $(cat temp/count_rules_multiparty.txt)"
echo "# Found multi-party trackers: $(cat temp/count_recs_multiparty.txt)"
echo "# Number of multi-party hostnames: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
sed 's|^| |' "dist/$basename.txt"
) > "dist/$basename-hosts.txt"
generate_hosts "firstparty-trackers" "Generated from a curated list of first-party trackers" ""
generate_hosts "firstparty-only-trackers" "Generated from a curated list of first-party trackers" "Only contain the first chain of redirection."
generate_hosts "multiparty-trackers" "Generated from known third-party trackers." "Also contains trackers used as third-party."
generate_hosts "multiparty-only-trackers" "Generated from known third-party trackers." "Do not contain trackers used in third-party. Use in combination with third-party lists."