#!/usr/bin/env bash function log() { echo -e "\033[33m$@\033[0m" } log "Calculating statistics…" oldest="$(cat last_updates/*.txt | sort -n | head -1)" oldest_date=$(date -Isec -d @$oldest) gen_date=$(date -Isec) gen_software=$(git describe --tags) number_websites=$(wc -l < temp/all_websites.list) number_subdomains=$(wc -l < temp/all_subdomains.list) number_dns=$(grep 'NOERROR' temp/all_resolved.txt | wc -l) for partyness in {first,multi} do if [ $partyness = "first" ] then partyness_flags="--first-party" else partyness_flags="" fi rules_input=$(./export.py --count --base-rules $partyness_flags) rules_found=$(./export.py --count --rules $partyness_flags) rules_found_nd=$(./export.py --count --rules --no-dupplicates $partyness_flags) echo echo "Statistics for ${partyness}-party trackers" echo "Input rules: $rules_input" echo "Subsequent rules: $rules_found" echo "Subsequent rules (no dupplicate): $rules_found_nd" echo "Output hostnames: $(./export.py --count $partyness_flags)" echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)" echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)" echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)" for trackerness in {trackers,only-trackers} do if [ $trackerness = "trackers" ] then trackerness_flags="" else trackerness_flags="--end-chain --no-dupplicates" fi file_list="dist/${partyness}party-${trackerness}.txt" file_host="dist/${partyness}party-${trackerness}-hosts.txt" log "Generating lists for variant ${partyness}-party ${trackerness}…" # Real export heeere ./export.py $partyness_flags $trackerness_flags > $file_list # Sometimes a bit heavy to have the DB open and sort the output # so this is done in two steps sort -u $file_list -o $file_list rules_output=$(./export.py --count $partyness_flags $trackerness_flags) ( echo "# First-party trackers host list" echo "# Variant: ${partyness}-party ${trackerness}" echo "#" echo "# About first-party trackers: https://hostfiles.frogeye.fr/#whats-a-first-party-tracker" echo "#" echo "# In case of false positives/negatives, or any other question," echo "# contact me the way you like: https://geoffrey.frogeye.fr" echo "#" echo "# Latest versions and variants: https://hostfiles.frogeye.fr/#list-variants" echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien" echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE" echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements" echo "#" echo "# Generation software: eulaurarien $gen_software" echo "# List generation date: $gen_date" echo "# Oldest record: $oldest_date" echo "# Number of source websites: $number_websites" echo "# Number of source subdomains: $number_subdomains" echo "# Number of source DNS records: ~2E9 + $number_dns" echo "#" echo "# Input rules: $rules_input" echo "# Subsequent rules: $rules_found" echo "# … no dupplicates: $rules_found_nd" echo "# Output rules: $rules_output" echo "#" echo sed 's|^|0.0.0.0 |' "$file_list" ) > "$file_host" done done if [ -d explanations ] then filename="$(date -Isec).txt" ./export.py --explain > "explanations/$filename" ln --force --symbolic "$filename" "explainations/latest.txt" fi