eulaurarien/export_lists.sh

97 lines
3.7 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
2019-12-07 18:45:48 +01:00
function log() {
echo -e "\033[33m$@\033[0m"
}
2019-12-18 01:03:08 +01:00
log "Calculating statistics…"
2019-12-27 21:46:57 +01:00
oldest="$(cat last_updates/*.txt | sort -n | head -1)"
oldest_date=$(date -Isec -d @$oldest)
2019-12-18 01:03:08 +01:00
gen_date=$(date -Isec)
gen_software=$(git describe --tags)
number_websites=$(wc -l < temp/all_websites.list)
number_subdomains=$(wc -l < temp/all_subdomains.list)
2019-12-27 21:46:57 +01:00
number_dns=$(grep 'NOERROR' temp/all_resolved.txt | wc -l)
2019-12-18 01:03:08 +01:00
for partyness in {first,multi}
do
if [ $partyness = "first" ]
then
partyness_flags="--first-party"
else
partyness_flags=""
fi
2019-12-17 13:29:02 +01:00
2019-12-27 21:46:57 +01:00
rules_input=$(./export.py --count --base-rules $partyness_flags)
rules_found=$(./export.py --count --rules $partyness_flags)
rules_found_nd=$(./export.py --count --rules --no-dupplicates $partyness_flags)
echo
2019-12-18 01:03:08 +01:00
echo "Statistics for ${partyness}-party trackers"
2019-12-27 21:46:57 +01:00
echo "Input rules: $rules_input"
echo "Subsequent rules: $rules_found"
echo "Subsequent rules (no dupplicate): $rules_found_nd"
2019-12-18 01:03:08 +01:00
echo "Output hostnames: $(./export.py --count $partyness_flags)"
echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
2019-12-17 13:29:02 +01:00
2019-12-18 01:03:08 +01:00
for trackerness in {trackers,only-trackers}
do
if [ $trackerness = "trackers" ]
then
trackerness_flags=""
else
trackerness_flags="--end-chain --no-dupplicates"
fi
file_list="dist/${partyness}party-${trackerness}.txt"
file_host="dist/${partyness}party-${trackerness}-hosts.txt"
2019-12-18 01:03:08 +01:00
log "Generating lists for variant ${partyness}-party ${trackerness}"
# Real export heeere
./export.py $partyness_flags $trackerness_flags > $file_list
# Sometimes a bit heavy to have the DB open and sort the output
# so this is done in two steps
sort -u $file_list -o $file_list
rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
(
echo "# First-party trackers host list"
echo "# Variant: ${partyness}-party ${trackerness}"
echo "#"
2019-12-27 21:46:57 +01:00
echo "# About first-party trackers: https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
2019-12-18 01:03:08 +01:00
echo "#"
echo "# In case of false positives/negatives, or any other question,"
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
echo "#"
2019-12-27 21:46:57 +01:00
echo "# Latest versions and variants: https://hostfiles.frogeye.fr/#list-variants"
2019-12-20 17:46:24 +01:00
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE"
2019-12-27 15:21:33 +01:00
echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements"
2019-12-20 17:46:24 +01:00
echo "#"
2019-12-18 01:03:08 +01:00
echo "# Generation software: eulaurarien $gen_software"
2019-12-27 21:46:57 +01:00
echo "# List generation date: $gen_date"
echo "# Oldest record: $oldest_date"
2019-12-18 01:03:08 +01:00
echo "# Number of source websites: $number_websites"
echo "# Number of source subdomains: $number_subdomains"
echo "# Number of source DNS records: ~2E9 + $number_dns"
echo "#"
echo "# Input rules: $rules_input"
echo "# Subsequent rules: $rules_found"
2019-12-27 21:46:57 +01:00
echo "# … no dupplicates: $rules_found_nd"
2019-12-18 01:03:08 +01:00
echo "# Output rules: $rules_output"
echo "#"
echo
sed 's|^|0.0.0.0 |' "$file_list"
) > "$file_host"
2019-12-18 01:03:08 +01:00
done
done
2019-12-27 15:35:30 +01:00
if [ -d explanations ]
then
./export.py --explain > "explanations/$(date -Isec).txt"
fi