Geoffrey Frogeye
38cf532854
Split in two actually (program and list). Closes #3 Also, Closes #1 Because I forgot to do it earlier.
99 lines
3.9 KiB
Bash
Executable file
99 lines
3.9 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
function log() {
|
|
echo -e "\033[33m$@\033[0m"
|
|
}
|
|
|
|
log "Calculating statistics…"
|
|
gen_date=$(date -Isec)
|
|
gen_software=$(git describe --tags)
|
|
number_websites=$(wc -l < temp/all_websites.list)
|
|
number_subdomains=$(wc -l < temp/all_subdomains.list)
|
|
number_dns=$(grep '^$' temp/all_resolved.txt | wc -l)
|
|
|
|
for partyness in {first,multi}
|
|
do
|
|
if [ $partyness = "first" ]
|
|
then
|
|
partyness_flags="--first-party"
|
|
else
|
|
partyness_flags=""
|
|
fi
|
|
|
|
echo "Statistics for ${partyness}-party trackers"
|
|
echo "Input rules: $(./export.py --count --base-rules $partyness_flags)"
|
|
echo "Subsequent rules: $(./export.py --count --rules $partyness_flags)"
|
|
echo "Subsequent rules (no dupplicate): $(./export.py --count --rules --no-dupplicates $partyness_flags)"
|
|
echo "Output hostnames: $(./export.py --count $partyness_flags)"
|
|
echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
|
|
echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
|
|
echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
|
|
echo
|
|
|
|
for trackerness in {trackers,only-trackers}
|
|
do
|
|
if [ $trackerness = "trackers" ]
|
|
then
|
|
trackerness_flags=""
|
|
else
|
|
trackerness_flags="--end-chain --no-dupplicates"
|
|
fi
|
|
file_list="dist/${partyness}party-${trackerness}.txt"
|
|
file_host="dist/${partyness}party-${trackerness}-hosts.txt"
|
|
|
|
log "Generating lists for variant ${partyness}-party ${trackerness}…"
|
|
|
|
# Real export heeere
|
|
./export.py $partyness_flags $trackerness_flags > $file_list
|
|
# Sometimes a bit heavy to have the DB open and sort the output
|
|
# so this is done in two steps
|
|
sort -u $file_list -o $file_list
|
|
|
|
rules_input=$(./export.py --count --base-rules $partyness_flags)
|
|
rules_found=$(./export.py --count --rules $partyness_flags)
|
|
rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
|
|
|
|
function link() { # link partyness, link trackerness
|
|
url="https://hostfiles.frogeye.fr/${1}party-${2}-hosts.txt"
|
|
if [ "$1" = "$partyness" ] && [ "$2" = "$trackerness" ]
|
|
then
|
|
url="$url (this one)"
|
|
fi
|
|
echo $url
|
|
}
|
|
|
|
(
|
|
echo "# First-party trackers host list"
|
|
echo "# Variant: ${partyness}-party ${trackerness}"
|
|
echo "#"
|
|
echo "# About first-party trackers: TODO"
|
|
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
|
echo "#"
|
|
echo "# In case of false positives/negatives, or any other question,"
|
|
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
|
|
echo "#"
|
|
echo "# Latest versions and variants:"
|
|
echo "# - First-party trackers : $(link first trackers)"
|
|
echo "# - … excluding redirected: $(link first only-trackers)"
|
|
echo "# - First and third party : $(link multi trackers)"
|
|
echo "# - … excluding redirected: $(link multi only-trackers)"
|
|
echo '# (variants informations: TODO)'
|
|
echo '# (you can remove `-hosts` to get the raw list)'
|
|
echo "#"
|
|
echo "# Generation date: $gen_date"
|
|
echo "# Generation software: eulaurarien $gen_software"
|
|
echo "# Number of source websites: $number_websites"
|
|
echo "# Number of source subdomains: $number_subdomains"
|
|
echo "# Number of source DNS records: ~2E9 + $number_dns"
|
|
echo "#"
|
|
echo "# Input rules: $rules_input"
|
|
echo "# Subsequent rules: $rules_found"
|
|
echo "# Output rules: $rules_output"
|
|
echo "#"
|
|
echo
|
|
sed 's|^|0.0.0.0 |' "$file_list"
|
|
) > "$file_host"
|
|
|
|
done
|
|
done
|