Geoffrey Frogeye
3b6f7a58b3
They changed their privacy / pricing model and as such I don't have access to their massive DNS dataset anymore, even after asking. Since 2022-01-02, I put the list on freeze while looking for an alternative, but couldn't find any. To make the list update again with the remaining DNS sources I have, I put the last version of the list generated with the Rapid7 dataset as an input for subdomains, that will now get resolved with MassDNS.
99 lines
3.8 KiB
Bash
Executable file
99 lines
3.8 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
function log() {
|
|
echo -e "\033[33m$@\033[0m"
|
|
}
|
|
|
|
log "Calculating statistics…"
|
|
oldest="$(cat last_updates/*.txt | sort -n | head -1)"
|
|
oldest_date=$(date -Isec -d @$oldest)
|
|
gen_date=$(date -Isec)
|
|
gen_software=$(git describe --tags)
|
|
number_websites=$(wc -l < temp/all_websites.list)
|
|
number_subdomains=$(wc -l < temp/all_subdomains.list)
|
|
number_dns=$(grep 'NOERROR' temp/all_resolved.txt | wc -l)
|
|
|
|
for partyness in {first,multi}
|
|
do
|
|
if [ $partyness = "first" ]
|
|
then
|
|
partyness_flags="--first-party"
|
|
else
|
|
partyness_flags=""
|
|
fi
|
|
|
|
rules_input=$(./export.py --count --base-rules $partyness_flags)
|
|
rules_found=$(./export.py --count --rules $partyness_flags)
|
|
rules_found_nd=$(./export.py --count --rules --no-dupplicates $partyness_flags)
|
|
|
|
echo
|
|
echo "Statistics for ${partyness}-party trackers"
|
|
echo "Input rules: $rules_input"
|
|
echo "Subsequent rules: $rules_found"
|
|
echo "Subsequent rules (no dupplicate): $rules_found_nd"
|
|
echo "Output hostnames: $(./export.py --count $partyness_flags)"
|
|
echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
|
|
echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
|
|
echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
|
|
|
|
for trackerness in {trackers,only-trackers}
|
|
do
|
|
if [ $trackerness = "trackers" ]
|
|
then
|
|
trackerness_flags=""
|
|
else
|
|
trackerness_flags="--no-dupplicates"
|
|
fi
|
|
file_list="dist/${partyness}party-${trackerness}.txt"
|
|
file_host="dist/${partyness}party-${trackerness}-hosts.txt"
|
|
|
|
log "Generating lists for variant ${partyness}-party ${trackerness}…"
|
|
|
|
# Real export heeere
|
|
./export.py $partyness_flags $trackerness_flags > $file_list
|
|
# Sometimes a bit heavy to have the DB open and sort the output
|
|
# so this is done in two steps
|
|
sort -u $file_list -o $file_list
|
|
|
|
rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
|
|
|
|
(
|
|
echo "# First-party trackers host list"
|
|
echo "# Variant: ${partyness}-party ${trackerness}"
|
|
echo "#"
|
|
echo "# About first-party trackers: https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
|
|
echo "#"
|
|
echo "# In case of false positives/negatives, or any other question,"
|
|
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
|
|
echo "#"
|
|
echo "# Latest versions and variants: https://hostfiles.frogeye.fr/#list-variants"
|
|
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
|
echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE"
|
|
echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements"
|
|
echo "#"
|
|
echo "# Generation software: eulaurarien $gen_software"
|
|
echo "# List generation date: $gen_date"
|
|
echo "# Oldest record: $oldest_date"
|
|
echo "# Number of source websites: $number_websites"
|
|
echo "# Number of source subdomains: $number_subdomains"
|
|
echo "# Number of source DNS records: $number_dns"
|
|
echo "#"
|
|
echo "# Input rules: $rules_input"
|
|
echo "# Subsequent rules: $rules_found"
|
|
echo "# … no dupplicates: $rules_found_nd"
|
|
echo "# Output rules: $rules_output"
|
|
echo "#"
|
|
echo
|
|
sed 's|^|0.0.0.0 |' "$file_list"
|
|
) > "$file_host"
|
|
|
|
done
|
|
done
|
|
|
|
if [ -d explanations ]
|
|
then
|
|
filename="$(date -Isec).txt"
|
|
./export.py --explain > "explanations/$filename"
|
|
ln --force --symbolic "$filename" "explanations/latest.txt"
|
|
fi
|