Better list output
This commit is contained in:
parent
fd8bfee088
commit
2b97ee4cb9
2
dist/README.md
vendored
2
dist/README.md
vendored
|
@ -70,7 +70,7 @@ In the other hand, they might protect against first-party tracker that we're not
|
||||||
|
|
||||||
This is the same list as above, albeit not containing the hostnames under the tracking company domains (e.g. `website1.trackercompany.com`).
|
This is the same list as above, albeit not containing the hostnames under the tracking company domains (e.g. `website1.trackercompany.com`).
|
||||||
While those are technically third-party trackers, they cannot be blocked at once by some ad blockers (e.g. Pi-hole).
|
While those are technically third-party trackers, they cannot be blocked at once by some ad blockers (e.g. Pi-hole).
|
||||||
Use only with ad blocker able to import regular expressions and in conjuction with other block lists.
|
Use only with ad blocker able to import regular expressions and in conjuction with other block lists, especially the ones in the previous section.
|
||||||
|
|
||||||
## Meta
|
## Meta
|
||||||
|
|
||||||
|
|
|
@ -5,11 +5,13 @@ function log() {
|
||||||
}
|
}
|
||||||
|
|
||||||
log "Calculating statistics…"
|
log "Calculating statistics…"
|
||||||
|
oldest="$(cat last_updates/*.txt | sort -n | head -1)"
|
||||||
|
oldest_date=$(date -Isec -d @$oldest)
|
||||||
gen_date=$(date -Isec)
|
gen_date=$(date -Isec)
|
||||||
gen_software=$(git describe --tags)
|
gen_software=$(git describe --tags)
|
||||||
number_websites=$(wc -l < temp/all_websites.list)
|
number_websites=$(wc -l < temp/all_websites.list)
|
||||||
number_subdomains=$(wc -l < temp/all_subdomains.list)
|
number_subdomains=$(wc -l < temp/all_subdomains.list)
|
||||||
number_dns=$(grep '^$' temp/all_resolved.txt | wc -l)
|
number_dns=$(grep 'NOERROR' temp/all_resolved.txt | wc -l)
|
||||||
|
|
||||||
for partyness in {first,multi}
|
for partyness in {first,multi}
|
||||||
do
|
do
|
||||||
|
@ -20,15 +22,19 @@ do
|
||||||
partyness_flags=""
|
partyness_flags=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
rules_input=$(./export.py --count --base-rules $partyness_flags)
|
||||||
|
rules_found=$(./export.py --count --rules $partyness_flags)
|
||||||
|
rules_found_nd=$(./export.py --count --rules --no-dupplicates $partyness_flags)
|
||||||
|
|
||||||
|
echo
|
||||||
echo "Statistics for ${partyness}-party trackers"
|
echo "Statistics for ${partyness}-party trackers"
|
||||||
echo "Input rules: $(./export.py --count --base-rules $partyness_flags)"
|
echo "Input rules: $rules_input"
|
||||||
echo "Subsequent rules: $(./export.py --count --rules $partyness_flags)"
|
echo "Subsequent rules: $rules_found"
|
||||||
echo "Subsequent rules (no dupplicate): $(./export.py --count --rules --no-dupplicates $partyness_flags)"
|
echo "Subsequent rules (no dupplicate): $rules_found_nd"
|
||||||
echo "Output hostnames: $(./export.py --count $partyness_flags)"
|
echo "Output hostnames: $(./export.py --count $partyness_flags)"
|
||||||
echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
|
echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
|
||||||
echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
|
echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
|
||||||
echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
|
echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
|
||||||
echo
|
|
||||||
|
|
||||||
for trackerness in {trackers,only-trackers}
|
for trackerness in {trackers,only-trackers}
|
||||||
do
|
do
|
||||||
|
@ -49,50 +55,32 @@ do
|
||||||
# so this is done in two steps
|
# so this is done in two steps
|
||||||
sort -u $file_list -o $file_list
|
sort -u $file_list -o $file_list
|
||||||
|
|
||||||
rules_input=$(./export.py --count --base-rules $partyness_flags)
|
|
||||||
rules_found=$(./export.py --count --rules $partyness_flags)
|
|
||||||
rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
|
rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
|
||||||
|
|
||||||
function link() { # link partyness, link trackerness
|
|
||||||
url="https://hostfiles.frogeye.fr/${1}party-${2}-hosts.txt"
|
|
||||||
if [ "$1" = "$partyness" ] && [ "$2" = "$trackerness" ]
|
|
||||||
then
|
|
||||||
url="$url (this one)"
|
|
||||||
fi
|
|
||||||
echo $url
|
|
||||||
}
|
|
||||||
|
|
||||||
(
|
(
|
||||||
echo "# First-party trackers host list"
|
echo "# First-party trackers host list"
|
||||||
echo "# Variant: ${partyness}-party ${trackerness}"
|
echo "# Variant: ${partyness}-party ${trackerness}"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# About first-party trackers: "
|
echo "# About first-party trackers: https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
|
||||||
echo "# https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
|
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# In case of false positives/negatives, or any other question,"
|
echo "# In case of false positives/negatives, or any other question,"
|
||||||
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
|
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
|
||||||
echo "#"
|
echo "#"
|
||||||
|
echo "# Latest versions and variants: https://hostfiles.frogeye.fr/#list-variants"
|
||||||
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
||||||
echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE"
|
echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE"
|
||||||
echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements"
|
echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Latest versions and variants:"
|
|
||||||
echo "# - First-party trackers : $(link first trackers)"
|
|
||||||
echo "# - … excluding redirected: $(link first only-trackers)"
|
|
||||||
echo "# - First and third party : $(link multi trackers)"
|
|
||||||
echo "# - … excluding redirected: $(link multi only-trackers)"
|
|
||||||
echo '# (you can remove `-hosts` to get the raw list)'
|
|
||||||
echo '# Information about the variants:'
|
|
||||||
echo '# https://hostfiles.frogeye.fr/#list-variants'
|
|
||||||
echo "#"
|
|
||||||
echo "# Generation date: $gen_date"
|
|
||||||
echo "# Generation software: eulaurarien $gen_software"
|
echo "# Generation software: eulaurarien $gen_software"
|
||||||
|
echo "# List generation date: $gen_date"
|
||||||
|
echo "# Oldest record: $oldest_date"
|
||||||
echo "# Number of source websites: $number_websites"
|
echo "# Number of source websites: $number_websites"
|
||||||
echo "# Number of source subdomains: $number_subdomains"
|
echo "# Number of source subdomains: $number_subdomains"
|
||||||
echo "# Number of source DNS records: ~2E9 + $number_dns"
|
echo "# Number of source DNS records: ~2E9 + $number_dns"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Input rules: $rules_input"
|
echo "# Input rules: $rules_input"
|
||||||
echo "# Subsequent rules: $rules_found"
|
echo "# Subsequent rules: $rules_found"
|
||||||
|
echo "# … no dupplicates: $rules_found_nd"
|
||||||
echo "# Output rules: $rules_output"
|
echo "# Output rules: $rules_output"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo
|
echo
|
||||||
|
|
|
@ -17,7 +17,7 @@ pv -f subdomains/*.list | ./validate_list.py --domain | rev | sort -u | rev > te
|
||||||
|
|
||||||
log "Resolving subdomain…"
|
log "Resolving subdomain…"
|
||||||
date +%s > "last_updates/massdns.txt"
|
date +%s > "last_updates/massdns.txt"
|
||||||
"$MASSDNS_BINARY" --output Snrql --retry REFUSED,SERVFAIL --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list
|
"$MASSDNS_BINARY" --output Snrql --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list
|
||||||
|
|
||||||
log "Importing into database…"
|
log "Importing into database…"
|
||||||
[ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process"
|
[ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process"
|
||||||
|
|
Loading…
Reference in a new issue