Browse Source

Better list output

tags/v2.2
Geoffrey Frogeye 3 months ago
parent
commit
2b97ee4cb9
Signed by: geoffrey <geoffrey@frogeye.fr> GPG Key ID: D8A7ECA00A8CD3DD
3 changed files with 18 additions and 30 deletions
  1. +1
    -1
      dist/README.md
  2. +16
    -28
      export_lists.sh
  3. +1
    -1
      resolve_subdomains.sh

+ 1
- 1
dist/README.md View File

@@ -70,7 +70,7 @@ In the other hand, they might protect against first-party tracker that we're not

This is the same list as above, albeit not containing the hostnames under the tracking company domains (e.g. `website1.trackercompany.com`).
While those are technically third-party trackers, they cannot be blocked at once by some ad blockers (e.g. Pi-hole).
Use only with ad blocker able to import regular expressions and in conjuction with other block lists.
Use only with ad blocker able to import regular expressions and in conjuction with other block lists, especially the ones in the previous section.

## Meta



+ 16
- 28
export_lists.sh View File

@@ -5,11 +5,13 @@ function log() {
}

log "Calculating statistics…"
oldest="$(cat last_updates/*.txt | sort -n | head -1)"
oldest_date=$(date -Isec -d @$oldest)
gen_date=$(date -Isec)
gen_software=$(git describe --tags)
number_websites=$(wc -l < temp/all_websites.list)
number_subdomains=$(wc -l < temp/all_subdomains.list)
number_dns=$(grep '^$' temp/all_resolved.txt | wc -l)
number_dns=$(grep 'NOERROR' temp/all_resolved.txt | wc -l)

for partyness in {first,multi}
do
@@ -20,15 +22,19 @@ do
partyness_flags=""
fi

rules_input=$(./export.py --count --base-rules $partyness_flags)
rules_found=$(./export.py --count --rules $partyness_flags)
rules_found_nd=$(./export.py --count --rules --no-dupplicates $partyness_flags)

echo
echo "Statistics for ${partyness}-party trackers"
echo "Input rules: $(./export.py --count --base-rules $partyness_flags)"
echo "Subsequent rules: $(./export.py --count --rules $partyness_flags)"
echo "Subsequent rules (no dupplicate): $(./export.py --count --rules --no-dupplicates $partyness_flags)"
echo "Input rules: $rules_input"
echo "Subsequent rules: $rules_found"
echo "Subsequent rules (no dupplicate): $rules_found_nd"
echo "Output hostnames: $(./export.py --count $partyness_flags)"
echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
echo

for trackerness in {trackers,only-trackers}
do
@@ -49,50 +55,32 @@ do
# so this is done in two steps
sort -u $file_list -o $file_list

rules_input=$(./export.py --count --base-rules $partyness_flags)
rules_found=$(./export.py --count --rules $partyness_flags)
rules_output=$(./export.py --count $partyness_flags $trackerness_flags)

function link() { # link partyness, link trackerness
url="https://hostfiles.frogeye.fr/${1}party-${2}-hosts.txt"
if [ "$1" = "$partyness" ] && [ "$2" = "$trackerness" ]
then
url="$url (this one)"
fi
echo $url
}

(
echo "# First-party trackers host list"
echo "# Variant: ${partyness}-party ${trackerness}"
echo "#"
echo "# About first-party trackers: "
echo "# https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
echo "# About first-party trackers: https://hostfiles.frogeye.fr/#whats-a-first-party-tracker"
echo "#"
echo "# In case of false positives/negatives, or any other question,"
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
echo "#"
echo "# Latest versions and variants: https://hostfiles.frogeye.fr/#list-variants"
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE"
echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements"
echo "#"
echo "# Latest versions and variants:"
echo "# - First-party trackers : $(link first trackers)"
echo "# - … excluding redirected: $(link first only-trackers)"
echo "# - First and third party : $(link multi trackers)"
echo "# - … excluding redirected: $(link multi only-trackers)"
echo '# (you can remove `-hosts` to get the raw list)'
echo '# Information about the variants:'
echo '# https://hostfiles.frogeye.fr/#list-variants'
echo "#"
echo "# Generation date: $gen_date"
echo "# Generation software: eulaurarien $gen_software"
echo "# List generation date: $gen_date"
echo "# Oldest record: $oldest_date"
echo "# Number of source websites: $number_websites"
echo "# Number of source subdomains: $number_subdomains"
echo "# Number of source DNS records: ~2E9 + $number_dns"
echo "#"
echo "# Input rules: $rules_input"
echo "# Subsequent rules: $rules_found"
echo "# … no dupplicates: $rules_found_nd"
echo "# Output rules: $rules_output"
echo "#"
echo


+ 1
- 1
resolve_subdomains.sh View File

@@ -17,7 +17,7 @@ pv -f subdomains/*.list | ./validate_list.py --domain | rev | sort -u | rev > te

log "Resolving subdomain…"
date +%s > "last_updates/massdns.txt"
"$MASSDNS_BINARY" --output Snrql --retry REFUSED,SERVFAIL --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list
"$MASSDNS_BINARY" --output Snrql --hashmap-size "$MASSDNS_HASHMAP_SIZE" --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list

log "Importing into database…"
[ $SINGLE_PROCESS -eq 1 ] && EXTRA_ARGS="--single-process"


Loading…
Cancel
Save