#!/usr/bin/env bash # Resolve the CNAME chain of all the known subdomains for later analysis cat subdomains/*.list | sort -u > temp/all_subdomains.list ./resolve_subdomains.py --input temp/all_subdomains.list --output temp/all_resolved.csv sort -u temp/all_resolved.csv > temp/all_resolved_sorted.csv # Filter out the subdomains not pointing to a first-party tracker cat rules/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules.txt ./filter_subdomains.py --rules temp/all_rules.txt --input temp/all_resolved_sorted.csv --output dist/firstparty-trackers.txt ./filter_subdomains.py --rules temp/all_rules.txt --input temp/all_resolved_sorted.csv --no-explicit --output dist/firstparty-only-trackers.txt # Format the blocklist so it can be used as a hostlist function generate_hosts { basename="$1" description="$2" ( echo "# First-party trackers host list" echo "# $description" echo "#" echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker" echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien" echo "#" echo "# Latest version:" echo "# - With third-party trackers: https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt" echo "# - First-party trackers only: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt" echo "#" echo "# Generation date: $(date -Isec)" echo "# Generation version: eulaurarien $(git describe --tags)" echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)" echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)" echo "# Number of trackers identification rules : $(wc -l temp/all_rules.txt | cut -d' ' -f1)" echo "# Number of tracker subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)" echo "# Number of first-party subdomains: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)" echo cat "dist/$basename.txt" | while read host; do echo "0.0.0.0 $host" done ) > "dist/$basename-hosts.txt" } generate_hosts "firstparty-trackers" "Also contains trackers used in third-party" generate_hosts "firstparty-only-trackers" "Do not contain trackers used in third-party. Use in conjuction with EasyPrivacy."