Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

45 lines
2.4 KiB

#!/usr/bin/env bash
# Resolve the CNAME chain of all the known subdomains for later analysis
cat subdomains/*.list | sort -u > temp/all_subdomains.list
./ --input temp/all_subdomains.list --output temp/all_resolved.csv
sort -u temp/all_resolved.csv > temp/all_resolved_sorted.csv
# Filter out the subdomains not pointing to a first-party tracker
cat rules/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules.txt
./ --rules temp/all_rules.txt --input temp/all_resolved_sorted.csv --output dist/firstparty-trackers.txt
./ --rules temp/all_rules.txt --input temp/all_resolved_sorted.csv --no-explicit --output dist/firstparty-only-trackers.txt
# Format the blocklist so it can be used as a hostlist
function generate_hosts {
echo "# First-party trackers host list"
echo "# $description"
echo "#"
echo "# About first-party trackers:"
echo "# Source code:"
echo "#"
echo "# Latest version:"
echo "# - With third-party trackers:"
echo "# - First-party trackers only:"
echo "#"
echo "# Generation date: $(date -Isec)"
echo "# Generation version: eulaurarien $(git describe --tags)"
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
echo "# Number of trackers identification rules : $(wc -l temp/all_rules.txt | cut -d' ' -f1)"
echo "# Number of tracker subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
cat "dist/$basename.txt" | while read host;
echo " $host"
) > "dist/$basename-hosts.txt"
generate_hosts "firstparty-trackers" "Also contains trackers used in third-party"
generate_hosts "firstparty-only-trackers" "Do not contain trackers used in third-party. Use in conjuction with EasyPrivacy."