Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

30 lines
1.4 KiB

#!/usr/bin/env bash
# Filter out the subdomains not pointing to a first-party tracker
cat subdomains/*.list | sort -u > temp/all_subdomains.list
./ --input temp/all_subdomains.list --output temp/all_toblock.list
sort -u temp/all_toblock.list > dist/firstparty-trackers.txt
# Format the blocklist so it can be used as a hostlist
echo "# First-party trackers host list"
echo "#"
echo "# About first-party trackers:"
echo "# Source code:"
echo "# Latest version of this list:"
echo "#"
echo "# Generation date: $(date -Isec)"
echo "# Generation version: eulaurarien $(git describe --tags)"
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
echo "# Number of known trackers : $(python -c 'import regexes; print(len(regexes.REGEXES))')"
echo "# Number of blocked subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
echo "# Number of first-party subdomains: $(./ dist/firstparty-trackers.txt | wc)"
cat dist/firstparty-trackers.txt | while read host;
echo " $host"
) > dist/firstparty-trackers-hosts.txt