Geoffrey Frogeye
7d01d016a5
It's not very performant by itself, especially since pyre2 isn't maintained nor really compilableinstallable anymore. The performance seems to have decreased from 200 req/s to 0.2 req/s when using 512 threads, and to 80 req/s using 64 req/s. This might or might not be related,as the CPU doesn't seem to be the bottleneck. I will probably add support for host-based rules, matching the subdomains of such hosts (as for now there doesn't seem to be any other pattern for first-party trackers than subdomains, and this would be a very broad performace / compatibility with existing lists improvement), and convert the AdBlock lists to this format, only keeping domains-only rules.
44 lines
2.2 KiB
Bash
Executable file
44 lines
2.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
# Filter out the subdomains not pointing to a first-party tracker
|
|
cat subdomains/*.list | sort -u > temp/all_subdomains.list
|
|
cat rules/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules.txt
|
|
./filter_subdomains.py --rules temp/all_rules.txt --input temp/all_subdomains.list --output temp/all_toblock.list
|
|
sort -u temp/all_toblock.list > dist/firstparty-trackers.txt
|
|
./filter_out_explicit.py --rules temp/all_rules.txt --input dist/firstparty-trackers.txt --output dist/firstparty-only-trackers.txt
|
|
|
|
# Format the blocklist so it can be used as a hostlist
|
|
|
|
function generate_hosts {
|
|
basename="$1"
|
|
description="$2"
|
|
|
|
(
|
|
echo "# First-party trackers host list"
|
|
echo "# $description"
|
|
echo "#"
|
|
echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
|
|
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
|
echo "#"
|
|
echo "# Latest version:"
|
|
echo "# - With third-party trackers: https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt"
|
|
echo "# - First-party trackers only: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
|
|
echo "#"
|
|
echo "# Generation date: $(date -Isec)"
|
|
echo "# Generation version: eulaurarien $(git describe --tags)"
|
|
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
|
|
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
|
echo "# Number of trackers identification rules : $(wc -l temp/all_rules.txt | cut -d' ' -f1)"
|
|
echo "# Number of tracker subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
|
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
|
|
echo
|
|
cat "dist/$basename.txt" | while read host;
|
|
do
|
|
echo "0.0.0.0 $host"
|
|
done
|
|
) > "dist/$basename-hosts.txt"
|
|
}
|
|
|
|
generate_hosts "firstparty-trackers" "Also contains trackers used in third-party"
|
|
generate_hosts "firstparty-only-trackers" "Do not contain trackers used in third-party. Use in conjuction with EasyPrivacy."
|