parent
1c20963ffd
commit
025370bbbe
|
@ -11,25 +11,35 @@ echo "Compiling rules..." > /dev/stderr
|
||||||
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules_adblock.txt
|
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules_adblock.txt
|
||||||
./adblock_to_domain_list.py --input temp/all_rules_adblock.txt --output rules/from_adblock.cache.list
|
./adblock_to_domain_list.py --input temp/all_rules_adblock.txt --output rules/from_adblock.cache.list
|
||||||
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 > rules/from_hosts.cache.list
|
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 > rules/from_hosts.cache.list
|
||||||
cat rules/*.list | grep -v '^#' | grep -v '^$' | sort -u > temp/all_rules.list
|
cat rules/*.list | grep -v '^#' | grep -v '^$' | sort -u > temp/all_rules_multi.list
|
||||||
|
cat rules/first-party.list | grep -v '^#' | grep -v '^$' | sort -u > temp/all_rules_first.list
|
||||||
|
|
||||||
# Filter out the subdomains not pointing to a first-party tracker
|
echo "Filtering first-party tracking domains..." > /dev/stderr
|
||||||
echo "Filtering tracking domains..." > /dev/stderr
|
./filter_subdomains.py --rules temp/all_rules_first.list --input temp/all_resolved_sorted.csv --output temp/firstparty-trackers.list
|
||||||
./filter_subdomains.py --rules temp/all_rules.list --input temp/all_resolved_sorted.csv --output temp/firstparty-trackers.list
|
|
||||||
sort -u temp/firstparty-trackers.list > dist/firstparty-trackers.txt
|
sort -u temp/firstparty-trackers.list > dist/firstparty-trackers.txt
|
||||||
|
|
||||||
echo "Filtering first-party only tracking domains..." > /dev/stderr
|
echo "Filtering first-party curated tracking domains..." > /dev/stderr
|
||||||
./filter_subdomains.py --rules temp/all_rules.list --input temp/all_resolved_sorted.csv --no-explicit --output temp/firstparty-only-trackers.list
|
./filter_subdomains.py --rules temp/all_rules_first.list --input temp/all_resolved_sorted.csv --no-explicit --output temp/firstparty-only-trackers.list
|
||||||
sort -u temp/firstparty-only-trackers.list > dist/firstparty-only-trackers.txt
|
sort -u temp/firstparty-only-trackers.list > dist/firstparty-only-trackers.txt
|
||||||
|
|
||||||
|
echo "Filtering multi-party tracking domains..." > /dev/stderr
|
||||||
|
./filter_subdomains.py --rules temp/all_rules_multi.list --input temp/all_resolved_sorted.csv --output temp/multiparty-trackers.list
|
||||||
|
sort -u temp/multiparty-trackers.list > dist/multiparty-trackers.txt
|
||||||
|
|
||||||
|
echo "Filtering multi-party curated tracking domains..." > /dev/stderr
|
||||||
|
./filter_subdomains.py --rules temp/all_rules_multi.list --input temp/all_resolved_sorted.csv --no-explicit --output temp/multiparty-only-trackers.list
|
||||||
|
sort -u temp/multiparty-only-trackers.list > dist/multiparty-only-trackers.txt
|
||||||
|
|
||||||
# Format the blocklist so it can be used as a hostlist
|
# Format the blocklist so it can be used as a hostlist
|
||||||
function generate_hosts {
|
function generate_hosts {
|
||||||
basename="$1"
|
basename="$1"
|
||||||
description="$2"
|
description="$2"
|
||||||
|
description2="$2"
|
||||||
|
|
||||||
(
|
(
|
||||||
echo "# First-party trackers host list"
|
echo "# First-party trackers host list"
|
||||||
echo "# $description"
|
echo "# $description"
|
||||||
|
echo "# $description2"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
|
echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
|
||||||
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
|
||||||
|
@ -38,16 +48,21 @@ function generate_hosts {
|
||||||
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
|
echo "# contact me the way you like: https://geoffrey.frogeye.fr"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Latest version:"
|
echo "# Latest version:"
|
||||||
echo "# - With third-party trackers: https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt"
|
echo "# - First-party trackers : https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt"
|
||||||
echo "# - First-party trackers only: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
|
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
|
||||||
|
echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
|
||||||
|
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Generation date: $(date -Isec)"
|
echo "# Generation date: $(date -Isec)"
|
||||||
echo "# Generation software: eulaurarien $(git describe --tags)"
|
echo "# Generation software: eulaurarien $(git describe --tags)"
|
||||||
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
|
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
|
||||||
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
||||||
echo "# Number of trackers identification rules : $(wc -l temp/all_rules.list | cut -d' ' -f1)"
|
echo "# Number of known first-party trackers: $(wc -l temp/all_rules_first.list | cut -d' ' -f1)"
|
||||||
echo "# Number of tracker subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
echo "# Number of known multi-party trackers: $(wc -l temp/all_rules_multi.list | cut -d' ' -f1)"
|
||||||
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
|
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
||||||
|
echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
|
||||||
|
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
||||||
|
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
||||||
echo
|
echo
|
||||||
cat "dist/$basename.txt" | while read host;
|
cat "dist/$basename.txt" | while read host;
|
||||||
do
|
do
|
||||||
|
@ -56,5 +71,7 @@ function generate_hosts {
|
||||||
) > "dist/$basename-hosts.txt"
|
) > "dist/$basename-hosts.txt"
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_hosts "firstparty-trackers" "Also contains trackers used as third-party."
|
generate_hosts "firstparty-trackers" "Generated from a curated list of first-party trackers" ""
|
||||||
generate_hosts "firstparty-only-trackers" "Do not contain trackers used in third-party. Use in combination with third-party lists."
|
generate_hosts "firstparty-only-trackers" "Generated from a curated list of first-party trackers" "Only contain the first chain of redirection."
|
||||||
|
generate_hosts "multiparty-trackers" "Generated from known third-party trackers." "Also contains trackers used as third-party."
|
||||||
|
generate_hosts "multiparty-only-trackers" "Generated from known third-party trackers." "Do not contain trackers used in third-party. Use in combination with third-party lists."
|
||||||
|
|
|
@ -1,8 +1,15 @@
|
||||||
|
# Eulerian
|
||||||
|
eulerian.net
|
||||||
# Xiti (AT Internet)
|
# Xiti (AT Internet)
|
||||||
ati-host.net
|
ati-host.net
|
||||||
at-o.net
|
at-o.net
|
||||||
# NP6
|
# NP6
|
||||||
bp01.net
|
bp01.net
|
||||||
# Criteo
|
# Criteo
|
||||||
|
criteo.com
|
||||||
dnsdelegation.io
|
dnsdelegation.io
|
||||||
storetail.io
|
storetail.io
|
||||||
|
# Keyade
|
||||||
|
keyade.com
|
||||||
|
# Adobe Experience Cloud
|
||||||
|
omtrdc.net
|
||||||
|
|
Loading…
Reference in a new issue