Compare commits
4 commits
2c0286e36b
...
cec96b7e50
Author | SHA1 | Date | |
---|---|---|---|
Geoffrey Frogeye | cec96b7e50 | ||
Geoffrey Frogeye | eb1fcefd49 | ||
Geoffrey Frogeye | 0ecb431728 | ||
Geoffrey Frogeye | c1619b3cff |
|
@ -34,6 +34,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in
|
||||||
|
|
||||||
- [Bash](https://www.gnu.org/software/bash/bash.html)
|
- [Bash](https://www.gnu.org/software/bash/bash.html)
|
||||||
- [Coreutils](https://www.gnu.org/software/coreutils/)
|
- [Coreutils](https://www.gnu.org/software/coreutils/)
|
||||||
|
- [Gawk](https://www.gnu.org/software/gawk/)
|
||||||
- [curl](https://curl.haxx.se)
|
- [curl](https://curl.haxx.se)
|
||||||
- [pv](http://www.ivarch.com/programs/pv.shtml)
|
- [pv](http://www.ivarch.com/programs/pv.shtml)
|
||||||
- [Python 3.4+](https://www.python.org/)
|
- [Python 3.4+](https://www.python.org/)
|
||||||
|
|
9
dist/README.md
vendored
9
dist/README.md
vendored
|
@ -24,9 +24,12 @@ This list is an inventory of every `somestring.website1.com` found to allow non
|
||||||
|
|
||||||
### Learn more
|
### Learn more
|
||||||
|
|
||||||
- [CNAME Cloaking, the dangerous disguise of third-party trackers](https://medium.com/nextdns/cname-cloaking-the-dangerous-disguise-of-third-party-trackers-195205dc522a)
|
- [CNAME Cloaking, the dangerous disguise of third-party trackers](https://medium.com/nextdns/cname-cloaking-the-dangerous-disguise-of-third-party-trackers-195205dc522a) from NextDNS
|
||||||
- [Trackers first-party](https://blog.imirhil.fr/2019/11/13/first-party-tracker.html) (french)
|
- [Trackers first-party](https://blog.imirhil.fr/2019/11/13/first-party-tracker.html) from Aeris, in french
|
||||||
- [uBlock Origin issue](https://github.com/uBlockOrigin/uBlock-issues/issues/780)
|
- [uBlock Origin issue](https://github.com/uBlockOrigin/uBlock-issues/issues/780)
|
||||||
|
- [CNAME Cloaking and Bounce Tracking Defense](https://webkit.org/blog/11338/cname-cloaking-and-bounce-tracking-defense/) on WebKit's blog
|
||||||
|
- [Characterizing CNAME cloaking-based tracking](https://blog.apnic.net/2020/08/04/characterizing-cname-cloaking-based-tracking/) on APNIC's webiste
|
||||||
|
- [Characterizing CNAME Cloaking-Based Tracking on the Web](https://tma.ifip.org/2020/wp-content/uploads/sites/9/2020/06/tma2020-camera-paper66.pdf) is a research paper from Sokendai and ANSSI
|
||||||
|
|
||||||
## List variants
|
## List variants
|
||||||
|
|
||||||
|
@ -93,6 +96,7 @@ Some of the first-party tracker included in this list have been found by:
|
||||||
- [Aeris](https://imirhil.fr/)
|
- [Aeris](https://imirhil.fr/)
|
||||||
- NextDNS and [their blocklist](https://github.com/nextdns/cname-cloaking-blocklist)'s contributors
|
- NextDNS and [their blocklist](https://github.com/nextdns/cname-cloaking-blocklist)'s contributors
|
||||||
- Yuki2718 from [Wilders Security Forums](https://www.wilderssecurity.com/threads/ublock-a-lean-and-fast-blocker.365273/page-168#post-2880361)
|
- Yuki2718 from [Wilders Security Forums](https://www.wilderssecurity.com/threads/ublock-a-lean-and-fast-blocker.365273/page-168#post-2880361)
|
||||||
|
- Ha Dao, Johan Mazel, and Kensuke Fukuda, ["Characterizing CNAME Cloaking-Based Tracking on the Web", Proceedings of IFIP/IEEE Traffic Measurement Analysis Conference (TMA), 9 pages, 2020.](https://tma.ifip.org/2020/wp-content/uploads/sites/9/2020/06/tma2020-camera-paper66.pdf)
|
||||||
|
|
||||||
The list was generated using data from
|
The list was generated using data from
|
||||||
|
|
||||||
|
@ -100,6 +104,7 @@ The list was generated using data from
|
||||||
- [Cisco Umbrella Popularity List](http://s3-us-west-1.amazonaws.com/umbrella-static/index.html)
|
- [Cisco Umbrella Popularity List](http://s3-us-west-1.amazonaws.com/umbrella-static/index.html)
|
||||||
- [Public DNS Server List](https://public-dns.info/)
|
- [Public DNS Server List](https://public-dns.info/)
|
||||||
|
|
||||||
|
|
||||||
Similar projects:
|
Similar projects:
|
||||||
|
|
||||||
- [NextDNS blocklist](https://github.com/nextdns/cname-cloaking-blocklist): for DNS-aware ad blockers
|
- [NextDNS blocklist](https://github.com/nextdns/cname-cloaking-blocklist): for DNS-aware ad blockers
|
||||||
|
|
|
@ -13,10 +13,15 @@ function dl() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log "Retrieving tests…"
|
||||||
|
rm -f tests/*.cache.csv
|
||||||
|
dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv
|
||||||
|
(echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv
|
||||||
|
|
||||||
log "Retrieving rules…"
|
log "Retrieving rules…"
|
||||||
rm -f rules*/*.cache.*
|
rm -f rules*/*.cache.*
|
||||||
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
|
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
|
||||||
|
dl https://filters.adtidy.org/extension/chromium/filters/3.txt rules_adblock/adguard.cache.txt
|
||||||
|
|
||||||
log "Retrieving TLD list…"
|
log "Retrieving TLD list…"
|
||||||
dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
|
dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
|
||||||
|
|
63
run_tests.py
63
run_tests.py
|
@ -5,30 +5,67 @@ import os
|
||||||
import logging
|
import logging
|
||||||
import csv
|
import csv
|
||||||
|
|
||||||
TESTS_DIR = 'tests'
|
TESTS_DIR = "tests"
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
|
|
||||||
DB = database.Database()
|
DB = database.Database()
|
||||||
log = logging.getLogger('tests')
|
log = logging.getLogger("tests")
|
||||||
|
|
||||||
for filename in os.listdir(TESTS_DIR):
|
for filename in os.listdir(TESTS_DIR):
|
||||||
|
if not filename.lower().endswith(".csv"):
|
||||||
|
continue
|
||||||
log.info("")
|
log.info("")
|
||||||
log.info("Running tests from %s", filename)
|
log.info("Running tests from %s", filename)
|
||||||
path = os.path.join(TESTS_DIR, filename)
|
path = os.path.join(TESTS_DIR, filename)
|
||||||
with open(path, 'rt') as fdesc:
|
with open(path, "rt") as fdesc:
|
||||||
|
count_ent = 0
|
||||||
|
count_all = 0
|
||||||
|
count_den = 0
|
||||||
|
pass_ent = 0
|
||||||
|
pass_all = 0
|
||||||
|
pass_den = 0
|
||||||
reader = csv.DictReader(fdesc)
|
reader = csv.DictReader(fdesc)
|
||||||
for test in reader:
|
for test in reader:
|
||||||
log.info("Testing %s (%s)", test['url'], test['comment'])
|
log.debug("Testing %s (%s)", test["url"], test["comment"])
|
||||||
|
count_ent += 1
|
||||||
|
passed = True
|
||||||
|
|
||||||
for white in test['white'].split(':'):
|
for allow in test["allow"].split(":"):
|
||||||
if not white:
|
if not allow:
|
||||||
continue
|
continue
|
||||||
if any(DB.get_domain(white)):
|
count_all += 1
|
||||||
log.error("False positive: %s", white)
|
if any(DB.get_domain(allow)):
|
||||||
|
log.error("False positive: %s", allow)
|
||||||
|
passed = False
|
||||||
|
else:
|
||||||
|
pass_all += 1
|
||||||
|
|
||||||
for black in test['black'].split(':'):
|
for deny in test["deny"].split(":"):
|
||||||
if not black:
|
if not deny:
|
||||||
continue
|
continue
|
||||||
if not any(DB.get_domain(black)):
|
count_den += 1
|
||||||
log.error("False negative: %s", black)
|
if not any(DB.get_domain(deny)):
|
||||||
|
log.error("False negative: %s", deny)
|
||||||
|
passed = False
|
||||||
|
else:
|
||||||
|
pass_den += 1
|
||||||
|
|
||||||
|
if passed:
|
||||||
|
pass_ent += 1
|
||||||
|
perc_ent = (100 * pass_ent / count_ent) if count_ent else 100
|
||||||
|
perc_all = (100 * pass_all / count_all) if count_all else 100
|
||||||
|
perc_den = (100 * pass_den / count_den) if count_den else 100
|
||||||
|
log.info(
|
||||||
|
"%s: Entries %d/%d (%.2f%%) | Allow %d/%d (%.2f%%) | Deny %d/%d (%.2f%%)",
|
||||||
|
filename,
|
||||||
|
pass_ent,
|
||||||
|
count_ent,
|
||||||
|
perc_ent,
|
||||||
|
pass_all,
|
||||||
|
count_all,
|
||||||
|
perc_all,
|
||||||
|
pass_den,
|
||||||
|
count_den,
|
||||||
|
perc_den,
|
||||||
|
)
|
||||||
|
|
1
tests/.gitignore
vendored
Normal file
1
tests/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
*.cache.csv
|
|
@ -1,4 +1,4 @@
|
||||||
url,white,black,comment
|
url,allow,deny,comment
|
||||||
https://support.apple.com,support.apple.com,,EdgeKey / AkamaiEdge
|
https://support.apple.com,support.apple.com,,EdgeKey / AkamaiEdge
|
||||||
https://www.pinterest.fr/,i.pinimg.com,,Cedexis
|
https://www.pinterest.fr/,i.pinimg.com,,Cedexis
|
||||||
https://www.tumblr.com/,66.media.tumblr.com,,ChiCDN
|
https://www.tumblr.com/,66.media.tumblr.com,,ChiCDN
|
||||||
|
|
|
|
@ -1,4 +1,4 @@
|
||||||
url,white,black,comment
|
url,allow,deny,comment
|
||||||
https://www.red-by-sfr.fr/,static.s-sfr.fr,nrg.red-by-sfr.fr,Eulerian
|
https://www.red-by-sfr.fr/,static.s-sfr.fr,nrg.red-by-sfr.fr,Eulerian
|
||||||
https://www.cbc.ca/,,smetrics.cbc.ca,2o7 | Ominuture | Adobe Experience Cloud
|
https://www.cbc.ca/,,smetrics.cbc.ca,2o7 | Ominuture | Adobe Experience Cloud
|
||||||
https://www.discover.com/,,content.discover.com,ThreatMetrix
|
https://www.discover.com/,,content.discover.com,ThreatMetrix
|
||||||
|
|
|
Loading…
Reference in a new issue