Add Fukuda & co research paper to test suite

This commit is contained in:
Geoffrey Frogeye 2020-12-06 22:13:05 +01:00
parent eb1fcefd49
commit cec96b7e50
Signed by: geoffrey
GPG key ID: C72403E7F82E6AD8
4 changed files with 50 additions and 7 deletions

View file

@ -34,6 +34,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in
- [Bash](https://www.gnu.org/software/bash/bash.html) - [Bash](https://www.gnu.org/software/bash/bash.html)
- [Coreutils](https://www.gnu.org/software/coreutils/) - [Coreutils](https://www.gnu.org/software/coreutils/)
- [Gawk](https://www.gnu.org/software/gawk/)
- [curl](https://curl.haxx.se) - [curl](https://curl.haxx.se)
- [pv](http://www.ivarch.com/programs/pv.shtml) - [pv](http://www.ivarch.com/programs/pv.shtml)
- [Python 3.4+](https://www.python.org/) - [Python 3.4+](https://www.python.org/)

View file

@ -13,6 +13,10 @@ function dl() {
fi fi
} }
log "Retrieving tests…"
rm -f tests/*.cache.csv
dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv
(echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv
log "Retrieving rules…" log "Retrieving rules…"
rm -f rules*/*.cache.* rm -f rules*/*.cache.*

View file

@ -5,30 +5,67 @@ import os
import logging import logging
import csv import csv
TESTS_DIR = 'tests' TESTS_DIR = "tests"
if __name__ == '__main__': if __name__ == "__main__":
DB = database.Database() DB = database.Database()
log = logging.getLogger('tests') log = logging.getLogger("tests")
for filename in os.listdir(TESTS_DIR): for filename in os.listdir(TESTS_DIR):
if not filename.lower().endswith(".csv"):
continue
log.info("") log.info("")
log.info("Running tests from %s", filename) log.info("Running tests from %s", filename)
path = os.path.join(TESTS_DIR, filename) path = os.path.join(TESTS_DIR, filename)
with open(path, 'rt') as fdesc: with open(path, "rt") as fdesc:
count_ent = 0
count_all = 0
count_den = 0
pass_ent = 0
pass_all = 0
pass_den = 0
reader = csv.DictReader(fdesc) reader = csv.DictReader(fdesc)
for test in reader: for test in reader:
log.info("Testing %s (%s)", test['url'], test['comment']) log.debug("Testing %s (%s)", test["url"], test["comment"])
count_ent += 1
passed = True
for allow in test['allow'].split(':'): for allow in test["allow"].split(":"):
if not allow: if not allow:
continue continue
count_all += 1
if any(DB.get_domain(allow)): if any(DB.get_domain(allow)):
log.error("False positive: %s", allow) log.error("False positive: %s", allow)
passed = False
else:
pass_all += 1
for deny in test['deny'].split(':'): for deny in test["deny"].split(":"):
if not deny: if not deny:
continue continue
count_den += 1
if not any(DB.get_domain(deny)): if not any(DB.get_domain(deny)):
log.error("False negative: %s", deny) log.error("False negative: %s", deny)
passed = False
else:
pass_den += 1
if passed:
pass_ent += 1
perc_ent = (100 * pass_ent / count_ent) if count_ent else 100
perc_all = (100 * pass_all / count_all) if count_all else 100
perc_den = (100 * pass_den / count_den) if count_den else 100
log.info(
"%s: Entries %d/%d (%.2f%%) | Allow %d/%d (%.2f%%) | Deny %d/%d (%.2f%%)",
filename,
pass_ent,
count_ent,
perc_ent,
pass_all,
count_all,
perc_all,
pass_den,
count_den,
perc_den,
)

1
tests/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.cache.csv