From cec96b7e5052d692735b73e53d50967febfe9a91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geoffrey=20=E2=80=9CFrogeye=E2=80=9D=20Preud=27homme?= Date: Sun, 6 Dec 2020 22:13:05 +0100 Subject: [PATCH] Add Fukuda & co research paper to test suite --- README.md | 1 + fetch_resources.sh | 4 ++++ run_tests.py | 51 +++++++++++++++++++++++++++++++++++++++------- tests/.gitignore | 1 + 4 files changed, 50 insertions(+), 7 deletions(-) create mode 100644 tests/.gitignore diff --git a/README.md b/README.md index 2579e1f..0985a9d 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in - [Bash](https://www.gnu.org/software/bash/bash.html) - [Coreutils](https://www.gnu.org/software/coreutils/) +- [Gawk](https://www.gnu.org/software/gawk/) - [curl](https://curl.haxx.se) - [pv](http://www.ivarch.com/programs/pv.shtml) - [Python 3.4+](https://www.python.org/) diff --git a/fetch_resources.sh b/fetch_resources.sh index 4cc06e4..b9ab599 100755 --- a/fetch_resources.sh +++ b/fetch_resources.sh @@ -13,6 +13,10 @@ function dl() { fi } +log "Retrieving tests…" +rm -f tests/*.cache.csv +dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv +(echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv log "Retrieving rules…" rm -f rules*/*.cache.* diff --git a/run_tests.py b/run_tests.py index 9584cb8..12964e3 100755 --- a/run_tests.py +++ b/run_tests.py @@ -5,30 +5,67 @@ import os import logging import csv -TESTS_DIR = 'tests' +TESTS_DIR = "tests" -if __name__ == '__main__': +if __name__ == "__main__": DB = database.Database() - log = logging.getLogger('tests') + log = logging.getLogger("tests") for filename in os.listdir(TESTS_DIR): + if not filename.lower().endswith(".csv"): + continue log.info("") log.info("Running tests from %s", filename) path = os.path.join(TESTS_DIR, filename) - with open(path, 'rt') as fdesc: + with open(path, "rt") as fdesc: + count_ent = 0 + count_all = 0 + count_den = 0 + pass_ent = 0 + pass_all = 0 + pass_den = 0 reader = csv.DictReader(fdesc) for test in reader: - log.info("Testing %s (%s)", test['url'], test['comment']) + log.debug("Testing %s (%s)", test["url"], test["comment"]) + count_ent += 1 + passed = True - for allow in test['allow'].split(':'): + for allow in test["allow"].split(":"): if not allow: continue + count_all += 1 if any(DB.get_domain(allow)): log.error("False positive: %s", allow) + passed = False + else: + pass_all += 1 - for deny in test['deny'].split(':'): + for deny in test["deny"].split(":"): if not deny: continue + count_den += 1 if not any(DB.get_domain(deny)): log.error("False negative: %s", deny) + passed = False + else: + pass_den += 1 + + if passed: + pass_ent += 1 + perc_ent = (100 * pass_ent / count_ent) if count_ent else 100 + perc_all = (100 * pass_all / count_all) if count_all else 100 + perc_den = (100 * pass_den / count_den) if count_den else 100 + log.info( + "%s: Entries %d/%d (%.2f%%) | Allow %d/%d (%.2f%%) | Deny %d/%d (%.2f%%)", + filename, + pass_ent, + count_ent, + perc_ent, + pass_all, + count_all, + perc_all, + pass_den, + count_den, + perc_den, + ) diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..109a9bf --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1 @@ +*.cache.csv