Add Fukuda & co research paper to test suite

2020-12-06 22:13:05 +01:00 · 2020-12-06 22:13:05 +01:00 · cec96b7e50
commit cec96b7e50
parent eb1fcefd49
4 changed files with 50 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -34,6 +34,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in
 - [Bash](https://www.gnu.org/software/bash/bash.html)
 - [Coreutils](https://www.gnu.org/software/coreutils/)
 - [Gawk](https://www.gnu.org/software/gawk/)
 - [curl](https://curl.haxx.se)
 - [pv](http://www.ivarch.com/programs/pv.shtml)
 - [Python 3.4+](https://www.python.org/)
--- a/fetch_resources.sh
+++ b/fetch_resources.sh
@ -13,6 +13,10 @@ function dl() {
    fi
 }
 log "Retrieving tests…"
 rm -f tests/*.cache.csv
 dl https://raw.githubusercontent.com/fukuda-lab/cname_cloaking/master/Subdomain_CNAME-cloaking-based-tracking.csv temp/fukuda.csv
 (echo "url,allow,deny,comment"; tail -n +2 temp/fukuda.csv | awk -F, '{ print "https://" $2 "/,," $3 "," $5 }') > tests/fukuda.cache.csv
 log "Retrieving rules…"
 rm -f rules*/*.cache.*
--- a/run_tests.py
+++ b/run_tests.py
@ -5,30 +5,67 @@ import os
 import logging
 import csv
-TESTS_DIR = 'tests'
+TESTS_DIR = "tests"
-if __name__ == '__main__':
+if __name__ == "__main__":
    DB = database.Database()
-    log = logging.getLogger('tests')
+    log = logging.getLogger("tests")
    for filename in os.listdir(TESTS_DIR):
        if not filename.lower().endswith(".csv"):
            continue
        log.info("")
        log.info("Running tests from %s", filename)
        path = os.path.join(TESTS_DIR, filename)
-        with open(path, 'rt') as fdesc:
+        with open(path, "rt") as fdesc:
            count_ent = 0
            count_all = 0
            count_den = 0
            pass_ent = 0
            pass_all = 0
            pass_den = 0
            reader = csv.DictReader(fdesc)
            for test in reader:
-                log.info("Testing %s (%s)", test['url'], test['comment'])
+                log.debug("Testing %s (%s)", test["url"], test["comment"])
                count_ent += 1
                passed = True
-                for allow in test['allow'].split(':'):
+                for allow in test["allow"].split(":"):
                    if not allow:
                        continue
                    count_all += 1
                    if any(DB.get_domain(allow)):
                        log.error("False positive: %s", allow)
                        passed = False
                    else:
                        pass_all += 1
-                for deny in test['deny'].split(':'):
+                for deny in test["deny"].split(":"):
                    if not deny:
                        continue
                    count_den += 1
                    if not any(DB.get_domain(deny)):
                        log.error("False negative: %s", deny)
                        passed = False
                    else:
                        pass_den += 1
                if passed:
                    pass_ent += 1
            perc_ent = (100 * pass_ent / count_ent) if count_ent else 100
            perc_all = (100 * pass_all / count_all) if count_all else 100
            perc_den = (100 * pass_den / count_den) if count_den else 100
            log.info(
                "%s: Entries %d/%d (%.2f%%) | Allow %d/%d (%.2f%%) | Deny %d/%d (%.2f%%)",
                filename,
                pass_ent,
                count_ent,
                perc_ent,
                pass_all,
                count_all,
                perc_all,
                pass_den,
                count_den,
                perc_den,
            )
--- a/tests/.gitignore
+++ b/tests/.gitignore
@ -0,0 +1 @@
 *.cache.csv