Workflow: POO and individual tables per types

Mostly for performances reasons.
First one to implement threading later.
Second one to speed up the dichotomy,
but it doesn't seem that much better so far.
This commit is contained in:
Geoffrey Frogeye 2019-12-13 00:11:21 +01:00
parent 1484733a90
commit 57416b6e2c
Signed by: geoffrey
GPG key ID: D8A7ECA00A8CD3DD
10 changed files with 525 additions and 360 deletions

View file

@ -5,18 +5,20 @@ function log() {
}
log "Preparing database…"
./database.py --refresh
./database.py --expire
log "Compiling rules…"
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py subdomains
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py subdomains
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py subdomains
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
# NOTE: Ensure first-party sources are last
cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py subdomains --first-party
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party
./import_rules.sh
# TODO Fetch 'em
log "Reading PTR records…"
pv ptr.json.gz | gunzip | ./feed_dns.py
log "Reading A records…"
pv a.json.gz | gunzip | ./feed_dns.py
log "Reading CNAME records…"
pv cname.json.gz | gunzip | ./feed_dns.py
log "Pruning old data…"
./database.py --prune
./filter_subdomains.sh
# log "Reading A records…"
# pv a.json.gz | gunzip | ./feed_dns.py
# log "Reading CNAME records…"
# pv cname.json.gz | gunzip | ./feed_dns.py