Workflow: Some modifications

This commit is contained in:
Geoffrey Frogeye 2019-12-14 16:04:19 +01:00
parent 5023b85d7c
commit d7c239a6f6
6 changed files with 27 additions and 13 deletions

2
.gitignore vendored
View file

@ -3,5 +3,3 @@
*.db-journal
nameservers
nameservers.head
*.o
*.so

View file

@ -149,6 +149,8 @@ class Database():
total = 0
for i, octet in enumerate(address.split('.')):
total += int(octet) << (3-i)*8
if total > 0xFFFFFFFF:
raise ValueError
return total
# return '{:02x}{:02x}{:02x}{:02x}'.format(
# *[int(c) for c in address.split('.')])
@ -192,10 +194,13 @@ class Database():
'(SELECT count(*) FROM rules '
'WHERE source=r.id)')
def prune(self, before: int) -> None:
def prune(self, before: int, base_only: bool = False) -> None:
self.enter_step('prune')
cursor = self.conn.cursor()
cursor.execute('DELETE FROM rules WHERE updated<?', (before,))
cmd = 'DELETE FROM rules WHERE updated<?'
if base_only:
cmd += ' AND level=0'
cursor.execute(cmd, (before,))
def explain(self, entry: int) -> str:
# Format current
@ -541,7 +546,14 @@ if __name__ == '__main__':
help="Reconstruct the whole database")
parser.add_argument(
'-p', '--prune', action='store_true',
help="Remove old (+6 months) entries from database")
help="Remove old entries from database")
parser.add_argument(
'-b', '--prune-base', action='store_true',
help="TODO")
parser.add_argument(
'-s', '--prune-before', type=int,
default=(int(time.time()) - 60*60*24*31*6),
help="TODO")
parser.add_argument(
'-r', '--references', action='store_true',
help="Update the reference count")
@ -552,8 +564,8 @@ if __name__ == '__main__':
if args.initialize:
DB.initialize()
if args.prune:
DB.prune(before=int(time.time()) - 60*60*24*31*6)
if args.references and not args.prune:
DB.prune(before=args.prune_before, base_only=args.prune_base)
if args.references:
DB.update_references()
DB.close()

View file

@ -37,20 +37,21 @@ if __name__ == '__main__':
DB.enter_step('feed_switch')
if dtype == 'a':
for rule in DB.get_ip4(value):
if not list(DB.get_domain_in_zone(name)):
if not any(DB.get_domain_in_zone(name)):
DB.set_hostname(name, source=rule,
updated=int(timestamp))
# updated=int(data['timestamp']))
elif dtype == 'c':
for rule in DB.get_domain(value):
if not list(DB.get_domain_in_zone(name)):
if not any(DB.get_domain_in_zone(name)):
DB.set_hostname(name, source=rule,
updated=int(timestamp))
# updated=int(data['timestamp']))
elif dtype == 'p':
for rule in DB.get_domain(value):
if not list(DB.get_ip4_in_network(name)):
if not any(DB.get_ip4_in_network(name)):
log.debug('%s matched by %d: add %s', value, rule, name)
DB.set_ip4address(name, source=rule,
updated=int(timestamp))
# updated=int(data['timestamp']))

View file

@ -18,7 +18,7 @@ log "Retrieving rules…"
rm -f rules*/*.cache.*
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
# From firebog.net Tracking & Telemetry Lists
dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
# dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
# dl https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt rules/notrack-blocklist.cache.list
# False positives: https://github.com/WaLLy3K/wally3k.github.io/issues/73 -> 69.media.tumblr.com chicdn.net
dl https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts rules_hosts/add2o7.cache.txt

View file

@ -5,6 +5,7 @@ function log() {
}
log "Importing rules…"
BEFORE="$(date +%s)"
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
@ -17,3 +18,5 @@ cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py as
./feed_asn.py
log "Pruning old rules…"
./database.py --prune --prune-before "$BEFORE" --prune-base

View file

@ -27,10 +27,10 @@ if __name__ == '__main__':
data = json.loads(line)
try:
writer.writerow([
data['type'][0],
data['type'][0], # First letter, will need to do something special for AAAA
data['timestamp'],
data['name'],
data['value']])
except IndexError:
except (KeyError, json.decoder.JSONDecodeError):
log.error('Could not parse line: %s', line)
pass