Workflow: Small tweaks

This commit is contained in:
Geoffrey Frogeye 2019-12-09 18:21:08 +01:00
parent 55877be891
commit 1484733a90
2 changed files with 52 additions and 47 deletions

View file

@ -13,7 +13,9 @@ import ctypes
Utility functions to interact with the database.
"""
VERSION = 1
# TODO Rule level and source priority
VERSION = 2
PATH = f"blocking.db"
CONN = None
C = None # Cursor
@ -100,11 +102,11 @@ def refresh() -> None:
RULE_SUBDOMAIN_COMMAND = \
'INSERT INTO blocking (key, type, updated, firstparty) ' \
f'VALUES (?, {RowType.DomainTree.value}, 1, ?) ' \
'INSERT INTO blocking (key, type, updated, firstpart, level) ' \
f'VALUES (?, {RowType.DomainTree.value}, 1, ?, 0) ' \
'ON CONFLICT(key)' \
f'DO UPDATE SET source=null, type={RowType.DomainTree.value}, ' \
'updated=1, firstparty=?'
'updated=1, firstparty=?, level=0'
def feed_rule_subdomains(subdomain: str, first_party: bool = False) -> None:
@ -138,11 +140,11 @@ def ip4_flat(address: bytes) -> typing.Optional[str]:
RULE_IP4NETWORK_COMMAND = \
'INSERT INTO blocking (key, type, updated, firstparty) ' \
f'VALUES (?, {RowType.IPv4Network.value}, 1, ?) ' \
'INSERT INTO blocking (key, type, updated, firstparty, level) ' \
f'VALUES (?, {RowType.IPv4Network.value}, 1, ?, 0) ' \
'ON CONFLICT(key)' \
f'DO UPDATE SET source=null, type={RowType.IPv4Network.value}, ' \
'updated=1, firstparty=?'
'updated=1, firstparty=?, level=0'
def feed_rule_ip4network(network: ipaddress.IPv4Network,
@ -156,10 +158,12 @@ def feed_rule_ip4network(network: ipaddress.IPv4Network,
FEED_A_COMMAND_FETCH = \
'SELECT key, firstparty FROM blocking ' \
'WHERE key<=? ' \
'AND updated=1 ' \
'AND instr(?, key) > 0 ' \
f'AND type={RowType.IPv4Network.value} ' \
'ORDER BY key DESC ' \
'LIMIT 1'
'ORDER BY key DESC '
# UPSERT are not issued often relative to FETCH,
# merging the both might be counterproductive
FEED_A_COMMAND_UPSERT = \
'INSERT INTO blocking (key, source, type, updated, firstparty) ' \
@ -177,35 +181,39 @@ def feed_a(name: bytes, value_ip: bytes) -> None:
value_dec = ip4_flat(value_ip)
if value_dec is None:
# Malformed IPs
time_step('a_malformed')
return
time_step('a_fetch')
C.execute(FEED_A_COMMAND_FETCH, (value_dec,))
C.execute(FEED_A_COMMAND_FETCH, (value_dec, value_dec))
base = C.fetchone()
time_step('a_fetch_confirm')
if not base:
return
b_key, b_firstparty = base
if not value_dec.startswith(b_key):
return
name = name[::-1]
time_step('a_upsert')
C.execute(FEED_A_COMMAND_UPSERT,
(name, b_key, b_firstparty, # Insert
b_key, b_firstparty, b_firstparty) # Update
)
time_step('other')
for b_key, b_firstparty in C:
time_step('a_upsert')
C.execute(FEED_A_COMMAND_UPSERT,
(name, b_key, b_firstparty, # Insert
b_key, b_firstparty, b_firstparty) # Update
)
time_step('a_fetch_confirm')
time_step('a_end')
FEED_CNAME_COMMAND_FETCH = \
'SELECT key, type, firstparty FROM blocking ' \
'WHERE key<=? ' \
f'AND (type={RowType.DomainTree.value} OR type={RowType.Domain.value}) ' \
'AND updated=1 ' \
'ORDER BY key DESC ' \
'LIMIT 1'
# Optimisations that renders the index unused
# (and thus counterproductive until fixed):
# 'AND instr(?, key) > 0 ' \
# f'WHERE ((type={RowType.DomainTree.value} AND key<=?) OR ' \
# f'(type={RowType.Domain.value} AND key=?)) ' \
# This optimisation is counter productive
# Might be fixable by using multiple SELECT and a JOIN
# In the meantime the confirm is very light so it's ok
FEED_CNAME_COMMAND_UPSERT = \
'INSERT INTO blocking (key, source, type, updated, firstparty) ' \
@ -224,28 +232,25 @@ def feed_cname(name: bytes, value: bytes) -> None:
value_dec = value.decode()
time_step('cname_fetch')
C.execute(FEED_CNAME_COMMAND_FETCH, (value_dec,))
base = C.fetchone()
time_step('cname_fetch_confirm')
if not base:
# Should only happen at an extremum of the database
return
b_key, b_type, b_firstparty = base
matching = b_key == value_dec[:len(b_key)] and (
len(value_dec) == len(b_key)
or (
b_type == RowType.DomainTree.value
and value_dec[len(b_key)] == '.'
for b_key, b_type, b_firstparty in C:
matching = b_key == value_dec[:len(b_key)] and (
len(value_dec) == len(b_key)
or (
b_type == RowType.DomainTree.value
and value_dec[len(b_key)] == '.'
)
)
)
if not matching:
return
name = name[::-1]
time_step('cname_upsert')
C.execute(FEED_CNAME_COMMAND_UPSERT,
(name, b_key, b_firstparty, # Insert
b_key, b_firstparty, b_firstparty) # Update
)
time_step('other')
if not matching:
continue
name = name[::-1]
time_step('cname_upsert')
C.execute(FEED_CNAME_COMMAND_UPSERT,
(name, b_key, b_firstparty, # Insert
b_key, b_firstparty, b_firstparty) # Update
)
time_step('cname_fetch_confirm')
time_step('cname_end')
if __name__ == '__main__':

View file

@ -7,12 +7,12 @@ CREATE TABLE blocking (
type INTEGER, -- Type of the field: 1: AS, 2: domain tree, 3: domain, 4: IPv4 network, 6: IPv6 network
updated INTEGER, -- If the row was updated during last data import (0: No, 1: Yes)
firstparty INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party)
-- refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party)
refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) (used for -only lists)
level INTEGER, -- Level of recursion to the original rule (used for source priority)
FOREIGN KEY (source) REFERENCES blocking(key) ON DELETE CASCADE
);
CREATE INDEX "blocking_type_updated_key" ON "blocking" (
CREATE INDEX "blocking_type_key" ON "blocking" (
"type",
"updated",
"key" DESC
);