Workflow: Small tweaks

This commit is contained in:
Geoffrey Frogeye 2019-12-09 18:21:08 +01:00
parent 55877be891
commit 1484733a90
2 changed files with 52 additions and 47 deletions

View file

@ -13,7 +13,9 @@ import ctypes
Utility functions to interact with the database. Utility functions to interact with the database.
""" """
VERSION = 1 # TODO Rule level and source priority
VERSION = 2
PATH = f"blocking.db" PATH = f"blocking.db"
CONN = None CONN = None
C = None # Cursor C = None # Cursor
@ -100,11 +102,11 @@ def refresh() -> None:
RULE_SUBDOMAIN_COMMAND = \ RULE_SUBDOMAIN_COMMAND = \
'INSERT INTO blocking (key, type, updated, firstparty) ' \ 'INSERT INTO blocking (key, type, updated, firstpart, level) ' \
f'VALUES (?, {RowType.DomainTree.value}, 1, ?) ' \ f'VALUES (?, {RowType.DomainTree.value}, 1, ?, 0) ' \
'ON CONFLICT(key)' \ 'ON CONFLICT(key)' \
f'DO UPDATE SET source=null, type={RowType.DomainTree.value}, ' \ f'DO UPDATE SET source=null, type={RowType.DomainTree.value}, ' \
'updated=1, firstparty=?' 'updated=1, firstparty=?, level=0'
def feed_rule_subdomains(subdomain: str, first_party: bool = False) -> None: def feed_rule_subdomains(subdomain: str, first_party: bool = False) -> None:
@ -138,11 +140,11 @@ def ip4_flat(address: bytes) -> typing.Optional[str]:
RULE_IP4NETWORK_COMMAND = \ RULE_IP4NETWORK_COMMAND = \
'INSERT INTO blocking (key, type, updated, firstparty) ' \ 'INSERT INTO blocking (key, type, updated, firstparty, level) ' \
f'VALUES (?, {RowType.IPv4Network.value}, 1, ?) ' \ f'VALUES (?, {RowType.IPv4Network.value}, 1, ?, 0) ' \
'ON CONFLICT(key)' \ 'ON CONFLICT(key)' \
f'DO UPDATE SET source=null, type={RowType.IPv4Network.value}, ' \ f'DO UPDATE SET source=null, type={RowType.IPv4Network.value}, ' \
'updated=1, firstparty=?' 'updated=1, firstparty=?, level=0'
def feed_rule_ip4network(network: ipaddress.IPv4Network, def feed_rule_ip4network(network: ipaddress.IPv4Network,
@ -156,10 +158,12 @@ def feed_rule_ip4network(network: ipaddress.IPv4Network,
FEED_A_COMMAND_FETCH = \ FEED_A_COMMAND_FETCH = \
'SELECT key, firstparty FROM blocking ' \ 'SELECT key, firstparty FROM blocking ' \
'WHERE key<=? ' \ 'WHERE key<=? ' \
'AND updated=1 ' \ 'AND instr(?, key) > 0 ' \
f'AND type={RowType.IPv4Network.value} ' \ f'AND type={RowType.IPv4Network.value} ' \
'ORDER BY key DESC ' \ 'ORDER BY key DESC '
'LIMIT 1'
# UPSERT are not issued often relative to FETCH,
# merging the both might be counterproductive
FEED_A_COMMAND_UPSERT = \ FEED_A_COMMAND_UPSERT = \
'INSERT INTO blocking (key, source, type, updated, firstparty) ' \ 'INSERT INTO blocking (key, source, type, updated, firstparty) ' \
@ -177,35 +181,39 @@ def feed_a(name: bytes, value_ip: bytes) -> None:
value_dec = ip4_flat(value_ip) value_dec = ip4_flat(value_ip)
if value_dec is None: if value_dec is None:
# Malformed IPs # Malformed IPs
time_step('a_malformed')
return return
time_step('a_fetch') time_step('a_fetch')
C.execute(FEED_A_COMMAND_FETCH, (value_dec,)) C.execute(FEED_A_COMMAND_FETCH, (value_dec, value_dec))
base = C.fetchone() base = C.fetchone()
time_step('a_fetch_confirm') time_step('a_fetch_confirm')
if not base:
return
b_key, b_firstparty = base
if not value_dec.startswith(b_key):
return
name = name[::-1] name = name[::-1]
time_step('a_upsert') for b_key, b_firstparty in C:
C.execute(FEED_A_COMMAND_UPSERT, time_step('a_upsert')
(name, b_key, b_firstparty, # Insert C.execute(FEED_A_COMMAND_UPSERT,
b_key, b_firstparty, b_firstparty) # Update (name, b_key, b_firstparty, # Insert
) b_key, b_firstparty, b_firstparty) # Update
time_step('other') )
time_step('a_fetch_confirm')
time_step('a_end')
FEED_CNAME_COMMAND_FETCH = \ FEED_CNAME_COMMAND_FETCH = \
'SELECT key, type, firstparty FROM blocking ' \ 'SELECT key, type, firstparty FROM blocking ' \
'WHERE key<=? ' \ 'WHERE key<=? ' \
f'AND (type={RowType.DomainTree.value} OR type={RowType.Domain.value}) ' \ f'AND (type={RowType.DomainTree.value} OR type={RowType.Domain.value}) ' \
'AND updated=1 ' \
'ORDER BY key DESC ' \ 'ORDER BY key DESC ' \
'LIMIT 1' 'LIMIT 1'
# Optimisations that renders the index unused
# (and thus counterproductive until fixed):
# 'AND instr(?, key) > 0 ' \
# f'WHERE ((type={RowType.DomainTree.value} AND key<=?) OR ' \ # f'WHERE ((type={RowType.DomainTree.value} AND key<=?) OR ' \
# f'(type={RowType.Domain.value} AND key=?)) ' \ # f'(type={RowType.Domain.value} AND key=?)) ' \
# This optimisation is counter productive
# Might be fixable by using multiple SELECT and a JOIN
# In the meantime the confirm is very light so it's ok
FEED_CNAME_COMMAND_UPSERT = \ FEED_CNAME_COMMAND_UPSERT = \
'INSERT INTO blocking (key, source, type, updated, firstparty) ' \ 'INSERT INTO blocking (key, source, type, updated, firstparty) ' \
@ -224,28 +232,25 @@ def feed_cname(name: bytes, value: bytes) -> None:
value_dec = value.decode() value_dec = value.decode()
time_step('cname_fetch') time_step('cname_fetch')
C.execute(FEED_CNAME_COMMAND_FETCH, (value_dec,)) C.execute(FEED_CNAME_COMMAND_FETCH, (value_dec,))
base = C.fetchone()
time_step('cname_fetch_confirm') time_step('cname_fetch_confirm')
if not base: for b_key, b_type, b_firstparty in C:
# Should only happen at an extremum of the database matching = b_key == value_dec[:len(b_key)] and (
return len(value_dec) == len(b_key)
b_key, b_type, b_firstparty = base or (
matching = b_key == value_dec[:len(b_key)] and ( b_type == RowType.DomainTree.value
len(value_dec) == len(b_key) and value_dec[len(b_key)] == '.'
or ( )
b_type == RowType.DomainTree.value
and value_dec[len(b_key)] == '.'
) )
) if not matching:
if not matching: continue
return name = name[::-1]
name = name[::-1] time_step('cname_upsert')
time_step('cname_upsert') C.execute(FEED_CNAME_COMMAND_UPSERT,
C.execute(FEED_CNAME_COMMAND_UPSERT, (name, b_key, b_firstparty, # Insert
(name, b_key, b_firstparty, # Insert b_key, b_firstparty, b_firstparty) # Update
b_key, b_firstparty, b_firstparty) # Update )
) time_step('cname_fetch_confirm')
time_step('other') time_step('cname_end')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -7,12 +7,12 @@ CREATE TABLE blocking (
type INTEGER, -- Type of the field: 1: AS, 2: domain tree, 3: domain, 4: IPv4 network, 6: IPv6 network type INTEGER, -- Type of the field: 1: AS, 2: domain tree, 3: domain, 4: IPv4 network, 6: IPv6 network
updated INTEGER, -- If the row was updated during last data import (0: No, 1: Yes) updated INTEGER, -- If the row was updated during last data import (0: No, 1: Yes)
firstparty INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) firstparty INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party)
-- refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) (used for -only lists)
level INTEGER, -- Level of recursion to the original rule (used for source priority)
FOREIGN KEY (source) REFERENCES blocking(key) ON DELETE CASCADE FOREIGN KEY (source) REFERENCES blocking(key) ON DELETE CASCADE
); );
CREATE INDEX "blocking_type_updated_key" ON "blocking" ( CREATE INDEX "blocking_type_key" ON "blocking" (
"type", "type",
"updated",
"key" DESC "key" DESC
); );