diff --git a/database.py b/database.py index bdb92b0..4fb5463 100755 --- a/database.py +++ b/database.py @@ -13,7 +13,9 @@ import ctypes Utility functions to interact with the database. """ -VERSION = 1 +# TODO Rule level and source priority + +VERSION = 2 PATH = f"blocking.db" CONN = None C = None # Cursor @@ -100,11 +102,11 @@ def refresh() -> None: RULE_SUBDOMAIN_COMMAND = \ - 'INSERT INTO blocking (key, type, updated, firstparty) ' \ - f'VALUES (?, {RowType.DomainTree.value}, 1, ?) ' \ + 'INSERT INTO blocking (key, type, updated, firstpart, level) ' \ + f'VALUES (?, {RowType.DomainTree.value}, 1, ?, 0) ' \ 'ON CONFLICT(key)' \ f'DO UPDATE SET source=null, type={RowType.DomainTree.value}, ' \ - 'updated=1, firstparty=?' + 'updated=1, firstparty=?, level=0' def feed_rule_subdomains(subdomain: str, first_party: bool = False) -> None: @@ -138,11 +140,11 @@ def ip4_flat(address: bytes) -> typing.Optional[str]: RULE_IP4NETWORK_COMMAND = \ - 'INSERT INTO blocking (key, type, updated, firstparty) ' \ - f'VALUES (?, {RowType.IPv4Network.value}, 1, ?) ' \ + 'INSERT INTO blocking (key, type, updated, firstparty, level) ' \ + f'VALUES (?, {RowType.IPv4Network.value}, 1, ?, 0) ' \ 'ON CONFLICT(key)' \ f'DO UPDATE SET source=null, type={RowType.IPv4Network.value}, ' \ - 'updated=1, firstparty=?' + 'updated=1, firstparty=?, level=0' def feed_rule_ip4network(network: ipaddress.IPv4Network, @@ -156,10 +158,12 @@ def feed_rule_ip4network(network: ipaddress.IPv4Network, FEED_A_COMMAND_FETCH = \ 'SELECT key, firstparty FROM blocking ' \ 'WHERE key<=? ' \ - 'AND updated=1 ' \ + 'AND instr(?, key) > 0 ' \ f'AND type={RowType.IPv4Network.value} ' \ - 'ORDER BY key DESC ' \ - 'LIMIT 1' + 'ORDER BY key DESC ' + +# UPSERT are not issued often relative to FETCH, +# merging the both might be counterproductive FEED_A_COMMAND_UPSERT = \ 'INSERT INTO blocking (key, source, type, updated, firstparty) ' \ @@ -177,35 +181,39 @@ def feed_a(name: bytes, value_ip: bytes) -> None: value_dec = ip4_flat(value_ip) if value_dec is None: # Malformed IPs + time_step('a_malformed') return time_step('a_fetch') - C.execute(FEED_A_COMMAND_FETCH, (value_dec,)) + C.execute(FEED_A_COMMAND_FETCH, (value_dec, value_dec)) base = C.fetchone() time_step('a_fetch_confirm') - if not base: - return - b_key, b_firstparty = base - if not value_dec.startswith(b_key): - return name = name[::-1] - time_step('a_upsert') - C.execute(FEED_A_COMMAND_UPSERT, - (name, b_key, b_firstparty, # Insert - b_key, b_firstparty, b_firstparty) # Update - ) - time_step('other') + for b_key, b_firstparty in C: + time_step('a_upsert') + C.execute(FEED_A_COMMAND_UPSERT, + (name, b_key, b_firstparty, # Insert + b_key, b_firstparty, b_firstparty) # Update + ) + time_step('a_fetch_confirm') + time_step('a_end') FEED_CNAME_COMMAND_FETCH = \ 'SELECT key, type, firstparty FROM blocking ' \ 'WHERE key<=? ' \ f'AND (type={RowType.DomainTree.value} OR type={RowType.Domain.value}) ' \ - 'AND updated=1 ' \ 'ORDER BY key DESC ' \ 'LIMIT 1' +# Optimisations that renders the index unused +# (and thus counterproductive until fixed): + +# 'AND instr(?, key) > 0 ' \ + # f'WHERE ((type={RowType.DomainTree.value} AND key<=?) OR ' \ # f'(type={RowType.Domain.value} AND key=?)) ' \ -# This optimisation is counter productive + +# Might be fixable by using multiple SELECT and a JOIN +# In the meantime the confirm is very light so it's ok FEED_CNAME_COMMAND_UPSERT = \ 'INSERT INTO blocking (key, source, type, updated, firstparty) ' \ @@ -224,28 +232,25 @@ def feed_cname(name: bytes, value: bytes) -> None: value_dec = value.decode() time_step('cname_fetch') C.execute(FEED_CNAME_COMMAND_FETCH, (value_dec,)) - base = C.fetchone() time_step('cname_fetch_confirm') - if not base: - # Should only happen at an extremum of the database - return - b_key, b_type, b_firstparty = base - matching = b_key == value_dec[:len(b_key)] and ( - len(value_dec) == len(b_key) - or ( - b_type == RowType.DomainTree.value - and value_dec[len(b_key)] == '.' + for b_key, b_type, b_firstparty in C: + matching = b_key == value_dec[:len(b_key)] and ( + len(value_dec) == len(b_key) + or ( + b_type == RowType.DomainTree.value + and value_dec[len(b_key)] == '.' + ) ) - ) - if not matching: - return - name = name[::-1] - time_step('cname_upsert') - C.execute(FEED_CNAME_COMMAND_UPSERT, - (name, b_key, b_firstparty, # Insert - b_key, b_firstparty, b_firstparty) # Update - ) - time_step('other') + if not matching: + continue + name = name[::-1] + time_step('cname_upsert') + C.execute(FEED_CNAME_COMMAND_UPSERT, + (name, b_key, b_firstparty, # Insert + b_key, b_firstparty, b_firstparty) # Update + ) + time_step('cname_fetch_confirm') + time_step('cname_end') if __name__ == '__main__': diff --git a/database_schema.sql b/database_schema.sql index 1985281..833338d 100644 --- a/database_schema.sql +++ b/database_schema.sql @@ -7,12 +7,12 @@ CREATE TABLE blocking ( type INTEGER, -- Type of the field: 1: AS, 2: domain tree, 3: domain, 4: IPv4 network, 6: IPv6 network updated INTEGER, -- If the row was updated during last data import (0: No, 1: Yes) firstparty INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) - -- refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) + refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) (used for -only lists) + level INTEGER, -- Level of recursion to the original rule (used for source priority) FOREIGN KEY (source) REFERENCES blocking(key) ON DELETE CASCADE ); -CREATE INDEX "blocking_type_updated_key" ON "blocking" ( +CREATE INDEX "blocking_type_key" ON "blocking" ( "type", - "updated", "key" DESC );