IP parsing C accelerated, use bytes everywhere

This commit is contained in:
Geoffrey Frogeye 2019-12-09 08:55:34 +01:00
parent 7937496882
commit 55877be891
Signed by: geoffrey
GPG key ID: D8A7ECA00A8CD3DD
6 changed files with 73 additions and 21 deletions

2
.gitignore vendored
View file

@ -3,3 +3,5 @@
*.db-journal *.db-journal
nameservers nameservers
nameservers.head nameservers.head
*.o
*.so

5
Makefile Normal file
View file

@ -0,0 +1,5 @@
libaccel.so: accel.o
clang -shared -Wl,-soname,libaccel.so -o libaccel.so accel.o
accel.o: accel.c
clang -c -fPIC -O3 accel.c -o accel.o

37
accel.c Normal file
View file

@ -0,0 +1,37 @@
#include <stdlib.h>
int ip4_flat(char* value, wchar_t* flat)
{
unsigned char value_index = 0;
unsigned char octet_index = 0;
unsigned char octet_value = 0;
char flat_index;
unsigned char value_chara;
do {
value_chara = value[value_index];
if (value_chara >= '0' && value_chara <= '9') {
octet_value *= 10;
octet_value += value_chara - '0';
} else if (value_chara == '.') {
for (flat_index = (octet_index+1)*8-1; flat_index >= octet_index*8; flat_index--) {
flat[flat_index] = '0' + (octet_value & 1);
octet_value >>= 1;
}
octet_index++;
octet_value = 0;
} else if (value_chara == '\0') {
if (octet_index != 3) {
return 1;
}
for (flat_index = 31; flat_index >= 24; flat_index--) {
flat[flat_index] = '0' + (octet_value & 1);
octet_value >>= 1;
}
return 0;
} else {
return 1;
}
value_index++;
} while (1); // This ugly thing save one comparison
return 1;
}

View file

@ -7,7 +7,7 @@ import typing
import ipaddress import ipaddress
import enum import enum
import time import time
import pprint import ctypes
""" """
Utility functions to interact with the database. Utility functions to interact with the database.
@ -20,6 +20,8 @@ C = None # Cursor
TIME_DICT: typing.Dict[str, float] = dict() TIME_DICT: typing.Dict[str, float] = dict()
TIME_LAST = time.perf_counter() TIME_LAST = time.perf_counter()
TIME_STEP = 'start' TIME_STEP = 'start'
ACCEL = ctypes.cdll.LoadLibrary('./libaccel.so')
ACCEL_IP4_BUF = ctypes.create_unicode_buffer('Z'*32, 32)
def time_step(step: str) -> None: def time_step(step: str) -> None:
@ -127,9 +129,12 @@ def ip_flat(address: ipaddress.IPv4Address) -> str:
return ''.join(map(str, ip_get_bits(address))) return ''.join(map(str, ip_get_bits(address)))
def ip4_flat(address: str) -> str: def ip4_flat(address: bytes) -> typing.Optional[str]:
return '{:08b}{:08b}{:08b}{:08b}'.format( carg = ctypes.c_char_p(address)
*[int(c) for c in address.split('.')]) ret = ACCEL.ip4_flat(carg, ACCEL_IP4_BUF)
if ret != 0:
return None
return ACCEL_IP4_BUF.value
RULE_IP4NETWORK_COMMAND = \ RULE_IP4NETWORK_COMMAND = \
@ -165,23 +170,22 @@ FEED_A_COMMAND_UPSERT = \
'WHERE updated=0 OR firstparty<?' 'WHERE updated=0 OR firstparty<?'
def feed_a(name: str, value_ip: str) -> None: def feed_a(name: bytes, value_ip: bytes) -> None:
assert C assert C
assert CONN assert CONN
time_step('a_flat') time_step('a_flat')
try: value_dec = ip4_flat(value_ip)
value = ip4_flat(value_ip) if value_dec is None:
except (ValueError, IndexError):
# Malformed IPs # Malformed IPs
return return
time_step('a_fetch') time_step('a_fetch')
C.execute(FEED_A_COMMAND_FETCH, (value,)) C.execute(FEED_A_COMMAND_FETCH, (value_dec,))
base = C.fetchone() base = C.fetchone()
time_step('a_fetch_confirm') time_step('a_fetch_confirm')
if not base: if not base:
return return
b_key, b_firstparty = base b_key, b_firstparty = base
if not value.startswith(b_key): if not value_dec.startswith(b_key):
return return
name = name[::-1] name = name[::-1]
time_step('a_upsert') time_step('a_upsert')
@ -212,23 +216,25 @@ FEED_CNAME_COMMAND_UPSERT = \
'WHERE updated=0 OR firstparty<?' 'WHERE updated=0 OR firstparty<?'
def feed_cname(name: str, value: str) -> None: def feed_cname(name: bytes, value: bytes) -> None:
assert C assert C
assert CONN assert CONN
time_step('cname_decode')
value = value[::-1] value = value[::-1]
value_dec = value.decode()
time_step('cname_fetch') time_step('cname_fetch')
C.execute(FEED_CNAME_COMMAND_FETCH, (value,)) C.execute(FEED_CNAME_COMMAND_FETCH, (value_dec,))
base = C.fetchone() base = C.fetchone()
time_step('cname_fetch_confirm') time_step('cname_fetch_confirm')
if not base: if not base:
# Should only happen at an extremum of the database # Should only happen at an extremum of the database
return return
b_key, b_type, b_firstparty = base b_key, b_type, b_firstparty = base
matching = b_key == value[:len(b_key)] and ( matching = b_key == value_dec[:len(b_key)] and (
len(value) == len(b_key) len(value_dec) == len(b_key)
or ( or (
b_type == RowType.DomainTree.value b_type == RowType.DomainTree.value
and value[len(b_key)] == '.' and value_dec[len(b_key)] == '.'
) )
) )
if not matching: if not matching:

View file

@ -2,11 +2,12 @@
-- in database.py on changes to this file -- in database.py on changes to this file
CREATE TABLE blocking ( CREATE TABLE blocking (
key text PRIMARY KEY, -- Contains the reversed domain name or IP in binary form key TEXT PRIMARY KEY, -- Contains the reversed domain name or IP in binary form
source TEXT, -- The rule this one is based on source TEXT, -- The rule this one is based on
type INTEGER, -- Type of the field: 1: AS, 2: domain tree, 3: domain, 4: IPv4 network, 6: IPv6 network type INTEGER, -- Type of the field: 1: AS, 2: domain tree, 3: domain, 4: IPv4 network, 6: IPv6 network
updated INTEGER, -- If the row was updated during last data import (0: No, 1: Yes) updated INTEGER, -- If the row was updated during last data import (0: No, 1: Yes)
firstparty INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party) firstparty INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party)
-- refs INTEGER, -- Which blocking list this row is issued from (0: first-party, 1: multi-party)
FOREIGN KEY (source) REFERENCES blocking(key) ON DELETE CASCADE FOREIGN KEY (source) REFERENCES blocking(key) ON DELETE CASCADE
); );
CREATE INDEX "blocking_type_updated_key" ON "blocking" ( CREATE INDEX "blocking_type_updated_key" ON "blocking" (
@ -17,6 +18,6 @@ CREATE INDEX "blocking_type_updated_key" ON "blocking" (
-- Store various things -- Store various things
CREATE TABLE meta ( CREATE TABLE meta (
key text PRIMARY KEY, key TEXT PRIMARY KEY,
value integer value integer
); );

View file

@ -5,8 +5,8 @@ import argparse
import sys import sys
FUNCTION_MAP = { FUNCTION_MAP = {
'a': database.feed_a, b'a': database.feed_a,
'cname': database.feed_cname, b'cname': database.feed_cname,
} }
if __name__ == '__main__': if __name__ == '__main__':
@ -15,7 +15,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="TODO") description="TODO")
parser.add_argument( parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin, '-i', '--input', type=argparse.FileType('rb'), default=sys.stdin.buffer,
help="TODO") help="TODO")
args = parser.parse_args() args = parser.parse_args()
@ -23,9 +23,10 @@ if __name__ == '__main__':
try: try:
database.time_step('iowait') database.time_step('iowait')
line: bytes
for line in args.input: for line in args.input:
database.time_step('feed_json_parse') database.time_step('feed_json_parse')
split = line.split('"') split = line.split(b'"')
name = split[7] name = split[7]
dtype = split[11] dtype = split[11]
value = split[15] value = split[15]