Workflow: Multiprocess

Still trying.
It's better than multithread though.

Merge branch 'newworkflow' into newworkflow_threaded
This commit is contained in:
Geoffrey Frogeye 2019-12-14 17:27:46 +01:00
commit 189deeb559
Signed by: geoffrey
GPG key ID: D8A7ECA00A8CD3DD
15 changed files with 512 additions and 279 deletions

184
feed_dns.py Normal file → Executable file
View file

@ -1,23 +1,43 @@
#!/usr/bin/env python3
import database
import argparse
import sys
import database
import json
import logging
import threading
import queue
import sys
import typing
import multiprocessing
NUMBER_THREADS = 8
NUMBER_THREADS = 2
BLOCK_SIZE = 100
# select, confirm, write
FUNCTION_MAP: typing.Any = {
'a': (
database.Database.get_ip4,
database.Database.get_domain_in_zone,
database.Database.set_hostname,
),
'cname': (
database.Database.get_domain,
database.Database.get_domain_in_zone,
database.Database.set_hostname,
),
'ptr': (
database.Database.get_domain,
database.Database.get_ip4_in_network,
database.Database.set_ip4address,
),
}
class Worker(threading.Thread):
class Reader(multiprocessing.Process):
def __init__(self,
lines_queue: queue.Queue,
write_queue: queue.Queue,
lines_queue: multiprocessing.Queue,
write_queue: multiprocessing.Queue,
index: int = 0):
super(Worker, self).__init__()
self.log = logging.getLogger(f'worker{index:03d}')
super(Reader, self).__init__()
self.log = logging.getLogger(f'rd{index:03d}')
self.lines_queue = lines_queue
self.write_queue = write_queue
self.index = index
@ -25,45 +45,51 @@ class Worker(threading.Thread):
def run(self) -> None:
self.db = database.Database(write=False)
self.db.log = logging.getLogger(f'db{self.index:03d}')
self.db.enter_step('wait_line')
line: str
for line in iter(self.lines_queue.get, None):
self.db.enter_step('feed_json_parse')
# split = line.split(b'"')
split = line.split('"')
try:
name = split[7]
dtype = split[11]
value = split[15]
except IndexError:
log.error("Invalid JSON: %s", line)
continue
# DB.enter_step('feed_json_assert')
# data = json.loads(line)
# assert dtype == data['type']
# assert name == data['name']
# assert value == data['value']
self.db.enter_step('feed_switch')
if dtype == 'a':
for rule in self.db.get_ip4(value):
self.db.enter_step('wait_put')
self.write_queue.put(
(database.Database.set_hostname, name, rule))
elif dtype == 'cname':
for rule in self.db.get_domain(value):
self.db.enter_step('wait_put')
self.write_queue.put(
(database.Database.set_hostname, name, rule))
elif dtype == 'ptr':
for rule in self.db.get_domain(value):
self.db.enter_step('wait_put')
self.write_queue.put(
(database.Database.set_ip4address, name, rule))
self.db.enter_step('wait_line')
self.db.enter_step('line_wait')
block: typing.List[str]
try:
for block in iter(self.lines_queue.get, None):
for line in block:
dtype, updated, name, value = line
self.db.enter_step('feed_switch')
select, confirm, write = FUNCTION_MAP[dtype]
for rule in select(self.db, value):
if not any(confirm(self.db, name)):
self.db.enter_step('wait_put')
self.write_queue.put((write, name, updated))
self.db.enter_step('line_wait')
except KeyboardInterrupt:
self.log.error('Interrupted')
self.db.enter_step('end')
self.db.close()
class Writer(multiprocessing.Process):
def __init__(self,
write_queue: multiprocessing.Queue,
):
super(Writer, self).__init__()
self.log = logging.getLogger(f'wr ')
self.write_queue = write_queue
def run(self) -> None:
self.db = database.Database(write=True)
self.db.log = logging.getLogger(f'dbw ')
self.db.enter_step('line_wait')
block: typing.List[str]
try:
fun: typing.Callable
name: str
updated: int
for fun, name, updated in iter(self.write_queue.get, None):
self.db.enter_step('exec')
fun(self.db, name, updated)
self.db.enter_step('line_wait')
except KeyboardInterrupt:
self.log.error('Interrupted')
self.db.enter_step('end')
self.write_queue.put(None)
self.db.close()
@ -80,42 +106,52 @@ if __name__ == '__main__':
args = parser.parse_args()
DB = database.Database(write=False) # Not needed, just for timing
DB.log = logging.getLogger('dbf')
DBW = database.Database(write=True)
DBW.log = logging.getLogger('dbw')
DB.log = logging.getLogger('db ')
lines_queue: queue.Queue = queue.Queue(maxsize=NUMBER_THREADS)
write_queue: queue.Queue = queue.Queue(maxsize=NUMBER_THREADS)
lines_queue: multiprocessing.Queue = multiprocessing.Queue(maxsize=100)
write_queue: multiprocessing.Queue = multiprocessing.Queue(maxsize=100)
def fill_lines_queue() -> None:
DB.enter_step('proc_create')
readers: typing.List[Reader] = list()
for w in range(NUMBER_THREADS):
readers.append(Reader(lines_queue, write_queue, w))
writer = Writer(write_queue)
DB.enter_step('proc_start')
for reader in readers:
reader.start()
writer.start()
try:
block: typing.List[str] = list()
DB.enter_step('iowait')
for line in args.input:
DB.enter_step('wait_put')
lines_queue.put(line)
DB.enter_step('block_append')
DB.enter_step('feed_json_parse')
data = json.loads(line)
line = (data['type'],
int(data['timestamp']),
data['name'],
data['value'])
block.append(line)
if len(block) >= BLOCK_SIZE:
DB.enter_step('wait_put')
lines_queue.put(block)
block = list()
DB.enter_step('iowait')
DB.enter_step('wait_put')
lines_queue.put(block)
DB.enter_step('end_put')
for _ in range(NUMBER_THREADS):
lines_queue.put(None)
write_queue.put(None)
for w in range(NUMBER_THREADS):
Worker(lines_queue, write_queue, w).start()
DB.enter_step('proc_join')
for reader in readers:
reader.join()
writer.join()
except KeyboardInterrupt:
log.error('Interrupted')
threading.Thread(target=fill_lines_queue).start()
for _ in range(NUMBER_THREADS):
fun: typing.Callable
name: str
source: int
DBW.enter_step('wait_fun')
for fun, name, source in iter(write_queue.get, None):
DBW.enter_step('exec_fun')
fun(DBW, name, source=source)
DBW.enter_step('commit')
DBW.conn.commit()
DBW.enter_step('wait_fun')
DBW.enter_step('end')
DBW.close()
DB.close()