Workflow: Multiprocess
Still trying. It's better than multithread though. Merge branch 'newworkflow' into newworkflow_threaded
This commit is contained in:
commit
189deeb559
15 changed files with 512 additions and 279 deletions
184
feed_dns.py
Normal file → Executable file
184
feed_dns.py
Normal file → Executable file
|
@ -1,23 +1,43 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import database
|
||||
import argparse
|
||||
import sys
|
||||
import database
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import queue
|
||||
import sys
|
||||
import typing
|
||||
import multiprocessing
|
||||
|
||||
NUMBER_THREADS = 8
|
||||
NUMBER_THREADS = 2
|
||||
BLOCK_SIZE = 100
|
||||
|
||||
# select, confirm, write
|
||||
FUNCTION_MAP: typing.Any = {
|
||||
'a': (
|
||||
database.Database.get_ip4,
|
||||
database.Database.get_domain_in_zone,
|
||||
database.Database.set_hostname,
|
||||
),
|
||||
'cname': (
|
||||
database.Database.get_domain,
|
||||
database.Database.get_domain_in_zone,
|
||||
database.Database.set_hostname,
|
||||
),
|
||||
'ptr': (
|
||||
database.Database.get_domain,
|
||||
database.Database.get_ip4_in_network,
|
||||
database.Database.set_ip4address,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class Worker(threading.Thread):
|
||||
class Reader(multiprocessing.Process):
|
||||
def __init__(self,
|
||||
lines_queue: queue.Queue,
|
||||
write_queue: queue.Queue,
|
||||
lines_queue: multiprocessing.Queue,
|
||||
write_queue: multiprocessing.Queue,
|
||||
index: int = 0):
|
||||
super(Worker, self).__init__()
|
||||
self.log = logging.getLogger(f'worker{index:03d}')
|
||||
super(Reader, self).__init__()
|
||||
self.log = logging.getLogger(f'rd{index:03d}')
|
||||
self.lines_queue = lines_queue
|
||||
self.write_queue = write_queue
|
||||
self.index = index
|
||||
|
@ -25,45 +45,51 @@ class Worker(threading.Thread):
|
|||
def run(self) -> None:
|
||||
self.db = database.Database(write=False)
|
||||
self.db.log = logging.getLogger(f'db{self.index:03d}')
|
||||
self.db.enter_step('wait_line')
|
||||
line: str
|
||||
for line in iter(self.lines_queue.get, None):
|
||||
self.db.enter_step('feed_json_parse')
|
||||
# split = line.split(b'"')
|
||||
split = line.split('"')
|
||||
try:
|
||||
name = split[7]
|
||||
dtype = split[11]
|
||||
value = split[15]
|
||||
except IndexError:
|
||||
log.error("Invalid JSON: %s", line)
|
||||
continue
|
||||
# DB.enter_step('feed_json_assert')
|
||||
# data = json.loads(line)
|
||||
# assert dtype == data['type']
|
||||
# assert name == data['name']
|
||||
# assert value == data['value']
|
||||
|
||||
self.db.enter_step('feed_switch')
|
||||
if dtype == 'a':
|
||||
for rule in self.db.get_ip4(value):
|
||||
self.db.enter_step('wait_put')
|
||||
self.write_queue.put(
|
||||
(database.Database.set_hostname, name, rule))
|
||||
elif dtype == 'cname':
|
||||
for rule in self.db.get_domain(value):
|
||||
self.db.enter_step('wait_put')
|
||||
self.write_queue.put(
|
||||
(database.Database.set_hostname, name, rule))
|
||||
elif dtype == 'ptr':
|
||||
for rule in self.db.get_domain(value):
|
||||
self.db.enter_step('wait_put')
|
||||
self.write_queue.put(
|
||||
(database.Database.set_ip4address, name, rule))
|
||||
self.db.enter_step('wait_line')
|
||||
self.db.enter_step('line_wait')
|
||||
block: typing.List[str]
|
||||
try:
|
||||
for block in iter(self.lines_queue.get, None):
|
||||
for line in block:
|
||||
dtype, updated, name, value = line
|
||||
self.db.enter_step('feed_switch')
|
||||
select, confirm, write = FUNCTION_MAP[dtype]
|
||||
for rule in select(self.db, value):
|
||||
if not any(confirm(self.db, name)):
|
||||
self.db.enter_step('wait_put')
|
||||
self.write_queue.put((write, name, updated))
|
||||
self.db.enter_step('line_wait')
|
||||
except KeyboardInterrupt:
|
||||
self.log.error('Interrupted')
|
||||
|
||||
self.db.enter_step('end')
|
||||
self.db.close()
|
||||
|
||||
|
||||
class Writer(multiprocessing.Process):
|
||||
def __init__(self,
|
||||
write_queue: multiprocessing.Queue,
|
||||
):
|
||||
super(Writer, self).__init__()
|
||||
self.log = logging.getLogger(f'wr ')
|
||||
self.write_queue = write_queue
|
||||
|
||||
def run(self) -> None:
|
||||
self.db = database.Database(write=True)
|
||||
self.db.log = logging.getLogger(f'dbw ')
|
||||
self.db.enter_step('line_wait')
|
||||
block: typing.List[str]
|
||||
try:
|
||||
fun: typing.Callable
|
||||
name: str
|
||||
updated: int
|
||||
for fun, name, updated in iter(self.write_queue.get, None):
|
||||
self.db.enter_step('exec')
|
||||
fun(self.db, name, updated)
|
||||
self.db.enter_step('line_wait')
|
||||
except KeyboardInterrupt:
|
||||
self.log.error('Interrupted')
|
||||
|
||||
self.db.enter_step('end')
|
||||
self.write_queue.put(None)
|
||||
self.db.close()
|
||||
|
||||
|
||||
|
@ -80,42 +106,52 @@ if __name__ == '__main__':
|
|||
args = parser.parse_args()
|
||||
|
||||
DB = database.Database(write=False) # Not needed, just for timing
|
||||
DB.log = logging.getLogger('dbf')
|
||||
DBW = database.Database(write=True)
|
||||
DBW.log = logging.getLogger('dbw')
|
||||
DB.log = logging.getLogger('db ')
|
||||
|
||||
lines_queue: queue.Queue = queue.Queue(maxsize=NUMBER_THREADS)
|
||||
write_queue: queue.Queue = queue.Queue(maxsize=NUMBER_THREADS)
|
||||
lines_queue: multiprocessing.Queue = multiprocessing.Queue(maxsize=100)
|
||||
write_queue: multiprocessing.Queue = multiprocessing.Queue(maxsize=100)
|
||||
|
||||
def fill_lines_queue() -> None:
|
||||
DB.enter_step('proc_create')
|
||||
readers: typing.List[Reader] = list()
|
||||
for w in range(NUMBER_THREADS):
|
||||
readers.append(Reader(lines_queue, write_queue, w))
|
||||
writer = Writer(write_queue)
|
||||
|
||||
DB.enter_step('proc_start')
|
||||
for reader in readers:
|
||||
reader.start()
|
||||
writer.start()
|
||||
|
||||
try:
|
||||
block: typing.List[str] = list()
|
||||
DB.enter_step('iowait')
|
||||
for line in args.input:
|
||||
DB.enter_step('wait_put')
|
||||
lines_queue.put(line)
|
||||
DB.enter_step('block_append')
|
||||
DB.enter_step('feed_json_parse')
|
||||
data = json.loads(line)
|
||||
line = (data['type'],
|
||||
int(data['timestamp']),
|
||||
data['name'],
|
||||
data['value'])
|
||||
block.append(line)
|
||||
if len(block) >= BLOCK_SIZE:
|
||||
DB.enter_step('wait_put')
|
||||
lines_queue.put(block)
|
||||
block = list()
|
||||
DB.enter_step('iowait')
|
||||
DB.enter_step('wait_put')
|
||||
lines_queue.put(block)
|
||||
|
||||
DB.enter_step('end_put')
|
||||
for _ in range(NUMBER_THREADS):
|
||||
lines_queue.put(None)
|
||||
write_queue.put(None)
|
||||
|
||||
for w in range(NUMBER_THREADS):
|
||||
Worker(lines_queue, write_queue, w).start()
|
||||
DB.enter_step('proc_join')
|
||||
for reader in readers:
|
||||
reader.join()
|
||||
writer.join()
|
||||
except KeyboardInterrupt:
|
||||
log.error('Interrupted')
|
||||
|
||||
threading.Thread(target=fill_lines_queue).start()
|
||||
|
||||
for _ in range(NUMBER_THREADS):
|
||||
fun: typing.Callable
|
||||
name: str
|
||||
source: int
|
||||
DBW.enter_step('wait_fun')
|
||||
for fun, name, source in iter(write_queue.get, None):
|
||||
DBW.enter_step('exec_fun')
|
||||
fun(DBW, name, source=source)
|
||||
DBW.enter_step('commit')
|
||||
DBW.conn.commit()
|
||||
DBW.enter_step('wait_fun')
|
||||
|
||||
DBW.enter_step('end')
|
||||
|
||||
DBW.close()
|
||||
DB.close()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue