eulaurarien/feed_dns.py

228 lines
6.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import argparse
import database
import logging
import sys
2019-12-13 12:36:11 +01:00
import typing
2019-12-15 17:05:41 +01:00
import multiprocessing
import time
2019-12-15 17:12:44 +01:00
Record = typing.Tuple[typing.Callable, typing.Callable, int, str, str]
# select, write
FUNCTION_MAP: typing.Any = {
2019-12-15 17:12:44 +01:00
'a': (
database.Database.get_ip4,
database.Database.set_hostname,
),
2019-12-15 17:12:44 +01:00
'cname': (
database.Database.get_domain,
database.Database.set_hostname,
),
2019-12-15 17:12:44 +01:00
'ptr': (
database.Database.get_domain,
database.Database.set_ip4address,
),
}
2019-12-15 17:05:41 +01:00
class Writer(multiprocessing.Process):
def __init__(self,
recs_queue: multiprocessing.Queue,
autosave_interval: int = 0,
ip4_cache: int = 0,
):
2019-12-15 17:05:41 +01:00
super(Writer, self).__init__()
self.log = logging.getLogger(f'wr')
self.recs_queue = recs_queue
self.autosave_interval = autosave_interval
self.ip4_cache = ip4_cache
2019-12-15 17:05:41 +01:00
def run(self) -> None:
self.db = database.Database()
2019-12-15 17:05:41 +01:00
self.db.log = logging.getLogger(f'wr')
self.db.fill_ip4cache(max_size=self.ip4_cache)
if self.autosave_interval > 0:
next_save = time.time() + self.autosave_interval
else:
next_save = 0
2019-12-15 17:05:41 +01:00
self.db.enter_step('block_wait')
block: typing.List[Record]
for block in iter(self.recs_queue.get, None):
record: Record
for record in block:
2019-12-15 17:12:44 +01:00
select, write, updated, name, value = record
2019-12-15 17:05:41 +01:00
self.db.enter_step('feed_switch')
try:
for source in select(self.db, value):
write(self.db, name, updated, source=source)
except ValueError:
self.log.exception("Cannot execute: %s", record)
2019-12-15 17:05:41 +01:00
if next_save > 0 and time.time() > next_save:
self.log.info("Saving database...")
self.db.save()
self.log.info("Done!")
next_save = time.time() + self.autosave_interval
2019-12-15 17:05:41 +01:00
self.db.enter_step('block_wait')
self.db.enter_step('end')
self.db.save()
2019-12-15 17:05:41 +01:00
class Parser():
def __init__(self,
buf: typing.Any,
recs_queue: multiprocessing.Queue,
block_size: int,
):
super(Parser, self).__init__()
self.buf = buf
self.log = logging.getLogger('pr')
self.recs_queue = recs_queue
self.block: typing.List[Record] = list()
self.block_size = block_size
self.prof = database.Profiler()
self.prof.log = logging.getLogger('pr')
def register(self, record: Record) -> None:
self.prof.enter_step('register')
self.block.append(record)
if len(self.block) >= self.block_size:
self.prof.enter_step('put_block')
self.recs_queue.put(self.block)
self.block = list()
def run(self) -> None:
self.consume()
self.recs_queue.put(self.block)
self.prof.profile()
def consume(self) -> None:
raise NotImplementedError
class Rapid7Parser(Parser):
def consume(self) -> None:
2019-12-15 16:38:01 +01:00
data = dict()
for line in self.buf:
2019-12-15 17:05:41 +01:00
self.prof.enter_step('parse_rapid7')
2019-12-15 16:38:01 +01:00
split = line.split('"')
try:
for k in range(1, 14, 4):
key = split[k]
val = split[k+2]
data[key] = val
select, writer = FUNCTION_MAP[data['type']]
record = (
select,
writer,
int(data['timestamp']),
data['name'],
data['value']
)
except IndexError:
self.log.exception("Cannot parse: %s", line)
2019-12-15 17:05:41 +01:00
self.register(record)
2019-12-18 01:03:08 +01:00
class MassDnsParser(Parser):
# massdns --output Snrql
# --retry REFUSED,SERVFAIL --resolvers nameservers-ipv4
TYPES = {
2019-12-15 17:12:44 +01:00
'A': (FUNCTION_MAP['a'][0], FUNCTION_MAP['a'][1], -1, None),
# 'AAAA': (FUNCTION_MAP['aaaa'][0], FUNCTION_MAP['aaaa'][1], -1, None),
'CNAME': (FUNCTION_MAP['cname'][0], FUNCTION_MAP['cname'][1], -1, -1),
}
def consume(self) -> None:
2019-12-18 01:03:08 +01:00
self.prof.enter_step('parse_massdns')
timestamp = 0
header = True
for line in self.buf:
line = line[:-1]
if not line:
header = True
continue
split = line.split(' ')
try:
if header:
timestamp = int(split[1])
header = False
else:
2019-12-15 17:12:44 +01:00
select, write, name_offset, value_offset = \
2019-12-18 01:03:08 +01:00
MassDnsParser.TYPES[split[1]]
2019-12-15 17:05:41 +01:00
record = (
2019-12-15 17:12:44 +01:00
select,
write,
timestamp,
split[0][:name_offset],
split[2][:value_offset],
)
2019-12-15 17:05:41 +01:00
self.register(record)
2019-12-18 01:03:08 +01:00
self.prof.enter_step('parse_massdns')
except KeyError:
continue
PARSERS = {
'rapid7': Rapid7Parser,
2019-12-18 01:03:08 +01:00
'massdns': MassDnsParser,
}
2019-12-13 12:36:11 +01:00
if __name__ == '__main__':
# Parsing arguments
log = logging.getLogger('feed_dns')
args_parser = argparse.ArgumentParser(
description="Read DNS records and import "
"tracking-relevant data into the database")
args_parser.add_argument(
'parser',
choices=PARSERS.keys(),
help="Input format")
args_parser.add_argument(
2019-12-13 12:36:11 +01:00
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file")
2019-12-15 17:05:41 +01:00
args_parser.add_argument(
'-b', '--block-size', type=int, default=1024,
help="Performance tuning value")
2019-12-15 17:05:41 +01:00
args_parser.add_argument(
'-q', '--queue-size', type=int, default=128,
help="Performance tuning value")
args_parser.add_argument(
'-a', '--autosave-interval', type=int, default=900,
help="Interval to which the database will save in seconds. "
"0 to disable.")
args_parser.add_argument(
'-4', '--ip4-cache', type=int, default=0,
help="RAM cache for faster IPv4 lookup. "
"Maximum useful value: 512 MiB (536870912). "
"Warning: Depending on the rules, this might already "
"be a memory-heavy process, even without the cache.")
args = args_parser.parse_args()
2019-12-13 12:36:11 +01:00
2019-12-15 17:05:41 +01:00
recs_queue: multiprocessing.Queue = multiprocessing.Queue(
maxsize=args.queue_size)
2019-12-15 17:05:41 +01:00
writer = Writer(recs_queue,
autosave_interval=args.autosave_interval,
ip4_cache=args.ip4_cache
)
2019-12-15 17:05:41 +01:00
writer.start()
parser = PARSERS[args.parser](args.input, recs_queue, args.block_size)
parser.run()
2019-12-13 12:36:11 +01:00
2019-12-15 17:05:41 +01:00
recs_queue.put(None)
writer.join()