diff --git a/db.py b/db.py index 4ecec6b..91d00c5 100755 --- a/db.py +++ b/db.py @@ -18,14 +18,16 @@ if __name__ == '__main__': help="Remove old entries from database") parser.add_argument( '-b', '--prune-base', action='store_true', - help="TODO") + help="With --prune, only prune base rules " + "(the ones added by ./feed_rules.py)") parser.add_argument( '-s', '--prune-before', type=int, default=(int(time.time()) - 60*60*24*31*6), - help="TODO") + help="With --prune, only rules updated before " + "this UNIX timestamp will be deleted") parser.add_argument( '-r', '--references', action='store_true', - help="Update the reference count") + help="DEBUG: Update the reference count") args = parser.parse_args() if not args.initialize: diff --git a/export.py b/export.py index 8dcf2c5..c5eefb2 100755 --- a/export.py +++ b/export.py @@ -9,31 +9,36 @@ if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( - description="TODO") + description="Export the hostnames rules stored " + "in the Database as plain text") parser.add_argument( '-o', '--output', type=argparse.FileType('w'), default=sys.stdout, - help="TODO") + help="Output file, one rule per line") parser.add_argument( '-f', '--first-party', action='store_true', - help="TODO") + help="Only output rules issued from first-party sources") parser.add_argument( '-e', '--end-chain', action='store_true', - help="TODO") + help="Only output rules that are not referenced by any other") parser.add_argument( '-r', '--rules', action='store_true', - help="TODO") + help="Output all kinds of rules, not just hostnames") parser.add_argument( '-b', '--base-rules', action='store_true', - help="TODO implies rules") + help="Output base rules " + "(the ones added by ./feed_rules.py) " + "(implies --rules)") parser.add_argument( '-d', '--no-dupplicates', action='store_true', - help="TODO") + help="Do not output rules that already match a zone/network rule " + "(e.g. dummy.example.com when there's a zone example.com rule)") parser.add_argument( '-x', '--explain', action='store_true', - help="TODO") + help="Show the chain of rules leading to one " + "(and the number of references they have)") parser.add_argument( '-c', '--count', action='store_true', - help="TODO") + help="Show the number of rules per type instead of listing them") args = parser.parse_args() DB = database.Database() diff --git a/feed_asn.py b/feed_asn.py index 6acfba7..25a35e2 100755 --- a/feed_asn.py +++ b/feed_asn.py @@ -36,7 +36,7 @@ if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( - description="TODO") + description="Add the IP ranges associated to the AS in the database") args = parser.parse_args() DB = database.Database() diff --git a/feed_dns.py b/feed_dns.py index 03b9429..74fe1dd 100755 --- a/feed_dns.py +++ b/feed_dns.py @@ -184,35 +184,40 @@ if __name__ == '__main__': # Parsing arguments log = logging.getLogger('feed_dns') args_parser = argparse.ArgumentParser( - description="TODO") + description="Read DNS records and import " + "tracking-relevant data into the database") args_parser.add_argument( 'parser', choices=PARSERS.keys(), - help="TODO") + help="Input format") args_parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, - help="TODO") - args_parser.add_argument( - '-j', '--workers', type=int, default=4, - help="TODO") + help="Input file") args_parser.add_argument( '-b', '--block-size', type=int, default=1024, - help="TODO") + help="Performance tuning value") args_parser.add_argument( '-q', '--queue-size', type=int, default=128, - help="TODO") + help="Performance tuning value") args_parser.add_argument( '-a', '--autosave-interval', type=int, default=900, - help="TODO seconds") + help="Interval to which the database will save in seconds. " + "0 to disable.") args_parser.add_argument( '-4', '--ip4-cache', type=int, default=0, - help="TODO bytes max 512 MiB") + help="RAM cache for faster IPv4 lookup. " + "Maximum useful value: 512 MiB (536870912). " + "Warning: Depending on the rules, this might already " + "be a memory-heavy process, even without the cache.") args = args_parser.parse_args() recs_queue: multiprocessing.Queue = multiprocessing.Queue( - maxsize=args.queue_size) + maxsize=args.queue_size) - writer = Writer(recs_queue, autosave_interval=args.autosave_interval, ip4_cache=args.ip4_cache) + writer = Writer(recs_queue, + autosave_interval=args.autosave_interval, + ip4_cache=args.ip4_cache + ) writer.start() parser = PARSERS[args.parser](args.input, recs_queue, args.block_size) diff --git a/feed_rules.py b/feed_rules.py index 0889900..9d0365f 100755 --- a/feed_rules.py +++ b/feed_rules.py @@ -7,22 +7,24 @@ import time FUNCTION_MAP = { 'zone': database.Database.set_zone, - 'ip4network': database.Database.set_ip4network, + 'hostname': database.Database.set_hostname, 'asn': database.Database.set_asn, + 'ip4network': database.Database.set_ip4network, + 'ip4address': database.Database.set_ip4address, } if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( - description="TODO") + description="Import base rules to the database") parser.add_argument( 'type', choices=FUNCTION_MAP.keys(), help="Type of rule inputed") parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, - help="List of domains domains to block (with their subdomains)") + help="File with one rule per line") parser.add_argument( '-f', '--first-party', action='store_true', help="The input only comes from verified first-party sources") diff --git a/import_rapid7.sh b/import_rapid7.sh index 993bfe7..4b5714f 100755 --- a/import_rapid7.sh +++ b/import_rapid7.sh @@ -12,7 +12,7 @@ function feed_rapid7_fdns { # dataset curl -L "$link" | gunzip } -function feed_rapid7_rdns { # dataset +function feed_rapid7_rdns { dataset=$1 line=$(curl -s https://opendata.rapid7.com/sonar.rdns_v2/ | grep "href=\".\+-rdns.json.gz\"") link="https://opendata.rapid7.com$(echo "$line" | cut -d'"' -f2)" diff --git a/validate_list.py b/validate_list.py index 62301c2..23e46d7 100755 --- a/validate_list.py +++ b/validate_list.py @@ -13,16 +13,16 @@ if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( - description="Filter out invalid domain names.") + description="Filter out invalid domain name/ip addresses from a list.") parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, - help="TODO") + help="Input file, one element per line") parser.add_argument( '-o', '--output', type=argparse.FileType('w'), default=sys.stdout, - help="TODO") + help="Output file, one element per line") parser.add_argument( '-d', '--domain', action='store_true', - help="Can be domain") + help="Can be domain name") parser.add_argument( '-4', '--ip4', action='store_true', help="Can be IP4")