Integrated DNS resolving to workflow
Since the bigger datasets are only updated once a month, this might help for quick updates.
This commit is contained in:
parent
9050a84670
commit
8d94b80fd0
|
@ -12,12 +12,11 @@ import queue
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import typing
|
import typing
|
||||||
import csv
|
import time
|
||||||
|
|
||||||
import coloredlogs
|
import coloredlogs
|
||||||
import dns.exception
|
import dns.exception
|
||||||
import dns.resolver
|
import dns.resolver
|
||||||
import progressbar
|
|
||||||
|
|
||||||
DNS_TIMEOUT = 5.0
|
DNS_TIMEOUT = 5.0
|
||||||
NUMBER_THREADS = 512
|
NUMBER_THREADS = 512
|
||||||
|
@ -26,8 +25,6 @@ NUMBER_TRIES = 5
|
||||||
# TODO All the domains don't get treated,
|
# TODO All the domains don't get treated,
|
||||||
# so it leaves with 4-5 subdomains not resolved
|
# so it leaves with 4-5 subdomains not resolved
|
||||||
|
|
||||||
glob = None
|
|
||||||
|
|
||||||
|
|
||||||
class Worker(threading.Thread):
|
class Worker(threading.Thread):
|
||||||
"""
|
"""
|
||||||
|
@ -60,7 +57,7 @@ class Worker(threading.Thread):
|
||||||
|
|
||||||
def resolve_subdomain(self, subdomain: str) -> typing.Optional[
|
def resolve_subdomain(self, subdomain: str) -> typing.Optional[
|
||||||
typing.List[
|
typing.List[
|
||||||
str
|
dns.rrset.RRset
|
||||||
]
|
]
|
||||||
]:
|
]:
|
||||||
"""
|
"""
|
||||||
|
@ -93,18 +90,7 @@ class Worker(threading.Thread):
|
||||||
except dns.name.EmptyLabel:
|
except dns.name.EmptyLabel:
|
||||||
self.log.warning("Empty label for %s", subdomain)
|
self.log.warning("Empty label for %s", subdomain)
|
||||||
return None
|
return None
|
||||||
resolved = list()
|
return query.response.answer
|
||||||
last = len(query.response.answer) - 1
|
|
||||||
for a, answer in enumerate(query.response.answer):
|
|
||||||
if answer.rdtype == dns.rdatatype.CNAME:
|
|
||||||
assert a < last
|
|
||||||
resolved.append(answer.items[0].to_text()[:-1])
|
|
||||||
elif answer.rdtype == dns.rdatatype.A:
|
|
||||||
assert a == last
|
|
||||||
resolved.append(answer.items[0].address)
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
return resolved
|
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
self.log.info("Started")
|
self.log.info("Started")
|
||||||
|
@ -124,7 +110,6 @@ class Worker(threading.Thread):
|
||||||
self.log.error("Gave up on %s", subdomain)
|
self.log.error("Gave up on %s", subdomain)
|
||||||
resolved = []
|
resolved = []
|
||||||
|
|
||||||
resolved.insert(0, subdomain)
|
|
||||||
assert isinstance(resolved, list)
|
assert isinstance(resolved, list)
|
||||||
self.orchestrator.results_queue.put(resolved)
|
self.orchestrator.results_queue.put(resolved)
|
||||||
|
|
||||||
|
@ -182,7 +167,23 @@ class Orchestrator():
|
||||||
for _ in range(NUMBER_THREADS):
|
for _ in range(NUMBER_THREADS):
|
||||||
self.subdomains_queue.put(None)
|
self.subdomains_queue.put(None)
|
||||||
|
|
||||||
def run(self) -> typing.Iterable[typing.List[str]]:
|
@staticmethod
|
||||||
|
def format_rrset(rrset: dns.rrset.RRset) -> typing.Iterable[str]:
|
||||||
|
if rrset.rdtype == dns.rdatatype.CNAME:
|
||||||
|
dtype = 'cname'
|
||||||
|
elif rrset.rdtype == dns.rdatatype.A:
|
||||||
|
dtype = 'a'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
name = rrset.name.to_text()[:-1]
|
||||||
|
for item in rrset.items:
|
||||||
|
value = item.to_text()
|
||||||
|
if rrset.rdtype == dns.rdatatype.CNAME:
|
||||||
|
value = value[:-1]
|
||||||
|
yield '{"timestamp":"' + str(int(time.time())) + '","name":"' + \
|
||||||
|
name + '","type":"' + dtype + '","value":"' + value + '"}\n'
|
||||||
|
|
||||||
|
def run(self) -> typing.Iterable[str]:
|
||||||
"""
|
"""
|
||||||
Yield the results.
|
Yield the results.
|
||||||
"""
|
"""
|
||||||
|
@ -197,9 +198,10 @@ class Orchestrator():
|
||||||
# Wait for one sentinel per worker
|
# Wait for one sentinel per worker
|
||||||
# In the meantime output results
|
# In the meantime output results
|
||||||
for _ in range(NUMBER_THREADS):
|
for _ in range(NUMBER_THREADS):
|
||||||
result: typing.List[str]
|
resolved: typing.List[dns.rrset.RRset]
|
||||||
for result in iter(self.results_queue.get, None):
|
for resolved in iter(self.results_queue.get, None):
|
||||||
yield result
|
for rrset in resolved:
|
||||||
|
yield from self.format_rrset(rrset)
|
||||||
|
|
||||||
self.log.info("Waiting for reader thread")
|
self.log.info("Waiting for reader thread")
|
||||||
fill_thread.join()
|
fill_thread.join()
|
||||||
|
@ -214,16 +216,14 @@ def main() -> None:
|
||||||
the last CNAME resolved and the IP adress it resolves to.
|
the last CNAME resolved and the IP adress it resolves to.
|
||||||
Takes as an input a filename (or nothing, for stdin),
|
Takes as an input a filename (or nothing, for stdin),
|
||||||
and as an output a filename (or nothing, for stdout).
|
and as an output a filename (or nothing, for stdout).
|
||||||
The input must be a subdomain per line, the output is a comma-sep
|
The input must be a subdomain per line, the output is a TODO
|
||||||
file with the columns source CNAME and A.
|
|
||||||
Use the file `nameservers` as the list of nameservers
|
Use the file `nameservers` as the list of nameservers
|
||||||
to use, or else it will use the system defaults.
|
to use, or else it will use the system defaults.
|
||||||
Also shows a nice progressbar.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Initialization
|
# Initialization
|
||||||
coloredlogs.install(
|
coloredlogs.install(
|
||||||
level='DEBUG',
|
# level='DEBUG',
|
||||||
fmt='%(asctime)s %(name)s %(levelname)s %(message)s'
|
fmt='%(asctime)s %(name)s %(levelname)s %(message)s'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -244,20 +244,6 @@ def main() -> None:
|
||||||
# help="Number of threads to use")
|
# help="Number of threads to use")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Progress bar
|
|
||||||
widgets = [
|
|
||||||
progressbar.Percentage(),
|
|
||||||
' ', progressbar.SimpleProgress(),
|
|
||||||
' ', progressbar.Bar(),
|
|
||||||
' ', progressbar.Timer(),
|
|
||||||
' ', progressbar.AdaptiveTransferSpeed(unit='req'),
|
|
||||||
' ', progressbar.AdaptiveETA(),
|
|
||||||
]
|
|
||||||
progress = progressbar.ProgressBar(widgets=widgets)
|
|
||||||
if args.input.seekable():
|
|
||||||
progress.max_value = len(args.input.readlines())
|
|
||||||
args.input.seek(0)
|
|
||||||
|
|
||||||
# Cleaning input
|
# Cleaning input
|
||||||
iterator = iter(args.input)
|
iterator = iter(args.input)
|
||||||
iterator = map(str.strip, iterator)
|
iterator = map(str.strip, iterator)
|
||||||
|
@ -269,15 +255,8 @@ def main() -> None:
|
||||||
servers = open('nameservers').readlines()
|
servers = open('nameservers').readlines()
|
||||||
servers = list(filter(None, map(str.strip, servers)))
|
servers = list(filter(None, map(str.strip, servers)))
|
||||||
|
|
||||||
writer = csv.writer(args.output)
|
for resolved in Orchestrator(iterator, servers).run():
|
||||||
|
args.output.write(resolved)
|
||||||
progress.start()
|
|
||||||
global glob
|
|
||||||
glob = Orchestrator(iterator, servers)
|
|
||||||
for resolved in glob.run():
|
|
||||||
progress.update(progress.value + 1)
|
|
||||||
writer.writerow(resolved)
|
|
||||||
progress.finish()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -4,11 +4,9 @@ function log() {
|
||||||
echo -e "\033[33m$@\033[0m"
|
echo -e "\033[33m$@\033[0m"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Resolve the CNAME chain of all the known subdomains for later analysis
|
log "Compiling locally known subdomain…"
|
||||||
log "Compiling subdomain lists..."
|
|
||||||
pv subdomains/*.list | sort -u > temp/all_subdomains.list
|
|
||||||
# Sort by last character to utilize the DNS server caching mechanism
|
# Sort by last character to utilize the DNS server caching mechanism
|
||||||
pv temp/all_subdomains.list | rev | sort | rev > temp/all_subdomains_reversort.list
|
pv subdomains/*.list | rev | sort -u | rev > temp/all_subdomains.list
|
||||||
./resolve_subdomains.py --input temp/all_subdomains_reversort.list --output temp/all_resolved.csv
|
log "Resolving locally known subdomain…"
|
||||||
sort -u temp/all_resolved.csv > temp/all_resolved_sorted.csv
|
pv temp/all_subdomains.list | ./resolve_subdomains.py --output temp/all_resolved.json
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue