2019-11-10 18:14:25 +01:00
|
|
|
#!/usr/bin/env python3
|
2019-11-10 21:59:06 +01:00
|
|
|
# pylint: disable=C0103
|
2019-11-10 18:14:25 +01:00
|
|
|
|
|
|
|
"""
|
|
|
|
From a list of URLs, output the subdomains
|
|
|
|
accessed by the websites.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import typing
|
|
|
|
import urllib.parse
|
2019-11-14 06:29:24 +01:00
|
|
|
import time
|
2019-11-10 18:14:25 +01:00
|
|
|
|
2019-11-10 21:59:06 +01:00
|
|
|
import progressbar
|
2019-11-10 18:14:25 +01:00
|
|
|
import selenium.webdriver.firefox.options
|
|
|
|
import seleniumwire.webdriver
|
|
|
|
|
|
|
|
|
|
|
|
def subdomain_from_url(url: str) -> str:
|
|
|
|
"""
|
|
|
|
Extract the domain part from an url.
|
|
|
|
"""
|
|
|
|
parsed = urllib.parse.urlparse(url)
|
|
|
|
return parsed.netloc
|
|
|
|
|
|
|
|
|
|
|
|
def collect_subdomains(url: str) -> typing.Iterable[str]:
|
|
|
|
"""
|
|
|
|
Load an URL into an headless browser and return all the domains
|
|
|
|
it tried to access.
|
|
|
|
"""
|
|
|
|
options = selenium.webdriver.firefox.options.Options()
|
|
|
|
options.add_argument('-headless')
|
|
|
|
driver = seleniumwire.webdriver.Firefox(
|
|
|
|
executable_path='geckodriver', options=options)
|
|
|
|
|
|
|
|
driver.get(url)
|
2019-11-14 06:29:24 +01:00
|
|
|
time.sleep(10)
|
2019-11-10 18:14:25 +01:00
|
|
|
for request in driver.requests:
|
|
|
|
if request.response:
|
|
|
|
yield subdomain_from_url(request.path)
|
|
|
|
driver.close()
|
|
|
|
|
|
|
|
|
2019-11-10 21:59:06 +01:00
|
|
|
def collect_subdomains_standalone(url: str) -> None:
|
|
|
|
url = url.strip()
|
|
|
|
if not url:
|
|
|
|
return
|
2019-11-10 23:22:21 +01:00
|
|
|
try:
|
|
|
|
for subdomain in collect_subdomains(url):
|
|
|
|
print(subdomain)
|
|
|
|
except:
|
|
|
|
pass
|
2019-11-10 21:59:06 +01:00
|
|
|
|
|
|
|
|
2019-11-10 18:14:25 +01:00
|
|
|
if __name__ == '__main__':
|
2019-11-10 21:59:06 +01:00
|
|
|
assert len(sys.argv) <= 2
|
|
|
|
filename = None
|
|
|
|
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
|
|
|
filename = sys.argv[1]
|
|
|
|
num_lines = sum(1 for line in open(filename))
|
|
|
|
iterator = progressbar.progressbar(open(filename), max_value=num_lines)
|
|
|
|
else:
|
|
|
|
iterator = sys.stdin
|
|
|
|
|
|
|
|
for line in iterator:
|
|
|
|
collect_subdomains_standalone(line)
|
|
|
|
|
|
|
|
if filename:
|
|
|
|
iterator.close()
|