48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
"""
|
||
|
From a list of URLs, output the subdomains
|
||
|
accessed by the websites.
|
||
|
"""
|
||
|
|
||
|
import sys
|
||
|
import typing
|
||
|
import urllib.parse
|
||
|
|
||
|
import selenium.webdriver.firefox.options
|
||
|
import seleniumwire.webdriver
|
||
|
|
||
|
|
||
|
def subdomain_from_url(url: str) -> str:
|
||
|
"""
|
||
|
Extract the domain part from an url.
|
||
|
"""
|
||
|
parsed = urllib.parse.urlparse(url)
|
||
|
return parsed.netloc
|
||
|
|
||
|
|
||
|
def collect_subdomains(url: str) -> typing.Iterable[str]:
|
||
|
"""
|
||
|
Load an URL into an headless browser and return all the domains
|
||
|
it tried to access.
|
||
|
"""
|
||
|
options = selenium.webdriver.firefox.options.Options()
|
||
|
options.add_argument('-headless')
|
||
|
driver = seleniumwire.webdriver.Firefox(
|
||
|
executable_path='geckodriver', options=options)
|
||
|
|
||
|
driver.get(url)
|
||
|
for request in driver.requests:
|
||
|
if request.response:
|
||
|
yield subdomain_from_url(request.path)
|
||
|
driver.close()
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
for line in sys.stdin:
|
||
|
line = line.strip()
|
||
|
if not line:
|
||
|
continue
|
||
|
for subdomain in collect_subdomains(line):
|
||
|
print(subdomain)
|