#!/usr/bin/env python3 """ From a list of URLs, output the subdomains accessed by the websites. """ import sys import typing import urllib.parse import selenium.webdriver.firefox.options import seleniumwire.webdriver def subdomain_from_url(url: str) -> str: """ Extract the domain part from an url. """ parsed = urllib.parse.urlparse(url) return parsed.netloc def collect_subdomains(url: str) -> typing.Iterable[str]: """ Load an URL into an headless browser and return all the domains it tried to access. """ options = selenium.webdriver.firefox.options.Options() options.add_argument('-headless') driver = seleniumwire.webdriver.Firefox( executable_path='geckodriver', options=options) driver.get(url) for request in driver.requests: if request.response: yield subdomain_from_url(request.path) driver.close() if __name__ == '__main__': for line in sys.stdin: line = line.strip() if not line: continue for subdomain in collect_subdomains(line): print(subdomain)