|
|
@ -14,6 +14,29 @@ import time |
|
|
|
import progressbar |
|
|
|
import selenium.webdriver.firefox.options |
|
|
|
import seleniumwire.webdriver |
|
|
|
import logging |
|
|
|
|
|
|
|
log = logging.getLogger('cs') |
|
|
|
DRIVER = None |
|
|
|
SCROLL_TIME = 10.0 |
|
|
|
SCROLL_STEPS = 100 |
|
|
|
SCROLL_CMD = f'window.scrollBy(0,document.body.scrollHeight/{SCROLL_STEPS})' |
|
|
|
|
|
|
|
|
|
|
|
def new_driver() -> seleniumwire.webdriver.browser.Firefox: |
|
|
|
profile = selenium.webdriver.FirefoxProfile() |
|
|
|
profile.set_preference('privacy.trackingprotection.enabled', False) |
|
|
|
profile.set_preference('network.cookie.cookieBehavior', 0) |
|
|
|
profile.set_preference('privacy.trackingprotection.pbmode.enabled', False) |
|
|
|
profile.set_preference( |
|
|
|
'privacy.trackingprotection.cryptomining.enabled', False) |
|
|
|
profile.set_preference( |
|
|
|
'privacy.trackingprotection.fingerprinting.enabled', False) |
|
|
|
options = selenium.webdriver.firefox.options.Options() |
|
|
|
# options.add_argument('-headless') |
|
|
|
driver = seleniumwire.webdriver.Firefox(profile, |
|
|
|
executable_path='geckodriver', options=options) |
|
|
|
return driver |
|
|
|
|
|
|
|
|
|
|
|
def subdomain_from_url(url: str) -> str: |
|
|
@ -29,28 +52,30 @@ def collect_subdomains(url: str) -> typing.Iterable[str]: |
|
|
|
Load an URL into an headless browser and return all the domains |
|
|
|
it tried to access. |
|
|
|
""" |
|
|
|
options = selenium.webdriver.firefox.options.Options() |
|
|
|
options.add_argument('-headless') |
|
|
|
driver = seleniumwire.webdriver.Firefox( |
|
|
|
executable_path='geckodriver', options=options) |
|
|
|
global DRIVER |
|
|
|
if not DRIVER: |
|
|
|
DRIVER = new_driver() |
|
|
|
|
|
|
|
driver.get(url) |
|
|
|
time.sleep(10) |
|
|
|
for request in driver.requests: |
|
|
|
if request.response: |
|
|
|
yield subdomain_from_url(request.path) |
|
|
|
driver.close() |
|
|
|
try: |
|
|
|
DRIVER.get(url) |
|
|
|
for s in range(SCROLL_STEPS): |
|
|
|
DRIVER.execute_script(SCROLL_CMD) |
|
|
|
time.sleep(SCROLL_TIME/SCROLL_STEPS) |
|
|
|
for request in DRIVER.requests: |
|
|
|
if request.response: |
|
|
|
yield subdomain_from_url(request.path) |
|
|
|
except: |
|
|
|
log.exception("Error") |
|
|
|
DRIVER.quit() |
|
|
|
DRIVER = None |
|
|
|
|
|
|
|
|
|
|
|
def collect_subdomains_standalone(url: str) -> None: |
|
|
|
url = url.strip() |
|
|
|
if not url: |
|
|
|
return |
|
|
|
try: |
|
|
|
for subdomain in collect_subdomains(url): |
|
|
|
print(subdomain) |
|
|
|
except: |
|
|
|
pass |
|
|
|
for subdomain in collect_subdomains(url): |
|
|
|
print(subdomain) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
@ -66,5 +91,8 @@ if __name__ == '__main__': |
|
|
|
for line in iterator: |
|
|
|
collect_subdomains_standalone(line) |
|
|
|
|
|
|
|
if DRIVER: |
|
|
|
DRIVER.quit() |
|
|
|
|
|
|
|
if filename: |
|
|
|
iterator.close() |