Added some delay for websites subdomains collecting

Some websites load their trackers after the page is done loading.
newworkflow_parseropti
Geoffrey Frogeye 2019-11-14 06:29:24 +01:00
parent 56374e3223
commit 00a0020914
1 changed files with 2 additions and 0 deletions

View File

@ -9,6 +9,7 @@ accessed by the websites.
import sys import sys
import typing import typing
import urllib.parse import urllib.parse
import time
import progressbar import progressbar
import selenium.webdriver.firefox.options import selenium.webdriver.firefox.options
@ -34,6 +35,7 @@ def collect_subdomains(url: str) -> typing.Iterable[str]:
executable_path='geckodriver', options=options) executable_path='geckodriver', options=options)
driver.get(url) driver.get(url)
time.sleep(10)
for request in driver.requests: for request in driver.requests:
if request.response: if request.response:
yield subdomain_from_url(request.path) yield subdomain_from_url(request.path)