Compare commits

..

2 commits

Author SHA1 Message Date
Geoffrey Frogeye 708c53041e
Added two japanese trackers 2020-01-03 22:09:16 +01:00
Geoffrey Frogeye 808e36dde3
Improvements to subdomain collection
I use this for tracker identification so it's not perfect but still it's
a bit better.
2020-01-03 22:08:06 +01:00
4 changed files with 50 additions and 16 deletions

View file

@ -151,7 +151,7 @@ If you want to force re-importing, run `rm last_updates/rapid7_*.txt`.
### Export the lists ### Export the lists
For the tracking list, use `./export_lists.sh`, the output will be in the `dist` forlder (please change the links before distributing them). For the tracking list, use `./export_lists.sh`, the output will be in the `dist` folder (please change the links before distributing them).
For other purposes, tinker with the `./export.py` program. For other purposes, tinker with the `./export.py` program.
#### Explanations #### Explanations

View file

@ -14,6 +14,29 @@ import time
import progressbar import progressbar
import selenium.webdriver.firefox.options import selenium.webdriver.firefox.options
import seleniumwire.webdriver import seleniumwire.webdriver
import logging
log = logging.getLogger('cs')
DRIVER = None
SCROLL_TIME = 10.0
SCROLL_STEPS = 100
SCROLL_CMD = f'window.scrollBy(0,document.body.scrollHeight/{SCROLL_STEPS})'
def new_driver() -> seleniumwire.webdriver.browser.Firefox:
profile = selenium.webdriver.FirefoxProfile()
profile.set_preference('privacy.trackingprotection.enabled', False)
profile.set_preference('network.cookie.cookieBehavior', 0)
profile.set_preference('privacy.trackingprotection.pbmode.enabled', False)
profile.set_preference(
'privacy.trackingprotection.cryptomining.enabled', False)
profile.set_preference(
'privacy.trackingprotection.fingerprinting.enabled', False)
options = selenium.webdriver.firefox.options.Options()
# options.add_argument('-headless')
driver = seleniumwire.webdriver.Firefox(profile,
executable_path='geckodriver', options=options)
return driver
def subdomain_from_url(url: str) -> str: def subdomain_from_url(url: str) -> str:
@ -29,28 +52,30 @@ def collect_subdomains(url: str) -> typing.Iterable[str]:
Load an URL into an headless browser and return all the domains Load an URL into an headless browser and return all the domains
it tried to access. it tried to access.
""" """
options = selenium.webdriver.firefox.options.Options() global DRIVER
options.add_argument('-headless') if not DRIVER:
driver = seleniumwire.webdriver.Firefox( DRIVER = new_driver()
executable_path='geckodriver', options=options)
driver.get(url) try:
time.sleep(10) DRIVER.get(url)
for request in driver.requests: for s in range(SCROLL_STEPS):
DRIVER.execute_script(SCROLL_CMD)
time.sleep(SCROLL_TIME/SCROLL_STEPS)
for request in DRIVER.requests:
if request.response: if request.response:
yield subdomain_from_url(request.path) yield subdomain_from_url(request.path)
driver.close() except:
log.exception("Error")
DRIVER.quit()
DRIVER = None
def collect_subdomains_standalone(url: str) -> None: def collect_subdomains_standalone(url: str) -> None:
url = url.strip() url = url.strip()
if not url: if not url:
return return
try:
for subdomain in collect_subdomains(url): for subdomain in collect_subdomains(url):
print(subdomain) print(subdomain)
except:
pass
if __name__ == '__main__': if __name__ == '__main__':
@ -66,5 +91,8 @@ if __name__ == '__main__':
for line in iterator: for line in iterator:
collect_subdomains_standalone(line) collect_subdomains_standalone(line)
if DRIVER:
DRIVER.quit()
if filename: if filename:
iterator.close() iterator.close()

1
dist/README.md vendored
View file

@ -90,3 +90,4 @@ The list was generated using data from
- [Rapid7 OpenData](https://opendata.rapid7.com/sonar.fdns_v2/), who kindly provided a free account - [Rapid7 OpenData](https://opendata.rapid7.com/sonar.fdns_v2/), who kindly provided a free account
- [Cisco Umbrella Popularity List](http://s3-us-west-1.amazonaws.com/umbrella-static/index.html) - [Cisco Umbrella Popularity List](http://s3-us-west-1.amazonaws.com/umbrella-static/index.html)
- [Public DNS Server List](https://public-dns.info/) - [Public DNS Server List](https://public-dns.info/)
- Yuki2718 from [Wilders Security Forums](https://www.wilderssecurity.com/threads/ublock-a-lean-and-fast-blocker.365273/page-168#post-2880361) (by the way I'd appreciate if someone from this forum could contact a moderator to get [me](https://www.wilderssecurity.com/members/geoffrey-frogeye.162660/) confirmed!)

View file

@ -32,3 +32,8 @@ affex.org
# TraceDock # TraceDock
a88045584548111e997c60ac8a4ec150-1610510072.eu-central-1.elb.amazonaws.com a88045584548111e997c60ac8a4ec150-1610510072.eu-central-1.elb.amazonaws.com
afc4d9aa2a91d11e997c60ac8a4ec150-2082092489.eu-central-1.elb.amazonaws.com afc4d9aa2a91d11e997c60ac8a4ec150-2082092489.eu-central-1.elb.amazonaws.com
# A8
trck.a8.net
# Ebis
# https://prtimes.jp/main/html/rd/p/000000215.000009812.html
ebis.ne.jp