Added progressbar and ETA
This commit is contained in:
parent
d49a7803e9
commit
2f1af3c850
|
@ -42,6 +42,7 @@ Just to build the list, you can find an already-built list in the releases.
|
|||
- Selenium
|
||||
- seleniumwire
|
||||
- dnspython
|
||||
- [progressbar2](https://pypi.org/project/progressbar2/)
|
||||
|
||||
And then just run `eulaurarien.sh`.
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
# pylint: disable=C0103
|
||||
|
||||
"""
|
||||
From a list of URLs, output the subdomains
|
||||
|
@ -9,6 +10,7 @@ import sys
|
|||
import typing
|
||||
import urllib.parse
|
||||
|
||||
import progressbar
|
||||
import selenium.webdriver.firefox.options
|
||||
import seleniumwire.webdriver
|
||||
|
||||
|
@ -38,10 +40,26 @@ def collect_subdomains(url: str) -> typing.Iterable[str]:
|
|||
driver.close()
|
||||
|
||||
|
||||
def collect_subdomains_standalone(url: str) -> None:
|
||||
url = url.strip()
|
||||
if not url:
|
||||
return
|
||||
for subdomain in collect_subdomains(url):
|
||||
print(subdomain)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
for subdomain in collect_subdomains(line):
|
||||
print(subdomain)
|
||||
assert len(sys.argv) <= 2
|
||||
filename = None
|
||||
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
||||
filename = sys.argv[1]
|
||||
num_lines = sum(1 for line in open(filename))
|
||||
iterator = progressbar.progressbar(open(filename), max_value=num_lines)
|
||||
else:
|
||||
iterator = sys.stdin
|
||||
|
||||
for line in iterator:
|
||||
collect_subdomains_standalone(line)
|
||||
|
||||
if filename:
|
||||
iterator.close()
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
# Main script for eulaurarien
|
||||
|
||||
# Get all subdomains accessed by each website in the website list
|
||||
cat websites.list | ./collect_subdomains.py > subdomains.list
|
||||
./collect_subdomains.py websites.list > subdomains.list
|
||||
sort -u subdomains.list > subdomains.sorted.list
|
||||
|
||||
# Filter out the subdomains not pointing to a first-party tracker
|
||||
cat subdomains.sorted.list | ./filter_subdomains.py > toblock.list
|
||||
./filter_subdomains.py subdomains.sorted.list > toblock.list
|
||||
sort -u toblock.list > toblock.sorted.list
|
||||
|
||||
# Format the blocklist so it can be used as a hostlist
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
# pylint: disable=C0103
|
||||
|
||||
"""
|
||||
From a list of subdomains, output only
|
||||
|
@ -9,6 +10,7 @@ import re
|
|||
import sys
|
||||
|
||||
import dns.resolver
|
||||
import progressbar
|
||||
|
||||
import regexes
|
||||
|
||||
|
@ -18,7 +20,10 @@ def is_subdomain_matching(subdomain: str) -> bool:
|
|||
Indicates if the subdomain redirects to a first-party tracker.
|
||||
"""
|
||||
# TODO Look at the whole chain rather than the last one
|
||||
query = dns.resolver.query(subdomain, 'A')
|
||||
try:
|
||||
query = dns.resolver.query(subdomain, 'A')
|
||||
except dns.resolver.NXDOMAIN:
|
||||
return False
|
||||
canonical = query.canonical_name.to_text()
|
||||
for regex in regexes.REGEXES:
|
||||
if re.match(regex, canonical):
|
||||
|
@ -26,10 +31,29 @@ def is_subdomain_matching(subdomain: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def is_subdomain_matching_standalone(subdomain: str) -> None:
|
||||
"""
|
||||
Print the subdomain if it redirects to a first-party tracker.
|
||||
"""
|
||||
subdomain = subdomain.strip()
|
||||
if not subdomain:
|
||||
return
|
||||
if is_subdomain_matching(subdomain):
|
||||
print(subdomain)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if is_subdomain_matching(line):
|
||||
print(line)
|
||||
assert len(sys.argv) <= 2
|
||||
filename = None
|
||||
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
||||
filename = sys.argv[1]
|
||||
num_lines = sum(1 for line in open(filename))
|
||||
iterator = progressbar.progressbar(open(filename), max_value=num_lines)
|
||||
else:
|
||||
iterator = sys.stdin
|
||||
|
||||
for line in iterator:
|
||||
is_subdomain_matching_standalone(line)
|
||||
|
||||
if filename:
|
||||
iterator.close()
|
||||
|
|
Loading…
Reference in a new issue