Added progressbar and ETA
This commit is contained in:
parent
d49a7803e9
commit
2f1af3c850
|
@ -42,6 +42,7 @@ Just to build the list, you can find an already-built list in the releases.
|
||||||
- Selenium
|
- Selenium
|
||||||
- seleniumwire
|
- seleniumwire
|
||||||
- dnspython
|
- dnspython
|
||||||
|
- [progressbar2](https://pypi.org/project/progressbar2/)
|
||||||
|
|
||||||
And then just run `eulaurarien.sh`.
|
And then just run `eulaurarien.sh`.
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# pylint: disable=C0103
|
||||||
|
|
||||||
"""
|
"""
|
||||||
From a list of URLs, output the subdomains
|
From a list of URLs, output the subdomains
|
||||||
|
@ -9,6 +10,7 @@ import sys
|
||||||
import typing
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
import progressbar
|
||||||
import selenium.webdriver.firefox.options
|
import selenium.webdriver.firefox.options
|
||||||
import seleniumwire.webdriver
|
import seleniumwire.webdriver
|
||||||
|
|
||||||
|
@ -38,10 +40,26 @@ def collect_subdomains(url: str) -> typing.Iterable[str]:
|
||||||
driver.close()
|
driver.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def collect_subdomains_standalone(url: str) -> None:
|
||||||
for line in sys.stdin:
|
url = url.strip()
|
||||||
line = line.strip()
|
if not url:
|
||||||
if not line:
|
return
|
||||||
continue
|
for subdomain in collect_subdomains(url):
|
||||||
for subdomain in collect_subdomains(line):
|
|
||||||
print(subdomain)
|
print(subdomain)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
assert len(sys.argv) <= 2
|
||||||
|
filename = None
|
||||||
|
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
||||||
|
filename = sys.argv[1]
|
||||||
|
num_lines = sum(1 for line in open(filename))
|
||||||
|
iterator = progressbar.progressbar(open(filename), max_value=num_lines)
|
||||||
|
else:
|
||||||
|
iterator = sys.stdin
|
||||||
|
|
||||||
|
for line in iterator:
|
||||||
|
collect_subdomains_standalone(line)
|
||||||
|
|
||||||
|
if filename:
|
||||||
|
iterator.close()
|
||||||
|
|
|
@ -3,11 +3,11 @@
|
||||||
# Main script for eulaurarien
|
# Main script for eulaurarien
|
||||||
|
|
||||||
# Get all subdomains accessed by each website in the website list
|
# Get all subdomains accessed by each website in the website list
|
||||||
cat websites.list | ./collect_subdomains.py > subdomains.list
|
./collect_subdomains.py websites.list > subdomains.list
|
||||||
sort -u subdomains.list > subdomains.sorted.list
|
sort -u subdomains.list > subdomains.sorted.list
|
||||||
|
|
||||||
# Filter out the subdomains not pointing to a first-party tracker
|
# Filter out the subdomains not pointing to a first-party tracker
|
||||||
cat subdomains.sorted.list | ./filter_subdomains.py > toblock.list
|
./filter_subdomains.py subdomains.sorted.list > toblock.list
|
||||||
sort -u toblock.list > toblock.sorted.list
|
sort -u toblock.list > toblock.sorted.list
|
||||||
|
|
||||||
# Format the blocklist so it can be used as a hostlist
|
# Format the blocklist so it can be used as a hostlist
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# pylint: disable=C0103
|
||||||
|
|
||||||
"""
|
"""
|
||||||
From a list of subdomains, output only
|
From a list of subdomains, output only
|
||||||
|
@ -9,6 +10,7 @@ import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import dns.resolver
|
import dns.resolver
|
||||||
|
import progressbar
|
||||||
|
|
||||||
import regexes
|
import regexes
|
||||||
|
|
||||||
|
@ -18,7 +20,10 @@ def is_subdomain_matching(subdomain: str) -> bool:
|
||||||
Indicates if the subdomain redirects to a first-party tracker.
|
Indicates if the subdomain redirects to a first-party tracker.
|
||||||
"""
|
"""
|
||||||
# TODO Look at the whole chain rather than the last one
|
# TODO Look at the whole chain rather than the last one
|
||||||
|
try:
|
||||||
query = dns.resolver.query(subdomain, 'A')
|
query = dns.resolver.query(subdomain, 'A')
|
||||||
|
except dns.resolver.NXDOMAIN:
|
||||||
|
return False
|
||||||
canonical = query.canonical_name.to_text()
|
canonical = query.canonical_name.to_text()
|
||||||
for regex in regexes.REGEXES:
|
for regex in regexes.REGEXES:
|
||||||
if re.match(regex, canonical):
|
if re.match(regex, canonical):
|
||||||
|
@ -26,10 +31,29 @@ def is_subdomain_matching(subdomain: str) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_subdomain_matching_standalone(subdomain: str) -> None:
|
||||||
|
"""
|
||||||
|
Print the subdomain if it redirects to a first-party tracker.
|
||||||
|
"""
|
||||||
|
subdomain = subdomain.strip()
|
||||||
|
if not subdomain:
|
||||||
|
return
|
||||||
|
if is_subdomain_matching(subdomain):
|
||||||
|
print(subdomain)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
for line in sys.stdin:
|
assert len(sys.argv) <= 2
|
||||||
line = line.strip()
|
filename = None
|
||||||
if not line:
|
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
||||||
continue
|
filename = sys.argv[1]
|
||||||
if is_subdomain_matching(line):
|
num_lines = sum(1 for line in open(filename))
|
||||||
print(line)
|
iterator = progressbar.progressbar(open(filename), max_value=num_lines)
|
||||||
|
else:
|
||||||
|
iterator = sys.stdin
|
||||||
|
|
||||||
|
for line in iterator:
|
||||||
|
is_subdomain_matching_standalone(line)
|
||||||
|
|
||||||
|
if filename:
|
||||||
|
iterator.close()
|
||||||
|
|
Loading…
Reference in a new issue