Compare commits
No commits in common. "b343893c72db2f1647b8ccb23906d4e4da816caa" and "bdc691e647e9e63156e57b9d7d39f39e43304c0e" have entirely different histories.
b343893c72
...
bdc691e647
|
@ -1,45 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# pylint: disable=C0103
|
|
||||||
|
|
||||||
"""
|
|
||||||
From a list of subdomains to block,
|
|
||||||
filter out the ones explicitely matching a regex.
|
|
||||||
It should be already handled by the ad blocker.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import multiprocessing
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import typing
|
|
||||||
|
|
||||||
import regexes
|
|
||||||
|
|
||||||
|
|
||||||
def explicitely_match(subdomain: str) -> bool:
|
|
||||||
for regex in regexes.REGEXES:
|
|
||||||
if re.match(regex, subdomain + '.'):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
# Parsing arguments
|
|
||||||
assert len(sys.argv) <= 2
|
|
||||||
filename = None
|
|
||||||
|
|
||||||
if len(sys.argv) == 2 and sys.argv[1] != '-':
|
|
||||||
filename = sys.argv[1]
|
|
||||||
textio = open(filename)
|
|
||||||
else:
|
|
||||||
textio = sys.stdin
|
|
||||||
|
|
||||||
# Cleaning input
|
|
||||||
iterator = iter(textio)
|
|
||||||
iterator = map(str.strip, iterator)
|
|
||||||
iterator = filter(None, iterator)
|
|
||||||
|
|
||||||
for subdomain in iterator:
|
|
||||||
if not explicitely_match(subdomain):
|
|
||||||
print(subdomain)
|
|
|
@ -21,7 +21,6 @@ sort -u temp/all_toblock.list > dist/firstparty-trackers.txt
|
||||||
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
||||||
echo "# Number of known trackers : $(python -c 'import regexes; print(len(regexes.REGEXES))')"
|
echo "# Number of known trackers : $(python -c 'import regexes; print(len(regexes.REGEXES))')"
|
||||||
echo "# Number of blocked subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
echo "# Number of blocked subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
||||||
echo "# Number of first-party subdomains: $(./filter_out_explicit.py dist/firstparty-trackers.txt | wc)"
|
|
||||||
echo
|
echo
|
||||||
cat dist/firstparty-trackers.txt | while read host;
|
cat dist/firstparty-trackers.txt | while read host;
|
||||||
do
|
do
|
||||||
|
|
Loading…
Reference in a new issue