eulaurarien/filter_out_explicit.py

46 lines
938 B
Python
Executable file

#!/usr/bin/env python3
# pylint: disable=C0103
"""
From a list of subdomains to block,
filter out the ones explicitely matching a regex.
It should be already handled by the ad blocker.
"""
import logging
import multiprocessing
import re
import sys
import typing
import regexes
def explicitely_match(subdomain: str) -> bool:
for regex in regexes.REGEXES:
if re.match(regex, subdomain + '.'):
return True
return False
if __name__ == '__main__':
# Parsing arguments
assert len(sys.argv) <= 2
filename = None
if len(sys.argv) == 2 and sys.argv[1] != '-':
filename = sys.argv[1]
textio = open(filename)
else:
textio = sys.stdin
# Cleaning input
iterator = iter(textio)
iterator = map(str.strip, iterator)
iterator = filter(None, iterator)
for subdomain in iterator:
if not explicitely_match(subdomain):
print(subdomain)