@ -12,14 +12,22 @@ import progressbar
import csv
import typing
import adblockparser
OPTIONS = { " third-party " : True }
# DomainRule = typing.Union[bool, typing.Dict[str, 'DomainRule']]
DomainRule = typing . Union [ bool , typing . Dict ]
RULES_DICT : DomainRule = dict ( )
def subdomain_matching ( subdomain : str ) - > bool :
url = f " https://{subdomain}/ "
return rules . should_block ( url , OPTIONS )
parts = subdomain . split ( ' . ' )
parts . reverse ( )
dic = RULES_DICT
for part in parts :
if isinstance ( dic , bool ) or part not in dic :
break
dic = dic [ part ]
if isinstance ( dic , bool ) :
return dic
return False
def get_matching ( chain : typing . List [ str ] , no_explicit : bool = False
@ -35,6 +43,21 @@ def get_matching(chain: typing.List[str], no_explicit: bool = False
yield initial
def register_rule ( subdomain : str ) - > None :
# Make a tree with domain parts
parts = subdomain . split ( ' . ' )
parts . reverse ( )
dic = RULES_DICT
last_part = len ( parts ) - 1
for p , part in enumerate ( parts ) :
if isinstance ( dic , bool ) :
return
if p == last_part :
dic [ part ] = True
else :
dic . setdefault ( part , dict ( ) )
dic = dic [ part ]
if __name__ == ' __main__ ' :
# Parsing arguments
@ -54,9 +77,6 @@ if __name__ == '__main__':
help = " Rules file " )
args = parser . parse_args ( )
# Reading rules
rules : adblockparser . AdblockRules = adblockparser . AdblockRules ( args . rules )
# Progress bar
widgets = [
progressbar . Percentage ( ) ,
@ -67,14 +87,17 @@ if __name__ == '__main__':
' ' , progressbar . AdaptiveETA ( ) ,
]
progress = progressbar . ProgressBar ( widgets = widgets )
# Reading rules
for rule in args . rules :
register_rule ( rule . strip ( ) )
# Reading domains to filter
if args . input . seekable ( ) :
progress . max_value = len ( args . input . readlines ( ) )
args . input . seek ( 0 )
# Cleaning input
reader = csv . reader ( args . input )
# Filtering
progress . start ( )
for chain in reader :
for match in get_matching ( chain , no_explicit = args . no_explicit ) :