Added first_party tracking
Well, tracking if a rule is from a first or a multi rule... Hope I did not do any mistake
This commit is contained in:
parent
c3bf102289
commit
8f6e01c857
135
database.py
135
database.py
|
@ -27,7 +27,17 @@ class Path():
|
|||
|
||||
class RulePath(Path):
|
||||
def __str__(self) -> str:
|
||||
return '(rules)'
|
||||
return '(rule)'
|
||||
|
||||
|
||||
class RuleFirstPath(RulePath):
|
||||
def __str__(self) -> str:
|
||||
return '(first-party rule)'
|
||||
|
||||
|
||||
class RuleMultiPath(RulePath):
|
||||
def __str__(self) -> str:
|
||||
return '(multi-party rule)'
|
||||
|
||||
|
||||
class DomainPath(Path):
|
||||
|
@ -67,14 +77,18 @@ class Ip4Path(Path):
|
|||
|
||||
class Match():
|
||||
def __init__(self) -> None:
|
||||
self.updated: int = 0
|
||||
self.level: int = 0
|
||||
self.source: typing.Optional[Path] = None
|
||||
self.references: int = 0
|
||||
# FP dupplicate args
|
||||
self.updated: int = 0
|
||||
|
||||
def active(self) -> bool:
|
||||
return self.updated > 0
|
||||
# Cache
|
||||
self.level: int = 0
|
||||
self.first_party: bool = False
|
||||
self.references: int = 0
|
||||
|
||||
def active(self, first_party: bool = None) -> bool:
|
||||
if self.updated == 0 or (first_party and not self.first_party):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class AsnNode(Match):
|
||||
|
@ -133,13 +147,21 @@ class Profiler():
|
|||
|
||||
|
||||
class Database(Profiler):
|
||||
VERSION = 14
|
||||
VERSION = 17
|
||||
PATH = "blocking.p"
|
||||
|
||||
def initialize(self) -> None:
|
||||
self.log.warning(
|
||||
"Creating database version: %d ",
|
||||
Database.VERSION)
|
||||
# Dummy match objects that everything refer to
|
||||
self.rules: typing.List[Match] = list()
|
||||
for first_party in (False, True):
|
||||
m = Match()
|
||||
m.updated = 1
|
||||
m.level = 0
|
||||
m.first_party = first_party
|
||||
self.rules.append(m)
|
||||
self.domtree = DomainTreeNode()
|
||||
self.asns: typing.Dict[Asn, AsnNode] = dict()
|
||||
self.ip4tree = IpTreeNode()
|
||||
|
@ -150,7 +172,7 @@ class Database(Profiler):
|
|||
with open(self.PATH, 'rb') as db_fdsec:
|
||||
version, data = pickle.load(db_fdsec)
|
||||
if version == Database.VERSION:
|
||||
self.domtree, self.asns, self.ip4tree = data
|
||||
self.rules, self.domtree, self.asns, self.ip4tree = data
|
||||
return
|
||||
self.log.warning(
|
||||
"Outdated database version found: %d, "
|
||||
|
@ -167,7 +189,7 @@ class Database(Profiler):
|
|||
def save(self) -> None:
|
||||
self.enter_step('save')
|
||||
with open(self.PATH, 'wb') as db_fdsec:
|
||||
data = self.domtree, self.asns, self.ip4tree
|
||||
data = self.rules, self.domtree, self.asns, self.ip4tree
|
||||
pickle.dump((self.VERSION, data), db_fdsec)
|
||||
self.profile()
|
||||
|
||||
|
@ -232,8 +254,10 @@ class Database(Profiler):
|
|||
return '.'.join(map(str, octets)) + '/' + str(network.prefixlen)
|
||||
|
||||
def get_match(self, path: Path) -> Match:
|
||||
if isinstance(path, RulePath):
|
||||
return Match()
|
||||
if isinstance(path, RuleMultiPath):
|
||||
return self.rules[0]
|
||||
elif isinstance(path, RuleFirstPath):
|
||||
return self.rules[1]
|
||||
elif isinstance(path, AsnPath):
|
||||
return self.asns[path.asn]
|
||||
elif isinstance(path, DomainPath):
|
||||
|
@ -275,7 +299,6 @@ class Database(Profiler):
|
|||
except TypeError: # not iterable
|
||||
pass
|
||||
|
||||
|
||||
def exec_each_domain(self,
|
||||
callback: MatchCallable,
|
||||
arg: typing.Any = None,
|
||||
|
@ -374,8 +397,8 @@ class Database(Profiler):
|
|||
pass
|
||||
|
||||
def increment_references_cb(path: Path,
|
||||
match: Match, _: typing.Any
|
||||
) -> None:
|
||||
match: Match, _: typing.Any
|
||||
) -> None:
|
||||
if match.source:
|
||||
source = self.get_match(match.source)
|
||||
source.references += 1
|
||||
|
@ -387,9 +410,7 @@ class Database(Profiler):
|
|||
|
||||
def explain(self, path: Path) -> str:
|
||||
match = self.get_match(path)
|
||||
string = f'{path}'
|
||||
if not isinstance(path, RulePath):
|
||||
string += f' #{match.references}'
|
||||
string = f'{path} #{match.references}'
|
||||
if match.source:
|
||||
string += f' ← {self.explain(match.source)}'
|
||||
return string
|
||||
|
@ -399,14 +420,14 @@ class Database(Profiler):
|
|||
end_chain_only: bool = False,
|
||||
explain: bool = False,
|
||||
) -> typing.Iterable[str]:
|
||||
if first_party_only:
|
||||
raise NotImplementedError
|
||||
|
||||
def export_cb(path: Path, match: Match, _: typing.Any
|
||||
) -> typing.Iterable[str]:
|
||||
assert isinstance(path, DomainPath)
|
||||
if not isinstance(path, HostnamePath):
|
||||
return
|
||||
if first_party_only and not match.first_party:
|
||||
return
|
||||
if end_chain_only and match.references > 0:
|
||||
return
|
||||
if explain:
|
||||
|
@ -419,11 +440,11 @@ class Database(Profiler):
|
|||
def list_rules(self,
|
||||
first_party_only: bool = False,
|
||||
) -> typing.Iterable[str]:
|
||||
if first_party_only:
|
||||
raise NotImplementedError
|
||||
|
||||
def list_rules_cb(path: Path, match: Match, _: typing.Any
|
||||
) -> typing.Iterable[str]:
|
||||
if first_party_only and not match.first_party:
|
||||
return
|
||||
if isinstance(path, ZonePath) \
|
||||
or (isinstance(path, Ip4Path) and path.prefixlen < 32):
|
||||
# if match.level == 0:
|
||||
|
@ -465,10 +486,10 @@ class Database(Profiler):
|
|||
dic = self.ip4tree
|
||||
for i in range(31, 31-ip4.prefixlen, -1):
|
||||
bit = (ip4.value >> i) & 0b1
|
||||
# TODO PERF copy value and slide once every loop
|
||||
if dic.active():
|
||||
self.enter_step('get_ip4_yield')
|
||||
a = Ip4Path(ip4.value >> (i+1) << (i+1), 31-i)
|
||||
yield a
|
||||
yield Ip4Path(ip4.value >> (i+1) << (i+1), 31-i)
|
||||
self.enter_step('get_ip4_brws')
|
||||
next_dic = dic.one if bit else dic.zero
|
||||
if next_dic is None:
|
||||
|
@ -478,50 +499,58 @@ class Database(Profiler):
|
|||
self.enter_step('get_ip4_yield')
|
||||
yield ip4
|
||||
|
||||
def set_match(self,
|
||||
match: Match,
|
||||
updated: int,
|
||||
source: Path,
|
||||
) -> None:
|
||||
new_source = self.get_match(source)
|
||||
new_level = new_source.level + 1
|
||||
if updated > match.updated or new_level > match.level:
|
||||
def _set_match(self,
|
||||
match: Match,
|
||||
updated: int,
|
||||
source: Path,
|
||||
source_match: Match = None,
|
||||
) -> None:
|
||||
# source_match is in parameters because most of the time
|
||||
# its parent function needs it too,
|
||||
# so it can pass it to save a traversal
|
||||
source_match = source_match or self.get_match(source)
|
||||
new_level = source_match.level + 1
|
||||
if updated > match.updated or new_level < match.level \
|
||||
or source_match.first_party > match.first_party:
|
||||
# NOTE FP and level of matches referencing this one
|
||||
# won't be updated until run or prune
|
||||
if match.source:
|
||||
old_source = self.get_match(match.source)
|
||||
old_source.references -= 1
|
||||
match.updated = updated
|
||||
match.level = new_level
|
||||
match.first_party = source_match.first_party
|
||||
match.source = source
|
||||
new_source.references += 1
|
||||
# FP dupplicate function
|
||||
source_match.references += 1
|
||||
|
||||
def _set_domain(self,
|
||||
hostname: bool,
|
||||
domain_str: str,
|
||||
updated: int,
|
||||
is_first_party: bool = None,
|
||||
source: Path = None) -> None:
|
||||
source: Path) -> None:
|
||||
self.enter_step('set_domain_pack')
|
||||
if is_first_party:
|
||||
raise NotImplementedError
|
||||
domain = self.pack_domain(domain_str)
|
||||
self.enter_step('set_domain_fp')
|
||||
source_match = self.get_match(source)
|
||||
is_first_party = source_match.first_party
|
||||
self.enter_step('set_domain_brws')
|
||||
dic = self.domtree
|
||||
for part in domain.parts:
|
||||
if part not in dic.children:
|
||||
dic.children[part] = DomainTreeNode()
|
||||
dic = dic.children[part]
|
||||
if dic.match_zone.active():
|
||||
if dic.match_zone.active(is_first_party):
|
||||
# Refuse to add domain whose zone is already matching
|
||||
return
|
||||
if hostname:
|
||||
match = dic.match_hostname
|
||||
else:
|
||||
match = dic.match_zone
|
||||
self.set_match(
|
||||
self._set_match(
|
||||
match,
|
||||
updated,
|
||||
source or RulePath(),
|
||||
source,
|
||||
source_match=source_match,
|
||||
)
|
||||
|
||||
def set_hostname(self,
|
||||
|
@ -537,30 +566,27 @@ class Database(Profiler):
|
|||
def set_asn(self,
|
||||
asn_str: str,
|
||||
updated: int,
|
||||
is_first_party: bool = None,
|
||||
source: Path = None) -> None:
|
||||
source: Path) -> None:
|
||||
self.enter_step('set_asn')
|
||||
if is_first_party:
|
||||
raise NotImplementedError
|
||||
path = self.pack_asn(asn_str)
|
||||
if path.asn in self.asns:
|
||||
match = self.asns[path.asn]
|
||||
else:
|
||||
match = AsnNode()
|
||||
self.asns[path.asn] = match
|
||||
self.set_match(
|
||||
self._set_match(
|
||||
match,
|
||||
updated,
|
||||
source or RulePath(),
|
||||
source,
|
||||
)
|
||||
|
||||
def _set_ip4(self,
|
||||
ip4: Ip4Path,
|
||||
updated: int,
|
||||
is_first_party: bool = None,
|
||||
source: Path = None) -> None:
|
||||
if is_first_party:
|
||||
raise NotImplementedError
|
||||
source: Path) -> None:
|
||||
self.enter_step('set_ip4_fp')
|
||||
source_match = self.get_match(source)
|
||||
is_first_party = source_match.first_party
|
||||
self.enter_step('set_ip4_brws')
|
||||
dic = self.ip4tree
|
||||
for i in range(31, 31-ip4.prefixlen, -1):
|
||||
|
@ -573,13 +599,14 @@ class Database(Profiler):
|
|||
else:
|
||||
dic.zero = next_dic
|
||||
dic = next_dic
|
||||
if dic.active():
|
||||
if dic.active(is_first_party):
|
||||
# Refuse to add ip4* whose network is already matching
|
||||
return
|
||||
self.set_match(
|
||||
self._set_match(
|
||||
dic,
|
||||
updated,
|
||||
source or RulePath(),
|
||||
source,
|
||||
source_match=source_match,
|
||||
)
|
||||
|
||||
def set_ip4address(self,
|
||||
|
|
|
@ -32,10 +32,16 @@ if __name__ == '__main__':
|
|||
|
||||
fun = FUNCTION_MAP[args.type]
|
||||
|
||||
source: database.RulePath
|
||||
if args.first_party:
|
||||
source = database.RuleFirstPath()
|
||||
else:
|
||||
source = database.RuleMultiPath()
|
||||
|
||||
for rule in args.input:
|
||||
fun(DB,
|
||||
rule.strip(),
|
||||
# is_first_party=args.first_party,
|
||||
source=source,
|
||||
updated=int(time.time()),
|
||||
)
|
||||
|
||||
|
|
|
@ -6,11 +6,11 @@ function log() {
|
|||
|
||||
log "Importing rules…"
|
||||
BEFORE="$(date +%s)"
|
||||
# cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone
|
||||
# cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
||||
# cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
||||
# cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
|
||||
# cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn
|
||||
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone
|
||||
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
||||
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
||||
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
|
||||
cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn
|
||||
|
||||
cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party
|
||||
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party
|
||||
|
|
Loading…
Reference in a new issue