Added first_party tracking
Well, tracking if a rule is from a first or a multi rule... Hope I did not do any mistake
This commit is contained in:
		
							parent
							
								
									c3bf102289
								
							
						
					
					
						commit
						8f6e01c857
					
				
					 3 changed files with 93 additions and 60 deletions
				
			
		
							
								
								
									
										135
									
								
								database.py
									
										
									
									
									
								
							
							
						
						
									
										135
									
								
								database.py
									
										
									
									
									
								
							|  | @ -27,7 +27,17 @@ class Path(): | ||||||
| 
 | 
 | ||||||
| class RulePath(Path): | class RulePath(Path): | ||||||
|     def __str__(self) -> str: |     def __str__(self) -> str: | ||||||
|         return '(rules)' |         return '(rule)' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class RuleFirstPath(RulePath): | ||||||
|  |     def __str__(self) -> str: | ||||||
|  |         return '(first-party rule)' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class RuleMultiPath(RulePath): | ||||||
|  |     def __str__(self) -> str: | ||||||
|  |         return '(multi-party rule)' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DomainPath(Path): | class DomainPath(Path): | ||||||
|  | @ -67,14 +77,18 @@ class Ip4Path(Path): | ||||||
| 
 | 
 | ||||||
| class Match(): | class Match(): | ||||||
|     def __init__(self) -> None: |     def __init__(self) -> None: | ||||||
|         self.updated: int = 0 |  | ||||||
|         self.level: int = 0 |  | ||||||
|         self.source: typing.Optional[Path] = None |         self.source: typing.Optional[Path] = None | ||||||
|         self.references: int = 0 |         self.updated: int = 0 | ||||||
|         # FP dupplicate args |  | ||||||
| 
 | 
 | ||||||
|     def active(self) -> bool: |         # Cache | ||||||
|         return self.updated > 0 |         self.level: int = 0 | ||||||
|  |         self.first_party: bool = False | ||||||
|  |         self.references: int = 0 | ||||||
|  | 
 | ||||||
|  |     def active(self, first_party: bool = None) -> bool: | ||||||
|  |         if self.updated == 0 or (first_party and not self.first_party): | ||||||
|  |             return False | ||||||
|  |         return True | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class AsnNode(Match): | class AsnNode(Match): | ||||||
|  | @ -133,13 +147,21 @@ class Profiler(): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Database(Profiler): | class Database(Profiler): | ||||||
|     VERSION = 14 |     VERSION = 17 | ||||||
|     PATH = "blocking.p" |     PATH = "blocking.p" | ||||||
| 
 | 
 | ||||||
|     def initialize(self) -> None: |     def initialize(self) -> None: | ||||||
|         self.log.warning( |         self.log.warning( | ||||||
|             "Creating database version: %d ", |             "Creating database version: %d ", | ||||||
|             Database.VERSION) |             Database.VERSION) | ||||||
|  |         # Dummy match objects that everything refer to | ||||||
|  |         self.rules: typing.List[Match] = list() | ||||||
|  |         for first_party in (False, True): | ||||||
|  |             m = Match() | ||||||
|  |             m.updated = 1 | ||||||
|  |             m.level = 0 | ||||||
|  |             m.first_party = first_party | ||||||
|  |             self.rules.append(m) | ||||||
|         self.domtree = DomainTreeNode() |         self.domtree = DomainTreeNode() | ||||||
|         self.asns: typing.Dict[Asn, AsnNode] = dict() |         self.asns: typing.Dict[Asn, AsnNode] = dict() | ||||||
|         self.ip4tree = IpTreeNode() |         self.ip4tree = IpTreeNode() | ||||||
|  | @ -150,7 +172,7 @@ class Database(Profiler): | ||||||
|             with open(self.PATH, 'rb') as db_fdsec: |             with open(self.PATH, 'rb') as db_fdsec: | ||||||
|                 version, data = pickle.load(db_fdsec) |                 version, data = pickle.load(db_fdsec) | ||||||
|                 if version == Database.VERSION: |                 if version == Database.VERSION: | ||||||
|                     self.domtree, self.asns, self.ip4tree = data |                     self.rules, self.domtree, self.asns, self.ip4tree = data | ||||||
|                     return |                     return | ||||||
|                 self.log.warning( |                 self.log.warning( | ||||||
|                     "Outdated database version found: %d, " |                     "Outdated database version found: %d, " | ||||||
|  | @ -167,7 +189,7 @@ class Database(Profiler): | ||||||
|     def save(self) -> None: |     def save(self) -> None: | ||||||
|         self.enter_step('save') |         self.enter_step('save') | ||||||
|         with open(self.PATH, 'wb') as db_fdsec: |         with open(self.PATH, 'wb') as db_fdsec: | ||||||
|             data = self.domtree, self.asns, self.ip4tree |             data = self.rules, self.domtree, self.asns, self.ip4tree | ||||||
|             pickle.dump((self.VERSION, data), db_fdsec) |             pickle.dump((self.VERSION, data), db_fdsec) | ||||||
|         self.profile() |         self.profile() | ||||||
| 
 | 
 | ||||||
|  | @ -232,8 +254,10 @@ class Database(Profiler): | ||||||
|         return '.'.join(map(str, octets)) + '/' + str(network.prefixlen) |         return '.'.join(map(str, octets)) + '/' + str(network.prefixlen) | ||||||
| 
 | 
 | ||||||
|     def get_match(self, path: Path) -> Match: |     def get_match(self, path: Path) -> Match: | ||||||
|         if isinstance(path, RulePath): |         if isinstance(path, RuleMultiPath): | ||||||
|             return Match() |             return self.rules[0] | ||||||
|  |         elif isinstance(path, RuleFirstPath): | ||||||
|  |             return self.rules[1] | ||||||
|         elif isinstance(path, AsnPath): |         elif isinstance(path, AsnPath): | ||||||
|             return self.asns[path.asn] |             return self.asns[path.asn] | ||||||
|         elif isinstance(path, DomainPath): |         elif isinstance(path, DomainPath): | ||||||
|  | @ -275,7 +299,6 @@ class Database(Profiler): | ||||||
|                 except TypeError:  # not iterable |                 except TypeError:  # not iterable | ||||||
|                     pass |                     pass | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
|     def exec_each_domain(self, |     def exec_each_domain(self, | ||||||
|                          callback: MatchCallable, |                          callback: MatchCallable, | ||||||
|                          arg: typing.Any = None, |                          arg: typing.Any = None, | ||||||
|  | @ -374,8 +397,8 @@ class Database(Profiler): | ||||||
|             pass |             pass | ||||||
| 
 | 
 | ||||||
|         def increment_references_cb(path: Path, |         def increment_references_cb(path: Path, | ||||||
|                                 match: Match, _: typing.Any |                                     match: Match, _: typing.Any | ||||||
|                                 ) -> None: |                                     ) -> None: | ||||||
|             if match.source: |             if match.source: | ||||||
|                 source = self.get_match(match.source) |                 source = self.get_match(match.source) | ||||||
|                 source.references += 1 |                 source.references += 1 | ||||||
|  | @ -387,9 +410,7 @@ class Database(Profiler): | ||||||
| 
 | 
 | ||||||
|     def explain(self, path: Path) -> str: |     def explain(self, path: Path) -> str: | ||||||
|         match = self.get_match(path) |         match = self.get_match(path) | ||||||
|         string = f'{path}' |         string = f'{path} #{match.references}' | ||||||
|         if not isinstance(path, RulePath): |  | ||||||
|             string += f' #{match.references}' |  | ||||||
|         if match.source: |         if match.source: | ||||||
|             string += f' ← {self.explain(match.source)}' |             string += f' ← {self.explain(match.source)}' | ||||||
|         return string |         return string | ||||||
|  | @ -399,14 +420,14 @@ class Database(Profiler): | ||||||
|                end_chain_only: bool = False, |                end_chain_only: bool = False, | ||||||
|                explain: bool = False, |                explain: bool = False, | ||||||
|                ) -> typing.Iterable[str]: |                ) -> typing.Iterable[str]: | ||||||
|         if first_party_only: |  | ||||||
|             raise NotImplementedError |  | ||||||
| 
 | 
 | ||||||
|         def export_cb(path: Path, match: Match, _: typing.Any |         def export_cb(path: Path, match: Match, _: typing.Any | ||||||
|                       ) -> typing.Iterable[str]: |                       ) -> typing.Iterable[str]: | ||||||
|             assert isinstance(path, DomainPath) |             assert isinstance(path, DomainPath) | ||||||
|             if not isinstance(path, HostnamePath): |             if not isinstance(path, HostnamePath): | ||||||
|                 return |                 return | ||||||
|  |             if first_party_only and not match.first_party: | ||||||
|  |                 return | ||||||
|             if end_chain_only and match.references > 0: |             if end_chain_only and match.references > 0: | ||||||
|                 return |                 return | ||||||
|             if explain: |             if explain: | ||||||
|  | @ -419,11 +440,11 @@ class Database(Profiler): | ||||||
|     def list_rules(self, |     def list_rules(self, | ||||||
|                    first_party_only: bool = False, |                    first_party_only: bool = False, | ||||||
|                    ) -> typing.Iterable[str]: |                    ) -> typing.Iterable[str]: | ||||||
|         if first_party_only: |  | ||||||
|             raise NotImplementedError |  | ||||||
| 
 | 
 | ||||||
|         def list_rules_cb(path: Path, match: Match, _: typing.Any |         def list_rules_cb(path: Path, match: Match, _: typing.Any | ||||||
|                           ) -> typing.Iterable[str]: |                           ) -> typing.Iterable[str]: | ||||||
|  |             if first_party_only and not match.first_party: | ||||||
|  |                 return | ||||||
|             if isinstance(path, ZonePath) \ |             if isinstance(path, ZonePath) \ | ||||||
|                     or (isinstance(path, Ip4Path) and path.prefixlen < 32): |                     or (isinstance(path, Ip4Path) and path.prefixlen < 32): | ||||||
|                 # if match.level == 0: |                 # if match.level == 0: | ||||||
|  | @ -465,10 +486,10 @@ class Database(Profiler): | ||||||
|         dic = self.ip4tree |         dic = self.ip4tree | ||||||
|         for i in range(31, 31-ip4.prefixlen, -1): |         for i in range(31, 31-ip4.prefixlen, -1): | ||||||
|             bit = (ip4.value >> i) & 0b1 |             bit = (ip4.value >> i) & 0b1 | ||||||
|  |             # TODO PERF copy value and slide once every loop | ||||||
|             if dic.active(): |             if dic.active(): | ||||||
|                 self.enter_step('get_ip4_yield') |                 self.enter_step('get_ip4_yield') | ||||||
|                 a = Ip4Path(ip4.value >> (i+1) << (i+1), 31-i) |                 yield Ip4Path(ip4.value >> (i+1) << (i+1), 31-i) | ||||||
|                 yield a |  | ||||||
|                 self.enter_step('get_ip4_brws') |                 self.enter_step('get_ip4_brws') | ||||||
|             next_dic = dic.one if bit else dic.zero |             next_dic = dic.one if bit else dic.zero | ||||||
|             if next_dic is None: |             if next_dic is None: | ||||||
|  | @ -478,50 +499,58 @@ class Database(Profiler): | ||||||
|             self.enter_step('get_ip4_yield') |             self.enter_step('get_ip4_yield') | ||||||
|             yield ip4 |             yield ip4 | ||||||
| 
 | 
 | ||||||
|     def set_match(self, |     def _set_match(self, | ||||||
|                   match: Match, |                    match: Match, | ||||||
|                   updated: int, |                    updated: int, | ||||||
|                   source: Path, |                    source: Path, | ||||||
|                   ) -> None: |                    source_match: Match = None, | ||||||
|         new_source = self.get_match(source) |                    ) -> None: | ||||||
|         new_level = new_source.level + 1 |         # source_match is in parameters because most of the time | ||||||
|         if updated > match.updated or new_level > match.level: |         # its parent function needs it too, | ||||||
|  |         # so it can pass it to save a traversal | ||||||
|  |         source_match = source_match or self.get_match(source) | ||||||
|  |         new_level = source_match.level + 1 | ||||||
|  |         if updated > match.updated or new_level < match.level \ | ||||||
|  |                 or source_match.first_party > match.first_party: | ||||||
|  |             # NOTE FP and level of matches referencing this one | ||||||
|  |             # won't be updated until run or prune | ||||||
|             if match.source: |             if match.source: | ||||||
|                 old_source = self.get_match(match.source) |                 old_source = self.get_match(match.source) | ||||||
|                 old_source.references -= 1 |                 old_source.references -= 1 | ||||||
|             match.updated = updated |             match.updated = updated | ||||||
|             match.level = new_level |             match.level = new_level | ||||||
|  |             match.first_party = source_match.first_party | ||||||
|             match.source = source |             match.source = source | ||||||
|             new_source.references += 1 |             source_match.references += 1 | ||||||
|         # FP dupplicate function |  | ||||||
| 
 | 
 | ||||||
|     def _set_domain(self, |     def _set_domain(self, | ||||||
|                     hostname: bool, |                     hostname: bool, | ||||||
|                     domain_str: str, |                     domain_str: str, | ||||||
|                     updated: int, |                     updated: int, | ||||||
|                     is_first_party: bool = None, |                     source: Path) -> None: | ||||||
|                     source: Path = None) -> None: |  | ||||||
|         self.enter_step('set_domain_pack') |         self.enter_step('set_domain_pack') | ||||||
|         if is_first_party: |  | ||||||
|             raise NotImplementedError |  | ||||||
|         domain = self.pack_domain(domain_str) |         domain = self.pack_domain(domain_str) | ||||||
|  |         self.enter_step('set_domain_fp') | ||||||
|  |         source_match = self.get_match(source) | ||||||
|  |         is_first_party = source_match.first_party | ||||||
|         self.enter_step('set_domain_brws') |         self.enter_step('set_domain_brws') | ||||||
|         dic = self.domtree |         dic = self.domtree | ||||||
|         for part in domain.parts: |         for part in domain.parts: | ||||||
|             if part not in dic.children: |             if part not in dic.children: | ||||||
|                 dic.children[part] = DomainTreeNode() |                 dic.children[part] = DomainTreeNode() | ||||||
|             dic = dic.children[part] |             dic = dic.children[part] | ||||||
|             if dic.match_zone.active(): |             if dic.match_zone.active(is_first_party): | ||||||
|                 # Refuse to add domain whose zone is already matching |                 # Refuse to add domain whose zone is already matching | ||||||
|                 return |                 return | ||||||
|         if hostname: |         if hostname: | ||||||
|             match = dic.match_hostname |             match = dic.match_hostname | ||||||
|         else: |         else: | ||||||
|             match = dic.match_zone |             match = dic.match_zone | ||||||
|         self.set_match( |         self._set_match( | ||||||
|             match, |             match, | ||||||
|             updated, |             updated, | ||||||
|             source or RulePath(), |             source, | ||||||
|  |             source_match=source_match, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def set_hostname(self, |     def set_hostname(self, | ||||||
|  | @ -537,30 +566,27 @@ class Database(Profiler): | ||||||
|     def set_asn(self, |     def set_asn(self, | ||||||
|                 asn_str: str, |                 asn_str: str, | ||||||
|                 updated: int, |                 updated: int, | ||||||
|                 is_first_party: bool = None, |                 source: Path) -> None: | ||||||
|                 source: Path = None) -> None: |  | ||||||
|         self.enter_step('set_asn') |         self.enter_step('set_asn') | ||||||
|         if is_first_party: |  | ||||||
|             raise NotImplementedError |  | ||||||
|         path = self.pack_asn(asn_str) |         path = self.pack_asn(asn_str) | ||||||
|         if path.asn in self.asns: |         if path.asn in self.asns: | ||||||
|             match = self.asns[path.asn] |             match = self.asns[path.asn] | ||||||
|         else: |         else: | ||||||
|             match = AsnNode() |             match = AsnNode() | ||||||
|             self.asns[path.asn] = match |             self.asns[path.asn] = match | ||||||
|         self.set_match( |         self._set_match( | ||||||
|             match, |             match, | ||||||
|             updated, |             updated, | ||||||
|             source or RulePath(), |             source, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _set_ip4(self, |     def _set_ip4(self, | ||||||
|                  ip4: Ip4Path, |                  ip4: Ip4Path, | ||||||
|                  updated: int, |                  updated: int, | ||||||
|                  is_first_party: bool = None, |                  source: Path) -> None: | ||||||
|                  source: Path = None) -> None: |         self.enter_step('set_ip4_fp') | ||||||
|         if is_first_party: |         source_match = self.get_match(source) | ||||||
|             raise NotImplementedError |         is_first_party = source_match.first_party | ||||||
|         self.enter_step('set_ip4_brws') |         self.enter_step('set_ip4_brws') | ||||||
|         dic = self.ip4tree |         dic = self.ip4tree | ||||||
|         for i in range(31, 31-ip4.prefixlen, -1): |         for i in range(31, 31-ip4.prefixlen, -1): | ||||||
|  | @ -573,13 +599,14 @@ class Database(Profiler): | ||||||
|                 else: |                 else: | ||||||
|                     dic.zero = next_dic |                     dic.zero = next_dic | ||||||
|             dic = next_dic |             dic = next_dic | ||||||
|             if dic.active(): |             if dic.active(is_first_party): | ||||||
|                 # Refuse to add ip4* whose network is already matching |                 # Refuse to add ip4* whose network is already matching | ||||||
|                 return |                 return | ||||||
|         self.set_match( |         self._set_match( | ||||||
|             dic, |             dic, | ||||||
|             updated, |             updated, | ||||||
|             source or RulePath(), |             source, | ||||||
|  |             source_match=source_match, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def set_ip4address(self, |     def set_ip4address(self, | ||||||
|  |  | ||||||
|  | @ -32,10 +32,16 @@ if __name__ == '__main__': | ||||||
| 
 | 
 | ||||||
|     fun = FUNCTION_MAP[args.type] |     fun = FUNCTION_MAP[args.type] | ||||||
| 
 | 
 | ||||||
|  |     source: database.RulePath | ||||||
|  |     if args.first_party: | ||||||
|  |         source = database.RuleFirstPath() | ||||||
|  |     else: | ||||||
|  |         source = database.RuleMultiPath() | ||||||
|  | 
 | ||||||
|     for rule in args.input: |     for rule in args.input: | ||||||
|         fun(DB, |         fun(DB, | ||||||
|             rule.strip(), |             rule.strip(), | ||||||
|             # is_first_party=args.first_party, |             source=source, | ||||||
|             updated=int(time.time()), |             updated=int(time.time()), | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -6,11 +6,11 @@ function log() { | ||||||
| 
 | 
 | ||||||
| log "Importing rules…" | log "Importing rules…" | ||||||
| BEFORE="$(date +%s)" | BEFORE="$(date +%s)" | ||||||
| # cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone | cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone | ||||||
| # cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone | cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone | ||||||
| # cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone | cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone | ||||||
| # cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network | cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network | ||||||
| # cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn | cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn | ||||||
| 
 | 
 | ||||||
| cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party | cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party | ||||||
| cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party | cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue