Implement pruning
This commit is contained in:
parent
1a6e64da3d
commit
7d1c1a1d54
|
@ -47,7 +47,9 @@ Depending on the sources you'll be using to generate the list, you'll need to in
|
|||
### Create a new database
|
||||
|
||||
The so-called database (in the form of `blocking.p`) is a file storing all the matching entities (ASN, IPs, hostnames, zones…) and every entity leading to it.
|
||||
For now there's no way to remove data from it, so here's the command to recreate it: `./db.py --initialize`.
|
||||
It exists because the list cannot be generated in one pass, as DNS redirections chain links do not have to be inputed in order.
|
||||
You can purge the database by removing old data using `./db.py --prune --prune-before TIMESTAMP` ;
|
||||
`TIMESTAMP` can be generated using `date +%s`.
|
||||
|
||||
### Gather external sources
|
||||
|
||||
|
@ -79,6 +81,7 @@ In each folder:
|
|||
- `*.custom.ext` are for sources that you don't want commited
|
||||
|
||||
Then, run `./import_rules.sh`.
|
||||
Note that removed rules and every record depending on them will be automatically pruned.
|
||||
|
||||
### Add subdomains
|
||||
|
||||
|
|
65
database.py
65
database.py
|
@ -95,6 +95,9 @@ class Match():
|
|||
return False
|
||||
return True
|
||||
|
||||
def disable(self) -> None:
|
||||
self.updated = 0
|
||||
|
||||
|
||||
class AsnNode(Match):
|
||||
def __init__(self) -> None:
|
||||
|
@ -478,15 +481,61 @@ class Database(Profiler):
|
|||
for _ in self.exec_each(increment_references_cb):
|
||||
pass
|
||||
|
||||
def _clean_deps(self) -> None:
|
||||
# Disable the matches that depends on the targeted
|
||||
# matches until all disabled matches reference count = 0
|
||||
did_something = True
|
||||
|
||||
def clean_deps_cb(path: Path,
|
||||
match: Match
|
||||
) -> None:
|
||||
nonlocal did_something
|
||||
if not match.source:
|
||||
return
|
||||
source = self.get_match(match.source)
|
||||
if not source.active():
|
||||
self._unset_match(match)
|
||||
elif match.first_party > source.first_party:
|
||||
match.first_party = source.first_party
|
||||
else:
|
||||
return
|
||||
did_something = True
|
||||
|
||||
while did_something:
|
||||
did_something = False
|
||||
self.enter_step('pass_clean_deps')
|
||||
for _ in self.exec_each(clean_deps_cb):
|
||||
pass
|
||||
|
||||
def prune(self, before: int, base_only: bool = False) -> None:
|
||||
raise NotImplementedError
|
||||
# Disable the matches targeted
|
||||
def prune_cb(path: Path,
|
||||
match: Match
|
||||
) -> None:
|
||||
if base_only and match.level > 1:
|
||||
return
|
||||
if match.updated > before:
|
||||
return
|
||||
self._unset_match(match)
|
||||
self.log.debug("Print: disabled %s", path)
|
||||
|
||||
self.enter_step('pass_prune')
|
||||
for _ in self.exec_each(prune_cb):
|
||||
pass
|
||||
|
||||
self._clean_deps()
|
||||
|
||||
# Remove branches with no match
|
||||
# TODO
|
||||
|
||||
def explain(self, path: Path) -> str:
|
||||
match = self.get_match(path)
|
||||
string = str(path)
|
||||
if isinstance(match, AsnNode):
|
||||
string = f'{path} ({match.name}) #{match.references}'
|
||||
else:
|
||||
string = f'{path} #{match.references}'
|
||||
string += f' ({match.name})'
|
||||
party_char = 'F' if match.first_party else 'M'
|
||||
dup_char = 'D' if match.dupplicate else '_'
|
||||
string += f' {match.level}{party_char}{dup_char}{match.references}'
|
||||
if match.source:
|
||||
string += f' ← {self.explain(match.source)}'
|
||||
return string
|
||||
|
@ -598,6 +647,14 @@ class Database(Profiler):
|
|||
self.enter_step('get_ip4_yield')
|
||||
yield ip4
|
||||
|
||||
def _unset_match(self,
|
||||
match: Match,
|
||||
) -> None:
|
||||
match.disable()
|
||||
if match.source:
|
||||
source_match = self.get_match(match.source)
|
||||
source_match.references -= 1
|
||||
|
||||
def _set_match(self,
|
||||
match: Match,
|
||||
updated: int,
|
||||
|
|
|
@ -18,5 +18,5 @@ cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py as
|
|||
|
||||
./feed_asn.py
|
||||
|
||||
# log "Pruning old rules…"
|
||||
# ./db.py --prune --prune-before "$BEFORE" --prune-base
|
||||
log "Pruning old rules…"
|
||||
./db.py --prune --prune-before "$BEFORE" --prune-base
|
||||
|
|
|
@ -7,7 +7,7 @@ function log() {
|
|||
log "Compiling nameservers…"
|
||||
pv nameservers/*.list | ./validate_list.py --ip4 | sort -u > temp/all_nameservers_ip4.list
|
||||
|
||||
log "Compiling subdomain…"
|
||||
log "Compiling subdomains…"
|
||||
# Sort by last character to utilize the DNS server caching mechanism
|
||||
# (not as efficient with massdns but it's almost free so why not)
|
||||
pv subdomains/*.list | ./validate_list.py --domain | rev | sort -u | rev > temp/all_subdomains.list
|
||||
|
|
Loading…
Reference in a new issue