Workflow: Some modifications
This commit is contained in:
parent
5023b85d7c
commit
d7c239a6f6
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -3,5 +3,3 @@
|
|||
*.db-journal
|
||||
nameservers
|
||||
nameservers.head
|
||||
*.o
|
||||
*.so
|
||||
|
|
22
database.py
22
database.py
|
@ -149,6 +149,8 @@ class Database():
|
|||
total = 0
|
||||
for i, octet in enumerate(address.split('.')):
|
||||
total += int(octet) << (3-i)*8
|
||||
if total > 0xFFFFFFFF:
|
||||
raise ValueError
|
||||
return total
|
||||
# return '{:02x}{:02x}{:02x}{:02x}'.format(
|
||||
# *[int(c) for c in address.split('.')])
|
||||
|
@ -192,10 +194,13 @@ class Database():
|
|||
'(SELECT count(*) FROM rules '
|
||||
'WHERE source=r.id)')
|
||||
|
||||
def prune(self, before: int) -> None:
|
||||
def prune(self, before: int, base_only: bool = False) -> None:
|
||||
self.enter_step('prune')
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute('DELETE FROM rules WHERE updated<?', (before,))
|
||||
cmd = 'DELETE FROM rules WHERE updated<?'
|
||||
if base_only:
|
||||
cmd += ' AND level=0'
|
||||
cursor.execute(cmd, (before,))
|
||||
|
||||
def explain(self, entry: int) -> str:
|
||||
# Format current
|
||||
|
@ -541,7 +546,14 @@ if __name__ == '__main__':
|
|||
help="Reconstruct the whole database")
|
||||
parser.add_argument(
|
||||
'-p', '--prune', action='store_true',
|
||||
help="Remove old (+6 months) entries from database")
|
||||
help="Remove old entries from database")
|
||||
parser.add_argument(
|
||||
'-b', '--prune-base', action='store_true',
|
||||
help="TODO")
|
||||
parser.add_argument(
|
||||
'-s', '--prune-before', type=int,
|
||||
default=(int(time.time()) - 60*60*24*31*6),
|
||||
help="TODO")
|
||||
parser.add_argument(
|
||||
'-r', '--references', action='store_true',
|
||||
help="Update the reference count")
|
||||
|
@ -552,8 +564,8 @@ if __name__ == '__main__':
|
|||
if args.initialize:
|
||||
DB.initialize()
|
||||
if args.prune:
|
||||
DB.prune(before=int(time.time()) - 60*60*24*31*6)
|
||||
if args.references and not args.prune:
|
||||
DB.prune(before=args.prune_before, base_only=args.prune_base)
|
||||
if args.references:
|
||||
DB.update_references()
|
||||
|
||||
DB.close()
|
||||
|
|
|
@ -37,20 +37,21 @@ if __name__ == '__main__':
|
|||
DB.enter_step('feed_switch')
|
||||
if dtype == 'a':
|
||||
for rule in DB.get_ip4(value):
|
||||
if not list(DB.get_domain_in_zone(name)):
|
||||
if not any(DB.get_domain_in_zone(name)):
|
||||
|
||||
DB.set_hostname(name, source=rule,
|
||||
updated=int(timestamp))
|
||||
# updated=int(data['timestamp']))
|
||||
elif dtype == 'c':
|
||||
for rule in DB.get_domain(value):
|
||||
if not list(DB.get_domain_in_zone(name)):
|
||||
if not any(DB.get_domain_in_zone(name)):
|
||||
DB.set_hostname(name, source=rule,
|
||||
updated=int(timestamp))
|
||||
# updated=int(data['timestamp']))
|
||||
elif dtype == 'p':
|
||||
for rule in DB.get_domain(value):
|
||||
if not list(DB.get_ip4_in_network(name)):
|
||||
if not any(DB.get_ip4_in_network(name)):
|
||||
log.debug('%s matched by %d: add %s', value, rule, name)
|
||||
DB.set_ip4address(name, source=rule,
|
||||
updated=int(timestamp))
|
||||
# updated=int(data['timestamp']))
|
||||
|
|
|
@ -18,7 +18,7 @@ log "Retrieving rules…"
|
|||
rm -f rules*/*.cache.*
|
||||
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
|
||||
# From firebog.net Tracking & Telemetry Lists
|
||||
dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
|
||||
# dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
|
||||
# dl https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt rules/notrack-blocklist.cache.list
|
||||
# False positives: https://github.com/WaLLy3K/wally3k.github.io/issues/73 -> 69.media.tumblr.com chicdn.net
|
||||
dl https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts rules_hosts/add2o7.cache.txt
|
||||
|
|
|
@ -5,6 +5,7 @@ function log() {
|
|||
}
|
||||
|
||||
log "Importing rules…"
|
||||
BEFORE="$(date +%s)"
|
||||
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone
|
||||
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
||||
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
||||
|
@ -17,3 +18,5 @@ cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py as
|
|||
|
||||
./feed_asn.py
|
||||
|
||||
log "Pruning old rules…"
|
||||
./database.py --prune --prune-before "$BEFORE" --prune-base
|
||||
|
|
|
@ -27,10 +27,10 @@ if __name__ == '__main__':
|
|||
data = json.loads(line)
|
||||
try:
|
||||
writer.writerow([
|
||||
data['type'][0],
|
||||
data['type'][0], # First letter, will need to do something special for AAAA
|
||||
data['timestamp'],
|
||||
data['name'],
|
||||
data['value']])
|
||||
except IndexError:
|
||||
except (KeyError, json.decoder.JSONDecodeError):
|
||||
log.error('Could not parse line: %s', line)
|
||||
pass
|
||||
|
|
Loading…
Reference in a new issue