Workflow: Some modifications
This commit is contained in:
parent
5023b85d7c
commit
d7c239a6f6
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -3,5 +3,3 @@
|
||||||
*.db-journal
|
*.db-journal
|
||||||
nameservers
|
nameservers
|
||||||
nameservers.head
|
nameservers.head
|
||||||
*.o
|
|
||||||
*.so
|
|
||||||
|
|
22
database.py
22
database.py
|
@ -149,6 +149,8 @@ class Database():
|
||||||
total = 0
|
total = 0
|
||||||
for i, octet in enumerate(address.split('.')):
|
for i, octet in enumerate(address.split('.')):
|
||||||
total += int(octet) << (3-i)*8
|
total += int(octet) << (3-i)*8
|
||||||
|
if total > 0xFFFFFFFF:
|
||||||
|
raise ValueError
|
||||||
return total
|
return total
|
||||||
# return '{:02x}{:02x}{:02x}{:02x}'.format(
|
# return '{:02x}{:02x}{:02x}{:02x}'.format(
|
||||||
# *[int(c) for c in address.split('.')])
|
# *[int(c) for c in address.split('.')])
|
||||||
|
@ -192,10 +194,13 @@ class Database():
|
||||||
'(SELECT count(*) FROM rules '
|
'(SELECT count(*) FROM rules '
|
||||||
'WHERE source=r.id)')
|
'WHERE source=r.id)')
|
||||||
|
|
||||||
def prune(self, before: int) -> None:
|
def prune(self, before: int, base_only: bool = False) -> None:
|
||||||
self.enter_step('prune')
|
self.enter_step('prune')
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
cursor.execute('DELETE FROM rules WHERE updated<?', (before,))
|
cmd = 'DELETE FROM rules WHERE updated<?'
|
||||||
|
if base_only:
|
||||||
|
cmd += ' AND level=0'
|
||||||
|
cursor.execute(cmd, (before,))
|
||||||
|
|
||||||
def explain(self, entry: int) -> str:
|
def explain(self, entry: int) -> str:
|
||||||
# Format current
|
# Format current
|
||||||
|
@ -541,7 +546,14 @@ if __name__ == '__main__':
|
||||||
help="Reconstruct the whole database")
|
help="Reconstruct the whole database")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-p', '--prune', action='store_true',
|
'-p', '--prune', action='store_true',
|
||||||
help="Remove old (+6 months) entries from database")
|
help="Remove old entries from database")
|
||||||
|
parser.add_argument(
|
||||||
|
'-b', '--prune-base', action='store_true',
|
||||||
|
help="TODO")
|
||||||
|
parser.add_argument(
|
||||||
|
'-s', '--prune-before', type=int,
|
||||||
|
default=(int(time.time()) - 60*60*24*31*6),
|
||||||
|
help="TODO")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-r', '--references', action='store_true',
|
'-r', '--references', action='store_true',
|
||||||
help="Update the reference count")
|
help="Update the reference count")
|
||||||
|
@ -552,8 +564,8 @@ if __name__ == '__main__':
|
||||||
if args.initialize:
|
if args.initialize:
|
||||||
DB.initialize()
|
DB.initialize()
|
||||||
if args.prune:
|
if args.prune:
|
||||||
DB.prune(before=int(time.time()) - 60*60*24*31*6)
|
DB.prune(before=args.prune_before, base_only=args.prune_base)
|
||||||
if args.references and not args.prune:
|
if args.references:
|
||||||
DB.update_references()
|
DB.update_references()
|
||||||
|
|
||||||
DB.close()
|
DB.close()
|
||||||
|
|
|
@ -37,20 +37,21 @@ if __name__ == '__main__':
|
||||||
DB.enter_step('feed_switch')
|
DB.enter_step('feed_switch')
|
||||||
if dtype == 'a':
|
if dtype == 'a':
|
||||||
for rule in DB.get_ip4(value):
|
for rule in DB.get_ip4(value):
|
||||||
if not list(DB.get_domain_in_zone(name)):
|
if not any(DB.get_domain_in_zone(name)):
|
||||||
|
|
||||||
DB.set_hostname(name, source=rule,
|
DB.set_hostname(name, source=rule,
|
||||||
updated=int(timestamp))
|
updated=int(timestamp))
|
||||||
# updated=int(data['timestamp']))
|
# updated=int(data['timestamp']))
|
||||||
elif dtype == 'c':
|
elif dtype == 'c':
|
||||||
for rule in DB.get_domain(value):
|
for rule in DB.get_domain(value):
|
||||||
if not list(DB.get_domain_in_zone(name)):
|
if not any(DB.get_domain_in_zone(name)):
|
||||||
DB.set_hostname(name, source=rule,
|
DB.set_hostname(name, source=rule,
|
||||||
updated=int(timestamp))
|
updated=int(timestamp))
|
||||||
# updated=int(data['timestamp']))
|
# updated=int(data['timestamp']))
|
||||||
elif dtype == 'p':
|
elif dtype == 'p':
|
||||||
for rule in DB.get_domain(value):
|
for rule in DB.get_domain(value):
|
||||||
if not list(DB.get_ip4_in_network(name)):
|
if not any(DB.get_ip4_in_network(name)):
|
||||||
|
log.debug('%s matched by %d: add %s', value, rule, name)
|
||||||
DB.set_ip4address(name, source=rule,
|
DB.set_ip4address(name, source=rule,
|
||||||
updated=int(timestamp))
|
updated=int(timestamp))
|
||||||
# updated=int(data['timestamp']))
|
# updated=int(data['timestamp']))
|
||||||
|
|
|
@ -18,7 +18,7 @@ log "Retrieving rules…"
|
||||||
rm -f rules*/*.cache.*
|
rm -f rules*/*.cache.*
|
||||||
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
|
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
|
||||||
# From firebog.net Tracking & Telemetry Lists
|
# From firebog.net Tracking & Telemetry Lists
|
||||||
dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
|
# dl https://v.firebog.net/hosts/Prigent-Ads.txt rules/prigent-ads.cache.list
|
||||||
# dl https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt rules/notrack-blocklist.cache.list
|
# dl https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt rules/notrack-blocklist.cache.list
|
||||||
# False positives: https://github.com/WaLLy3K/wally3k.github.io/issues/73 -> 69.media.tumblr.com chicdn.net
|
# False positives: https://github.com/WaLLy3K/wally3k.github.io/issues/73 -> 69.media.tumblr.com chicdn.net
|
||||||
dl https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts rules_hosts/add2o7.cache.txt
|
dl https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts rules_hosts/add2o7.cache.txt
|
||||||
|
|
|
@ -5,6 +5,7 @@ function log() {
|
||||||
}
|
}
|
||||||
|
|
||||||
log "Importing rules…"
|
log "Importing rules…"
|
||||||
|
BEFORE="$(date +%s)"
|
||||||
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone
|
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_domain_list.py | ./feed_rules.py zone
|
||||||
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
||||||
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
||||||
|
@ -17,3 +18,5 @@ cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py as
|
||||||
|
|
||||||
./feed_asn.py
|
./feed_asn.py
|
||||||
|
|
||||||
|
log "Pruning old rules…"
|
||||||
|
./database.py --prune --prune-before "$BEFORE" --prune-base
|
||||||
|
|
|
@ -27,10 +27,10 @@ if __name__ == '__main__':
|
||||||
data = json.loads(line)
|
data = json.loads(line)
|
||||||
try:
|
try:
|
||||||
writer.writerow([
|
writer.writerow([
|
||||||
data['type'][0],
|
data['type'][0], # First letter, will need to do something special for AAAA
|
||||||
data['timestamp'],
|
data['timestamp'],
|
||||||
data['name'],
|
data['name'],
|
||||||
data['value']])
|
data['value']])
|
||||||
except IndexError:
|
except (KeyError, json.decoder.JSONDecodeError):
|
||||||
log.error('Could not parse line: %s', line)
|
log.error('Could not parse line: %s', line)
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in a new issue