Worflow: Fixed rules counts
This commit is contained in:
parent
ab7ef609dd
commit
269b8278b5
20
database.py
20
database.py
|
@ -23,7 +23,7 @@ DbValue = typing.Union[None, int, float, str, bytes]
|
|||
|
||||
|
||||
class Database():
|
||||
VERSION = 4
|
||||
VERSION = 5
|
||||
PATH = "blocking.db"
|
||||
|
||||
def open(self) -> None:
|
||||
|
@ -250,6 +250,24 @@ class Database():
|
|||
else:
|
||||
yield val
|
||||
|
||||
def count_rules(self,
|
||||
first_party_only: bool = False,
|
||||
) -> str:
|
||||
counts: typing.List[str] = list()
|
||||
cursor = self.conn.cursor()
|
||||
for table in ['asn', 'ip4network', 'ip4address', 'zone', 'hostname']:
|
||||
command = f'SELECT count(*) FROM rules ' \
|
||||
f'INNER JOIN {table} ON rules.id = {table}.entry ' \
|
||||
'WHERE rules.level = 0'
|
||||
if first_party_only:
|
||||
command += ' AND first_party=1'
|
||||
cursor.execute(command)
|
||||
count, = cursor.fetchone()
|
||||
if count > 0:
|
||||
counts.append(f'{table}: {count}')
|
||||
|
||||
return ', '.join(counts)
|
||||
|
||||
def get_domain(self, domain: str) -> typing.Iterable[int]:
|
||||
self.enter_step('get_domain_prepare')
|
||||
domain_prep = self.pack_hostname(domain)
|
||||
|
|
|
@ -11,6 +11,8 @@ CREATE TABLE rules (
|
|||
FOREIGN KEY (source) REFERENCES rules(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX rules_source ON rules (source); -- for references recounting
|
||||
CREATE INDEX rules_updated ON rules (updated); -- for pruning
|
||||
CREATE INDEX rules_level_firstparty ON rules (level, first_party); -- for counting rules
|
||||
|
||||
CREATE TABLE asn (
|
||||
val INTEGER PRIMARY KEY,
|
||||
|
|
13
export.py
13
export.py
|
@ -22,10 +22,23 @@ if __name__ == '__main__':
|
|||
parser.add_argument(
|
||||
'-x', '--explain', action='store_true',
|
||||
help="TODO")
|
||||
parser.add_argument(
|
||||
'-r', '--rules', action='store_true',
|
||||
help="TODO")
|
||||
parser.add_argument(
|
||||
'-c', '--count', action='store_true',
|
||||
help="TODO")
|
||||
args = parser.parse_args()
|
||||
|
||||
DB = database.Database()
|
||||
|
||||
if args.rules:
|
||||
if not args.count:
|
||||
raise NotImplementedError
|
||||
print(DB.count_rules(first_party_only=args.first_party))
|
||||
else:
|
||||
if args.count:
|
||||
raise NotImplementedError
|
||||
for domain in DB.export(
|
||||
first_party_only=args.first_party,
|
||||
end_chain_only=args.end_chain,
|
||||
|
|
|
@ -4,6 +4,9 @@ function log() {
|
|||
echo -e "\033[33m$@\033[0m"
|
||||
}
|
||||
|
||||
log "Pruning old data…"
|
||||
./database.py --prune
|
||||
|
||||
log "Recounting references…"
|
||||
./database.py --references
|
||||
|
||||
|
@ -14,6 +17,8 @@ log "Exporting lists…"
|
|||
./export.py --end-chain --output dist/multiparty-only-trackers.txt
|
||||
|
||||
log "Generating hosts lists…"
|
||||
./export.py --rules --count --first-party > temp/count_rules_firstparty.txt
|
||||
./export.py --rules --count > temp/count_rules_multiparty.txt
|
||||
function generate_hosts {
|
||||
basename="$1"
|
||||
description="$2"
|
||||
|
@ -39,15 +44,16 @@ function generate_hosts {
|
|||
echo "#"
|
||||
echo "# Generation date: $(date -Isec)"
|
||||
echo "# Generation software: eulaurarien $(git describe --tags)"
|
||||
echo "# Number of source websites: TODO"
|
||||
echo "# Number of source subdomains: TODO"
|
||||
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
|
||||
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
||||
echo "# Number of source DNS records: ~2M + $(wc -l temp/all_resolved.json | cut -d' ' -f1)"
|
||||
echo "#"
|
||||
echo "# Number of known first-party trackers: TODO"
|
||||
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
||||
echo "# Known first-party trackers: $(cat temp/count_rules_firstparty.txt)"
|
||||
echo "# Number of first-party hostnames: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
||||
echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
|
||||
echo "#"
|
||||
echo "# Number of known multi-party trackers: TODO"
|
||||
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
||||
echo "# Known multi-party trackers: $(cat temp/count_rules_multiparty.txt)"
|
||||
echo "# Number of multi-party hostnames: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
||||
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
||||
echo
|
||||
sed 's|^|0.0.0.0 |' "dist/$basename.txt"
|
||||
|
|
|
@ -4,9 +4,7 @@ function log() {
|
|||
echo -e "\033[33m$@\033[0m"
|
||||
}
|
||||
|
||||
log "Preparing database…"
|
||||
./database.py --expire
|
||||
|
||||
./fetch_resources.sh
|
||||
./import_rules.sh
|
||||
|
||||
# TODO Fetch 'em
|
||||
|
|
Loading…
Reference in a new issue