Worflow: Fixed rules counts

This commit is contained in:
Geoffrey Frogeye 2019-12-13 18:36:08 +01:00
parent ab7ef609dd
commit 269b8278b5
Signed by: geoffrey
GPG key ID: D8A7ECA00A8CD3DD
5 changed files with 53 additions and 16 deletions

View file

@ -23,7 +23,7 @@ DbValue = typing.Union[None, int, float, str, bytes]
class Database(): class Database():
VERSION = 4 VERSION = 5
PATH = "blocking.db" PATH = "blocking.db"
def open(self) -> None: def open(self) -> None:
@ -250,6 +250,24 @@ class Database():
else: else:
yield val yield val
def count_rules(self,
first_party_only: bool = False,
) -> str:
counts: typing.List[str] = list()
cursor = self.conn.cursor()
for table in ['asn', 'ip4network', 'ip4address', 'zone', 'hostname']:
command = f'SELECT count(*) FROM rules ' \
f'INNER JOIN {table} ON rules.id = {table}.entry ' \
'WHERE rules.level = 0'
if first_party_only:
command += ' AND first_party=1'
cursor.execute(command)
count, = cursor.fetchone()
if count > 0:
counts.append(f'{table}: {count}')
return ', '.join(counts)
def get_domain(self, domain: str) -> typing.Iterable[int]: def get_domain(self, domain: str) -> typing.Iterable[int]:
self.enter_step('get_domain_prepare') self.enter_step('get_domain_prepare')
domain_prep = self.pack_hostname(domain) domain_prep = self.pack_hostname(domain)

View file

@ -11,6 +11,8 @@ CREATE TABLE rules (
FOREIGN KEY (source) REFERENCES rules(id) ON DELETE CASCADE FOREIGN KEY (source) REFERENCES rules(id) ON DELETE CASCADE
); );
CREATE INDEX rules_source ON rules (source); -- for references recounting CREATE INDEX rules_source ON rules (source); -- for references recounting
CREATE INDEX rules_updated ON rules (updated); -- for pruning
CREATE INDEX rules_level_firstparty ON rules (level, first_party); -- for counting rules
CREATE TABLE asn ( CREATE TABLE asn (
val INTEGER PRIMARY KEY, val INTEGER PRIMARY KEY,

View file

@ -22,10 +22,23 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
'-x', '--explain', action='store_true', '-x', '--explain', action='store_true',
help="TODO") help="TODO")
parser.add_argument(
'-r', '--rules', action='store_true',
help="TODO")
parser.add_argument(
'-c', '--count', action='store_true',
help="TODO")
args = parser.parse_args() args = parser.parse_args()
DB = database.Database() DB = database.Database()
if args.rules:
if not args.count:
raise NotImplementedError
print(DB.count_rules(first_party_only=args.first_party))
else:
if args.count:
raise NotImplementedError
for domain in DB.export( for domain in DB.export(
first_party_only=args.first_party, first_party_only=args.first_party,
end_chain_only=args.end_chain, end_chain_only=args.end_chain,

View file

@ -4,6 +4,9 @@ function log() {
echo -e "\033[33m$@\033[0m" echo -e "\033[33m$@\033[0m"
} }
log "Pruning old data…"
./database.py --prune
log "Recounting references…" log "Recounting references…"
./database.py --references ./database.py --references
@ -14,6 +17,8 @@ log "Exporting lists…"
./export.py --end-chain --output dist/multiparty-only-trackers.txt ./export.py --end-chain --output dist/multiparty-only-trackers.txt
log "Generating hosts lists…" log "Generating hosts lists…"
./export.py --rules --count --first-party > temp/count_rules_firstparty.txt
./export.py --rules --count > temp/count_rules_multiparty.txt
function generate_hosts { function generate_hosts {
basename="$1" basename="$1"
description="$2" description="$2"
@ -39,15 +44,16 @@ function generate_hosts {
echo "#" echo "#"
echo "# Generation date: $(date -Isec)" echo "# Generation date: $(date -Isec)"
echo "# Generation software: eulaurarien $(git describe --tags)" echo "# Generation software: eulaurarien $(git describe --tags)"
echo "# Number of source websites: TODO" echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
echo "# Number of source subdomains: TODO" echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
echo "# Number of source DNS records: ~2M + $(wc -l temp/all_resolved.json | cut -d' ' -f1)"
echo "#" echo "#"
echo "# Number of known first-party trackers: TODO" echo "# Known first-party trackers: $(cat temp/count_rules_firstparty.txt)"
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)" echo "# Number of first-party hostnames: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)" echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
echo "#" echo "#"
echo "# Number of known multi-party trackers: TODO" echo "# Known multi-party trackers: $(cat temp/count_rules_multiparty.txt)"
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)" echo "# Number of multi-party hostnames: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)" echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
echo echo
sed 's|^|0.0.0.0 |' "dist/$basename.txt" sed 's|^|0.0.0.0 |' "dist/$basename.txt"

View file

@ -4,9 +4,7 @@ function log() {
echo -e "\033[33m$@\033[0m" echo -e "\033[33m$@\033[0m"
} }
log "Preparing database…" ./fetch_resources.sh
./database.py --expire
./import_rules.sh ./import_rules.sh
# TODO Fetch 'em # TODO Fetch 'em