Worflow: Fixed rules counts
This commit is contained in:
parent
ab7ef609dd
commit
269b8278b5
20
database.py
20
database.py
|
@ -23,7 +23,7 @@ DbValue = typing.Union[None, int, float, str, bytes]
|
||||||
|
|
||||||
|
|
||||||
class Database():
|
class Database():
|
||||||
VERSION = 4
|
VERSION = 5
|
||||||
PATH = "blocking.db"
|
PATH = "blocking.db"
|
||||||
|
|
||||||
def open(self) -> None:
|
def open(self) -> None:
|
||||||
|
@ -250,6 +250,24 @@ class Database():
|
||||||
else:
|
else:
|
||||||
yield val
|
yield val
|
||||||
|
|
||||||
|
def count_rules(self,
|
||||||
|
first_party_only: bool = False,
|
||||||
|
) -> str:
|
||||||
|
counts: typing.List[str] = list()
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
for table in ['asn', 'ip4network', 'ip4address', 'zone', 'hostname']:
|
||||||
|
command = f'SELECT count(*) FROM rules ' \
|
||||||
|
f'INNER JOIN {table} ON rules.id = {table}.entry ' \
|
||||||
|
'WHERE rules.level = 0'
|
||||||
|
if first_party_only:
|
||||||
|
command += ' AND first_party=1'
|
||||||
|
cursor.execute(command)
|
||||||
|
count, = cursor.fetchone()
|
||||||
|
if count > 0:
|
||||||
|
counts.append(f'{table}: {count}')
|
||||||
|
|
||||||
|
return ', '.join(counts)
|
||||||
|
|
||||||
def get_domain(self, domain: str) -> typing.Iterable[int]:
|
def get_domain(self, domain: str) -> typing.Iterable[int]:
|
||||||
self.enter_step('get_domain_prepare')
|
self.enter_step('get_domain_prepare')
|
||||||
domain_prep = self.pack_hostname(domain)
|
domain_prep = self.pack_hostname(domain)
|
||||||
|
|
|
@ -11,6 +11,8 @@ CREATE TABLE rules (
|
||||||
FOREIGN KEY (source) REFERENCES rules(id) ON DELETE CASCADE
|
FOREIGN KEY (source) REFERENCES rules(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
CREATE INDEX rules_source ON rules (source); -- for references recounting
|
CREATE INDEX rules_source ON rules (source); -- for references recounting
|
||||||
|
CREATE INDEX rules_updated ON rules (updated); -- for pruning
|
||||||
|
CREATE INDEX rules_level_firstparty ON rules (level, first_party); -- for counting rules
|
||||||
|
|
||||||
CREATE TABLE asn (
|
CREATE TABLE asn (
|
||||||
val INTEGER PRIMARY KEY,
|
val INTEGER PRIMARY KEY,
|
||||||
|
|
25
export.py
25
export.py
|
@ -22,15 +22,28 @@ if __name__ == '__main__':
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-x', '--explain', action='store_true',
|
'-x', '--explain', action='store_true',
|
||||||
help="TODO")
|
help="TODO")
|
||||||
|
parser.add_argument(
|
||||||
|
'-r', '--rules', action='store_true',
|
||||||
|
help="TODO")
|
||||||
|
parser.add_argument(
|
||||||
|
'-c', '--count', action='store_true',
|
||||||
|
help="TODO")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
DB = database.Database()
|
DB = database.Database()
|
||||||
|
|
||||||
for domain in DB.export(
|
if args.rules:
|
||||||
first_party_only=args.first_party,
|
if not args.count:
|
||||||
end_chain_only=args.end_chain,
|
raise NotImplementedError
|
||||||
explain=args.explain,
|
print(DB.count_rules(first_party_only=args.first_party))
|
||||||
):
|
else:
|
||||||
print(domain, file=args.output)
|
if args.count:
|
||||||
|
raise NotImplementedError
|
||||||
|
for domain in DB.export(
|
||||||
|
first_party_only=args.first_party,
|
||||||
|
end_chain_only=args.end_chain,
|
||||||
|
explain=args.explain,
|
||||||
|
):
|
||||||
|
print(domain, file=args.output)
|
||||||
|
|
||||||
DB.close()
|
DB.close()
|
||||||
|
|
|
@ -4,6 +4,9 @@ function log() {
|
||||||
echo -e "\033[33m$@\033[0m"
|
echo -e "\033[33m$@\033[0m"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log "Pruning old data…"
|
||||||
|
./database.py --prune
|
||||||
|
|
||||||
log "Recounting references…"
|
log "Recounting references…"
|
||||||
./database.py --references
|
./database.py --references
|
||||||
|
|
||||||
|
@ -14,6 +17,8 @@ log "Exporting lists…"
|
||||||
./export.py --end-chain --output dist/multiparty-only-trackers.txt
|
./export.py --end-chain --output dist/multiparty-only-trackers.txt
|
||||||
|
|
||||||
log "Generating hosts lists…"
|
log "Generating hosts lists…"
|
||||||
|
./export.py --rules --count --first-party > temp/count_rules_firstparty.txt
|
||||||
|
./export.py --rules --count > temp/count_rules_multiparty.txt
|
||||||
function generate_hosts {
|
function generate_hosts {
|
||||||
basename="$1"
|
basename="$1"
|
||||||
description="$2"
|
description="$2"
|
||||||
|
@ -39,15 +44,16 @@ function generate_hosts {
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Generation date: $(date -Isec)"
|
echo "# Generation date: $(date -Isec)"
|
||||||
echo "# Generation software: eulaurarien $(git describe --tags)"
|
echo "# Generation software: eulaurarien $(git describe --tags)"
|
||||||
echo "# Number of source websites: TODO"
|
echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
|
||||||
echo "# Number of source subdomains: TODO"
|
echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
|
||||||
|
echo "# Number of source DNS records: ~2M + $(wc -l temp/all_resolved.json | cut -d' ' -f1)"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Number of known first-party trackers: TODO"
|
echo "# Known first-party trackers: $(cat temp/count_rules_firstparty.txt)"
|
||||||
echo "# Number of first-party subdomains: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
echo "# Number of first-party hostnames: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
|
||||||
echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
|
echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Number of known multi-party trackers: TODO"
|
echo "# Known multi-party trackers: $(cat temp/count_rules_multiparty.txt)"
|
||||||
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
echo "# Number of multi-party hostnames: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
||||||
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
||||||
echo
|
echo
|
||||||
sed 's|^|0.0.0.0 |' "dist/$basename.txt"
|
sed 's|^|0.0.0.0 |' "dist/$basename.txt"
|
||||||
|
|
|
@ -4,9 +4,7 @@ function log() {
|
||||||
echo -e "\033[33m$@\033[0m"
|
echo -e "\033[33m$@\033[0m"
|
||||||
}
|
}
|
||||||
|
|
||||||
log "Preparing database…"
|
./fetch_resources.sh
|
||||||
./database.py --expire
|
|
||||||
|
|
||||||
./import_rules.sh
|
./import_rules.sh
|
||||||
|
|
||||||
# TODO Fetch 'em
|
# TODO Fetch 'em
|
||||||
|
|
Loading…
Reference in a new issue