parent
57416b6e2c
commit
e19f666331
43
database.py
43
database.py
|
@ -33,6 +33,9 @@ class Database():
|
||||||
# self.conn.create_function("prepare_ip4address", 1,
|
# self.conn.create_function("prepare_ip4address", 1,
|
||||||
# Database.prepare_ip4address,
|
# Database.prepare_ip4address,
|
||||||
# deterministic=True)
|
# deterministic=True)
|
||||||
|
self.conn.create_function("unpack_domain", 1,
|
||||||
|
lambda s: s[:-1][::-1],
|
||||||
|
deterministic=True)
|
||||||
|
|
||||||
def execute(self, cmd: str, args: typing.Union[
|
def execute(self, cmd: str, args: typing.Union[
|
||||||
typing.Tuple[DbValue, ...],
|
typing.Tuple[DbValue, ...],
|
||||||
|
@ -123,6 +126,13 @@ class Database():
|
||||||
def prepare_zone(self, zone: str) -> str:
|
def prepare_zone(self, zone: str) -> str:
|
||||||
return self.prepare_hostname(zone)
|
return self.prepare_hostname(zone)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prepare_asn(asn: str) -> int:
|
||||||
|
asn = asn.upper()
|
||||||
|
if asn.startswith('AS'):
|
||||||
|
asn = asn[2:]
|
||||||
|
return int(asn)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def prepare_ip4address(address: str) -> int:
|
def prepare_ip4address(address: str) -> int:
|
||||||
total = 0
|
total = 0
|
||||||
|
@ -169,7 +179,7 @@ class Database():
|
||||||
|
|
||||||
def export(self, first_party_only: bool = False,
|
def export(self, first_party_only: bool = False,
|
||||||
end_chain_only: bool = False) -> typing.Iterable[str]:
|
end_chain_only: bool = False) -> typing.Iterable[str]:
|
||||||
command = 'SELECT val FROM rules ' \
|
command = 'SELECT unpack_domain(val) FROM rules ' \
|
||||||
'INNER JOIN hostname ON rules.id = hostname.entry'
|
'INNER JOIN hostname ON rules.id = hostname.entry'
|
||||||
restrictions: typing.List[str] = list()
|
restrictions: typing.List[str] = list()
|
||||||
if first_party_only:
|
if first_party_only:
|
||||||
|
@ -178,9 +188,10 @@ class Database():
|
||||||
restrictions.append('rules.refs = 0')
|
restrictions.append('rules.refs = 0')
|
||||||
if restrictions:
|
if restrictions:
|
||||||
command += ' WHERE ' + ' AND '.join(restrictions)
|
command += ' WHERE ' + ' AND '.join(restrictions)
|
||||||
|
command += ' ORDER BY unpack_domain(val) ASC'
|
||||||
self.execute(command)
|
self.execute(command)
|
||||||
for val, in self.cursor:
|
for val, in self.cursor:
|
||||||
yield val[:-1][::-1]
|
yield val
|
||||||
|
|
||||||
def get_domain(self, domain: str) -> typing.Iterable[int]:
|
def get_domain(self, domain: str) -> typing.Iterable[int]:
|
||||||
self.enter_step('get_domain_prepare')
|
self.enter_step('get_domain_prepare')
|
||||||
|
@ -235,6 +246,13 @@ class Database():
|
||||||
self.enter_step('get_ip4_yield')
|
self.enter_step('get_ip4_yield')
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
|
def list_asn(self) -> typing.Iterable[typing.Tuple[str, int]]:
|
||||||
|
self.enter_step('list_asn_select')
|
||||||
|
self.enter_step('get_domain_select')
|
||||||
|
self.execute('SELECT val, entry FROM asn')
|
||||||
|
for val, entry in self.cursor:
|
||||||
|
yield f'AS{val}', entry
|
||||||
|
|
||||||
def _set_generic(self,
|
def _set_generic(self,
|
||||||
table: str,
|
table: str,
|
||||||
select_query: str,
|
select_query: str,
|
||||||
|
@ -325,8 +343,29 @@ class Database():
|
||||||
*args, **kwargs
|
*args, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def set_asn(self, asn: str,
|
||||||
|
*args: typing.Any, **kwargs: typing.Any) -> None:
|
||||||
|
self.enter_step('set_asn_prepare')
|
||||||
|
try:
|
||||||
|
asn_prep = self.prepare_asn(asn)
|
||||||
|
except ValueError:
|
||||||
|
self.log.error("Invalid asn: %s", asn)
|
||||||
|
return
|
||||||
|
prep: typing.Dict[str, DbValue] = {
|
||||||
|
'val': asn_prep,
|
||||||
|
}
|
||||||
|
self._set_generic(
|
||||||
|
'asn',
|
||||||
|
'SELECT entry FROM asn WHERE val=:val',
|
||||||
|
'INSERT INTO asn (val, entry) '
|
||||||
|
'VALUES (:val, :entry)',
|
||||||
|
prep,
|
||||||
|
*args, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
def set_ip4address(self, ip4address: str,
|
def set_ip4address(self, ip4address: str,
|
||||||
*args: typing.Any, **kwargs: typing.Any) -> None:
|
*args: typing.Any, **kwargs: typing.Any) -> None:
|
||||||
|
# TODO Do not add if already in ip4network
|
||||||
self.enter_step('set_ip4add_prepare')
|
self.enter_step('set_ip4add_prepare')
|
||||||
try:
|
try:
|
||||||
ip4address_prep = self.prepare_ip4address(ip4address)
|
ip4address_prep = self.prepare_ip4address(ip4address)
|
||||||
|
|
|
@ -3,8 +3,12 @@
|
||||||
import database
|
import database
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
import ipaddress
|
|
||||||
|
|
||||||
|
FUNCTION_MAP = {
|
||||||
|
'zone': database.Database.set_zone,
|
||||||
|
'ip4network': database.Database.set_ip4network,
|
||||||
|
'asn': database.Database.set_asn,
|
||||||
|
}
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
@ -13,7 +17,7 @@ if __name__ == '__main__':
|
||||||
description="TODO")
|
description="TODO")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'type',
|
'type',
|
||||||
choices={'zone', 'ip4network'},
|
choices=FUNCTION_MAP.keys(),
|
||||||
help="Type of rule inputed")
|
help="Type of rule inputed")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
|
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
|
||||||
|
@ -25,14 +29,9 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
DB = database.Database()
|
DB = database.Database()
|
||||||
|
|
||||||
FUNCTION_MAP = {
|
|
||||||
'zone': DB.set_zone,
|
|
||||||
'ip4network': DB.set_ip4network,
|
|
||||||
}
|
|
||||||
|
|
||||||
fun = FUNCTION_MAP[args.type]
|
fun = FUNCTION_MAP[args.type]
|
||||||
|
|
||||||
for rule in args.input:
|
for rule in args.input:
|
||||||
fun(rule.strip(), is_first_party=args.first_party)
|
fun(DB, rule.strip(), is_first_party=args.first_party)
|
||||||
|
|
||||||
DB.close()
|
DB.close()
|
||||||
|
|
|
@ -4,16 +4,13 @@ function log() {
|
||||||
echo -e "\033[33m$@\033[0m"
|
echo -e "\033[33m$@\033[0m"
|
||||||
}
|
}
|
||||||
|
|
||||||
log "Updating references…"
|
|
||||||
./database.py --references
|
|
||||||
|
|
||||||
log "Exporting lists…"
|
log "Exporting lists…"
|
||||||
./export.py --first-party | sort -u > dist/firstparty-trackers.txt
|
./export.py --first-party --output dist/firstparty-trackers.txt
|
||||||
./export.py --first-party --end-chain | sort -u > dist/firstparty-only-trackers.txt
|
./export.py --first-party --end-chain --output dist/firstparty-only-trackers.txt
|
||||||
./export.py | sort -u > dist/multiparty-trackers.txt
|
./export.py --output dist/multiparty-trackers.txt
|
||||||
./export.py --end-chain | sort -u > dist/multiparty-only-trackers.txt
|
./export.py --end-chain --output dist/multiparty-only-trackers.txt
|
||||||
|
|
||||||
# Format the blocklist so it can be used as a hostlist
|
log "Generating hosts lists…"
|
||||||
function generate_hosts {
|
function generate_hosts {
|
||||||
basename="$1"
|
basename="$1"
|
||||||
description="$2"
|
description="$2"
|
||||||
|
@ -35,6 +32,7 @@ function generate_hosts {
|
||||||
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
|
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
|
||||||
echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
|
echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
|
||||||
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
|
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
|
||||||
|
echo '# (you can remove `-hosts` to get the raw list)'
|
||||||
echo "#"
|
echo "#"
|
||||||
echo "# Generation date: $(date -Isec)"
|
echo "# Generation date: $(date -Isec)"
|
||||||
echo "# Generation software: eulaurarien $(git describe --tags)"
|
echo "# Generation software: eulaurarien $(git describe --tags)"
|
||||||
|
@ -49,10 +47,7 @@ function generate_hosts {
|
||||||
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
||||||
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
||||||
echo
|
echo
|
||||||
cat "dist/$basename.txt" | while read host;
|
sed 's|^|0.0.0.0 |' "dist/$basename.txt"
|
||||||
do
|
|
||||||
echo "0.0.0.0 $host"
|
|
||||||
done
|
|
||||||
) > "dist/$basename-hosts.txt"
|
) > "dist/$basename-hosts.txt"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,11 @@ cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_dom
|
||||||
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
||||||
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
||||||
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
|
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
|
||||||
|
cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn
|
||||||
|
|
||||||
cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party
|
cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party
|
||||||
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party
|
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party
|
||||||
|
cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn --first-party
|
||||||
|
|
||||||
|
./feed_asn.py
|
||||||
|
|
||||||
|
|
2
rules_asn/.gitignore
vendored
Normal file
2
rules_asn/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
*.custom.txt
|
||||||
|
*.cache.txt
|
10
rules_asn/first-party.txt
Normal file
10
rules_asn/first-party.txt
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Eulerian
|
||||||
|
AS50234
|
||||||
|
# Criteo
|
||||||
|
AS44788
|
||||||
|
AS19750
|
||||||
|
AS55569
|
||||||
|
# ThreatMetrix
|
||||||
|
AS30286
|
||||||
|
# Webtrekk
|
||||||
|
AS60164
|
|
@ -1,51 +0,0 @@
|
||||||
# Eulerian (AS50234 EULERIAN TECHNOLOGIES S.A.S.)
|
|
||||||
109.232.192.0/21
|
|
||||||
# Criteo (AS44788 Criteo SA)
|
|
||||||
91.199.242.0/24
|
|
||||||
91.212.98.0/24
|
|
||||||
178.250.0.0/21
|
|
||||||
178.250.0.0/24
|
|
||||||
178.250.1.0/24
|
|
||||||
178.250.2.0/24
|
|
||||||
178.250.3.0/24
|
|
||||||
178.250.4.0/24
|
|
||||||
178.250.6.0/24
|
|
||||||
185.235.84.0/24
|
|
||||||
# Criteo (AS19750 Criteo Corp.)
|
|
||||||
74.119.116.0/22
|
|
||||||
74.119.117.0/24
|
|
||||||
74.119.118.0/24
|
|
||||||
74.119.119.0/24
|
|
||||||
91.199.242.0/24
|
|
||||||
185.235.85.0/24
|
|
||||||
199.204.168.0/22
|
|
||||||
199.204.168.0/24
|
|
||||||
199.204.169.0/24
|
|
||||||
199.204.170.0/24
|
|
||||||
199.204.171.0/24
|
|
||||||
178.250.0.0/21
|
|
||||||
91.212.98.0/24
|
|
||||||
91.199.242.0/24
|
|
||||||
185.235.84.0/24
|
|
||||||
# Criteo (AS55569 Criteo APAC)
|
|
||||||
91.199.242.0/24
|
|
||||||
116.213.20.0/22
|
|
||||||
116.213.20.0/24
|
|
||||||
116.213.21.0/24
|
|
||||||
182.161.72.0/22
|
|
||||||
182.161.72.0/24
|
|
||||||
182.161.73.0/24
|
|
||||||
185.235.86.0/24
|
|
||||||
185.235.87.0/24
|
|
||||||
# ThreatMetrix (AS30286 ThreatMetrix Inc.)
|
|
||||||
69.84.176.0/24
|
|
||||||
173.254.179.0/24
|
|
||||||
185.32.240.0/23
|
|
||||||
185.32.242.0/23
|
|
||||||
192.225.156.0/22
|
|
||||||
199.101.156.0/23
|
|
||||||
199.101.158.0/23
|
|
||||||
# Webtrekk (AS60164 Webtrekk GmbH)
|
|
||||||
185.54.148.0/22
|
|
||||||
185.54.150.0/24
|
|
||||||
185.54.151.0/24
|
|
Loading…
Reference in a new issue