parent
57416b6e2c
commit
e19f666331
43
database.py
43
database.py
|
@ -33,6 +33,9 @@ class Database():
|
|||
# self.conn.create_function("prepare_ip4address", 1,
|
||||
# Database.prepare_ip4address,
|
||||
# deterministic=True)
|
||||
self.conn.create_function("unpack_domain", 1,
|
||||
lambda s: s[:-1][::-1],
|
||||
deterministic=True)
|
||||
|
||||
def execute(self, cmd: str, args: typing.Union[
|
||||
typing.Tuple[DbValue, ...],
|
||||
|
@ -123,6 +126,13 @@ class Database():
|
|||
def prepare_zone(self, zone: str) -> str:
|
||||
return self.prepare_hostname(zone)
|
||||
|
||||
@staticmethod
|
||||
def prepare_asn(asn: str) -> int:
|
||||
asn = asn.upper()
|
||||
if asn.startswith('AS'):
|
||||
asn = asn[2:]
|
||||
return int(asn)
|
||||
|
||||
@staticmethod
|
||||
def prepare_ip4address(address: str) -> int:
|
||||
total = 0
|
||||
|
@ -169,7 +179,7 @@ class Database():
|
|||
|
||||
def export(self, first_party_only: bool = False,
|
||||
end_chain_only: bool = False) -> typing.Iterable[str]:
|
||||
command = 'SELECT val FROM rules ' \
|
||||
command = 'SELECT unpack_domain(val) FROM rules ' \
|
||||
'INNER JOIN hostname ON rules.id = hostname.entry'
|
||||
restrictions: typing.List[str] = list()
|
||||
if first_party_only:
|
||||
|
@ -178,9 +188,10 @@ class Database():
|
|||
restrictions.append('rules.refs = 0')
|
||||
if restrictions:
|
||||
command += ' WHERE ' + ' AND '.join(restrictions)
|
||||
command += ' ORDER BY unpack_domain(val) ASC'
|
||||
self.execute(command)
|
||||
for val, in self.cursor:
|
||||
yield val[:-1][::-1]
|
||||
yield val
|
||||
|
||||
def get_domain(self, domain: str) -> typing.Iterable[int]:
|
||||
self.enter_step('get_domain_prepare')
|
||||
|
@ -235,6 +246,13 @@ class Database():
|
|||
self.enter_step('get_ip4_yield')
|
||||
yield entry
|
||||
|
||||
def list_asn(self) -> typing.Iterable[typing.Tuple[str, int]]:
|
||||
self.enter_step('list_asn_select')
|
||||
self.enter_step('get_domain_select')
|
||||
self.execute('SELECT val, entry FROM asn')
|
||||
for val, entry in self.cursor:
|
||||
yield f'AS{val}', entry
|
||||
|
||||
def _set_generic(self,
|
||||
table: str,
|
||||
select_query: str,
|
||||
|
@ -325,8 +343,29 @@ class Database():
|
|||
*args, **kwargs
|
||||
)
|
||||
|
||||
def set_asn(self, asn: str,
|
||||
*args: typing.Any, **kwargs: typing.Any) -> None:
|
||||
self.enter_step('set_asn_prepare')
|
||||
try:
|
||||
asn_prep = self.prepare_asn(asn)
|
||||
except ValueError:
|
||||
self.log.error("Invalid asn: %s", asn)
|
||||
return
|
||||
prep: typing.Dict[str, DbValue] = {
|
||||
'val': asn_prep,
|
||||
}
|
||||
self._set_generic(
|
||||
'asn',
|
||||
'SELECT entry FROM asn WHERE val=:val',
|
||||
'INSERT INTO asn (val, entry) '
|
||||
'VALUES (:val, :entry)',
|
||||
prep,
|
||||
*args, **kwargs
|
||||
)
|
||||
|
||||
def set_ip4address(self, ip4address: str,
|
||||
*args: typing.Any, **kwargs: typing.Any) -> None:
|
||||
# TODO Do not add if already in ip4network
|
||||
self.enter_step('set_ip4add_prepare')
|
||||
try:
|
||||
ip4address_prep = self.prepare_ip4address(ip4address)
|
||||
|
|
|
@ -3,8 +3,12 @@
|
|||
import database
|
||||
import argparse
|
||||
import sys
|
||||
import ipaddress
|
||||
|
||||
FUNCTION_MAP = {
|
||||
'zone': database.Database.set_zone,
|
||||
'ip4network': database.Database.set_ip4network,
|
||||
'asn': database.Database.set_asn,
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
@ -13,7 +17,7 @@ if __name__ == '__main__':
|
|||
description="TODO")
|
||||
parser.add_argument(
|
||||
'type',
|
||||
choices={'zone', 'ip4network'},
|
||||
choices=FUNCTION_MAP.keys(),
|
||||
help="Type of rule inputed")
|
||||
parser.add_argument(
|
||||
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
|
||||
|
@ -25,14 +29,9 @@ if __name__ == '__main__':
|
|||
|
||||
DB = database.Database()
|
||||
|
||||
FUNCTION_MAP = {
|
||||
'zone': DB.set_zone,
|
||||
'ip4network': DB.set_ip4network,
|
||||
}
|
||||
|
||||
fun = FUNCTION_MAP[args.type]
|
||||
|
||||
for rule in args.input:
|
||||
fun(rule.strip(), is_first_party=args.first_party)
|
||||
fun(DB, rule.strip(), is_first_party=args.first_party)
|
||||
|
||||
DB.close()
|
||||
|
|
|
@ -4,16 +4,13 @@ function log() {
|
|||
echo -e "\033[33m$@\033[0m"
|
||||
}
|
||||
|
||||
log "Updating references…"
|
||||
./database.py --references
|
||||
|
||||
log "Exporting lists…"
|
||||
./export.py --first-party | sort -u > dist/firstparty-trackers.txt
|
||||
./export.py --first-party --end-chain | sort -u > dist/firstparty-only-trackers.txt
|
||||
./export.py | sort -u > dist/multiparty-trackers.txt
|
||||
./export.py --end-chain | sort -u > dist/multiparty-only-trackers.txt
|
||||
./export.py --first-party --output dist/firstparty-trackers.txt
|
||||
./export.py --first-party --end-chain --output dist/firstparty-only-trackers.txt
|
||||
./export.py --output dist/multiparty-trackers.txt
|
||||
./export.py --end-chain --output dist/multiparty-only-trackers.txt
|
||||
|
||||
# Format the blocklist so it can be used as a hostlist
|
||||
log "Generating hosts lists…"
|
||||
function generate_hosts {
|
||||
basename="$1"
|
||||
description="$2"
|
||||
|
@ -35,6 +32,7 @@ function generate_hosts {
|
|||
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
|
||||
echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
|
||||
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
|
||||
echo '# (you can remove `-hosts` to get the raw list)'
|
||||
echo "#"
|
||||
echo "# Generation date: $(date -Isec)"
|
||||
echo "# Generation software: eulaurarien $(git describe --tags)"
|
||||
|
@ -49,10 +47,7 @@ function generate_hosts {
|
|||
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
|
||||
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
|
||||
echo
|
||||
cat "dist/$basename.txt" | while read host;
|
||||
do
|
||||
echo "0.0.0.0 $host"
|
||||
done
|
||||
sed 's|^|0.0.0.0 |' "dist/$basename.txt"
|
||||
) > "dist/$basename-hosts.txt"
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,11 @@ cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_dom
|
|||
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
|
||||
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
|
||||
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
|
||||
cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn
|
||||
|
||||
cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party
|
||||
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party
|
||||
cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn --first-party
|
||||
|
||||
./feed_asn.py
|
||||
|
||||
|
|
2
rules_asn/.gitignore
vendored
Normal file
2
rules_asn/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*.custom.txt
|
||||
*.cache.txt
|
10
rules_asn/first-party.txt
Normal file
10
rules_asn/first-party.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
# Eulerian
|
||||
AS50234
|
||||
# Criteo
|
||||
AS44788
|
||||
AS19750
|
||||
AS55569
|
||||
# ThreatMetrix
|
||||
AS30286
|
||||
# Webtrekk
|
||||
AS60164
|
|
@ -1,51 +0,0 @@
|
|||
# Eulerian (AS50234 EULERIAN TECHNOLOGIES S.A.S.)
|
||||
109.232.192.0/21
|
||||
# Criteo (AS44788 Criteo SA)
|
||||
91.199.242.0/24
|
||||
91.212.98.0/24
|
||||
178.250.0.0/21
|
||||
178.250.0.0/24
|
||||
178.250.1.0/24
|
||||
178.250.2.0/24
|
||||
178.250.3.0/24
|
||||
178.250.4.0/24
|
||||
178.250.6.0/24
|
||||
185.235.84.0/24
|
||||
# Criteo (AS19750 Criteo Corp.)
|
||||
74.119.116.0/22
|
||||
74.119.117.0/24
|
||||
74.119.118.0/24
|
||||
74.119.119.0/24
|
||||
91.199.242.0/24
|
||||
185.235.85.0/24
|
||||
199.204.168.0/22
|
||||
199.204.168.0/24
|
||||
199.204.169.0/24
|
||||
199.204.170.0/24
|
||||
199.204.171.0/24
|
||||
178.250.0.0/21
|
||||
91.212.98.0/24
|
||||
91.199.242.0/24
|
||||
185.235.84.0/24
|
||||
# Criteo (AS55569 Criteo APAC)
|
||||
91.199.242.0/24
|
||||
116.213.20.0/22
|
||||
116.213.20.0/24
|
||||
116.213.21.0/24
|
||||
182.161.72.0/22
|
||||
182.161.72.0/24
|
||||
182.161.73.0/24
|
||||
185.235.86.0/24
|
||||
185.235.87.0/24
|
||||
# ThreatMetrix (AS30286 ThreatMetrix Inc.)
|
||||
69.84.176.0/24
|
||||
173.254.179.0/24
|
||||
185.32.240.0/23
|
||||
185.32.242.0/23
|
||||
192.225.156.0/22
|
||||
199.101.156.0/23
|
||||
199.101.158.0/23
|
||||
# Webtrekk (AS60164 Webtrekk GmbH)
|
||||
185.54.148.0/22
|
||||
185.54.150.0/24
|
||||
185.54.151.0/24
|
Loading…
Reference in a new issue