Browse Source

Workflow: Automatically import IP ranges from ASN

Closes #9
tags/v2.1-beta
Geoffrey Frogeye 2 months ago
parent
commit
e19f666331
Signed by: geoffrey <geoffrey@frogeye.fr> GPG Key ID: D8A7ECA00A8CD3DD
7 changed files with 72 additions and 73 deletions
  1. +41
    -2
      database.py
  2. +7
    -8
      feed_rules.py
  3. +7
    -12
      filter_subdomains.sh
  4. +5
    -0
      import_rules.sh
  5. +2
    -0
      rules_asn/.gitignore
  6. +10
    -0
      rules_asn/first-party.txt
  7. +0
    -51
      rules_ip/first-party.txt

+ 41
- 2
database.py View File

@@ -33,6 +33,9 @@ class Database():
# self.conn.create_function("prepare_ip4address", 1,
# Database.prepare_ip4address,
# deterministic=True)
self.conn.create_function("unpack_domain", 1,
lambda s: s[:-1][::-1],
deterministic=True)

def execute(self, cmd: str, args: typing.Union[
typing.Tuple[DbValue, ...],
@@ -123,6 +126,13 @@ class Database():
def prepare_zone(self, zone: str) -> str:
return self.prepare_hostname(zone)

@staticmethod
def prepare_asn(asn: str) -> int:
asn = asn.upper()
if asn.startswith('AS'):
asn = asn[2:]
return int(asn)

@staticmethod
def prepare_ip4address(address: str) -> int:
total = 0
@@ -169,7 +179,7 @@ class Database():

def export(self, first_party_only: bool = False,
end_chain_only: bool = False) -> typing.Iterable[str]:
command = 'SELECT val FROM rules ' \
command = 'SELECT unpack_domain(val) FROM rules ' \
'INNER JOIN hostname ON rules.id = hostname.entry'
restrictions: typing.List[str] = list()
if first_party_only:
@@ -178,9 +188,10 @@ class Database():
restrictions.append('rules.refs = 0')
if restrictions:
command += ' WHERE ' + ' AND '.join(restrictions)
command += ' ORDER BY unpack_domain(val) ASC'
self.execute(command)
for val, in self.cursor:
yield val[:-1][::-1]
yield val

def get_domain(self, domain: str) -> typing.Iterable[int]:
self.enter_step('get_domain_prepare')
@@ -235,6 +246,13 @@ class Database():
self.enter_step('get_ip4_yield')
yield entry

def list_asn(self) -> typing.Iterable[typing.Tuple[str, int]]:
self.enter_step('list_asn_select')
self.enter_step('get_domain_select')
self.execute('SELECT val, entry FROM asn')
for val, entry in self.cursor:
yield f'AS{val}', entry

def _set_generic(self,
table: str,
select_query: str,
@@ -325,8 +343,29 @@ class Database():
*args, **kwargs
)

def set_asn(self, asn: str,
*args: typing.Any, **kwargs: typing.Any) -> None:
self.enter_step('set_asn_prepare')
try:
asn_prep = self.prepare_asn(asn)
except ValueError:
self.log.error("Invalid asn: %s", asn)
return
prep: typing.Dict[str, DbValue] = {
'val': asn_prep,
}
self._set_generic(
'asn',
'SELECT entry FROM asn WHERE val=:val',
'INSERT INTO asn (val, entry) '
'VALUES (:val, :entry)',
prep,
*args, **kwargs
)

def set_ip4address(self, ip4address: str,
*args: typing.Any, **kwargs: typing.Any) -> None:
# TODO Do not add if already in ip4network
self.enter_step('set_ip4add_prepare')
try:
ip4address_prep = self.prepare_ip4address(ip4address)


+ 7
- 8
feed_rules.py View File

@@ -3,8 +3,12 @@
import database
import argparse
import sys
import ipaddress

FUNCTION_MAP = {
'zone': database.Database.set_zone,
'ip4network': database.Database.set_ip4network,
'asn': database.Database.set_asn,
}

if __name__ == '__main__':

@@ -13,7 +17,7 @@ if __name__ == '__main__':
description="TODO")
parser.add_argument(
'type',
choices={'zone', 'ip4network'},
choices=FUNCTION_MAP.keys(),
help="Type of rule inputed")
parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
@@ -25,14 +29,9 @@ if __name__ == '__main__':

DB = database.Database()

FUNCTION_MAP = {
'zone': DB.set_zone,
'ip4network': DB.set_ip4network,
}

fun = FUNCTION_MAP[args.type]

for rule in args.input:
fun(rule.strip(), is_first_party=args.first_party)
fun(DB, rule.strip(), is_first_party=args.first_party)

DB.close()

+ 7
- 12
filter_subdomains.sh View File

@@ -4,16 +4,13 @@ function log() {
echo -e "\033[33m$@\033[0m"
}

log "Updating references…"
./database.py --references

log "Exporting lists…"
./export.py --first-party | sort -u > dist/firstparty-trackers.txt
./export.py --first-party --end-chain | sort -u > dist/firstparty-only-trackers.txt
./export.py | sort -u > dist/multiparty-trackers.txt
./export.py --end-chain | sort -u > dist/multiparty-only-trackers.txt
./export.py --first-party --output dist/firstparty-trackers.txt
./export.py --first-party --end-chain --output dist/firstparty-only-trackers.txt
./export.py --output dist/multiparty-trackers.txt
./export.py --end-chain --output dist/multiparty-only-trackers.txt

# Format the blocklist so it can be used as a hostlist
log "Generating hosts lists…"
function generate_hosts {
basename="$1"
description="$2"
@@ -35,6 +32,7 @@ function generate_hosts {
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
echo '# (you can remove `-hosts` to get the raw list)'
echo "#"
echo "# Generation date: $(date -Isec)"
echo "# Generation software: eulaurarien $(git describe --tags)"
@@ -49,10 +47,7 @@ function generate_hosts {
echo "# Number of multi-party subdomains: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
echo
cat "dist/$basename.txt" | while read host;
do
echo "0.0.0.0 $host"
done
sed 's|^|0.0.0.0 |' "dist/$basename.txt"
) > "dist/$basename-hosts.txt"
}



+ 5
- 0
import_rules.sh View File

@@ -9,6 +9,11 @@ cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | ./adblock_to_dom
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 | ./feed_rules.py zone
cat rules/*.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network
cat rules_asn/*.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn

cat rules/first-party.list | grep -v '^#' | grep -v '^$' | ./feed_rules.py zone --first-party
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py ip4network --first-party
cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py asn --first-party

./feed_asn.py


+ 2
- 0
rules_asn/.gitignore View File

@@ -0,0 +1,2 @@
*.custom.txt
*.cache.txt

+ 10
- 0
rules_asn/first-party.txt View File

@@ -0,0 +1,10 @@
# Eulerian
AS50234
# Criteo
AS44788
AS19750
AS55569
# ThreatMetrix
AS30286
# Webtrekk
AS60164

+ 0
- 51
rules_ip/first-party.txt View File

@@ -1,51 +0,0 @@
# Eulerian (AS50234 EULERIAN TECHNOLOGIES S.A.S.)
109.232.192.0/21
# Criteo (AS44788 Criteo SA)
91.199.242.0/24
91.212.98.0/24
178.250.0.0/21
178.250.0.0/24
178.250.1.0/24
178.250.2.0/24
178.250.3.0/24
178.250.4.0/24
178.250.6.0/24
185.235.84.0/24
# Criteo (AS19750 Criteo Corp.)
74.119.116.0/22
74.119.117.0/24
74.119.118.0/24
74.119.119.0/24
91.199.242.0/24
185.235.85.0/24
199.204.168.0/22
199.204.168.0/24
199.204.169.0/24
199.204.170.0/24
199.204.171.0/24
178.250.0.0/21
91.212.98.0/24
91.199.242.0/24
185.235.84.0/24
# Criteo (AS55569 Criteo APAC)
91.199.242.0/24
116.213.20.0/22
116.213.20.0/24
116.213.21.0/24
182.161.72.0/22
182.161.72.0/24
182.161.73.0/24
185.235.86.0/24
185.235.87.0/24
# ThreatMetrix (AS30286 ThreatMetrix Inc.)
69.84.176.0/24
173.254.179.0/24
185.32.240.0/23
185.32.242.0/23
192.225.156.0/22
199.101.156.0/23
199.101.158.0/23
# Webtrekk (AS60164 Webtrekk GmbH)
185.54.148.0/22
185.54.150.0/24
185.54.151.0/24

Loading…
Cancel
Save