Harder verficiation before adding entries to DB
This commit is contained in:
parent
747fe46ad0
commit
dce35cb299
60
database.py
60
database.py
|
@ -10,6 +10,8 @@ import logging
|
||||||
import coloredlogs
|
import coloredlogs
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
|
TLD_LIST: typing.Set[str] = set()
|
||||||
|
|
||||||
coloredlogs.install(
|
coloredlogs.install(
|
||||||
level='DEBUG',
|
level='DEBUG',
|
||||||
fmt='%(asctime)s %(name)s %(levelname)s %(message)s'
|
fmt='%(asctime)s %(name)s %(levelname)s %(message)s'
|
||||||
|
@ -200,6 +202,27 @@ class Database(Profiler):
|
||||||
self.log = logging.getLogger('db')
|
self.log = logging.getLogger('db')
|
||||||
self.load()
|
self.load()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def populate_tld_list() -> None:
|
||||||
|
with open('temp/all_tld.list', 'r') as tld_fdesc:
|
||||||
|
for tld in tld_fdesc:
|
||||||
|
tld = tld.strip()
|
||||||
|
TLD_LIST.add(tld)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_domain(path: str) -> bool:
|
||||||
|
if len(path) > 255:
|
||||||
|
return False
|
||||||
|
splits = path.split('.')
|
||||||
|
if not TLD_LIST:
|
||||||
|
Database.populate_tld_list()
|
||||||
|
if splits[0] not in TLD_LIST:
|
||||||
|
return False
|
||||||
|
for split in splits:
|
||||||
|
if not 1 <= len(split) <= 63:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pack_domain(domain: str) -> DomainPath:
|
def pack_domain(domain: str) -> DomainPath:
|
||||||
return DomainPath(domain.split('.')[::-1])
|
return DomainPath(domain.split('.')[::-1])
|
||||||
|
@ -219,6 +242,19 @@ class Database(Profiler):
|
||||||
def unpack_asn(asn: AsnPath) -> str:
|
def unpack_asn(asn: AsnPath) -> str:
|
||||||
return f'AS{asn.asn}'
|
return f'AS{asn.asn}'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_ip4address(path: str) -> bool:
|
||||||
|
splits = path.split('.')
|
||||||
|
if len(splits) != 4:
|
||||||
|
return False
|
||||||
|
for split in splits:
|
||||||
|
try:
|
||||||
|
if not 0 <= int(split) <= 255:
|
||||||
|
return False
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pack_ip4address(address: str) -> Ip4Path:
|
def pack_ip4address(address: str) -> Ip4Path:
|
||||||
addr = 0
|
addr = 0
|
||||||
|
@ -237,6 +273,21 @@ class Database(Profiler):
|
||||||
addr >>= 8
|
addr >>= 8
|
||||||
return '.'.join(map(str, octets))
|
return '.'.join(map(str, octets))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_ip4network(path: str) -> bool:
|
||||||
|
# A bit generous but ok for our usage
|
||||||
|
splits = path.split('/')
|
||||||
|
if len(splits) != 2:
|
||||||
|
return False
|
||||||
|
if not Database.validate_ip4address(splits[0]):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
if not 0 <= int(splits[1]) <= 32:
|
||||||
|
return False
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pack_ip4network(network: str) -> Ip4Path:
|
def pack_ip4network(network: str) -> Ip4Path:
|
||||||
address, prefixlen_str = network.split('/')
|
address, prefixlen_str = network.split('/')
|
||||||
|
@ -549,6 +600,9 @@ class Database(Profiler):
|
||||||
domain_str: str,
|
domain_str: str,
|
||||||
updated: int,
|
updated: int,
|
||||||
source: Path) -> None:
|
source: Path) -> None:
|
||||||
|
self.enter_step('set_domain_val')
|
||||||
|
if not Database.validate_domain(domain_str):
|
||||||
|
raise ValueError(f"Invalid domain: {domain_str}")
|
||||||
self.enter_step('set_domain_pack')
|
self.enter_step('set_domain_pack')
|
||||||
domain = self.pack_domain(domain_str)
|
domain = self.pack_domain(domain_str)
|
||||||
self.enter_step('set_domain_fp')
|
self.enter_step('set_domain_fp')
|
||||||
|
@ -636,6 +690,9 @@ class Database(Profiler):
|
||||||
ip4address_str: str,
|
ip4address_str: str,
|
||||||
*args: typing.Any, **kwargs: typing.Any
|
*args: typing.Any, **kwargs: typing.Any
|
||||||
) -> None:
|
) -> None:
|
||||||
|
self.enter_step('set_ip4add_val')
|
||||||
|
if not Database.validate_ip4address(ip4address_str):
|
||||||
|
raise ValueError(f"Invalid ip4address: {ip4address_str}")
|
||||||
self.enter_step('set_ip4add_pack')
|
self.enter_step('set_ip4add_pack')
|
||||||
ip4 = self.pack_ip4address(ip4address_str)
|
ip4 = self.pack_ip4address(ip4address_str)
|
||||||
self._set_ip4(ip4, *args, **kwargs)
|
self._set_ip4(ip4, *args, **kwargs)
|
||||||
|
@ -644,6 +701,9 @@ class Database(Profiler):
|
||||||
ip4network_str: str,
|
ip4network_str: str,
|
||||||
*args: typing.Any, **kwargs: typing.Any
|
*args: typing.Any, **kwargs: typing.Any
|
||||||
) -> None:
|
) -> None:
|
||||||
|
self.enter_step('set_ip4net_val')
|
||||||
|
if not Database.validate_ip4network(ip4network_str):
|
||||||
|
raise ValueError(f"Invalid ip4network: {ip4network_str}")
|
||||||
self.enter_step('set_ip4net_pack')
|
self.enter_step('set_ip4net_pack')
|
||||||
ip4 = self.pack_ip4network(ip4network_str)
|
ip4 = self.pack_ip4network(ip4network_str)
|
||||||
self._set_ip4(ip4, *args, **kwargs)
|
self._set_ip4(ip4, *args, **kwargs)
|
||||||
|
|
|
@ -8,7 +8,7 @@ log "Exporting lists…"
|
||||||
./export.py --first-party --output dist/firstparty-trackers.txt
|
./export.py --first-party --output dist/firstparty-trackers.txt
|
||||||
./export.py --first-party --end-chain --no-dupplicates --output dist/firstparty-only-trackers.txt
|
./export.py --first-party --end-chain --no-dupplicates --output dist/firstparty-only-trackers.txt
|
||||||
./export.py --output dist/multiparty-trackers.txt
|
./export.py --output dist/multiparty-trackers.txt
|
||||||
./export.py --end-chain --output --no-dupplicates dist/multiparty-only-trackers.txt
|
./export.py --end-chain --no-dupplicates --output dist/multiparty-only-trackers.txt
|
||||||
|
|
||||||
log "Generating statistics…"
|
log "Generating statistics…"
|
||||||
./export.py --count --first-party > temp/count_recs_firstparty.txt
|
./export.py --count --first-party > temp/count_recs_firstparty.txt
|
||||||
|
|
|
@ -30,6 +30,10 @@ dl https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hos
|
||||||
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt rules_hosts/smart-tv.cache.txt
|
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt rules_hosts/smart-tv.cache.txt
|
||||||
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/AmazonFireTV.txt rules_hosts/amazon-fire-tv.cache.txt
|
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/AmazonFireTV.txt rules_hosts/amazon-fire-tv.cache.txt
|
||||||
|
|
||||||
|
log "Retrieving TLD list…"
|
||||||
|
dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
|
||||||
|
grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list
|
||||||
|
|
||||||
log "Retrieving nameservers…"
|
log "Retrieving nameservers…"
|
||||||
rm -f nameservers
|
rm -f nameservers
|
||||||
touch nameservers
|
touch nameservers
|
||||||
|
@ -51,4 +55,3 @@ then
|
||||||
else
|
else
|
||||||
mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list
|
mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list
|
||||||
fi
|
fi
|
||||||
dl https://www.orwell1984.today/cname/eulerian.net.txt subdomains/orwell-eulerian-cname-list.cache.list
|
|
||||||
|
|
Loading…
Reference in a new issue