Harder verficiation before adding entries to DB
This commit is contained in:
parent
747fe46ad0
commit
dce35cb299
60
database.py
60
database.py
|
@ -10,6 +10,8 @@ import logging
|
|||
import coloredlogs
|
||||
import pickle
|
||||
|
||||
TLD_LIST: typing.Set[str] = set()
|
||||
|
||||
coloredlogs.install(
|
||||
level='DEBUG',
|
||||
fmt='%(asctime)s %(name)s %(levelname)s %(message)s'
|
||||
|
@ -200,6 +202,27 @@ class Database(Profiler):
|
|||
self.log = logging.getLogger('db')
|
||||
self.load()
|
||||
|
||||
@staticmethod
|
||||
def populate_tld_list() -> None:
|
||||
with open('temp/all_tld.list', 'r') as tld_fdesc:
|
||||
for tld in tld_fdesc:
|
||||
tld = tld.strip()
|
||||
TLD_LIST.add(tld)
|
||||
|
||||
@staticmethod
|
||||
def validate_domain(path: str) -> bool:
|
||||
if len(path) > 255:
|
||||
return False
|
||||
splits = path.split('.')
|
||||
if not TLD_LIST:
|
||||
Database.populate_tld_list()
|
||||
if splits[0] not in TLD_LIST:
|
||||
return False
|
||||
for split in splits:
|
||||
if not 1 <= len(split) <= 63:
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def pack_domain(domain: str) -> DomainPath:
|
||||
return DomainPath(domain.split('.')[::-1])
|
||||
|
@ -219,6 +242,19 @@ class Database(Profiler):
|
|||
def unpack_asn(asn: AsnPath) -> str:
|
||||
return f'AS{asn.asn}'
|
||||
|
||||
@staticmethod
|
||||
def validate_ip4address(path: str) -> bool:
|
||||
splits = path.split('.')
|
||||
if len(splits) != 4:
|
||||
return False
|
||||
for split in splits:
|
||||
try:
|
||||
if not 0 <= int(split) <= 255:
|
||||
return False
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def pack_ip4address(address: str) -> Ip4Path:
|
||||
addr = 0
|
||||
|
@ -237,6 +273,21 @@ class Database(Profiler):
|
|||
addr >>= 8
|
||||
return '.'.join(map(str, octets))
|
||||
|
||||
@staticmethod
|
||||
def validate_ip4network(path: str) -> bool:
|
||||
# A bit generous but ok for our usage
|
||||
splits = path.split('/')
|
||||
if len(splits) != 2:
|
||||
return False
|
||||
if not Database.validate_ip4address(splits[0]):
|
||||
return False
|
||||
try:
|
||||
if not 0 <= int(splits[1]) <= 32:
|
||||
return False
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def pack_ip4network(network: str) -> Ip4Path:
|
||||
address, prefixlen_str = network.split('/')
|
||||
|
@ -549,6 +600,9 @@ class Database(Profiler):
|
|||
domain_str: str,
|
||||
updated: int,
|
||||
source: Path) -> None:
|
||||
self.enter_step('set_domain_val')
|
||||
if not Database.validate_domain(domain_str):
|
||||
raise ValueError(f"Invalid domain: {domain_str}")
|
||||
self.enter_step('set_domain_pack')
|
||||
domain = self.pack_domain(domain_str)
|
||||
self.enter_step('set_domain_fp')
|
||||
|
@ -636,6 +690,9 @@ class Database(Profiler):
|
|||
ip4address_str: str,
|
||||
*args: typing.Any, **kwargs: typing.Any
|
||||
) -> None:
|
||||
self.enter_step('set_ip4add_val')
|
||||
if not Database.validate_ip4address(ip4address_str):
|
||||
raise ValueError(f"Invalid ip4address: {ip4address_str}")
|
||||
self.enter_step('set_ip4add_pack')
|
||||
ip4 = self.pack_ip4address(ip4address_str)
|
||||
self._set_ip4(ip4, *args, **kwargs)
|
||||
|
@ -644,6 +701,9 @@ class Database(Profiler):
|
|||
ip4network_str: str,
|
||||
*args: typing.Any, **kwargs: typing.Any
|
||||
) -> None:
|
||||
self.enter_step('set_ip4net_val')
|
||||
if not Database.validate_ip4network(ip4network_str):
|
||||
raise ValueError(f"Invalid ip4network: {ip4network_str}")
|
||||
self.enter_step('set_ip4net_pack')
|
||||
ip4 = self.pack_ip4network(ip4network_str)
|
||||
self._set_ip4(ip4, *args, **kwargs)
|
||||
|
|
|
@ -8,7 +8,7 @@ log "Exporting lists…"
|
|||
./export.py --first-party --output dist/firstparty-trackers.txt
|
||||
./export.py --first-party --end-chain --no-dupplicates --output dist/firstparty-only-trackers.txt
|
||||
./export.py --output dist/multiparty-trackers.txt
|
||||
./export.py --end-chain --output --no-dupplicates dist/multiparty-only-trackers.txt
|
||||
./export.py --end-chain --no-dupplicates --output dist/multiparty-only-trackers.txt
|
||||
|
||||
log "Generating statistics…"
|
||||
./export.py --count --first-party > temp/count_recs_firstparty.txt
|
||||
|
|
|
@ -30,6 +30,10 @@ dl https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hos
|
|||
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt rules_hosts/smart-tv.cache.txt
|
||||
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/AmazonFireTV.txt rules_hosts/amazon-fire-tv.cache.txt
|
||||
|
||||
log "Retrieving TLD list…"
|
||||
dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
|
||||
grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list
|
||||
|
||||
log "Retrieving nameservers…"
|
||||
rm -f nameservers
|
||||
touch nameservers
|
||||
|
@ -51,4 +55,3 @@ then
|
|||
else
|
||||
mv temp/cisco-umbrella_popularity.fresh.list subdomains/cisco-umbrella_popularity.cache.list
|
||||
fi
|
||||
dl https://www.orwell1984.today/cname/eulerian.net.txt subdomains/orwell-eulerian-cname-list.cache.list
|
||||
|
|
Loading…
Reference in a new issue