Generates a host list of first-party trackers for ad-blocking.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

68 lines
1.9 KiB

#!/usr/bin/env python3
# pylint: disable=C0103
"""
From a list of subdomains to block,
filter out the ones explicitely matching a regex.
It should be already handled by the ad blocker.
"""
import argparse
import sys
import progressbar
import adblockparser
OPTIONS = {"third-party": True}
def explicitely_match(subdomain: str) -> bool:
url = f"https://{subdomain}/"
return rules.should_block(url, OPTIONS)
if __name__ == '__main__':
# Parsing arguments
parser = argparse.ArgumentParser(
description="Filter first-party trackers from a list of subdomains")
parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file with one subdomain per line")
parser.add_argument(
'-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
help="Outptut file with one tracking subdomain per line")
parser.add_argument(
'-r', '--rules', type=argparse.FileType('r'), default='rules',
help="Rules file")
args = parser.parse_args()
# Reading rules
rules: adblockparser.AdblockRules = adblockparser.AdblockRules(args.rules)
# Progress bar
widgets = [
progressbar.Percentage(),
' ', progressbar.SimpleProgress(),
' ', progressbar.Bar(),
' ', progressbar.Timer(),
' ', progressbar.AdaptiveTransferSpeed(unit='req'),
' ', progressbar.AdaptiveETA(),
]
progress = progressbar.ProgressBar(widgets=widgets)
if args.input.seekable():
progress.max_value = len(args.input.readlines())
args.input.seek(0)
# Cleaning input
iterator = iter(args.input)
iterator = map(str.strip, iterator)
iterator = filter(None, iterator)
# Filtering
progress.start()
for subdomain in iterator:
progress.update(progress.value + 1)
if not explicitely_match(subdomain):
print(subdomain, file=args.output)
progress.finish()