From c609b903904e194d635c11514eb029d731439e86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geoffrey=20=E2=80=9CFrogeye=E2=80=9D=20Preud=27homme?= Date: Tue, 3 Dec 2019 09:02:59 +0100 Subject: [PATCH] Append top 1M subdomains rather than replacing it --- adblock_to_domain_list.py | 2 +- fetch_resources.sh | 6 ++++-- resolve_subdomains.sh | 0 3 files changed, 5 insertions(+), 3 deletions(-) mode change 100644 => 100755 resolve_subdomains.sh diff --git a/adblock_to_domain_list.py b/adblock_to_domain_list.py index 7c8bf82..4a4c017 100755 --- a/adblock_to_domain_list.py +++ b/adblock_to_domain_list.py @@ -29,7 +29,7 @@ if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( - description="TODO") + description="Extract whole domains from an AdBlock blocking list") parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="Input file with AdBlock rules") diff --git a/fetch_resources.sh b/fetch_resources.sh index 705b0c4..5b023d5 100755 --- a/fetch_resources.sh +++ b/fetch_resources.sh @@ -22,8 +22,10 @@ sort -R nameservers.temp >> nameservers rm nameservers.temp # Get top 1M subdomains -wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip +dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip unzip top-1m.csv.zip -sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list +sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list rm top-1m.csv top-1m.csv.zip +cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list +cat temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list diff --git a/resolve_subdomains.sh b/resolve_subdomains.sh old mode 100644 new mode 100755