diff --git a/adblock_to_domain_list.py b/adblock_to_domain_list.py index 7c8bf82..4a4c017 100755 --- a/adblock_to_domain_list.py +++ b/adblock_to_domain_list.py @@ -29,7 +29,7 @@ if __name__ == '__main__': # Parsing arguments parser = argparse.ArgumentParser( - description="TODO") + description="Extract whole domains from an AdBlock blocking list") parser.add_argument( '-i', '--input', type=argparse.FileType('r'), default=sys.stdin, help="Input file with AdBlock rules") diff --git a/fetch_resources.sh b/fetch_resources.sh index 705b0c4..5b023d5 100755 --- a/fetch_resources.sh +++ b/fetch_resources.sh @@ -22,8 +22,10 @@ sort -R nameservers.temp >> nameservers rm nameservers.temp # Get top 1M subdomains -wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip +dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip unzip top-1m.csv.zip -sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list +sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list rm top-1m.csv top-1m.csv.zip +cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list +cat temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list diff --git a/resolve_subdomains.sh b/resolve_subdomains.sh old mode 100644 new mode 100755