Browse Source

Append top 1M subdomains rather than replacing it

newworkflow_parseropti
Geoffrey Frogeye 3 years ago
parent
commit
c609b90390
  1. 2
      adblock_to_domain_list.py
  2. 6
      fetch_resources.sh
  3. 0
      resolve_subdomains.sh

2
adblock_to_domain_list.py

@ -29,7 +29,7 @@ if __name__ == '__main__':
# Parsing arguments
parser = argparse.ArgumentParser(
description="TODO")
description="Extract whole domains from an AdBlock blocking list")
parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file with AdBlock rules")

6
fetch_resources.sh

@ -22,8 +22,10 @@ sort -R nameservers.temp >> nameservers
rm nameservers.temp
# Get top 1M subdomains
wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip
dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
unzip top-1m.csv.zip
sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list
sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list
rm top-1m.csv top-1m.csv.zip
cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list
cat temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list

0
resolve_subdomains.sh

Loading…
Cancel
Save