Append top 1M subdomains rather than replacing it

This commit is contained in:
Geoffrey Frogeye 2019-12-03 09:02:59 +01:00
parent 69b82d29fd
commit c609b90390
3 changed files with 5 additions and 3 deletions

View file

@ -29,7 +29,7 @@ if __name__ == '__main__':
# Parsing arguments # Parsing arguments
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="TODO") description="Extract whole domains from an AdBlock blocking list")
parser.add_argument( parser.add_argument(
'-i', '--input', type=argparse.FileType('r'), default=sys.stdin, '-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
help="Input file with AdBlock rules") help="Input file with AdBlock rules")

View file

@ -22,8 +22,10 @@ sort -R nameservers.temp >> nameservers
rm nameservers.temp rm nameservers.temp
# Get top 1M subdomains # Get top 1M subdomains
wget http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
unzip top-1m.csv.zip unzip top-1m.csv.zip
sed 's|^[0-9]\+,||' top-1m.csv > subdomains/cisco-umbrella_popularity.cache.list sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list
rm top-1m.csv top-1m.csv.zip rm top-1m.csv top-1m.csv.zip
cp subdomains/cisco-umbrella_popularity.cache.list temp/cisco-umbrella_popularity.old.list
cat temp/cisco-umbrella_popularity.old.list temp/cisco-umbrella_popularity.fresh.list | sort -u > subdomains/cisco-umbrella_popularity.cache.list

0
resolve_subdomains.sh Normal file → Executable file
View file