Fix log in scripts

Closes #8
newworkflow_parseropti
Geoffrey Frogeye 2019-12-07 18:45:48 +01:00
parent 0b2eb000c3
commit 2b0a723c30
Signed by: geoffrey
GPG Key ID: D8A7ECA00A8CD3DD
4 changed files with 27 additions and 10 deletions

View File

@ -1,5 +1,9 @@
#!/usr/bin/env bash #!/usr/bin/env bash
function log() {
echo -e "\033[33m$@\033[0m"
}
# Get all subdomains accessed by each website in the website list # Get all subdomains accessed by each website in the website list
cat websites/*.list | sort -u > temp/all_websites.list cat websites/*.list | sort -u > temp/all_websites.list

View File

@ -1,7 +1,11 @@
#!/usr/bin/env bash #!/usr/bin/env bash
function log() {
echo -e "\033[33m$@\033[0m"
}
function dl() { function dl() {
echo "Downloading $1 to $2..." echo "Downloading $1 to $2"
curl --silent "$1" > "$2" curl --silent "$1" > "$2"
if [ $? -ne 0 ] if [ $? -ne 0 ]
then then
@ -9,7 +13,8 @@ function dl() {
fi fi
} }
echo "Retrieving rules..." > /dev/stderr
log "Retrieving rules…"
rm -f rules*/*.cache.* rm -f rules*/*.cache.*
dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt dl https://easylist.to/easylist/easyprivacy.txt rules_adblock/easyprivacy.cache.txt
# From firebog.net Tracking & Telemetry Lists # From firebog.net Tracking & Telemetry Lists
@ -25,7 +30,7 @@ dl https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hos
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt rules_hosts/smart-tv.cache.txt # dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt rules_hosts/smart-tv.cache.txt
# dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/AmazonFireTV.txt rules_hosts/amazon-fire-tv.cache.txt # dl https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/AmazonFireTV.txt rules_hosts/amazon-fire-tv.cache.txt
echo "Retrieving nameservers..." > /dev/stderr log "Retrieving nameservers…"
rm -f nameservers rm -f nameservers
touch nameservers touch nameservers
[ -f nameservers.head ] && cat nameservers.head >> nameservers [ -f nameservers.head ] && cat nameservers.head >> nameservers
@ -33,7 +38,7 @@ dl https://public-dns.info/nameservers.txt nameservers.temp
sort -R nameservers.temp >> nameservers sort -R nameservers.temp >> nameservers
rm nameservers.temp rm nameservers.temp
echo "Retrieving top subdomains..." > /dev/stderr log "Retrieving top subdomains…"
dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
unzip top-1m.csv.zip unzip top-1m.csv.zip
sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list sed 's|^[0-9]\+,||' top-1m.csv > temp/cisco-umbrella_popularity.fresh.list

View File

@ -1,5 +1,9 @@
#!/usr/bin/env bash #!/usr/bin/env bash
function log() {
echo -e "\033[33m$@\033[0m"
}
if [ ! -f temp/all_resolved.csv ] if [ ! -f temp/all_resolved.csv ]
then then
echo "Run ./resolve_subdomains.sh first!" echo "Run ./resolve_subdomains.sh first!"
@ -7,7 +11,7 @@ then
fi fi
# Gather all the rules for filtering # Gather all the rules for filtering
echo "Compiling rules..." > /dev/stderr log "Compiling rules…"
cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules_adblock.txt cat rules_adblock/*.txt | grep -v '^!' | grep -v '^\[Adblock' | sort -u > temp/all_rules_adblock.txt
./adblock_to_domain_list.py --input temp/all_rules_adblock.txt --output rules/from_adblock.cache.list ./adblock_to_domain_list.py --input temp/all_rules_adblock.txt --output rules/from_adblock.cache.list
cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 > rules/from_hosts.cache.list cat rules_hosts/*.txt | grep -v '^#' | grep -v '^$' | cut -d ' ' -f2 > rules/from_hosts.cache.list
@ -16,19 +20,19 @@ cat rules/first-party.list | grep -v '^#' | grep -v '^$' | sort -u > temp/all_ru
cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | sort -u > temp/all_ip_rules_multi.txt cat rules_ip/*.txt | grep -v '^#' | grep -v '^$' | sort -u > temp/all_ip_rules_multi.txt
cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | sort -u > temp/all_ip_rules_first.txt cat rules_ip/first-party.txt | grep -v '^#' | grep -v '^$' | sort -u > temp/all_ip_rules_first.txt
echo "Filtering first-party tracking domains..." > /dev/stderr log "Filtering first-party tracking domains…"
./filter_subdomains.py --rules temp/all_rules_first.list --rules-ip temp/all_ip_rules_first.txt --input temp/all_resolved_sorted.csv --output temp/firstparty-trackers.list ./filter_subdomains.py --rules temp/all_rules_first.list --rules-ip temp/all_ip_rules_first.txt --input temp/all_resolved_sorted.csv --output temp/firstparty-trackers.list
sort -u temp/firstparty-trackers.list > dist/firstparty-trackers.txt sort -u temp/firstparty-trackers.list > dist/firstparty-trackers.txt
echo "Filtering first-party curated tracking domains..." > /dev/stderr log "Filtering first-party curated tracking domains…"
./filter_subdomains.py --rules temp/all_rules_first.list --rules-ip temp/all_ip_rules_first.txt --input temp/all_resolved_sorted.csv --no-explicit --output temp/firstparty-only-trackers.list ./filter_subdomains.py --rules temp/all_rules_first.list --rules-ip temp/all_ip_rules_first.txt --input temp/all_resolved_sorted.csv --no-explicit --output temp/firstparty-only-trackers.list
sort -u temp/firstparty-only-trackers.list > dist/firstparty-only-trackers.txt sort -u temp/firstparty-only-trackers.list > dist/firstparty-only-trackers.txt
echo "Filtering multi-party tracking domains..." > /dev/stderr log "Filtering multi-party tracking domains…"
./filter_subdomains.py --rules temp/all_rules_multi.list --rules-ip temp/all_ip_rules_multi.txt --input temp/all_resolved_sorted.csv --output temp/multiparty-trackers.list ./filter_subdomains.py --rules temp/all_rules_multi.list --rules-ip temp/all_ip_rules_multi.txt --input temp/all_resolved_sorted.csv --output temp/multiparty-trackers.list
sort -u temp/multiparty-trackers.list > dist/multiparty-trackers.txt sort -u temp/multiparty-trackers.list > dist/multiparty-trackers.txt
echo "Filtering multi-party curated tracking domains..." > /dev/stderr log "Filtering multi-party curated tracking domains…"
./filter_subdomains.py --rules temp/all_rules_multi.list --rules-ip temp/all_ip_rules_multi.txt --input temp/all_resolved_sorted.csv --no-explicit --output temp/multiparty-only-trackers.list ./filter_subdomains.py --rules temp/all_rules_multi.list --rules-ip temp/all_ip_rules_multi.txt --input temp/all_resolved_sorted.csv --no-explicit --output temp/multiparty-only-trackers.list
sort -u temp/multiparty-only-trackers.list > dist/multiparty-only-trackers.txt sort -u temp/multiparty-only-trackers.list > dist/multiparty-only-trackers.txt

View File

@ -1,7 +1,11 @@
#!/usr/bin/env bash #!/usr/bin/env bash
function log() {
echo -e "\033[33m$@\033[0m"
}
# Resolve the CNAME chain of all the known subdomains for later analysis # Resolve the CNAME chain of all the known subdomains for later analysis
echo "Compiling subdomain lists..." > /dev/stderr log "Compiling subdomain lists..."
pv subdomains/*.list | sort -u > temp/all_subdomains.list pv subdomains/*.list | sort -u > temp/all_subdomains.list
# Sort by last character to utilize the DNS server caching mechanism # Sort by last character to utilize the DNS server caching mechanism
pv temp/all_subdomains.list | rev | sort | rev > temp/all_subdomains_reversort.list pv temp/all_subdomains.list | rev | sort | rev > temp/all_subdomains_reversort.list