Fixed scripting around

2019-12-18 01:03:08 +01:00 · 2019-12-18 01:03:08 +01:00 · aca5023c3f
parent dce35cb299
commit aca5023c3f
12 changed files with 212 additions and 286 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,2 @@
 *.log
 *.p
-nameservers
-nameservers.head
--- a/database.py
+++ b/database.py
@ -216,7 +216,7 @@ class Database(Profiler):
        splits = path.split('.')
        if not TLD_LIST:
            Database.populate_tld_list()
-        if splits[0] not in TLD_LIST:
+        if splits[-1] not in TLD_LIST:
            return False
        for split in splits:
            if not 1 <= len(split) <= 63:
@ -460,62 +460,56 @@ class Database(Profiler):
            string += f' ← {self.explain(match.source)}'
        return string

-    def export(self,
-               first_party_only: bool = False,
-               end_chain_only: bool = False,
-               no_dupplicates: bool = False,
-               explain: bool = False,
-               ) -> typing.Iterable[str]:
+    def list_records(self,
+                     first_party_only: bool = False,
+                     end_chain_only: bool = False,
+                     no_dupplicates: bool = False,
+                     rules_only: bool = False,
+                     hostnames_only: bool = False,
+                     explain: bool = False,
+                     ) -> typing.Iterable[str]:

        def export_cb(path: Path, match: Match
                      ) -> typing.Iterable[str]:
-            assert isinstance(path, DomainPath)
-            if not isinstance(path, HostnamePath):
-                return
            if first_party_only and not match.first_party:
                return
            if end_chain_only and match.references > 0:
                return
            if no_dupplicates and match.dupplicate:
                return
+            if rules_only and match.level > 1:
+                return
+            if hostnames_only and not isinstance(path, HostnamePath):
+                return
+
            if explain:
                yield self.explain(path)
            else:
-                yield self.unpack_domain(path)
+                yield str(path)

-        yield from self.exec_each_domain(export_cb)
-
-    def list_rules(self,
-                   first_party_only: bool = False,
-                   ) -> typing.Iterable[str]:
-
-        def list_rules_cb(path: Path, match: Match
-                          ) -> typing.Iterable[str]:
-            if first_party_only and not match.first_party:
-                return
-            if isinstance(path, ZonePath) \
-                    or (isinstance(path, Ip4Path) and path.prefixlen < 32):
-                # if match.level == 1:
-                # It should be the latter condition but it is more
-                # useful when using the former
-                yield self.explain(path)
-
-        yield from self.exec_each(list_rules_cb)
+        yield from self.exec_each(export_cb)

    def count_records(self,
                      first_party_only: bool = False,
-                      rules_only: bool = False,
+                      end_chain_only: bool = False,
                      no_dupplicates: bool = False,
+                      rules_only: bool = False,
+                      hostnames_only: bool = False,
                      ) -> str:
        memo: typing.Dict[str, int] = dict()

        def count_records_cb(path: Path, match: Match) -> None:
            if first_party_only and not match.first_party:
                return
-            if rules_only and match.level > 1:
+            if end_chain_only and match.references > 0:
                return
            if no_dupplicates and match.dupplicate:
                return
+            if rules_only and match.level > 1:
+                return
+            if hostnames_only and not isinstance(path, HostnamePath):
+                return
+
            try:
                memo[path.__class__.__name__] += 1
            except KeyError:
@ -523,9 +517,10 @@ class Database(Profiler):

        for _ in self.exec_each(count_records_cb):
            pass
+
        split: typing.List[str] = list()
        for key, value in sorted(memo.items(), key=lambda s: s[0]):
-            split.append(f'{key[:-4]}: {value}')
+            split.append(f'{key[:-4].lower()}s: {value}')
        return ', '.join(split)

    def get_domain(self, domain_str: str) -> typing.Iterable[DomainPath]:
--- a/export.py
+++ b/export.py
@ -19,15 +19,18 @@ if __name__ == '__main__':
    parser.add_argument(
        '-e', '--end-chain', action='store_true',
        help="TODO")
-    parser.add_argument(
-        '-x', '--explain', action='store_true',
-        help="TODO")
    parser.add_argument(
        '-r', '--rules', action='store_true',
        help="TODO")
+    parser.add_argument(
+        '-b', '--base-rules', action='store_true',
+        help="TODO implies rules")
    parser.add_argument(
        '-d', '--no-dupplicates', action='store_true',
        help="TODO")
+    parser.add_argument(
+        '-x', '--explain', action='store_true',
+        help="TODO")
    parser.add_argument(
        '-c', '--count', action='store_true',
        help="TODO")
@ -36,19 +39,21 @@ if __name__ == '__main__':
    DB = database.Database()

    if args.count:
+        assert not args.explain
        print(DB.count_records(
-            first_party_only=args.first_party,
-            rules_only=args.rules,
-            no_dupplicates=args.no_dupplicates,
-            ))
-    else:
-        if args.rules:
-            for line in DB.list_rules():
-                print(line)
-        for domain in DB.export(
            first_party_only=args.first_party,
            end_chain_only=args.end_chain,
            no_dupplicates=args.no_dupplicates,
+            rules_only=args.base_rules,
+            hostnames_only=not (args.rules or args.base_rules),
+        ))
+    else:
+        for domain in DB.list_records(
+            first_party_only=args.first_party,
+            end_chain_only=args.end_chain,
+            no_dupplicates=args.no_dupplicates,
+            rules_only=args.base_rules,
+            hostnames_only=not (args.rules or args.base_rules),
            explain=args.explain,
        ):
            print(domain, file=args.output)
--- a/export_lists.sh
+++ b/export_lists.sh
@ -4,69 +4,94 @@ function log() {
    echo -e "\033[33m$@\033[0m"
 }

-log "Exporting lists…"
-./export.py --first-party --output dist/firstparty-trackers.txt
-./export.py --first-party --end-chain --no-dupplicates --output dist/firstparty-only-trackers.txt
-./export.py --output dist/multiparty-trackers.txt
-./export.py --end-chain --no-dupplicates --output dist/multiparty-only-trackers.txt
+log "Calculating statistics…"
+gen_date=$(date -Isec)
+gen_software=$(git describe --tags)
+number_websites=$(wc -l < temp/all_websites.list)
+number_subdomains=$(wc -l < temp/all_subdomains.list)
+number_dns=$(grep '^$' temp/all_resolved.txt | wc -l)

-log "Generating statistics…"
-./export.py --count --first-party > temp/count_recs_firstparty.txt
-./export.py --count > temp/count_recs_multiparty.txt
-./export.py --rules --count --first-party > temp/count_rules_firstparty.txt
-./export.py --rules --count > temp/count_rules_multiparty.txt
+for partyness in {first,multi}
+do
+    if [ $partyness = "first" ]
+    then
+        partyness_flags="--first-party"
+    else
+        partyness_flags=""
+    fi

-log "Sorting lists…"
-sort -u dist/firstparty-trackers.txt -o dist/firstparty-trackers.txt
-sort -u dist/firstparty-only-trackers.txt -o dist/firstparty-only-trackers.txt
-sort -u dist/multiparty-trackers.txt -o dist/multiparty-trackers.txt
-sort -u dist/multiparty-only-trackers.txt -o dist/multiparty-only-trackers.txt
+    echo "Statistics for ${partyness}-party trackers"
+    echo "Input rules: $(./export.py --count --base-rules $partyness_flags)"
+    echo "Subsequent rules: $(./export.py --count --rules $partyness_flags)"
+    echo "Subsequent rules (no dupplicate): $(./export.py --count --rules --no-dupplicates $partyness_flags)"
+    echo "Output hostnames: $(./export.py --count $partyness_flags)"
+    echo "Output hostnames (no dupplicate): $(./export.py --count --no-dupplicates $partyness_flags)"
+    echo "Output hostnames (end-chain only): $(./export.py --count --end-chain $partyness_flags)"
+    echo "Output hostnames (no dupplicate, end-chain only): $(./export.py --count --no-dupplicates --end-chain $partyness_flags)"
+    echo

-log "Generating hosts lists…"
-function generate_hosts {
-    basename="$1"
-    description="$2"
-    description2="$3"
+    for trackerness in {trackers,only-trackers}
+    do
+        if [ $trackerness = "trackers" ]
+        then
+            trackerness_flags=""
+        else
+            trackerness_flags="--end-chain --no-dupplicates"
+        fi
+        file_list="dist/${partyness}party-${trackerness}.txt"
+        file_host="dist/${partyness}party-${trackerness}-hosts.txt"

-    (
-        echo "# First-party trackers host list"
-        echo "# $description"
-        echo "# $description2"
-        echo "#"
-        echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
-        echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
-        echo "#"
-        echo "# In case of false positives/negatives, or any other question,"
-        echo "# contact me the way you like: https://geoffrey.frogeye.fr"
-        echo "#"
-        echo "# Latest version:"
-        echo "# - First-party trackers  : https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt"
-        echo "# - … excluding redirected: https://hostfiles.frogeye.fr/firstparty-only-trackers-hosts.txt"
-        echo "# - First and third party : https://hostfiles.frogeye.fr/multiparty-trackers-hosts.txt"
-        echo "# - … excluding redirected: https://hostfiles.frogeye.fr/multiparty-only-trackers-hosts.txt"
-        echo '# (you can remove `-hosts` to get the raw list)'
-        echo "#"
-        echo "# Generation date: $(date -Isec)"
-        echo "# Generation software: eulaurarien $(git describe --tags)"
-        echo "# Number of source websites: $(wc -l temp/all_websites.list | cut -d' ' -f1)"
-        echo "# Number of source subdomains: $(wc -l temp/all_subdomains.list | cut -d' ' -f1)"
-        echo "# Number of source DNS records: ~2E9 + $(wc -l temp/all_resolved.json | cut -d' ' -f1)" # TODO
-        echo "#"
-        echo "# Known first-party trackers: $(cat temp/count_rules_firstparty.txt)"
-        echo "# Found first-party trackers: $(cat temp/count_recs_firstparty.txt)"
-        echo "# Number of first-party hostnames: $(wc -l dist/firstparty-trackers.txt | cut -d' ' -f1)"
-        echo "# … excluding redirected: $(wc -l dist/firstparty-only-trackers.txt | cut -d' ' -f1)"
-        echo "#"
-        echo "# Known multi-party trackers: $(cat temp/count_rules_multiparty.txt)"
-        echo "# Found multi-party trackers: $(cat temp/count_recs_multiparty.txt)"
-        echo "# Number of multi-party hostnames: $(wc -l dist/multiparty-trackers.txt | cut -d' ' -f1)"
-        echo "# … excluding redirected: $(wc -l dist/multiparty-only-trackers.txt | cut -d' ' -f1)"
-        echo
-        sed 's|^|0.0.0.0 |' "dist/$basename.txt"
-    ) > "dist/$basename-hosts.txt"
-}
+        log "Generating lists for variant ${partyness}-party ${trackerness}…"

-generate_hosts "firstparty-trackers" "Generated from a curated list of first-party trackers" ""
-generate_hosts "firstparty-only-trackers" "Generated from a curated list of first-party trackers" "Only contain the first chain of redirection."
-generate_hosts "multiparty-trackers" "Generated from known third-party trackers." "Also contains trackers used as third-party."
-generate_hosts "multiparty-only-trackers" "Generated from known third-party trackers." "Do not contain trackers used in third-party. Use in combination with third-party lists."
+        # Real export heeere
+        ./export.py $partyness_flags $trackerness_flags > $file_list
+        # Sometimes a bit heavy to have the DB open and sort the output
+        # so this is done in two steps
+        sort -u $file_list -o $file_list
+
+        rules_input=$(./export.py --count --base-rules $partyness_flags)
+        rules_found=$(./export.py --count --rules $partyness_flags)
+        rules_output=$(./export.py --count $partyness_flags $trackerness_flags)
+
+        function link() { # link partyness, link trackerness
+            url="https://hostfiles.frogeye.fr/${partyness}party-${trackerness}-hosts.txt"
+            if [ "$1" = "$partyness" ] && [ "$2" = "$trackerness" ]
+            then
+                url="$url (this one)"
+            fi
+            echo $url
+        }
+
+        (
+            echo "# First-party trackers host list"
+            echo "# Variant: ${partyness}-party ${trackerness}"
+            echo "#"
+            echo "# About first-party trackers: https://git.frogeye.fr/geoffrey/eulaurarien#whats-a-first-party-tracker"
+            echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien"
+            echo "#"
+            echo "# In case of false positives/negatives, or any other question,"
+            echo "# contact me the way you like: https://geoffrey.frogeye.fr"
+            echo "#"
+            echo "# Latest versions:"
+            echo "# - First-party trackers  : $(link first trackers)"
+            echo "# - … excluding redirected: $(link first only-trackers)"
+            echo "# - First and third party : $(link multi trackers)"
+            echo "# - … excluding redirected: $(link multi only-trackers)"
+            echo '# (you can remove `-hosts` to get the raw list)'
+            echo "#"
+            echo "# Generation date: $gen_date"
+            echo "# Generation software: eulaurarien $gen_software"
+            echo "# Number of source websites: $number_websites"
+            echo "# Number of source subdomains: $number_subdomains"
+            echo "# Number of source DNS records: ~2E9 + $number_dns"
+            echo "#"
+            echo "# Input rules: $rules_input"
+            echo "# Subsequent rules: $rules_found"
+            echo "# Output rules: $rules_output"
+            echo "#"
+            echo
+            sed 's|^|0.0.0.0 |' "$file_list"
+        ) > "$file_host"
+
+    done
+done
--- a/feed_dns.py
+++ b/feed_dns.py
@ -130,8 +130,8 @@ class Rapid7Parser(Parser):
            self.register(record)


-class DnsMassParser(Parser):
-    # dnsmass --output Snrql
+class MassDnsParser(Parser):
+    # massdns --output Snrql
    # --retry REFUSED,SERVFAIL --resolvers nameservers-ipv4
    TYPES = {
        'A': (FUNCTION_MAP['a'][0], FUNCTION_MAP['a'][1], -1, None),
@ -140,7 +140,7 @@ class DnsMassParser(Parser):
    }

    def consume(self) -> None:
-        self.prof.enter_step('parse_dnsmass')
+        self.prof.enter_step('parse_massdns')
        timestamp = 0
        header = True
        for line in self.buf:
@ -156,7 +156,7 @@ class DnsMassParser(Parser):
                    header = False
                else:
                    select, write, name_offset, value_offset = \
-                        DnsMassParser.TYPES[split[1]]
+                        MassDnsParser.TYPES[split[1]]
                    record = (
                        select,
                        write,
@ -165,14 +165,14 @@ class DnsMassParser(Parser):
                        split[2][:value_offset],
                    )
                    self.register(record)
-                    self.prof.enter_step('parse_dnsmass')
+                    self.prof.enter_step('parse_massdns')
            except KeyError:
                continue


 PARSERS = {
    'rapid7': Rapid7Parser,
-    'dnsmass': DnsMassParser,
+    'massdns': MassDnsParser,
 }

 if __name__ == '__main__':
--- a/fetch_resources.sh
+++ b/fetch_resources.sh
@ -35,12 +35,7 @@ dl http://data.iana.org/TLD/tlds-alpha-by-domain.txt temp/all_tld.temp.list
 grep -v '^#' temp/all_tld.temp.list | awk '{print tolower($0)}' > temp/all_tld.list

 log "Retrieving nameservers…"
-rm -f nameservers
-touch nameservers
-[ -f nameservers.head ] && cat nameservers.head >> nameservers
-dl https://public-dns.info/nameservers.txt nameservers.temp
-sort -R nameservers.temp >> nameservers
-rm nameservers.temp
+dl https://public-dns.info/nameservers.txt nameservers/public-dns.list

 log "Retrieving top subdomains…"
 dl http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip top-1m.csv.zip
--- a/filter_subdomains.py
+++ b/filter_subdomains.py
@ -1,160 +0,0 @@
-#!/usr/bin/env python3
-# pylint: disable=C0103
-
-"""
-From a list of subdomains, output only
-the ones resolving to a first-party tracker.
-"""
-
-import argparse
-import sys
-import progressbar
-import csv
-import typing
-import ipaddress
-
-# DomainRule = typing.Union[bool, typing.Dict[str, 'DomainRule']]
-DomainRule = typing.Union[bool, typing.Dict]
-# IpRule = typing.Union[bool, typing.Dict[int, 'DomainRule']]
-IpRule = typing.Union[bool, typing.Dict]
-
-RULES_DICT: DomainRule = dict()
-RULES_IP_DICT: IpRule = dict()
-
-
-def get_bits(address: ipaddress.IPv4Address) -> typing.Iterator[int]:
-    for char in address.packed:
-        for i in range(7, -1, -1):
-            yield (char >> i) & 0b1
-
-
-def subdomain_matching(subdomain: str) -> bool:
-    parts = subdomain.split('.')
-    parts.reverse()
-    dic = RULES_DICT
-    for part in parts:
-        if isinstance(dic, bool) or part not in dic:
-            break
-        dic = dic[part]
-    if isinstance(dic, bool):
-        return dic
-    return False
-
-
-def ip_matching(ip_str: str) -> bool:
-    ip = ipaddress.ip_address(ip_str)
-    dic = RULES_IP_DICT
-    i = 0
-    for bit in get_bits(ip):
-        i += 1
-        if isinstance(dic, bool) or bit not in dic:
-            break
-        dic = dic[bit]
-    if isinstance(dic, bool):
-        return dic
-    return False
-
-
-def get_matching(chain: typing.List[str], no_explicit: bool = False
-                 ) -> typing.Iterable[str]:
-    if len(chain) <= 1:
-        return
-    initial = chain[0]
-    cname_destinations = chain[1:-1]
-    a_destination = chain[-1]
-    initial_matching = subdomain_matching(initial)
-    if no_explicit and initial_matching:
-        return
-    cname_matching = any(map(subdomain_matching, cname_destinations))
-    if cname_matching or initial_matching or ip_matching(a_destination):
-        yield initial
-
-
-def register_rule(subdomain: str) -> None:
-    # Make a tree with domain parts
-    parts = subdomain.split('.')
-    parts.reverse()
-    dic = RULES_DICT
-    last_part = len(parts) - 1
-    for p, part in enumerate(parts):
-        if isinstance(dic, bool):
-            return
-        if p == last_part:
-            dic[part] = True
-        else:
-            dic.setdefault(part, dict())
-            dic = dic[part]
-
-
-def register_rule_ip(network: str) -> None:
-    net = ipaddress.ip_network(network)
-    ip = net.network_address
-    dic = RULES_IP_DICT
-    last_bit = net.prefixlen - 1
-    for b, bit in enumerate(get_bits(ip)):
-        if isinstance(dic, bool):
-            return
-        if b == last_bit:
-            dic[bit] = True
-        else:
-            dic.setdefault(bit, dict())
-            dic = dic[bit]
-
-
-if __name__ == '__main__':
-
-    # Parsing arguments
-    parser = argparse.ArgumentParser(
-        description="Filter first-party trackers from a list of subdomains")
-    parser.add_argument(
-        '-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
-        help="Input file with DNS chains")
-    parser.add_argument(
-        '-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
-        help="Outptut file with one tracking subdomain per line")
-    parser.add_argument(
-        '-n', '--no-explicit', action='store_true',
-        help="Don't output domains already blocked with rules without CNAME")
-    parser.add_argument(
-        '-r', '--rules', type=argparse.FileType('r'),
-        help="List of domains domains to block (with their subdomains)")
-    parser.add_argument(
-        '-p', '--rules-ip', type=argparse.FileType('r'),
-        help="List of IPs ranges to block")
-    args = parser.parse_args()
-
-    # Progress bar
-    widgets = [
-        progressbar.Percentage(),
-        ' ', progressbar.SimpleProgress(),
-        ' ', progressbar.Bar(),
-        ' ', progressbar.Timer(),
-        ' ', progressbar.AdaptiveTransferSpeed(unit='req'),
-        ' ', progressbar.AdaptiveETA(),
-    ]
-    progress = progressbar.ProgressBar(widgets=widgets)
-
-    # Reading rules
-    if args.rules:
-        for rule in args.rules:
-            register_rule(rule.strip())
-    if args.rules_ip:
-        for rule in args.rules_ip:
-            register_rule_ip(rule.strip())
-
-    # Approximating line count
-    if args.input.seekable():
-        lines = 0
-        for line in args.input:
-            lines += 1
-        progress.max_value = lines
-        args.input.seek(0)
-
-    # Reading domains to filter
-    reader = csv.reader(args.input)
-    progress.start()
-    for chain in reader:
-        for match in get_matching(chain, no_explicit=args.no_explicit):
-            print(match, file=args.output)
-        progress.update(progress.value + 1)
-    progress.finish()
--- a/import_rules.sh
+++ b/import_rules.sh
@ -18,5 +18,5 @@ cat rules_asn/first-party.txt | grep -v '^#' | grep -v '^$' | ./feed_rules.py as

 ./feed_asn.py

-log "Pruning old rules…"
-./db.py --prune --prune-before "$BEFORE" --prune-base
+# log "Pruning old rules…"
+# ./db.py --prune --prune-before "$BEFORE" --prune-base
--- a/nameservers/.gitignore
+++ b/nameservers/.gitignore
@ -0,0 +1,2 @@
+*.custom.list
+*.cache.list
--- a/nameservers/popular.list
+++ b/nameservers/popular.list
@ -0,0 +1,24 @@
+8.8.8.8
+8.8.4.4
+2001:4860:4860:0:0:0:0:8888
+2001:4860:4860:0:0:0:0:8844
+208.67.222.222
+208.67.220.220
+2620:119:35::35
+2620:119:53::53
+4.2.2.1
+4.2.2.2
+8.26.56.26
+8.20.247.20
+84.200.69.80
+84.200.70.40
+2001:1608:10:25:0:0:1c04:b12f
+2001:1608:10:25:0:0:9249:d69b
+9.9.9.10
+149.112.112.10
+2620:fe::10
+2620:fe::fe:10
+1.1.1.1
+1.0.0.1
+2606:4700:4700::1111
+2606:4700:4700::1001
--- a/resolve_subdomains.sh
+++ b/resolve_subdomains.sh
@ -4,9 +4,16 @@ function log() {
    echo -e "\033[33m$@\033[0m"
 }

-log "Compiling locally known subdomain…"
-# Sort by last character to utilize the DNS server caching mechanism
-pv subdomains/*.list | sed 's/\r$//' | rev | sort -u | rev > temp/all_subdomains.list
-log "Resolving locally known subdomain…"
-pv temp/all_subdomains.list | ./resolve_subdomains.py --output temp/all_resolved.csv
+log "Compiling nameservers…"
+pv nameservers/*.list | ./validate_list.py --ip4 | sort -u > temp/all_nameservers_ip4.list

+log "Compiling subdomain…"
+# Sort by last character to utilize the DNS server caching mechanism
+# (not as efficient with massdns but it's almost free so why not)
+pv subdomains/*.list | ./validate_list.py --domain | rev | sort -u | rev > temp/all_subdomains.list
+
+log "Resolving subdomain…"
+massdns --output Snrql --retry REFUSED,SERVFAIL --resolvers temp/all_nameservers_ip4.list --outfile temp/all_resolved.txt temp/all_subdomains.list
+
+log "Importing into database…"
+pv temp/all_resolved.txt | ./feed_dns.py massdns
--- a/validate_list.py
+++ b/validate_list.py
@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# pylint: disable=C0103
+
+"""
+Filter out invalid domain names
+"""
+
+import database
+import argparse
+import sys
+
+if __name__ == '__main__':
+
+    # Parsing arguments
+    parser = argparse.ArgumentParser(
+        description="Filter out invalid domain names.")
+    parser.add_argument(
+        '-i', '--input', type=argparse.FileType('r'), default=sys.stdin,
+        help="TODO")
+    parser.add_argument(
+        '-o', '--output', type=argparse.FileType('w'), default=sys.stdout,
+        help="TODO")
+    parser.add_argument(
+        '-d', '--domain', action='store_true',
+        help="Can be domain")
+    parser.add_argument(
+        '-4', '--ip4', action='store_true',
+        help="Can be IP4")
+    args = parser.parse_args()
+
+    for line in args.input:
+        line = line.strip()
+        if (args.domain and database.Database.validate_domain(line)) or \
+                (args.ip4 and database.Database.validate_ip4address(line)):
+            print(line, file=args.output)