From a4a908955a1eb1290b3ac3045775ecc9ae64bb73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geoffrey=20=E2=80=9CFrogeye=E2=80=9D=20Preud=27homme?= Date: Fri, 27 Dec 2019 15:21:33 +0100 Subject: [PATCH] Added index webpage --- README.md | 5 +++-- dist/.gitignore | 1 + dist/README.md | 4 +++- dist/markdown7.min.css | 2 ++ eulaurarien.sh | 1 + export_lists.sh | 6 +++--- generate_index.py | 27 +++++++++++++++++++++++++++ 7 files changed, 40 insertions(+), 6 deletions(-) create mode 100644 dist/markdown7.min.css create mode 100755 generate_index.py diff --git a/README.md b/README.md index ebd578e..4cdbdd5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This program is able to generate a list of every hostnames being a DNS redirection to a list of DNS zones and IP networks. -It is primarilyy used to generate [Geoffrey Frogeye's block list of first-party trackers](https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/dist/README.md) (learn about first-party trackers by following this link). +It is primarilyy used to generate [Geoffrey Frogeye's block list of first-party trackers](https://hostfiles.frogeye.fr) (learn about first-party trackers by following this link). If you want to contribute but don't want to create an account on this forge, contact me the way you like: @@ -24,7 +24,7 @@ Those subdomains can either be provided as is, come from [Cisco Umbrella Popular ## Usage -Remember you can get an already generated and up-to-date list of first-party trackers from [here](https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/dist/README.md). +Remember you can get an already generated and up-to-date list of first-party trackers from [here](https://hostfiles.frogeye.fr). The following is for the people wanting to build their own list. @@ -45,6 +45,7 @@ Depending on the sources you'll be using to generate the list, you'll need to in - [Firefox](https://www.mozilla.org/firefox/) (only if you have websites as a source) - [selenium (Python bindings)](https://pypi.python.org/pypi/selenium) (only if you have websites as a source) - [selenium-wire](https://pypi.org/project/selenium-wire/) (only if you have websites as a source) +- [markdown2](https://pypi.org/project/markdown2/) (only if you intend to generate the index webpage) ### Create a new database diff --git a/dist/.gitignore b/dist/.gitignore index 2211df6..4dc9e88 100644 --- a/dist/.gitignore +++ b/dist/.gitignore @@ -1 +1,2 @@ *.txt +*.html diff --git a/dist/README.md b/dist/README.md index 7ccd06d..28a76cc 100644 --- a/dist/README.md +++ b/dist/README.md @@ -22,7 +22,7 @@ Unfortunately, most don't support those blocking methods as they are not DNS-awa This list is an inventory of every `somestring.website1.com` found to allow non DNS-aware ad blocker to still block first-party trackers. -### Litterature +### Learn more - [CNAME Cloaking, the dangerous disguise of third-party trackers](https://medium.com/nextdns/cname-cloaking-the-dangerous-disguise-of-third-party-trackers-195205dc522a) - [Trackers first-party](https://blog.imirhil.fr/2019/11/13/first-party-tracker.html) (french) @@ -81,10 +81,12 @@ The software used to generate this list is available here: :first-child{margin-top:0!important}body>:last-child{margin-bottom:0!important}a{color:#4183c4}a.absent{color:#c00}a.anchor{display:block;padding-left:30px;margin-left:-30px;cursor:pointer;position:absolute;top:0;left:0;bottom:0}h1,h2,h3,h4,h5,h6{margin:20px 0 10px;padding:0;font-weight:700;-webkit-font-smoothing:antialiased;cursor:text;position:relative}h1:hover a.anchor,h2:hover a.anchor,h3:hover a.anchor,h4:hover a.anchor,h5:hover a.anchor,h6:hover a.anchor{text-decoration:none}h1 code,h1 tt{font-size:inherit}h2 code,h2 tt{font-size:inherit}h3 code,h3 tt{font-size:inherit}h4 code,h4 tt{font-size:inherit}h5 code,h5 tt{font-size:inherit}h6 code,h6 tt{font-size:inherit}h1{font-size:28px;color:#000}h2{font-size:24px;border-bottom:1px solid #ccc;color:#000}h3{font-size:18px}h4{font-size:16px}h5{font-size:14px}h6{color:#777;font-size:14px}blockquote,dl,li,ol,p,pre,table,ul{margin:15px 0}hr{border:0 none;color:#ccc;height:4px;padding:0}body>h2:first-child{margin-top:0;padding-top:0}body>h1:first-child{margin-top:0;padding-top:0}body>h1:first-child+h2{margin-top:0;padding-top:0}body>h3:first-child,body>h4:first-child,body>h5:first-child,body>h6:first-child{margin-top:0;padding-top:0}a:first-child h1,a:first-child h2,a:first-child h3,a:first-child h4,a:first-child h5,a:first-child h6{margin-top:0;padding-top:0}h1 p,h2 p,h3 p,h4 p,h5 p,h6 p{margin-top:0}li p.first{display:inline-block}li{margin:0}ol,ul{padding-left:30px}ol :first-child,ul :first-child{margin-top:0}dl{padding:0}dl dt{font-size:14px;font-weight:700;font-style:italic;padding:0;margin:15px 0 5px}dl dt:first-child{padding:0}dl dt>:first-child{margin-top:0}dl dt>:last-child{margin-bottom:0}dl dd{margin:0 0 15px;padding:0 15px}dl dd>:first-child{margin-top:0}dl dd>:last-child{margin-bottom:0}blockquote{border-left:4px solid #ddd;padding:0 15px;color:#777}blockquote>:first-child{margin-top:0}blockquote>:last-child{margin-bottom:0}table{padding:0;border-collapse:collapse}table tr{border-top:1px solid #ccc;background-color:#fff;margin:0;padding:0}table tr:nth-child(2n){background-color:#f8f8f8}table tr th{font-weight:700;border:1px solid #ccc;margin:0;padding:6px 13px}table tr td{border:1px solid #ccc;margin:0;padding:6px 13px}table tr td :first-child,table tr th :first-child{margin-top:0}table tr td :last-child,table tr th :last-child{margin-bottom:0}img{max-width:100%}span.frame{display:block;overflow:hidden}span.frame>span{border:1px solid #ddd;display:block;float:left;overflow:hidden;margin:13px 0 0;padding:7px;width:auto}span.frame span img{display:block;float:left}span.frame span span{clear:both;color:#333;display:block;padding:5px 0 0}span.align-center{display:block;overflow:hidden;clear:both}span.align-center>span{display:block;overflow:hidden;margin:13px auto 0;text-align:center}span.align-center span img{margin:0 auto;text-align:center}span.align-right{display:block;overflow:hidden;clear:both}span.align-right>span{display:block;overflow:hidden;margin:13px 0 0;text-align:right}span.align-right span img{margin:0;text-align:right}span.float-left{display:block;margin-right:13px;overflow:hidden;float:left}span.float-left span{margin:13px 0 0}span.float-right{display:block;margin-left:13px;overflow:hidden;float:right}span.float-right>span{display:block;overflow:hidden;margin:13px auto 0;text-align:right}code,tt{margin:0 2px;padding:0 5px;white-space:nowrap;border:1px solid #eaeaea;background-color:#f8f8f8;border-radius:3px}pre code{margin:0;padding:0;white-space:pre;border:none;background:0 0}.highlight pre{background-color:#f8f8f8;border:1px solid #ccc;font-size:13px;line-height:19px;overflow:auto;padding:6px 10px;border-radius:3px}pre{background-color:#f8f8f8;border:1px solid #ccc;font-size:13px;line-height:19px;overflow:auto;padding:6px 10px;border-radius:3px}pre code,pre tt{background-color:transparent;border:none}sup{font-size:.83em;vertical-align:super;line-height:0}*{-webkit-print-color-adjust:exact}@media screen and (min-width:914px){body{width:854px;margin:0 auto}}@media print{pre,table{page-break-inside:avoid}pre{word-wrap:break-word}} diff --git a/eulaurarien.sh b/eulaurarien.sh index 9adbac5..cb8aba3 100755 --- a/eulaurarien.sh +++ b/eulaurarien.sh @@ -11,4 +11,5 @@ ./import_rapid7.sh ./prune.sh ./export_lists.sh +./generate_index.py diff --git a/export_lists.sh b/export_lists.sh index cdfc759..bdd4365 100755 --- a/export_lists.sh +++ b/export_lists.sh @@ -67,14 +67,14 @@ do echo "# Variant: ${partyness}-party ${trackerness}" echo "#" echo "# About first-party trackers: " - echo "# https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/dist/README.md#whats-a-first-party-tracker" + echo "# https://hostfiles.frogeye.fr/#whats-a-first-party-tracker" echo "#" echo "# In case of false positives/negatives, or any other question," echo "# contact me the way you like: https://geoffrey.frogeye.fr" echo "#" echo "# Source code: https://git.frogeye.fr/geoffrey/eulaurarien" echo "# License: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/LICENSE" - echo "# Acknowledgements: https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/dist/README.md#acknowledgements" + echo "# Acknowledgements: https://hostfiles.frogeye.fr/#acknowledgements" echo "#" echo "# Latest versions and variants:" echo "# - First-party trackers : $(link first trackers)" @@ -83,7 +83,7 @@ do echo "# - … excluding redirected: $(link multi only-trackers)" echo '# (you can remove `-hosts` to get the raw list)' echo '# Information about the variants:' - echo '# https://git.frogeye.fr/geoffrey/eulaurarien/src/branch/master/dist/README.md#list-variants' + echo '# https://hostfiles.frogeye.fr/#list-variants' echo "#" echo "# Generation date: $gen_date" echo "# Generation software: eulaurarien $gen_software" diff --git a/generate_index.py b/generate_index.py new file mode 100755 index 0000000..9a5a03e --- /dev/null +++ b/generate_index.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import markdown2 + +extras = [ + "header-ids" +] + +with open('dist/README.md', 'r') as fdesc: + body = markdown2.markdown(fdesc.read(), extras=extras) + +output = f""" + + +Geoffrey Frogeye's block list of first-party trackers + + + + + +{body} + + +""" + +with open('dist/index.html', 'w') as fdesc: + fdesc.write(output)