Merge remote-tracking branch 'origin/master'

2021-12-28 19:03:58 +01:00 · 2021-12-28 19:03:58 +01:00 · 95f568ebb9
parent 67571d645a 404c7f3510
commit 95f568ebb9
1 changed files with 76 additions and 57 deletions
--- a/config/scripts/rssVideos
+++ b/config/scripts/rssVideos
@ -14,6 +14,7 @@ import logging
 import os
 import pickle
 import random
+import requests
 import re
 import subprocess
 import sys
@ -22,7 +23,6 @@ import typing
 import urllib.parse
 import urllib.request
 import urllib.error
-from xml.dom import minidom

 import coloredlogs
 import configargparse
@ -33,6 +33,7 @@ log = logging.getLogger(__name__)
 # TODO Lockfile, or a way to parallel watch and download
 # TODO Save ytdl infos and view info separately

+
 def configure_logging(args: configargparse.Namespace) -> None:
    # Configure logging
    if args.verbosity:
@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
        self.rvelement.update_post_download(info)
        return [], info

+
 def parse_duration(string: str) -> int:
    DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}

@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]:

    return lambda d: comparator(d, duration)

+
 def format_duration(duration: int) -> str:
    return time.strftime("%H:%M:%S", time.gmtime(duration))


 class RVElement:
    parent: "RVDatabase"
-    item: minidom.Element
+    item: dict
    downloaded_filepath: typing.Optional[str]
    watched: bool

-    def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
+    def __init__(self, parent: "RVDatabase", item: dict) -> None:
        self.parent = parent
        self.item = item
        self.downloaded_filepath = None
        self.watched = False

-    def get_tag_data(self, tag_name: str) -> str:
-        nodes = self.item.getElementsByTagName(tag_name)
-        if len(nodes) != 1:
-            raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
-        children = nodes[0].childNodes
-        if len(children) != 1:
-            raise KeyError(
-                f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
-            )
-        return children[0].data
-
    @property
    def title(self) -> str:
-        return self.get_tag_data("title")
+        return self.item["title"]

    @property
    def link(self) -> str:
-        return self.get_tag_data("link")
+        return self.item["canonical"][0]["href"]

    @property
-    def creator(self) -> typing.Optional[str]:
-        try:
-            return self.get_tag_data("dc:creator")
-        except KeyError:
-            return None
-
-    @property
-    def description(self) -> str:
-        # TODO Testing
-        return self.get_tag_data("description")
-
-    @property
-    def date(self) -> str:
-        # TODO datetime format
-        return self.get_tag_data("pubDate")
+    def creator(self) -> str:
+        return self.item["origin"]["title"]

    @property
    def guid(self) -> int:
-        return int(self.get_tag_data("guid"))
+        return int(self.item["timestampUsec"])

    @property
    def is_researched(self) -> bool:
@ -283,9 +262,7 @@ class RVElement:
            log.debug(f"Not a video: {self}")
            return False
        if args.duration and not compare_duration(args.duration)(self.duration):
-            log.debug(
-                f"Duration {self.duration} not matching {args.duration}: {self}"
-            )
+            log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
            return False

        return True
@ -340,6 +317,10 @@ class RVDatabase:
            pass
        return None

+    def salvage_cache_pre(self, cache: "RVDatabase") -> None:
+        if "auth_headers" in cache.__dict__:
+            self.auth_headers = cache.auth_headers
+
    def salvage_cache(self, cache: "RVDatabase") -> None:
        log.debug(f"Salvaging cache")
        cache_els = dict()
@ -361,22 +342,55 @@ class RVDatabase:

    def import_cache(self, cache: "RVDatabase") -> None:
        log.debug(f"Importing cache")
-        self.feed_xml = cache.feed_xml
-        self.read_feed()
+        self.build_list([element.item for element in cache.elements])

    @functools.cached_property
-    def feed_xml(self) -> minidom.Document:
-        log.info("Fetching RSS feed")
-        with urllib.request.urlopen(self.args.feed) as request:
-            return minidom.parse(request)
+    def auth_headers(self) -> dict[str, str]:
+        r = requests.get(
+            f"{self.args.url}/accounts/ClientLogin",
+            params={"Email": self.args.email, "Passwd": self.args.passwd},
+        )
+        r.raise_for_status()
+        for line in r.text.split("\n"):
+            if line.lower().startswith("auth="):
+                val = "=".join(line.split("=")[1:])
+                return {"Authorization": f"GoogleLogin auth={val}"}
+        raise RuntimeError("Couldn't find auth= key")

-    def read_feed(self) -> None:
+    def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
+        log.info("Fetching RSS feed")
+        continuation: typing.Optional[str] = None
+        with requests.Session() as s:
+
+            def next_page() -> typing.Generator[dict, None, None]:
+                nonlocal continuation
+                r = s.get(
+                    f"{self.args.url}/reader/api/0/stream/contents",
+                    params={
+                        "xt": "user/-/state/com.google/read",
+                        "c": continuation,
+                    },
+                    headers=self.auth_headers,
+                )
+                r.raise_for_status()
+                json = r.json()
+                yield from json["items"]
+                continuation = json.get("continuation")
+
+            yield from next_page()
+            while continuation:
+                yield from next_page()
+
+    def build_list(self, items: typing.Iterable[dict]) -> None:
        self.elements = []
-        for item in self.feed_xml.getElementsByTagName("item"):
+        for item in items:
            element = RVElement(self, item)
            self.elements.insert(0, element)
            log.debug(f"Known: {element}")

+    def read_feed(self) -> None:
+        self.build_list(self.fetch_feed_elements())
+
    def clean(self) -> None:
        log.debug("Cleaning")
        filenames = set()
@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace:
    )

    parser = configargparse.ArgParser(
-        description="Download videos linked in "
-        + "a RSS feed (e.g. an unread feed from "
-        + "an RSS aggregator",
+        description="Download videos in unread articles from a feed aggregator",
        default_config_files=[defaultConfigPath],
    )

@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace:

    # Input/Output
    parser.add(
-        "--feed",
-        help="URL of the RSS feed (must be public for now)",
-        env_var="RSS_VIDEOS_FEED",
+        "--url",
+        help="URL of the Google Reader API of the aggregator",
+        env_var="RSS_VIDEOS_URL",
+        required=True,
+    )
+    parser.add(
+        "--email",
+        help="E-mail / user to connect to the aggregator",
+        env_var="RSS_VIDEOS_EMAIL",
+        required=True,
+    )
+    parser.add(
+        "--passwd",
+        help="Password to connect to the aggregator",
+        env_var="RSS_VIDEOS_PASSWD",
        required=True,
    )
    parser.add(
@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace:
        help="Use videos that fit under the total given",
    )
    # TODO Envrionment variables
-    parser.add(
-        "--max-duration",
-        help="(Deprecated, use --duration instead)",
-        env_var="RSS_VIDEOS_MAX_DURATION",
-        type=int,
-        default=0,
-    )
    # TODO Allow to ask

    # How to download
@ -607,11 +624,13 @@ def main() -> None:
    database = RVDatabase(args)
    cache = RVDatabase.load()
    feed_fetched = False
+    if cache:
+        database.salvage_cache_pre(cache)
    if args.refresh:
        try:
            database.read_feed()
            feed_fetched = True
-        except urllib.error.URLError as err:
+        except requests.ConnectionError as err:
            if args.action == "download":
                raise RuntimeError("Couldn't fetch feed, refusing to download")
                # This is a quirky failsafe in case of no internet connection,