rssVideos: Use GReader API

2021-12-28 12:35:08 +01:00 · 2021-12-28 12:35:08 +01:00 · 406263b560
commit 406263b560
parent 7292e8ea88
1 changed files with 76 additions and 57 deletions
--- a/config/scripts/rssVideos
+++ b/config/scripts/rssVideos
@ -14,6 +14,7 @@ import logging
 import os
 import pickle
 import random
 import requests
 import re
 import subprocess
 import sys
@ -22,7 +23,6 @@ import typing
 import urllib.parse
 import urllib.request
 import urllib.error
 from xml.dom import minidom
 import coloredlogs
 import configargparse
@ -33,6 +33,7 @@ log = logging.getLogger(__name__)
 # TODO Lockfile, or a way to parallel watch and download
 # TODO Save ytdl infos and view info separately
 def configure_logging(args: configargparse.Namespace) -> None:
    # Configure logging
    if args.verbosity:
@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
        self.rvelement.update_post_download(info)
        return [], info
 def parse_duration(string: str) -> int:
    DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
    return lambda d: comparator(d, duration)
 def format_duration(duration: int) -> str:
    return time.strftime("%H:%M:%S", time.gmtime(duration))
 class RVElement:
    parent: "RVDatabase"
-    item: minidom.Element
+    item: dict
    downloaded_filepath: typing.Optional[str]
    watched: bool
-    def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
+    def __init__(self, parent: "RVDatabase", item: dict) -> None:
        self.parent = parent
        self.item = item
        self.downloaded_filepath = None
        self.watched = False
    def get_tag_data(self, tag_name: str) -> str:
        nodes = self.item.getElementsByTagName(tag_name)
        if len(nodes) != 1:
            raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
        children = nodes[0].childNodes
        if len(children) != 1:
            raise KeyError(
                f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
            )
        return children[0].data
    @property
    def title(self) -> str:
-        return self.get_tag_data("title")
+        return self.item["title"]
    @property
    def link(self) -> str:
-        return self.get_tag_data("link")
+        return self.item["canonical"][0]["href"]
    @property
-    def creator(self) -> typing.Optional[str]:
+    def creator(self) -> str:
-        try:
+        return self.item["origin"]["title"]
            return self.get_tag_data("dc:creator")
        except KeyError:
            return None
    @property
    def description(self) -> str:
        # TODO Testing
        return self.get_tag_data("description")
    @property
    def date(self) -> str:
        # TODO datetime format
        return self.get_tag_data("pubDate")
    @property
    def guid(self) -> int:
-        return int(self.get_tag_data("guid"))
+        return int(self.item["timestampUsec"])
    @property
    def is_researched(self) -> bool:
@ -283,9 +262,7 @@ class RVElement:
            log.debug(f"Not a video: {self}")
            return False
        if args.duration and not compare_duration(args.duration)(self.duration):
-            log.debug(
+            log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
                f"Duration {self.duration} not matching {args.duration}: {self}"
            )
            return False
        return True
@ -340,6 +317,10 @@ class RVDatabase:
            pass
        return None
    def salvage_cache_pre(self, cache: "RVDatabase") -> None:
        if "auth_headers" in cache.__dict__:
            self.auth_headers = cache.auth_headers
    def salvage_cache(self, cache: "RVDatabase") -> None:
        log.debug(f"Salvaging cache")
        cache_els = dict()
@ -361,22 +342,55 @@ class RVDatabase:
    def import_cache(self, cache: "RVDatabase") -> None:
        log.debug(f"Importing cache")
-        self.feed_xml = cache.feed_xml
+        self.build_list([element.item for element in cache.elements])
        self.read_feed()
    @functools.cached_property
-    def feed_xml(self) -> minidom.Document:
+    def auth_headers(self) -> dict[str, str]:
-        log.info("Fetching RSS feed")
+        r = requests.get(
-        with urllib.request.urlopen(self.args.feed) as request:
+            f"{self.args.url}/accounts/ClientLogin",
-            return minidom.parse(request)
+            params={"Email": self.args.email, "Passwd": self.args.passwd},
        )
        r.raise_for_status()
        for line in r.text.split("\n"):
            if line.lower().startswith("auth="):
                val = "=".join(line.split("=")[1:])
                return {"Authorization": f"GoogleLogin auth={val}"}
        raise RuntimeError("Couldn't find auth= key")
-    def read_feed(self) -> None:
+    def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
        log.info("Fetching RSS feed")
        continuation: typing.Optional[str] = None
        with requests.Session() as s:
            def next_page() -> typing.Generator[dict, None, None]:
                nonlocal continuation
                r = s.get(
                    f"{self.args.url}/reader/api/0/stream/contents",
                    params={
                        "xt": "user/-/state/com.google/read",
                        "c": continuation,
                    },
                    headers=self.auth_headers,
                )
                r.raise_for_status()
                json = r.json()
                yield from json["items"]
                continuation = json.get("continuation")
            yield from next_page()
            while continuation:
                yield from next_page()
    def build_list(self, items: typing.Iterable[dict]) -> None:
        self.elements = []
-        for item in self.feed_xml.getElementsByTagName("item"):
+        for item in items:
            element = RVElement(self, item)
            self.elements.insert(0, element)
            log.debug(f"Known: {element}")
    def read_feed(self) -> None:
        self.build_list(self.fetch_feed_elements())
    def clean(self) -> None:
        log.debug("Cleaning")
        filenames = set()
@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace:
    )
    parser = configargparse.ArgParser(
-        description="Download videos linked in "
+        description="Download videos in unread articles from a feed aggregator",
        + "a RSS feed (e.g. an unread feed from "
        + "an RSS aggregator",
        default_config_files=[defaultConfigPath],
    )
@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace:
    # Input/Output
    parser.add(
-        "--feed",
+        "--url",
-        help="URL of the RSS feed (must be public for now)",
+        help="URL of the Google Reader API of the aggregator",
-        env_var="RSS_VIDEOS_FEED",
+        env_var="RSS_VIDEOS_URL",
        required=True,
    )
    parser.add(
        "--email",
        help="E-mail / user to connect to the aggregator",
        env_var="RSS_VIDEOS_EMAIL",
        required=True,
    )
    parser.add(
        "--passwd",
        help="Password to connect to the aggregator",
        env_var="RSS_VIDEOS_PASSWD",
        required=True,
    )
    parser.add(
@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace:
        help="Use videos that fit under the total given",
    )
    # TODO Envrionment variables
    parser.add(
        "--max-duration",
        help="(Deprecated, use --duration instead)",
        env_var="RSS_VIDEOS_MAX_DURATION",
        type=int,
        default=0,
    )
    # TODO Allow to ask
    # How to download
@ -607,11 +624,13 @@ def main() -> None:
    database = RVDatabase(args)
    cache = RVDatabase.load()
    feed_fetched = False
    if cache:
        database.salvage_cache_pre(cache)
    if args.refresh:
        try:
            database.read_feed()
            feed_fetched = True
-        except urllib.error.URLError as err:
+        except requests.ConnectionError as err:
            if args.action == "download":
                raise RuntimeError("Couldn't fetch feed, refusing to download")
                # This is a quirky failsafe in case of no internet connection,