diff --git a/config/scripts/rssVideos b/config/scripts/rssVideos index 109b85c..4796d95 100755 --- a/config/scripts/rssVideos +++ b/config/scripts/rssVideos @@ -14,6 +14,7 @@ import logging import os import pickle import random +import requests import re import subprocess import sys @@ -22,7 +23,6 @@ import typing import urllib.parse import urllib.request import urllib.error -from xml.dom import minidom import coloredlogs import configargparse @@ -33,6 +33,7 @@ log = logging.getLogger(__name__) # TODO Lockfile, or a way to parallel watch and download # TODO Save ytdl infos and view info separately + def configure_logging(args: configargparse.Namespace) -> None: # Configure logging if args.verbosity: @@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor): self.rvelement.update_post_download(info) return [], info + def parse_duration(string: str) -> int: DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1} @@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]: return lambda d: comparator(d, duration) + def format_duration(duration: int) -> str: return time.strftime("%H:%M:%S", time.gmtime(duration)) class RVElement: parent: "RVDatabase" - item: minidom.Element + item: dict downloaded_filepath: typing.Optional[str] watched: bool - def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None: + def __init__(self, parent: "RVDatabase", item: dict) -> None: self.parent = parent self.item = item self.downloaded_filepath = None self.watched = False - def get_tag_data(self, tag_name: str) -> str: - nodes = self.item.getElementsByTagName(tag_name) - if len(nodes) != 1: - raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.") - children = nodes[0].childNodes - if len(children) != 1: - raise KeyError( - f"Exepected 1 children for tag `{tag_name}`, got {len(children)}." - ) - return children[0].data - @property def title(self) -> str: - return self.get_tag_data("title") + return self.item["title"] @property def link(self) -> str: - return self.get_tag_data("link") + return self.item["canonical"][0]["href"] @property - def creator(self) -> typing.Optional[str]: - try: - return self.get_tag_data("dc:creator") - except KeyError: - return None - - @property - def description(self) -> str: - # TODO Testing - return self.get_tag_data("description") - - @property - def date(self) -> str: - # TODO datetime format - return self.get_tag_data("pubDate") + def creator(self) -> str: + return self.item["origin"]["title"] @property def guid(self) -> int: - return int(self.get_tag_data("guid")) + return int(self.item["timestampUsec"]) @property def is_researched(self) -> bool: @@ -283,9 +262,7 @@ class RVElement: log.debug(f"Not a video: {self}") return False if args.duration and not compare_duration(args.duration)(self.duration): - log.debug( - f"Duration {self.duration} not matching {args.duration}: {self}" - ) + log.debug(f"Duration {self.duration} not matching {args.duration}: {self}") return False return True @@ -340,6 +317,10 @@ class RVDatabase: pass return None + def salvage_cache_pre(self, cache: "RVDatabase") -> None: + if "auth_headers" in cache.__dict__: + self.auth_headers = cache.auth_headers + def salvage_cache(self, cache: "RVDatabase") -> None: log.debug(f"Salvaging cache") cache_els = dict() @@ -361,22 +342,55 @@ class RVDatabase: def import_cache(self, cache: "RVDatabase") -> None: log.debug(f"Importing cache") - self.feed_xml = cache.feed_xml - self.read_feed() + self.build_list([element.item for element in cache.elements]) @functools.cached_property - def feed_xml(self) -> minidom.Document: - log.info("Fetching RSS feed") - with urllib.request.urlopen(self.args.feed) as request: - return minidom.parse(request) + def auth_headers(self) -> dict[str, str]: + r = requests.get( + f"{self.args.url}/accounts/ClientLogin", + params={"Email": self.args.email, "Passwd": self.args.passwd}, + ) + r.raise_for_status() + for line in r.text.split("\n"): + if line.lower().startswith("auth="): + val = "=".join(line.split("=")[1:]) + return {"Authorization": f"GoogleLogin auth={val}"} + raise RuntimeError("Couldn't find auth= key") - def read_feed(self) -> None: + def fetch_feed_elements(self) -> typing.Generator[dict, None, None]: + log.info("Fetching RSS feed") + continuation: typing.Optional[str] = None + with requests.Session() as s: + + def next_page() -> typing.Generator[dict, None, None]: + nonlocal continuation + r = s.get( + f"{self.args.url}/reader/api/0/stream/contents", + params={ + "xt": "user/-/state/com.google/read", + "c": continuation, + }, + headers=self.auth_headers, + ) + r.raise_for_status() + json = r.json() + yield from json["items"] + continuation = json.get("continuation") + + yield from next_page() + while continuation: + yield from next_page() + + def build_list(self, items: typing.Iterable[dict]) -> None: self.elements = [] - for item in self.feed_xml.getElementsByTagName("item"): + for item in items: element = RVElement(self, item) self.elements.insert(0, element) log.debug(f"Known: {element}") + def read_feed(self) -> None: + self.build_list(self.fetch_feed_elements()) + def clean(self) -> None: log.debug("Cleaning") filenames = set() @@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace: ) parser = configargparse.ArgParser( - description="Download videos linked in " - + "a RSS feed (e.g. an unread feed from " - + "an RSS aggregator", + description="Download videos in unread articles from a feed aggregator", default_config_files=[defaultConfigPath], ) @@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace: # Input/Output parser.add( - "--feed", - help="URL of the RSS feed (must be public for now)", - env_var="RSS_VIDEOS_FEED", + "--url", + help="URL of the Google Reader API of the aggregator", + env_var="RSS_VIDEOS_URL", + required=True, + ) + parser.add( + "--email", + help="E-mail / user to connect to the aggregator", + env_var="RSS_VIDEOS_EMAIL", + required=True, + ) + parser.add( + "--passwd", + help="Password to connect to the aggregator", + env_var="RSS_VIDEOS_PASSWD", required=True, ) parser.add( @@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace: help="Use videos that fit under the total given", ) # TODO Envrionment variables - parser.add( - "--max-duration", - help="(Deprecated, use --duration instead)", - env_var="RSS_VIDEOS_MAX_DURATION", - type=int, - default=0, - ) # TODO Allow to ask # How to download @@ -607,11 +624,13 @@ def main() -> None: database = RVDatabase(args) cache = RVDatabase.load() feed_fetched = False + if cache: + database.salvage_cache_pre(cache) if args.refresh: try: database.read_feed() feed_fetched = True - except urllib.error.URLError as err: + except requests.ConnectionError as err: if args.action == "download": raise RuntimeError("Couldn't fetch feed, refusing to download") # This is a quirky failsafe in case of no internet connection,