rssVideos: Use GReader API

This commit is contained in:
Geoffrey Frogeye 2021-12-28 12:35:08 +01:00
parent 7292e8ea88
commit 406263b560
Signed by: geoffrey
GPG key ID: C72403E7F82E6AD8

View file

@ -14,6 +14,7 @@ import logging
import os import os
import pickle import pickle
import random import random
import requests
import re import re
import subprocess import subprocess
import sys import sys
@ -22,7 +23,6 @@ import typing
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import urllib.error import urllib.error
from xml.dom import minidom
import coloredlogs import coloredlogs
import configargparse import configargparse
@ -33,6 +33,7 @@ log = logging.getLogger(__name__)
# TODO Lockfile, or a way to parallel watch and download # TODO Lockfile, or a way to parallel watch and download
# TODO Save ytdl infos and view info separately # TODO Save ytdl infos and view info separately
def configure_logging(args: configargparse.Namespace) -> None: def configure_logging(args: configargparse.Namespace) -> None:
# Configure logging # Configure logging
if args.verbosity: if args.verbosity:
@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
self.rvelement.update_post_download(info) self.rvelement.update_post_download(info)
return [], info return [], info
def parse_duration(string: str) -> int: def parse_duration(string: str) -> int:
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1} DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
return lambda d: comparator(d, duration) return lambda d: comparator(d, duration)
def format_duration(duration: int) -> str: def format_duration(duration: int) -> str:
return time.strftime("%H:%M:%S", time.gmtime(duration)) return time.strftime("%H:%M:%S", time.gmtime(duration))
class RVElement: class RVElement:
parent: "RVDatabase" parent: "RVDatabase"
item: minidom.Element item: dict
downloaded_filepath: typing.Optional[str] downloaded_filepath: typing.Optional[str]
watched: bool watched: bool
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None: def __init__(self, parent: "RVDatabase", item: dict) -> None:
self.parent = parent self.parent = parent
self.item = item self.item = item
self.downloaded_filepath = None self.downloaded_filepath = None
self.watched = False self.watched = False
def get_tag_data(self, tag_name: str) -> str:
nodes = self.item.getElementsByTagName(tag_name)
if len(nodes) != 1:
raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
children = nodes[0].childNodes
if len(children) != 1:
raise KeyError(
f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
)
return children[0].data
@property @property
def title(self) -> str: def title(self) -> str:
return self.get_tag_data("title") return self.item["title"]
@property @property
def link(self) -> str: def link(self) -> str:
return self.get_tag_data("link") return self.item["canonical"][0]["href"]
@property @property
def creator(self) -> typing.Optional[str]: def creator(self) -> str:
try: return self.item["origin"]["title"]
return self.get_tag_data("dc:creator")
except KeyError:
return None
@property
def description(self) -> str:
# TODO Testing
return self.get_tag_data("description")
@property
def date(self) -> str:
# TODO datetime format
return self.get_tag_data("pubDate")
@property @property
def guid(self) -> int: def guid(self) -> int:
return int(self.get_tag_data("guid")) return int(self.item["timestampUsec"])
@property @property
def is_researched(self) -> bool: def is_researched(self) -> bool:
@ -283,9 +262,7 @@ class RVElement:
log.debug(f"Not a video: {self}") log.debug(f"Not a video: {self}")
return False return False
if args.duration and not compare_duration(args.duration)(self.duration): if args.duration and not compare_duration(args.duration)(self.duration):
log.debug( log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
f"Duration {self.duration} not matching {args.duration}: {self}"
)
return False return False
return True return True
@ -340,6 +317,10 @@ class RVDatabase:
pass pass
return None return None
def salvage_cache_pre(self, cache: "RVDatabase") -> None:
if "auth_headers" in cache.__dict__:
self.auth_headers = cache.auth_headers
def salvage_cache(self, cache: "RVDatabase") -> None: def salvage_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Salvaging cache") log.debug(f"Salvaging cache")
cache_els = dict() cache_els = dict()
@ -361,22 +342,55 @@ class RVDatabase:
def import_cache(self, cache: "RVDatabase") -> None: def import_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Importing cache") log.debug(f"Importing cache")
self.feed_xml = cache.feed_xml self.build_list([element.item for element in cache.elements])
self.read_feed()
@functools.cached_property @functools.cached_property
def feed_xml(self) -> minidom.Document: def auth_headers(self) -> dict[str, str]:
log.info("Fetching RSS feed") r = requests.get(
with urllib.request.urlopen(self.args.feed) as request: f"{self.args.url}/accounts/ClientLogin",
return minidom.parse(request) params={"Email": self.args.email, "Passwd": self.args.passwd},
)
r.raise_for_status()
for line in r.text.split("\n"):
if line.lower().startswith("auth="):
val = "=".join(line.split("=")[1:])
return {"Authorization": f"GoogleLogin auth={val}"}
raise RuntimeError("Couldn't find auth= key")
def read_feed(self) -> None: def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
log.info("Fetching RSS feed")
continuation: typing.Optional[str] = None
with requests.Session() as s:
def next_page() -> typing.Generator[dict, None, None]:
nonlocal continuation
r = s.get(
f"{self.args.url}/reader/api/0/stream/contents",
params={
"xt": "user/-/state/com.google/read",
"c": continuation,
},
headers=self.auth_headers,
)
r.raise_for_status()
json = r.json()
yield from json["items"]
continuation = json.get("continuation")
yield from next_page()
while continuation:
yield from next_page()
def build_list(self, items: typing.Iterable[dict]) -> None:
self.elements = [] self.elements = []
for item in self.feed_xml.getElementsByTagName("item"): for item in items:
element = RVElement(self, item) element = RVElement(self, item)
self.elements.insert(0, element) self.elements.insert(0, element)
log.debug(f"Known: {element}") log.debug(f"Known: {element}")
def read_feed(self) -> None:
self.build_list(self.fetch_feed_elements())
def clean(self) -> None: def clean(self) -> None:
log.debug("Cleaning") log.debug("Cleaning")
filenames = set() filenames = set()
@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace:
) )
parser = configargparse.ArgParser( parser = configargparse.ArgParser(
description="Download videos linked in " description="Download videos in unread articles from a feed aggregator",
+ "a RSS feed (e.g. an unread feed from "
+ "an RSS aggregator",
default_config_files=[defaultConfigPath], default_config_files=[defaultConfigPath],
) )
@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace:
# Input/Output # Input/Output
parser.add( parser.add(
"--feed", "--url",
help="URL of the RSS feed (must be public for now)", help="URL of the Google Reader API of the aggregator",
env_var="RSS_VIDEOS_FEED", env_var="RSS_VIDEOS_URL",
required=True,
)
parser.add(
"--email",
help="E-mail / user to connect to the aggregator",
env_var="RSS_VIDEOS_EMAIL",
required=True,
)
parser.add(
"--passwd",
help="Password to connect to the aggregator",
env_var="RSS_VIDEOS_PASSWD",
required=True, required=True,
) )
parser.add( parser.add(
@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace:
help="Use videos that fit under the total given", help="Use videos that fit under the total given",
) )
# TODO Envrionment variables # TODO Envrionment variables
parser.add(
"--max-duration",
help="(Deprecated, use --duration instead)",
env_var="RSS_VIDEOS_MAX_DURATION",
type=int,
default=0,
)
# TODO Allow to ask # TODO Allow to ask
# How to download # How to download
@ -607,11 +624,13 @@ def main() -> None:
database = RVDatabase(args) database = RVDatabase(args)
cache = RVDatabase.load() cache = RVDatabase.load()
feed_fetched = False feed_fetched = False
if cache:
database.salvage_cache_pre(cache)
if args.refresh: if args.refresh:
try: try:
database.read_feed() database.read_feed()
feed_fetched = True feed_fetched = True
except urllib.error.URLError as err: except requests.ConnectionError as err:
if args.action == "download": if args.action == "download":
raise RuntimeError("Couldn't fetch feed, refusing to download") raise RuntimeError("Couldn't fetch feed, refusing to download")
# This is a quirky failsafe in case of no internet connection, # This is a quirky failsafe in case of no internet connection,