rssVideos: Use GReader API
This commit is contained in:
parent
7292e8ea88
commit
406263b560
|
@ -14,6 +14,7 @@ import logging
|
|||
import os
|
||||
import pickle
|
||||
import random
|
||||
import requests
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -22,7 +23,6 @@ import typing
|
|||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from xml.dom import minidom
|
||||
|
||||
import coloredlogs
|
||||
import configargparse
|
||||
|
@ -33,6 +33,7 @@ log = logging.getLogger(__name__)
|
|||
# TODO Lockfile, or a way to parallel watch and download
|
||||
# TODO Save ytdl infos and view info separately
|
||||
|
||||
|
||||
def configure_logging(args: configargparse.Namespace) -> None:
|
||||
# Configure logging
|
||||
if args.verbosity:
|
||||
|
@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
|
|||
self.rvelement.update_post_download(info)
|
||||
return [], info
|
||||
|
||||
|
||||
def parse_duration(string: str) -> int:
|
||||
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
|
||||
|
||||
|
@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
|
|||
|
||||
return lambda d: comparator(d, duration)
|
||||
|
||||
|
||||
def format_duration(duration: int) -> str:
|
||||
return time.strftime("%H:%M:%S", time.gmtime(duration))
|
||||
|
||||
|
||||
class RVElement:
|
||||
parent: "RVDatabase"
|
||||
item: minidom.Element
|
||||
item: dict
|
||||
downloaded_filepath: typing.Optional[str]
|
||||
watched: bool
|
||||
|
||||
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
||||
def __init__(self, parent: "RVDatabase", item: dict) -> None:
|
||||
self.parent = parent
|
||||
self.item = item
|
||||
self.downloaded_filepath = None
|
||||
self.watched = False
|
||||
|
||||
def get_tag_data(self, tag_name: str) -> str:
|
||||
nodes = self.item.getElementsByTagName(tag_name)
|
||||
if len(nodes) != 1:
|
||||
raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
|
||||
children = nodes[0].childNodes
|
||||
if len(children) != 1:
|
||||
raise KeyError(
|
||||
f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
|
||||
)
|
||||
return children[0].data
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
return self.get_tag_data("title")
|
||||
return self.item["title"]
|
||||
|
||||
@property
|
||||
def link(self) -> str:
|
||||
return self.get_tag_data("link")
|
||||
return self.item["canonical"][0]["href"]
|
||||
|
||||
@property
|
||||
def creator(self) -> typing.Optional[str]:
|
||||
try:
|
||||
return self.get_tag_data("dc:creator")
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
# TODO Testing
|
||||
return self.get_tag_data("description")
|
||||
|
||||
@property
|
||||
def date(self) -> str:
|
||||
# TODO datetime format
|
||||
return self.get_tag_data("pubDate")
|
||||
def creator(self) -> str:
|
||||
return self.item["origin"]["title"]
|
||||
|
||||
@property
|
||||
def guid(self) -> int:
|
||||
return int(self.get_tag_data("guid"))
|
||||
return int(self.item["timestampUsec"])
|
||||
|
||||
@property
|
||||
def is_researched(self) -> bool:
|
||||
|
@ -283,9 +262,7 @@ class RVElement:
|
|||
log.debug(f"Not a video: {self}")
|
||||
return False
|
||||
if args.duration and not compare_duration(args.duration)(self.duration):
|
||||
log.debug(
|
||||
f"Duration {self.duration} not matching {args.duration}: {self}"
|
||||
)
|
||||
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -340,6 +317,10 @@ class RVDatabase:
|
|||
pass
|
||||
return None
|
||||
|
||||
def salvage_cache_pre(self, cache: "RVDatabase") -> None:
|
||||
if "auth_headers" in cache.__dict__:
|
||||
self.auth_headers = cache.auth_headers
|
||||
|
||||
def salvage_cache(self, cache: "RVDatabase") -> None:
|
||||
log.debug(f"Salvaging cache")
|
||||
cache_els = dict()
|
||||
|
@ -361,22 +342,55 @@ class RVDatabase:
|
|||
|
||||
def import_cache(self, cache: "RVDatabase") -> None:
|
||||
log.debug(f"Importing cache")
|
||||
self.feed_xml = cache.feed_xml
|
||||
self.read_feed()
|
||||
self.build_list([element.item for element in cache.elements])
|
||||
|
||||
@functools.cached_property
|
||||
def feed_xml(self) -> minidom.Document:
|
||||
log.info("Fetching RSS feed")
|
||||
with urllib.request.urlopen(self.args.feed) as request:
|
||||
return minidom.parse(request)
|
||||
def auth_headers(self) -> dict[str, str]:
|
||||
r = requests.get(
|
||||
f"{self.args.url}/accounts/ClientLogin",
|
||||
params={"Email": self.args.email, "Passwd": self.args.passwd},
|
||||
)
|
||||
r.raise_for_status()
|
||||
for line in r.text.split("\n"):
|
||||
if line.lower().startswith("auth="):
|
||||
val = "=".join(line.split("=")[1:])
|
||||
return {"Authorization": f"GoogleLogin auth={val}"}
|
||||
raise RuntimeError("Couldn't find auth= key")
|
||||
|
||||
def read_feed(self) -> None:
|
||||
def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
|
||||
log.info("Fetching RSS feed")
|
||||
continuation: typing.Optional[str] = None
|
||||
with requests.Session() as s:
|
||||
|
||||
def next_page() -> typing.Generator[dict, None, None]:
|
||||
nonlocal continuation
|
||||
r = s.get(
|
||||
f"{self.args.url}/reader/api/0/stream/contents",
|
||||
params={
|
||||
"xt": "user/-/state/com.google/read",
|
||||
"c": continuation,
|
||||
},
|
||||
headers=self.auth_headers,
|
||||
)
|
||||
r.raise_for_status()
|
||||
json = r.json()
|
||||
yield from json["items"]
|
||||
continuation = json.get("continuation")
|
||||
|
||||
yield from next_page()
|
||||
while continuation:
|
||||
yield from next_page()
|
||||
|
||||
def build_list(self, items: typing.Iterable[dict]) -> None:
|
||||
self.elements = []
|
||||
for item in self.feed_xml.getElementsByTagName("item"):
|
||||
for item in items:
|
||||
element = RVElement(self, item)
|
||||
self.elements.insert(0, element)
|
||||
log.debug(f"Known: {element}")
|
||||
|
||||
def read_feed(self) -> None:
|
||||
self.build_list(self.fetch_feed_elements())
|
||||
|
||||
def clean(self) -> None:
|
||||
log.debug("Cleaning")
|
||||
filenames = set()
|
||||
|
@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace:
|
|||
)
|
||||
|
||||
parser = configargparse.ArgParser(
|
||||
description="Download videos linked in "
|
||||
+ "a RSS feed (e.g. an unread feed from "
|
||||
+ "an RSS aggregator",
|
||||
description="Download videos in unread articles from a feed aggregator",
|
||||
default_config_files=[defaultConfigPath],
|
||||
)
|
||||
|
||||
|
@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace:
|
|||
|
||||
# Input/Output
|
||||
parser.add(
|
||||
"--feed",
|
||||
help="URL of the RSS feed (must be public for now)",
|
||||
env_var="RSS_VIDEOS_FEED",
|
||||
"--url",
|
||||
help="URL of the Google Reader API of the aggregator",
|
||||
env_var="RSS_VIDEOS_URL",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
"--email",
|
||||
help="E-mail / user to connect to the aggregator",
|
||||
env_var="RSS_VIDEOS_EMAIL",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
"--passwd",
|
||||
help="Password to connect to the aggregator",
|
||||
env_var="RSS_VIDEOS_PASSWD",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
|
@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace:
|
|||
help="Use videos that fit under the total given",
|
||||
)
|
||||
# TODO Envrionment variables
|
||||
parser.add(
|
||||
"--max-duration",
|
||||
help="(Deprecated, use --duration instead)",
|
||||
env_var="RSS_VIDEOS_MAX_DURATION",
|
||||
type=int,
|
||||
default=0,
|
||||
)
|
||||
# TODO Allow to ask
|
||||
|
||||
# How to download
|
||||
|
@ -607,11 +624,13 @@ def main() -> None:
|
|||
database = RVDatabase(args)
|
||||
cache = RVDatabase.load()
|
||||
feed_fetched = False
|
||||
if cache:
|
||||
database.salvage_cache_pre(cache)
|
||||
if args.refresh:
|
||||
try:
|
||||
database.read_feed()
|
||||
feed_fetched = True
|
||||
except urllib.error.URLError as err:
|
||||
except requests.ConnectionError as err:
|
||||
if args.action == "download":
|
||||
raise RuntimeError("Couldn't fetch feed, refusing to download")
|
||||
# This is a quirky failsafe in case of no internet connection,
|
||||
|
|
Loading…
Reference in a new issue