rssVideos: Use GReader API
This commit is contained in:
parent
7292e8ea88
commit
406263b560
|
@ -14,6 +14,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import random
|
import random
|
||||||
|
import requests
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
@ -22,7 +23,6 @@ import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.error
|
import urllib.error
|
||||||
from xml.dom import minidom
|
|
||||||
|
|
||||||
import coloredlogs
|
import coloredlogs
|
||||||
import configargparse
|
import configargparse
|
||||||
|
@ -33,6 +33,7 @@ log = logging.getLogger(__name__)
|
||||||
# TODO Lockfile, or a way to parallel watch and download
|
# TODO Lockfile, or a way to parallel watch and download
|
||||||
# TODO Save ytdl infos and view info separately
|
# TODO Save ytdl infos and view info separately
|
||||||
|
|
||||||
|
|
||||||
def configure_logging(args: configargparse.Namespace) -> None:
|
def configure_logging(args: configargparse.Namespace) -> None:
|
||||||
# Configure logging
|
# Configure logging
|
||||||
if args.verbosity:
|
if args.verbosity:
|
||||||
|
@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
|
||||||
self.rvelement.update_post_download(info)
|
self.rvelement.update_post_download(info)
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
|
|
||||||
def parse_duration(string: str) -> int:
|
def parse_duration(string: str) -> int:
|
||||||
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
|
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
|
||||||
|
|
||||||
|
@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
|
||||||
|
|
||||||
return lambda d: comparator(d, duration)
|
return lambda d: comparator(d, duration)
|
||||||
|
|
||||||
|
|
||||||
def format_duration(duration: int) -> str:
|
def format_duration(duration: int) -> str:
|
||||||
return time.strftime("%H:%M:%S", time.gmtime(duration))
|
return time.strftime("%H:%M:%S", time.gmtime(duration))
|
||||||
|
|
||||||
|
|
||||||
class RVElement:
|
class RVElement:
|
||||||
parent: "RVDatabase"
|
parent: "RVDatabase"
|
||||||
item: minidom.Element
|
item: dict
|
||||||
downloaded_filepath: typing.Optional[str]
|
downloaded_filepath: typing.Optional[str]
|
||||||
watched: bool
|
watched: bool
|
||||||
|
|
||||||
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
def __init__(self, parent: "RVDatabase", item: dict) -> None:
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.item = item
|
self.item = item
|
||||||
self.downloaded_filepath = None
|
self.downloaded_filepath = None
|
||||||
self.watched = False
|
self.watched = False
|
||||||
|
|
||||||
def get_tag_data(self, tag_name: str) -> str:
|
|
||||||
nodes = self.item.getElementsByTagName(tag_name)
|
|
||||||
if len(nodes) != 1:
|
|
||||||
raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
|
|
||||||
children = nodes[0].childNodes
|
|
||||||
if len(children) != 1:
|
|
||||||
raise KeyError(
|
|
||||||
f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
|
|
||||||
)
|
|
||||||
return children[0].data
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def title(self) -> str:
|
def title(self) -> str:
|
||||||
return self.get_tag_data("title")
|
return self.item["title"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def link(self) -> str:
|
def link(self) -> str:
|
||||||
return self.get_tag_data("link")
|
return self.item["canonical"][0]["href"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def creator(self) -> typing.Optional[str]:
|
def creator(self) -> str:
|
||||||
try:
|
return self.item["origin"]["title"]
|
||||||
return self.get_tag_data("dc:creator")
|
|
||||||
except KeyError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def description(self) -> str:
|
|
||||||
# TODO Testing
|
|
||||||
return self.get_tag_data("description")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def date(self) -> str:
|
|
||||||
# TODO datetime format
|
|
||||||
return self.get_tag_data("pubDate")
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def guid(self) -> int:
|
def guid(self) -> int:
|
||||||
return int(self.get_tag_data("guid"))
|
return int(self.item["timestampUsec"])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_researched(self) -> bool:
|
def is_researched(self) -> bool:
|
||||||
|
@ -283,9 +262,7 @@ class RVElement:
|
||||||
log.debug(f"Not a video: {self}")
|
log.debug(f"Not a video: {self}")
|
||||||
return False
|
return False
|
||||||
if args.duration and not compare_duration(args.duration)(self.duration):
|
if args.duration and not compare_duration(args.duration)(self.duration):
|
||||||
log.debug(
|
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
|
||||||
f"Duration {self.duration} not matching {args.duration}: {self}"
|
|
||||||
)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -340,6 +317,10 @@ class RVDatabase:
|
||||||
pass
|
pass
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def salvage_cache_pre(self, cache: "RVDatabase") -> None:
|
||||||
|
if "auth_headers" in cache.__dict__:
|
||||||
|
self.auth_headers = cache.auth_headers
|
||||||
|
|
||||||
def salvage_cache(self, cache: "RVDatabase") -> None:
|
def salvage_cache(self, cache: "RVDatabase") -> None:
|
||||||
log.debug(f"Salvaging cache")
|
log.debug(f"Salvaging cache")
|
||||||
cache_els = dict()
|
cache_els = dict()
|
||||||
|
@ -361,22 +342,55 @@ class RVDatabase:
|
||||||
|
|
||||||
def import_cache(self, cache: "RVDatabase") -> None:
|
def import_cache(self, cache: "RVDatabase") -> None:
|
||||||
log.debug(f"Importing cache")
|
log.debug(f"Importing cache")
|
||||||
self.feed_xml = cache.feed_xml
|
self.build_list([element.item for element in cache.elements])
|
||||||
self.read_feed()
|
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def feed_xml(self) -> minidom.Document:
|
def auth_headers(self) -> dict[str, str]:
|
||||||
log.info("Fetching RSS feed")
|
r = requests.get(
|
||||||
with urllib.request.urlopen(self.args.feed) as request:
|
f"{self.args.url}/accounts/ClientLogin",
|
||||||
return minidom.parse(request)
|
params={"Email": self.args.email, "Passwd": self.args.passwd},
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
for line in r.text.split("\n"):
|
||||||
|
if line.lower().startswith("auth="):
|
||||||
|
val = "=".join(line.split("=")[1:])
|
||||||
|
return {"Authorization": f"GoogleLogin auth={val}"}
|
||||||
|
raise RuntimeError("Couldn't find auth= key")
|
||||||
|
|
||||||
def read_feed(self) -> None:
|
def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
|
||||||
|
log.info("Fetching RSS feed")
|
||||||
|
continuation: typing.Optional[str] = None
|
||||||
|
with requests.Session() as s:
|
||||||
|
|
||||||
|
def next_page() -> typing.Generator[dict, None, None]:
|
||||||
|
nonlocal continuation
|
||||||
|
r = s.get(
|
||||||
|
f"{self.args.url}/reader/api/0/stream/contents",
|
||||||
|
params={
|
||||||
|
"xt": "user/-/state/com.google/read",
|
||||||
|
"c": continuation,
|
||||||
|
},
|
||||||
|
headers=self.auth_headers,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
json = r.json()
|
||||||
|
yield from json["items"]
|
||||||
|
continuation = json.get("continuation")
|
||||||
|
|
||||||
|
yield from next_page()
|
||||||
|
while continuation:
|
||||||
|
yield from next_page()
|
||||||
|
|
||||||
|
def build_list(self, items: typing.Iterable[dict]) -> None:
|
||||||
self.elements = []
|
self.elements = []
|
||||||
for item in self.feed_xml.getElementsByTagName("item"):
|
for item in items:
|
||||||
element = RVElement(self, item)
|
element = RVElement(self, item)
|
||||||
self.elements.insert(0, element)
|
self.elements.insert(0, element)
|
||||||
log.debug(f"Known: {element}")
|
log.debug(f"Known: {element}")
|
||||||
|
|
||||||
|
def read_feed(self) -> None:
|
||||||
|
self.build_list(self.fetch_feed_elements())
|
||||||
|
|
||||||
def clean(self) -> None:
|
def clean(self) -> None:
|
||||||
log.debug("Cleaning")
|
log.debug("Cleaning")
|
||||||
filenames = set()
|
filenames = set()
|
||||||
|
@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace:
|
||||||
)
|
)
|
||||||
|
|
||||||
parser = configargparse.ArgParser(
|
parser = configargparse.ArgParser(
|
||||||
description="Download videos linked in "
|
description="Download videos in unread articles from a feed aggregator",
|
||||||
+ "a RSS feed (e.g. an unread feed from "
|
|
||||||
+ "an RSS aggregator",
|
|
||||||
default_config_files=[defaultConfigPath],
|
default_config_files=[defaultConfigPath],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace:
|
||||||
|
|
||||||
# Input/Output
|
# Input/Output
|
||||||
parser.add(
|
parser.add(
|
||||||
"--feed",
|
"--url",
|
||||||
help="URL of the RSS feed (must be public for now)",
|
help="URL of the Google Reader API of the aggregator",
|
||||||
env_var="RSS_VIDEOS_FEED",
|
env_var="RSS_VIDEOS_URL",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
parser.add(
|
||||||
|
"--email",
|
||||||
|
help="E-mail / user to connect to the aggregator",
|
||||||
|
env_var="RSS_VIDEOS_EMAIL",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
parser.add(
|
||||||
|
"--passwd",
|
||||||
|
help="Password to connect to the aggregator",
|
||||||
|
env_var="RSS_VIDEOS_PASSWD",
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
parser.add(
|
parser.add(
|
||||||
|
@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace:
|
||||||
help="Use videos that fit under the total given",
|
help="Use videos that fit under the total given",
|
||||||
)
|
)
|
||||||
# TODO Envrionment variables
|
# TODO Envrionment variables
|
||||||
parser.add(
|
|
||||||
"--max-duration",
|
|
||||||
help="(Deprecated, use --duration instead)",
|
|
||||||
env_var="RSS_VIDEOS_MAX_DURATION",
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
)
|
|
||||||
# TODO Allow to ask
|
# TODO Allow to ask
|
||||||
|
|
||||||
# How to download
|
# How to download
|
||||||
|
@ -607,11 +624,13 @@ def main() -> None:
|
||||||
database = RVDatabase(args)
|
database = RVDatabase(args)
|
||||||
cache = RVDatabase.load()
|
cache = RVDatabase.load()
|
||||||
feed_fetched = False
|
feed_fetched = False
|
||||||
|
if cache:
|
||||||
|
database.salvage_cache_pre(cache)
|
||||||
if args.refresh:
|
if args.refresh:
|
||||||
try:
|
try:
|
||||||
database.read_feed()
|
database.read_feed()
|
||||||
feed_fetched = True
|
feed_fetched = True
|
||||||
except urllib.error.URLError as err:
|
except requests.ConnectionError as err:
|
||||||
if args.action == "download":
|
if args.action == "download":
|
||||||
raise RuntimeError("Couldn't fetch feed, refusing to download")
|
raise RuntimeError("Couldn't fetch feed, refusing to download")
|
||||||
# This is a quirky failsafe in case of no internet connection,
|
# This is a quirky failsafe in case of no internet connection,
|
||||||
|
|
Loading…
Reference in a new issue