Merge remote-tracking branch 'origin/master'

prenix
Geoffrey Frogeye 2021-12-28 19:03:58 +01:00
commit 95f568ebb9
1 changed files with 76 additions and 57 deletions

View File

@ -14,6 +14,7 @@ import logging
import os
import pickle
import random
import requests
import re
import subprocess
import sys
@ -22,7 +23,6 @@ import typing
import urllib.parse
import urllib.request
import urllib.error
from xml.dom import minidom
import coloredlogs
import configargparse
@ -33,6 +33,7 @@ log = logging.getLogger(__name__)
# TODO Lockfile, or a way to parallel watch and download
# TODO Save ytdl infos and view info separately
def configure_logging(args: configargparse.Namespace) -> None:
# Configure logging
if args.verbosity:
@ -62,6 +63,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
self.rvelement.update_post_download(info)
return [], info
def parse_duration(string: str) -> int:
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
@ -102,61 +104,38 @@ def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
return lambda d: comparator(d, duration)
def format_duration(duration: int) -> str:
return time.strftime("%H:%M:%S", time.gmtime(duration))
class RVElement:
parent: "RVDatabase"
item: minidom.Element
item: dict
downloaded_filepath: typing.Optional[str]
watched: bool
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
def __init__(self, parent: "RVDatabase", item: dict) -> None:
self.parent = parent
self.item = item
self.downloaded_filepath = None
self.watched = False
def get_tag_data(self, tag_name: str) -> str:
nodes = self.item.getElementsByTagName(tag_name)
if len(nodes) != 1:
raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
children = nodes[0].childNodes
if len(children) != 1:
raise KeyError(
f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
)
return children[0].data
@property
def title(self) -> str:
return self.get_tag_data("title")
return self.item["title"]
@property
def link(self) -> str:
return self.get_tag_data("link")
return self.item["canonical"][0]["href"]
@property
def creator(self) -> typing.Optional[str]:
try:
return self.get_tag_data("dc:creator")
except KeyError:
return None
@property
def description(self) -> str:
# TODO Testing
return self.get_tag_data("description")
@property
def date(self) -> str:
# TODO datetime format
return self.get_tag_data("pubDate")
def creator(self) -> str:
return self.item["origin"]["title"]
@property
def guid(self) -> int:
return int(self.get_tag_data("guid"))
return int(self.item["timestampUsec"])
@property
def is_researched(self) -> bool:
@ -283,9 +262,7 @@ class RVElement:
log.debug(f"Not a video: {self}")
return False
if args.duration and not compare_duration(args.duration)(self.duration):
log.debug(
f"Duration {self.duration} not matching {args.duration}: {self}"
)
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
return False
return True
@ -340,6 +317,10 @@ class RVDatabase:
pass
return None
def salvage_cache_pre(self, cache: "RVDatabase") -> None:
if "auth_headers" in cache.__dict__:
self.auth_headers = cache.auth_headers
def salvage_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Salvaging cache")
cache_els = dict()
@ -361,22 +342,55 @@ class RVDatabase:
def import_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Importing cache")
self.feed_xml = cache.feed_xml
self.read_feed()
self.build_list([element.item for element in cache.elements])
@functools.cached_property
def feed_xml(self) -> minidom.Document:
log.info("Fetching RSS feed")
with urllib.request.urlopen(self.args.feed) as request:
return minidom.parse(request)
def auth_headers(self) -> dict[str, str]:
r = requests.get(
f"{self.args.url}/accounts/ClientLogin",
params={"Email": self.args.email, "Passwd": self.args.passwd},
)
r.raise_for_status()
for line in r.text.split("\n"):
if line.lower().startswith("auth="):
val = "=".join(line.split("=")[1:])
return {"Authorization": f"GoogleLogin auth={val}"}
raise RuntimeError("Couldn't find auth= key")
def read_feed(self) -> None:
def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
log.info("Fetching RSS feed")
continuation: typing.Optional[str] = None
with requests.Session() as s:
def next_page() -> typing.Generator[dict, None, None]:
nonlocal continuation
r = s.get(
f"{self.args.url}/reader/api/0/stream/contents",
params={
"xt": "user/-/state/com.google/read",
"c": continuation,
},
headers=self.auth_headers,
)
r.raise_for_status()
json = r.json()
yield from json["items"]
continuation = json.get("continuation")
yield from next_page()
while continuation:
yield from next_page()
def build_list(self, items: typing.Iterable[dict]) -> None:
self.elements = []
for item in self.feed_xml.getElementsByTagName("item"):
for item in items:
element = RVElement(self, item)
self.elements.insert(0, element)
log.debug(f"Known: {element}")
def read_feed(self) -> None:
self.build_list(self.fetch_feed_elements())
def clean(self) -> None:
log.debug("Cleaning")
filenames = set()
@ -476,9 +490,7 @@ def get_args() -> configargparse.Namespace:
)
parser = configargparse.ArgParser(
description="Download videos linked in "
+ "a RSS feed (e.g. an unread feed from "
+ "an RSS aggregator",
description="Download videos in unread articles from a feed aggregator",
default_config_files=[defaultConfigPath],
)
@ -504,9 +516,21 @@ def get_args() -> configargparse.Namespace:
# Input/Output
parser.add(
"--feed",
help="URL of the RSS feed (must be public for now)",
env_var="RSS_VIDEOS_FEED",
"--url",
help="URL of the Google Reader API of the aggregator",
env_var="RSS_VIDEOS_URL",
required=True,
)
parser.add(
"--email",
help="E-mail / user to connect to the aggregator",
env_var="RSS_VIDEOS_EMAIL",
required=True,
)
parser.add(
"--passwd",
help="Password to connect to the aggregator",
env_var="RSS_VIDEOS_PASSWD",
required=True,
)
parser.add(
@ -550,13 +574,6 @@ def get_args() -> configargparse.Namespace:
help="Use videos that fit under the total given",
)
# TODO Envrionment variables
parser.add(
"--max-duration",
help="(Deprecated, use --duration instead)",
env_var="RSS_VIDEOS_MAX_DURATION",
type=int,
default=0,
)
# TODO Allow to ask
# How to download
@ -607,11 +624,13 @@ def main() -> None:
database = RVDatabase(args)
cache = RVDatabase.load()
feed_fetched = False
if cache:
database.salvage_cache_pre(cache)
if args.refresh:
try:
database.read_feed()
feed_fetched = True
except urllib.error.URLError as err:
except requests.ConnectionError as err:
if args.action == "download":
raise RuntimeError("Couldn't fetch feed, refusing to download")
# This is a quirky failsafe in case of no internet connection,