Compare commits

...

6 commits

View file

@ -17,6 +17,7 @@ import random
import re import re
import subprocess import subprocess
import sys import sys
import time
import typing import typing
import urllib.parse import urllib.parse
import urllib.request import urllib.request
@ -25,11 +26,12 @@ from xml.dom import minidom
import coloredlogs import coloredlogs
import configargparse import configargparse
import yt_dlp as youtube_dl import yt_dlp
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# TODO Lockfile, or a way to parallel watch and download # TODO Lockfile, or a way to parallel watch and download
# TODO Save ytdl infos and view info separately
def configure_logging(args: configargparse.Namespace) -> None: def configure_logging(args: configargparse.Namespace) -> None:
# Configure logging # Configure logging
@ -44,16 +46,76 @@ def configure_logging(args: configargparse.Namespace) -> None:
) )
class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
"""
yt_dlp.process_ie_result() doesn't return a completely updated info dict,
notably the extension is still the one before it realizes the files cannot
be merged. So we use this PostProcessor to catch the info dict in its final
form and save what we need from it (it's not serializable in this state).
"""
def __init__(self, rvelement: "RVElement") -> None:
self.rvelement = rvelement
super().__init__()
def run(self, info: dict) -> tuple[list, dict]:
self.rvelement.update_post_download(info)
return [], info
def parse_duration(string: str) -> int:
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
mult_index = string[-1].lower()
if mult_index.isdigit():
mult_index = ""
else:
string = string[:-1]
try:
multiplier = DURATION_MULTIPLIERS[mult_index]
except IndexError:
raise ValueError(f"Unknown duration multiplier: {mult_index}")
return int(string) * multiplier
def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
DURATION_COMPARATORS = {
"<": int.__lt__,
"-": int.__lt__,
">": int.__gt__,
"+": int.__gt__,
"=": int.__eq__,
"": int.__le__,
}
comp_index = compstr[0]
if comp_index.isdigit():
comp_index = ""
else:
compstr = compstr[1:]
try:
comparator = DURATION_COMPARATORS[comp_index]
except IndexError:
raise ValueError(f"Unknown duration comparator: {comp_index}")
duration = parse_duration(compstr)
return lambda d: comparator(d, duration)
def format_duration(duration: int) -> str:
return time.strftime("%H:%M:%S", time.gmtime(duration))
class RVElement: class RVElement:
parent: "RVDatabase" parent: "RVDatabase"
item: minidom.Element item: minidom.Element
was_downloaded: bool downloaded_filepath: typing.Optional[str]
watched: bool watched: bool
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None: def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
self.parent = parent self.parent = parent
self.item = item self.item = item
self.was_downloaded = False self.downloaded_filepath = None
self.watched = False self.watched = False
def get_tag_data(self, tag_name: str) -> str: def get_tag_data(self, tag_name: str) -> str:
@ -101,16 +163,25 @@ class RVElement:
return "ytdl_infos" in self.__dict__ return "ytdl_infos" in self.__dict__
def salvage_cache(self, cache: "RVElement") -> None: def salvage_cache(self, cache: "RVElement") -> None:
if cache.is_researched: if not self.parent.args.research and cache.is_researched:
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"] self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
log.debug(f"From cache: {self}") log.debug(f"From cache: {self}")
if cache.was_downloaded: if cache.downloaded_filepath:
self.was_downloaded = True self.downloaded_filepath = cache.downloaded_filepath
if cache.watched: if cache.watched:
self.watched = True self.watched = True
def __str__(self) -> str: def __str__(self) -> str:
return f"{self.guid}: {self.creator} {self.title} {self.link}" str = f"{self.guid}: {self.creator if self.creator else '?'} {self.title}"
if self.is_researched:
if self.is_video:
str += f" ({format_duration(self.duration)})"
else:
str += " (N/A)"
else:
str += " (?)"
str += f" {self.link}"
return str
@property @property
def downloaded(self) -> bool: def downloaded(self) -> bool:
@ -122,21 +193,15 @@ class RVElement:
def ytdl_infos(self) -> typing.Optional[dict]: def ytdl_infos(self) -> typing.Optional[dict]:
log.info(f"Researching: {self}") log.info(f"Researching: {self}")
try: try:
infos = self.parent.ytdl_dry.extract_info(self.link) infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except youtube_dl.utils.DownloadError as e: except yt_dlp.utils.DownloadError as e:
# TODO Still raise in case of temporary network issue # TODO Still raise in case of temporary network issue
log.warning(e) log.warning(e)
infos = None infos = None
# Apparently that thing is transformed from a LazyList if infos:
# somewhere in the normal yt_dlp process infos = self.parent.ytdl_dry.sanitize_info(infos)
if (
infos
and "thumbnails" in infos
and isinstance(infos["thumbnails"], youtube_dl.utils.LazyList)
):
infos["thumbnails"] = infos["thumbnails"].exhaust()
# Save database once it's been computed # Save database once it's been computed
self.__dict__["ytdl_infos"] = infos self.__dict__["ytdl_infos"] = infos
self.parent.save() self.parent.save()
@ -156,7 +221,8 @@ class RVElement:
@property @property
def filepath(self) -> str: def filepath(self) -> str:
assert self.is_video assert self.is_video
# TODO This doesn't change the extension to mkv when the formats are incomaptible if self.downloaded_filepath:
return self.downloaded_filepath
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos) return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
@property @property
@ -168,10 +234,18 @@ class RVElement:
assert self.is_video assert self.is_video
log.info(f"Downloading: {self}") log.info(f"Downloading: {self}")
if not self.parent.args.dryrun: if not self.parent.args.dryrun:
self.parent.ytdl.process_ie_result(self.ytdl_infos, True, {}) with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
self.was_downloaded = True ydl.add_post_processor(SaveInfoPP(self))
ydl.process_ie_result(self.ytdl_infos, download=True)
self.parent.save() self.parent.save()
def update_post_download(self, info: dict) -> None:
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
@property
def was_downloaded(self) -> bool:
return self.downloaded_filepath is not None
def preload(self) -> None: def preload(self) -> None:
assert self.is_video assert self.is_video
if self.downloaded: if self.downloaded:
@ -182,18 +256,8 @@ class RVElement:
return return
self.download() self.download()
MATCHES_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, None: 1}
MATCHES_DURATION_COMPARATORS = {
"<": int.__lt__,
"-": int.__lt__,
">": int.__gt__,
"+": int.__gt__,
"=": int.__eq__,
None: int.__le__,
}
def matches_filter(self, args: configargparse.Namespace) -> bool: def matches_filter(self, args: configargparse.Namespace) -> bool:
# Inexpensive filters
if args.seen != "any" and (args.seen == "seen") != self.watched: if args.seen != "any" and (args.seen == "seen") != self.watched:
log.debug(f"Not {args.seen}: {self}") log.debug(f"Not {args.seen}: {self}")
return False return False
@ -206,39 +270,22 @@ class RVElement:
if args.link and not re.search(args.link, self.link): if args.link and not re.search(args.link, self.link):
log.debug(f"Link not matching {args.link}: {self}") log.debug(f"Link not matching {args.link}: {self}")
return False return False
if args.creator and (not self.creator or not re.search(args.creator, self.creator)): if args.creator and (
not self.creator or not re.search(args.creator, self.creator)
):
log.debug(f"Creator not matching {args.creator}: {self}") log.debug(f"Creator not matching {args.creator}: {self}")
return False return False
# Expensive filters
if not self.is_video: if not self.is_video:
log.debug(f"Not a video: {self}") log.debug(f"Not a video: {self}")
return False return False
if args.duration: if args.duration and not compare_duration(args.duration)(self.duration):
dur = args.duration log.debug(
f"Duration {self.duration} not matching {args.duration}: {self}"
)
return False
mult_index = dur[-1].lower()
if mult_index.isdigit():
mult_index = None
else:
dur = dur[:-1]
try:
multiplier = self.MATCHES_DURATION_MULTIPLIERS[mult_index]
except IndexError:
raise ValueError(f"Unknown duration multiplier: {mult_index}")
comp_index = dur[0]
if comp_index.isdigit():
comp_index = None
else:
dur = dur[1:]
try:
comparator = self.MATCHES_DURATION_COMPARATORS[comp_index]
except IndexError:
raise ValueError(f"Unknown duration comparator: {comp_index}")
duration = int(dur)
if not comparator(self.duration, duration * multiplier):
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
return False
return True return True
def watch(self) -> None: def watch(self) -> None:
@ -365,28 +412,60 @@ class RVDatabase:
@property @property
def ytdl_dry_opts(self) -> dict: def ytdl_dry_opts(self) -> dict:
opts = self.ytdl_opts.copy() opts = self.ytdl_opts.copy()
opts.update({"simulate": True, "quiet": True}) opts.update({"quiet": True})
return opts return opts
@property @property
def ytdl(self) -> youtube_dl.YoutubeDL: def ytdl_dry(self) -> yt_dlp.YoutubeDL:
return youtube_dl.YoutubeDL(self.ytdl_opts) return yt_dlp.YoutubeDL(self.ytdl_dry_opts)
@property
def ytdl_dry(self) -> youtube_dl.YoutubeDL:
return youtube_dl.YoutubeDL(self.ytdl_dry_opts)
def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]: def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
elements: typing.Iterable[RVElement] elements: typing.Iterable[RVElement]
if args.order == "old": # Inexpensive sort
elements = self.elements if args.order == "new":
elif args.order == "new":
elements = reversed(self.elements) elements = reversed(self.elements)
elif args.order == "title":
elements = sorted(self.elements, key=lambda el: el.title)
elif args.order == "creator":
elements = sorted(self.elements, key=lambda el: el.creator or "")
elif args.order == "link":
elements = sorted(self.elements, key=lambda el: el.link)
elif args.order == "random": elif args.order == "random":
elements_random = self.elements.copy() elements_random = self.elements.copy()
random.shuffle(elements_random) random.shuffle(elements_random)
elements = elements_random elements = elements_random
return filter(lambda el: el.matches_filter(args), elements) else:
elements = self.elements
# Possibly expensive filtering
elements = filter(lambda el: el.matches_filter(args), elements)
# Expensive sort
if args.order == "short":
elements = sorted(
elements, key=lambda el: el.duration if el.is_video else 0
)
elif args.order == "long":
elements = sorted(
elements, key=lambda el: el.duration if el.is_video else 0, reverse=True
)
# Post sorting filtering
if args.total_duration:
rem = parse_duration(args.total_duration)
old_els = list(elements)
elements = list()
while rem > 0:
for el in old_els:
if el.duration < rem:
elements.append(el)
rem -= el.duration
old_els.remove(el)
break
else:
break
return elements
def get_args() -> configargparse.Namespace: def get_args() -> configargparse.Namespace:
@ -428,6 +507,17 @@ def get_args() -> configargparse.Namespace:
env_var="RSS_VIDEOS_FEED", env_var="RSS_VIDEOS_FEED",
required=True, required=True,
) )
parser.add(
"--research",
help="Fetch video info again",
action="store_true",
)
parser.add(
"--no-refresh",
dest="refresh",
help="Don't fetch feed",
action="store_false",
)
parser.add( parser.add(
"--videos", "--videos",
help="Directory to store videos", help="Directory to store videos",
@ -438,7 +528,7 @@ def get_args() -> configargparse.Namespace:
# Which videos # Which videos
parser.add( parser.add(
"--order", "--order",
choices=("old", "new", "random"), choices=("old", "new", "title", "creator", "link", "short", "long", "random"),
default="old", default="old",
help="Sorting mechanism", help="Sorting mechanism",
) )
@ -447,7 +537,16 @@ def get_args() -> configargparse.Namespace:
parser.add("--title", help="Regex to filter by title") parser.add("--title", help="Regex to filter by title")
parser.add("--link", help="Regex to filter by link") parser.add("--link", help="Regex to filter by link")
parser.add("--duration", help="Comparative to filter by duration") parser.add("--duration", help="Comparative to filter by duration")
parser.add("--seen", choices=("seen","unseen","any"), default="unseen", help="Only include seen/unseen/any videos") parser.add(
"--seen",
choices=("seen", "unseen", "any"),
default="unseen",
help="Only include seen/unseen/any videos",
)
parser.add(
"--total-duration",
help="Use videos that fit under the total given",
)
# TODO Envrionment variables # TODO Envrionment variables
parser.add( parser.add(
"--max-duration", "--max-duration",
@ -476,7 +575,15 @@ def get_args() -> configargparse.Namespace:
parser.add( parser.add(
"action", "action",
nargs="?", nargs="?",
choices=("download", "list", "watch", "binge", "clean"), choices=(
"download",
"list",
"watch",
"binge",
"clean",
"seen",
"unseen",
),
default="download", default="download",
) )
@ -497,14 +604,22 @@ def main() -> None:
database = RVDatabase(args) database = RVDatabase(args)
cache = RVDatabase.load() cache = RVDatabase.load()
try: feed_fetched = False
database.read_feed() if args.refresh:
except urllib.error.URLError as err: try:
if args.action == "download" or not cache: database.read_feed()
raise err feed_fetched = True
else: except urllib.error.URLError as err:
log.warning("Cannot fetch RSS feed, using cached feed.", err) if args.action == "download":
raise RuntimeError("Couldn't fetch feed, refusing to download")
# This is a quirky failsafe in case of no internet connection,
# so the script doesn't go noting that no element is a video.
if not feed_fetched:
if cache:
log.warning("Using cached feed.")
database.import_cache(cache) database.import_cache(cache)
else:
raise FileNotFoundError("Feed not fetched and no cached feed.")
if cache: if cache:
database.salvage_cache(cache) database.salvage_cache(cache)
database.clean_cache(cache) database.clean_cache(cache)
@ -514,7 +629,7 @@ def main() -> None:
if args.action == "clean": if args.action == "clean":
database.clean() database.clean()
else: else:
database.attempt_clean() duration = 0
for element in database.filter(args): for element in database.filter(args):
if args.action == "download": if args.action == "download":
element.preload() element.preload()
@ -522,8 +637,20 @@ def main() -> None:
print(element) print(element)
elif args.action in ("watch", "binge"): elif args.action in ("watch", "binge"):
element.watch() element.watch()
if args.action == "watch": if args.action == "watch":
break break
elif args.action == "seen":
if not element.watched:
log.info(f"Maked as seen: {element}")
element.watched = True
elif args.action == "unseen":
if element.watched:
log.info(f"Maked as unseen: {element}")
element.watched = False
else:
raise NotImplementedError(f"Unimplemented action: {args.action}")
duration += element.duration if element.is_video else 0
log.info(f"Total duration: {format_duration(duration)}")
database.attempt_clean() database.attempt_clean()
database.save() database.save()