rssVideos: Better sanitization of ytdl info

This commit is contained in:
Geoffrey Frogeye 2021-12-20 18:57:13 +01:00
parent 5b7926df8f
commit 105bd9461c
Signed by: geoffrey
GPG key ID: C72403E7F82E6AD8

View file

@ -51,7 +51,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
yt_dlp.process_ie_result() doesn't return a completely updated info dict,
notably the extension is still the one before it realizes the files cannot
be merged. So we use this PostProcessor to catch the info dict in its final
form and save it.
form and save what we need from it (it's not serializable in this state).
"""
def __init__(self, rvelement: "RVElement") -> None:
@ -59,7 +59,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
super().__init__()
def run(self, info: dict) -> tuple[list, dict]:
self.rvelement.ytdl_infos = info
self.rvelement.update_post_download(info)
return [], info
def parse_duration(string: str) -> int:
@ -109,13 +109,13 @@ def format_duration(duration: int) -> str:
class RVElement:
parent: "RVDatabase"
item: minidom.Element
was_downloaded: bool
downloaded_filepath: typing.Optional[str]
watched: bool
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
self.parent = parent
self.item = item
self.was_downloaded = False
self.downloaded_filepath = None
self.watched = False
def get_tag_data(self, tag_name: str) -> str:
@ -166,8 +166,8 @@ class RVElement:
if not self.parent.args.research and cache.is_researched:
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
log.debug(f"From cache: {self}")
# if cache.was_downloaded:
# self.was_downloaded = True
if cache.downloaded_filepath:
self.downloaded_filepath = cache.downloaded_filepath
if cache.watched:
self.watched = True
@ -191,7 +191,6 @@ class RVElement:
@functools.cached_property
def ytdl_infos(self) -> typing.Optional[dict]:
# TODO Sanitize according to documentation
log.info(f"Researching: {self}")
try:
infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
@ -201,14 +200,8 @@ class RVElement:
# TODO Still raise in case of temporary network issue
log.warning(e)
infos = None
# Apparently that thing is transformed from a LazyList
# somewhere in the normal yt_dlp process
if (
infos
and "thumbnails" in infos
and isinstance(infos["thumbnails"], yt_dlp.utils.LazyList)
):
infos["thumbnails"] = infos["thumbnails"].exhaust()
if infos:
infos = self.parent.ytdl_dry.sanitize_info(infos)
# Save database once it's been computed
self.__dict__["ytdl_infos"] = infos
self.parent.save()
@ -228,6 +221,8 @@ class RVElement:
@property
def filepath(self) -> str:
assert self.is_video
if self.downloaded_filepath:
return self.downloaded_filepath
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
@property
@ -242,9 +237,15 @@ class RVElement:
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
ydl.add_post_processor(SaveInfoPP(self))
ydl.process_ie_result(self.ytdl_infos, download=True)
self.was_downloaded = True
self.parent.save()
def update_post_download(self, info: dict) -> None:
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
@property
def was_downloaded(self) -> bool:
return self.downloaded_filepath is not None
def preload(self) -> None:
assert self.is_video
if self.downloaded:
@ -628,7 +629,6 @@ def main() -> None:
if args.action == "clean":
database.clean()
else:
database.attempt_clean()
duration = 0
for element in database.filter(args):
if args.action == "download":