rssVideos: Better sanitization of ytdl info

This commit is contained in:
Geoffrey Frogeye 2021-12-20 18:57:13 +01:00
parent 5b7926df8f
commit 105bd9461c
Signed by: geoffrey
GPG key ID: C72403E7F82E6AD8

View file

@ -51,7 +51,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
yt_dlp.process_ie_result() doesn't return a completely updated info dict, yt_dlp.process_ie_result() doesn't return a completely updated info dict,
notably the extension is still the one before it realizes the files cannot notably the extension is still the one before it realizes the files cannot
be merged. So we use this PostProcessor to catch the info dict in its final be merged. So we use this PostProcessor to catch the info dict in its final
form and save it. form and save what we need from it (it's not serializable in this state).
""" """
def __init__(self, rvelement: "RVElement") -> None: def __init__(self, rvelement: "RVElement") -> None:
@ -59,7 +59,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
super().__init__() super().__init__()
def run(self, info: dict) -> tuple[list, dict]: def run(self, info: dict) -> tuple[list, dict]:
self.rvelement.ytdl_infos = info self.rvelement.update_post_download(info)
return [], info return [], info
def parse_duration(string: str) -> int: def parse_duration(string: str) -> int:
@ -109,13 +109,13 @@ def format_duration(duration: int) -> str:
class RVElement: class RVElement:
parent: "RVDatabase" parent: "RVDatabase"
item: minidom.Element item: minidom.Element
was_downloaded: bool downloaded_filepath: typing.Optional[str]
watched: bool watched: bool
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None: def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
self.parent = parent self.parent = parent
self.item = item self.item = item
self.was_downloaded = False self.downloaded_filepath = None
self.watched = False self.watched = False
def get_tag_data(self, tag_name: str) -> str: def get_tag_data(self, tag_name: str) -> str:
@ -166,8 +166,8 @@ class RVElement:
if not self.parent.args.research and cache.is_researched: if not self.parent.args.research and cache.is_researched:
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"] self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
log.debug(f"From cache: {self}") log.debug(f"From cache: {self}")
# if cache.was_downloaded: if cache.downloaded_filepath:
# self.was_downloaded = True self.downloaded_filepath = cache.downloaded_filepath
if cache.watched: if cache.watched:
self.watched = True self.watched = True
@ -191,7 +191,6 @@ class RVElement:
@functools.cached_property @functools.cached_property
def ytdl_infos(self) -> typing.Optional[dict]: def ytdl_infos(self) -> typing.Optional[dict]:
# TODO Sanitize according to documentation
log.info(f"Researching: {self}") log.info(f"Researching: {self}")
try: try:
infos = self.parent.ytdl_dry.extract_info(self.link, download=False) infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
@ -201,14 +200,8 @@ class RVElement:
# TODO Still raise in case of temporary network issue # TODO Still raise in case of temporary network issue
log.warning(e) log.warning(e)
infos = None infos = None
# Apparently that thing is transformed from a LazyList if infos:
# somewhere in the normal yt_dlp process infos = self.parent.ytdl_dry.sanitize_info(infos)
if (
infos
and "thumbnails" in infos
and isinstance(infos["thumbnails"], yt_dlp.utils.LazyList)
):
infos["thumbnails"] = infos["thumbnails"].exhaust()
# Save database once it's been computed # Save database once it's been computed
self.__dict__["ytdl_infos"] = infos self.__dict__["ytdl_infos"] = infos
self.parent.save() self.parent.save()
@ -228,6 +221,8 @@ class RVElement:
@property @property
def filepath(self) -> str: def filepath(self) -> str:
assert self.is_video assert self.is_video
if self.downloaded_filepath:
return self.downloaded_filepath
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos) return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
@property @property
@ -242,9 +237,15 @@ class RVElement:
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl: with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
ydl.add_post_processor(SaveInfoPP(self)) ydl.add_post_processor(SaveInfoPP(self))
ydl.process_ie_result(self.ytdl_infos, download=True) ydl.process_ie_result(self.ytdl_infos, download=True)
self.was_downloaded = True
self.parent.save() self.parent.save()
def update_post_download(self, info: dict) -> None:
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
@property
def was_downloaded(self) -> bool:
return self.downloaded_filepath is not None
def preload(self) -> None: def preload(self) -> None:
assert self.is_video assert self.is_video
if self.downloaded: if self.downloaded:
@ -628,7 +629,6 @@ def main() -> None:
if args.action == "clean": if args.action == "clean":
database.clean() database.clean()
else: else:
database.attempt_clean()
duration = 0 duration = 0
for element in database.filter(args): for element in database.filter(args):
if args.action == "download": if args.action == "download":