rssVideos: Better sanitization of ytdl info
This commit is contained in:
parent
5b7926df8f
commit
105bd9461c
|
@ -51,7 +51,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
|
||||||
yt_dlp.process_ie_result() doesn't return a completely updated info dict,
|
yt_dlp.process_ie_result() doesn't return a completely updated info dict,
|
||||||
notably the extension is still the one before it realizes the files cannot
|
notably the extension is still the one before it realizes the files cannot
|
||||||
be merged. So we use this PostProcessor to catch the info dict in its final
|
be merged. So we use this PostProcessor to catch the info dict in its final
|
||||||
form and save it.
|
form and save what we need from it (it's not serializable in this state).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rvelement: "RVElement") -> None:
|
def __init__(self, rvelement: "RVElement") -> None:
|
||||||
|
@ -59,7 +59,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def run(self, info: dict) -> tuple[list, dict]:
|
def run(self, info: dict) -> tuple[list, dict]:
|
||||||
self.rvelement.ytdl_infos = info
|
self.rvelement.update_post_download(info)
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
def parse_duration(string: str) -> int:
|
def parse_duration(string: str) -> int:
|
||||||
|
@ -109,13 +109,13 @@ def format_duration(duration: int) -> str:
|
||||||
class RVElement:
|
class RVElement:
|
||||||
parent: "RVDatabase"
|
parent: "RVDatabase"
|
||||||
item: minidom.Element
|
item: minidom.Element
|
||||||
was_downloaded: bool
|
downloaded_filepath: typing.Optional[str]
|
||||||
watched: bool
|
watched: bool
|
||||||
|
|
||||||
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.item = item
|
self.item = item
|
||||||
self.was_downloaded = False
|
self.downloaded_filepath = None
|
||||||
self.watched = False
|
self.watched = False
|
||||||
|
|
||||||
def get_tag_data(self, tag_name: str) -> str:
|
def get_tag_data(self, tag_name: str) -> str:
|
||||||
|
@ -166,8 +166,8 @@ class RVElement:
|
||||||
if not self.parent.args.research and cache.is_researched:
|
if not self.parent.args.research and cache.is_researched:
|
||||||
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
||||||
log.debug(f"From cache: {self}")
|
log.debug(f"From cache: {self}")
|
||||||
# if cache.was_downloaded:
|
if cache.downloaded_filepath:
|
||||||
# self.was_downloaded = True
|
self.downloaded_filepath = cache.downloaded_filepath
|
||||||
if cache.watched:
|
if cache.watched:
|
||||||
self.watched = True
|
self.watched = True
|
||||||
|
|
||||||
|
@ -191,7 +191,6 @@ class RVElement:
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def ytdl_infos(self) -> typing.Optional[dict]:
|
def ytdl_infos(self) -> typing.Optional[dict]:
|
||||||
# TODO Sanitize according to documentation
|
|
||||||
log.info(f"Researching: {self}")
|
log.info(f"Researching: {self}")
|
||||||
try:
|
try:
|
||||||
infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
|
infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
|
||||||
|
@ -201,14 +200,8 @@ class RVElement:
|
||||||
# TODO Still raise in case of temporary network issue
|
# TODO Still raise in case of temporary network issue
|
||||||
log.warning(e)
|
log.warning(e)
|
||||||
infos = None
|
infos = None
|
||||||
# Apparently that thing is transformed from a LazyList
|
if infos:
|
||||||
# somewhere in the normal yt_dlp process
|
infos = self.parent.ytdl_dry.sanitize_info(infos)
|
||||||
if (
|
|
||||||
infos
|
|
||||||
and "thumbnails" in infos
|
|
||||||
and isinstance(infos["thumbnails"], yt_dlp.utils.LazyList)
|
|
||||||
):
|
|
||||||
infos["thumbnails"] = infos["thumbnails"].exhaust()
|
|
||||||
# Save database once it's been computed
|
# Save database once it's been computed
|
||||||
self.__dict__["ytdl_infos"] = infos
|
self.__dict__["ytdl_infos"] = infos
|
||||||
self.parent.save()
|
self.parent.save()
|
||||||
|
@ -228,6 +221,8 @@ class RVElement:
|
||||||
@property
|
@property
|
||||||
def filepath(self) -> str:
|
def filepath(self) -> str:
|
||||||
assert self.is_video
|
assert self.is_video
|
||||||
|
if self.downloaded_filepath:
|
||||||
|
return self.downloaded_filepath
|
||||||
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -242,9 +237,15 @@ class RVElement:
|
||||||
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
|
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
|
||||||
ydl.add_post_processor(SaveInfoPP(self))
|
ydl.add_post_processor(SaveInfoPP(self))
|
||||||
ydl.process_ie_result(self.ytdl_infos, download=True)
|
ydl.process_ie_result(self.ytdl_infos, download=True)
|
||||||
self.was_downloaded = True
|
|
||||||
self.parent.save()
|
self.parent.save()
|
||||||
|
|
||||||
|
def update_post_download(self, info: dict) -> None:
|
||||||
|
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def was_downloaded(self) -> bool:
|
||||||
|
return self.downloaded_filepath is not None
|
||||||
|
|
||||||
def preload(self) -> None:
|
def preload(self) -> None:
|
||||||
assert self.is_video
|
assert self.is_video
|
||||||
if self.downloaded:
|
if self.downloaded:
|
||||||
|
@ -628,7 +629,6 @@ def main() -> None:
|
||||||
if args.action == "clean":
|
if args.action == "clean":
|
||||||
database.clean()
|
database.clean()
|
||||||
else:
|
else:
|
||||||
database.attempt_clean()
|
|
||||||
duration = 0
|
duration = 0
|
||||||
for element in database.filter(args):
|
for element in database.filter(args):
|
||||||
if args.action == "download":
|
if args.action == "download":
|
||||||
|
|
Loading…
Reference in a new issue