rssVideos: Better sanitization of ytdl info

2021-12-20 18:57:13 +01:00 · 2021-12-20 18:57:13 +01:00 · 105bd9461c
commit 105bd9461c
parent 5b7926df8f
1 changed files with 17 additions and 17 deletions
--- a/config/scripts/rssVideos
+++ b/config/scripts/rssVideos
@ -51,7 +51,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
    yt_dlp.process_ie_result() doesn't return a completely updated info dict,
    notably the extension is still the one before it realizes the files cannot
    be merged. So we use this PostProcessor to catch the info dict in its final
-    form and save it.
+    form and save what we need from it (it's not serializable in this state).
    """
    def __init__(self, rvelement: "RVElement") -> None:
@ -59,7 +59,7 @@ class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
        super().__init__()
    def run(self, info: dict) -> tuple[list, dict]:
-        self.rvelement.ytdl_infos = info
+        self.rvelement.update_post_download(info)
        return [], info
 def parse_duration(string: str) -> int:
@ -109,13 +109,13 @@ def format_duration(duration: int) -> str:
 class RVElement:
    parent: "RVDatabase"
    item: minidom.Element
-    was_downloaded: bool
+    downloaded_filepath: typing.Optional[str]
    watched: bool
    def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
        self.parent = parent
        self.item = item
-        self.was_downloaded = False
+        self.downloaded_filepath = None
        self.watched = False
    def get_tag_data(self, tag_name: str) -> str:
@ -166,8 +166,8 @@ class RVElement:
        if not self.parent.args.research and cache.is_researched:
            self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
            log.debug(f"From cache: {self}")
-        # if cache.was_downloaded:
+        if cache.downloaded_filepath:
-        #     self.was_downloaded = True
+            self.downloaded_filepath = cache.downloaded_filepath
        if cache.watched:
            self.watched = True
@ -191,7 +191,6 @@ class RVElement:
    @functools.cached_property
    def ytdl_infos(self) -> typing.Optional[dict]:
        # TODO Sanitize according to documentation
        log.info(f"Researching: {self}")
        try:
            infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
@ -201,14 +200,8 @@ class RVElement:
            # TODO Still raise in case of temporary network issue
            log.warning(e)
            infos = None
-        # Apparently that thing is transformed from a LazyList
+        if infos:
-        # somewhere in the normal yt_dlp process
+            infos = self.parent.ytdl_dry.sanitize_info(infos)
        if (
            infos
            and "thumbnails" in infos
            and isinstance(infos["thumbnails"], yt_dlp.utils.LazyList)
        ):
            infos["thumbnails"] = infos["thumbnails"].exhaust()
        # Save database once it's been computed
        self.__dict__["ytdl_infos"] = infos
        self.parent.save()
@ -228,6 +221,8 @@ class RVElement:
    @property
    def filepath(self) -> str:
        assert self.is_video
        if self.downloaded_filepath:
            return self.downloaded_filepath
        return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
    @property
@ -242,9 +237,15 @@ class RVElement:
            with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
                ydl.add_post_processor(SaveInfoPP(self))
                ydl.process_ie_result(self.ytdl_infos, download=True)
        self.was_downloaded = True
        self.parent.save()
    def update_post_download(self, info: dict) -> None:
        self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
    @property
    def was_downloaded(self) -> bool:
        return self.downloaded_filepath is not None
    def preload(self) -> None:
        assert self.is_video
        if self.downloaded:
@ -628,7 +629,6 @@ def main() -> None:
    if args.action == "clean":
        database.clean()
    else:
        database.attempt_clean()
        duration = 0
        for element in database.filter(args):
            if args.action == "download":