rssVideos: Use yt-dlp and download oldest first

2021-10-17 14:29:25 +02:00 · 2021-10-17 14:29:25 +02:00 · 8743082b21
commit 8743082b21
parent cc79262336
1 changed files with 27 additions and 20 deletions
--- a/config/scripts/rssVideos
+++ b/config/scripts/rssVideos
@ -8,17 +8,16 @@ with the unread items (non-video links are ignored).
 """

 # TODO Distribute this correclty, in the meanwhile please do
-# pip install --user youtube-dl ConfigArgParse progressbar2
+# pip install --user yt-dlp ConfigArgParse

 # TODO Better logging (youtube-dl allow to pass loggers)

 import sys
-from typing import Dict, Set, Tuple
 import urllib.request
 import urllib.parse
 import os
 from xml.dom import minidom
-import youtube_dl
+import yt_dlp as youtube_dl
 import configargparse


@ -95,18 +94,19 @@ def get_args() -> configargparse.Namespace:
    return args


-def get_links(args: configargparse.Namespace) -> Set[str]:
+def get_links(args: configargparse.Namespace) -> list[str]:
    """
    Read the feed XML, get the links
    """
-    links = set()
+    links = list()
    with urllib.request.urlopen(args.feed) as request:
        with minidom.parse(request) as xmldoc:
            for item in xmldoc.getElementsByTagName("item"):
                try:
                    linkNode = item.getElementsByTagName("link")[0]
                    link: str = linkNode.childNodes[0].data
-                    links.add(link)
+                    if link not in links:
+                        links.append(link)
                except BaseException as e:
                    print("Error while getting link from item:", e)
                    continue
@ -114,8 +114,8 @@ def get_links(args: configargparse.Namespace) -> Set[str]:


 def get_video_infos(
-    args: configargparse.Namespace, ydl_opts: Dict, links: Set[str]
-) -> Dict[str, Dict]:
+    args: configargparse.Namespace, ydl_opts: dict, links: list[str]
+) -> dict[str, dict]:
    """
    Filter out non-video links and store video download info
    and associated filename
@ -148,8 +148,8 @@ def get_video_infos(


 def get_downloaded_videos(
-    args: configargparse.Namespace, videosInfos: Dict[str, Dict]
-) -> Tuple[Set[str], Set[str]]:
+    args: configargparse.Namespace, videosInfos: dict[str, dict]
+) -> tuple[set[str], set[str]]:
    videosDownloaded = set()
    videosPartiallyDownloaded = set()
    """
@ -187,7 +187,7 @@ def get_downloaded_videos(
    return videosDownloaded, videosPartiallyDownloaded


-def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]:
+def get_tracked_videos(args: configargparse.Namespace, known: set[str]) -> set[str]:
    """
    Return videos previously downloaded (=tracked) amongst the unread videos.
    This is stored in the tracking directory as empty extension-less files.
@ -220,6 +220,8 @@ def main() -> None:

    print("→ Retrieveing RSS feed")
    links = get_links(args)
+    # Oldest first
+    links = links[::-1]

    print(f"→ Getting infos on {len(links)} unread articles")
    videosInfos = get_video_infos(args, ydl_opts, links)
@ -236,8 +238,8 @@ def main() -> None:
        markerPath = os.path.join(args.track, onlineFilename)
        open(markerPath, "a").close()

-    videosToDownload: Set[str] = set()
-    videosReads: Set[str] = set()
+    videosToDownload: set[str] = set()
+    videosReads: set[str] = set()
    for onlineFilename in videosInfos.keys():
        # If the video was once downloaded but manually deleted,
        # the marker should be left
@ -263,14 +265,19 @@ def main() -> None:
    os.chdir(args.videos)

    exit_code = 0
-    if not args.dryrun:
-        # TODO Progressbar one day maybe?
-        # We have all the info we need to make a reliable one
-        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-            for onlineFilename in videosToDownload:
-                infos = videosInfos[onlineFilename]
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        for onlineFilename, infos in videosInfos.items():
+            if onlineFilename not in videosToDownload:
+                continue

-                # Really download
+            # Really download
+            if args.dryrun:
+                print(f"Would download {onlineFilename}")
+            else:
+                # Apparently that thing is transformed from a LazyList
+                # somewhere in the normal yt_dlp process
+                if isinstance(infos["thumbnails"], youtube_dl.utils.LazyList):
+                    infos["thumbnails"] = infos["thumbnails"].exhaust()
                try:
                    ydl.process_ie_result(infos, True, {})