diff --git a/config/scripts/rssVideos b/config/scripts/rssVideos index 1afeb70..fc5f80a 100755 --- a/config/scripts/rssVideos +++ b/config/scripts/rssVideos @@ -8,17 +8,16 @@ with the unread items (non-video links are ignored). """ # TODO Distribute this correclty, in the meanwhile please do -# pip install --user youtube-dl ConfigArgParse progressbar2 +# pip install --user yt-dlp ConfigArgParse # TODO Better logging (youtube-dl allow to pass loggers) import sys -from typing import Dict, Set, Tuple import urllib.request import urllib.parse import os from xml.dom import minidom -import youtube_dl +import yt_dlp as youtube_dl import configargparse @@ -95,18 +94,19 @@ def get_args() -> configargparse.Namespace: return args -def get_links(args: configargparse.Namespace) -> Set[str]: +def get_links(args: configargparse.Namespace) -> list[str]: """ Read the feed XML, get the links """ - links = set() + links = list() with urllib.request.urlopen(args.feed) as request: with minidom.parse(request) as xmldoc: for item in xmldoc.getElementsByTagName("item"): try: linkNode = item.getElementsByTagName("link")[0] link: str = linkNode.childNodes[0].data - links.add(link) + if link not in links: + links.append(link) except BaseException as e: print("Error while getting link from item:", e) continue @@ -114,8 +114,8 @@ def get_links(args: configargparse.Namespace) -> Set[str]: def get_video_infos( - args: configargparse.Namespace, ydl_opts: Dict, links: Set[str] -) -> Dict[str, Dict]: + args: configargparse.Namespace, ydl_opts: dict, links: list[str] +) -> dict[str, dict]: """ Filter out non-video links and store video download info and associated filename @@ -148,8 +148,8 @@ def get_video_infos( def get_downloaded_videos( - args: configargparse.Namespace, videosInfos: Dict[str, Dict] -) -> Tuple[Set[str], Set[str]]: + args: configargparse.Namespace, videosInfos: dict[str, dict] +) -> tuple[set[str], set[str]]: videosDownloaded = set() videosPartiallyDownloaded = set() """ @@ -187,7 +187,7 @@ def get_downloaded_videos( return videosDownloaded, videosPartiallyDownloaded -def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]: +def get_tracked_videos(args: configargparse.Namespace, known: set[str]) -> set[str]: """ Return videos previously downloaded (=tracked) amongst the unread videos. This is stored in the tracking directory as empty extension-less files. @@ -220,6 +220,8 @@ def main() -> None: print("→ Retrieveing RSS feed") links = get_links(args) + # Oldest first + links = links[::-1] print(f"→ Getting infos on {len(links)} unread articles") videosInfos = get_video_infos(args, ydl_opts, links) @@ -236,8 +238,8 @@ def main() -> None: markerPath = os.path.join(args.track, onlineFilename) open(markerPath, "a").close() - videosToDownload: Set[str] = set() - videosReads: Set[str] = set() + videosToDownload: set[str] = set() + videosReads: set[str] = set() for onlineFilename in videosInfos.keys(): # If the video was once downloaded but manually deleted, # the marker should be left @@ -263,14 +265,19 @@ def main() -> None: os.chdir(args.videos) exit_code = 0 - if not args.dryrun: - # TODO Progressbar one day maybe? - # We have all the info we need to make a reliable one - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - for onlineFilename in videosToDownload: - infos = videosInfos[onlineFilename] + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + for onlineFilename, infos in videosInfos.items(): + if onlineFilename not in videosToDownload: + continue - # Really download + # Really download + if args.dryrun: + print(f"Would download {onlineFilename}") + else: + # Apparently that thing is transformed from a LazyList + # somewhere in the normal yt_dlp process + if isinstance(infos["thumbnails"], youtube_dl.utils.LazyList): + infos["thumbnails"] = infos["thumbnails"].exhaust() try: ydl.process_ie_result(infos, True, {})