rssVideos: Use yt-dlp and download oldest first

This commit is contained in:
Geoffrey Frogeye 2021-10-17 14:29:25 +02:00
parent cc79262336
commit 8743082b21

View file

@ -8,17 +8,16 @@ with the unread items (non-video links are ignored).
""" """
# TODO Distribute this correclty, in the meanwhile please do # TODO Distribute this correclty, in the meanwhile please do
# pip install --user youtube-dl ConfigArgParse progressbar2 # pip install --user yt-dlp ConfigArgParse
# TODO Better logging (youtube-dl allow to pass loggers) # TODO Better logging (youtube-dl allow to pass loggers)
import sys import sys
from typing import Dict, Set, Tuple
import urllib.request import urllib.request
import urllib.parse import urllib.parse
import os import os
from xml.dom import minidom from xml.dom import minidom
import youtube_dl import yt_dlp as youtube_dl
import configargparse import configargparse
@ -95,18 +94,19 @@ def get_args() -> configargparse.Namespace:
return args return args
def get_links(args: configargparse.Namespace) -> Set[str]: def get_links(args: configargparse.Namespace) -> list[str]:
""" """
Read the feed XML, get the links Read the feed XML, get the links
""" """
links = set() links = list()
with urllib.request.urlopen(args.feed) as request: with urllib.request.urlopen(args.feed) as request:
with minidom.parse(request) as xmldoc: with minidom.parse(request) as xmldoc:
for item in xmldoc.getElementsByTagName("item"): for item in xmldoc.getElementsByTagName("item"):
try: try:
linkNode = item.getElementsByTagName("link")[0] linkNode = item.getElementsByTagName("link")[0]
link: str = linkNode.childNodes[0].data link: str = linkNode.childNodes[0].data
links.add(link) if link not in links:
links.append(link)
except BaseException as e: except BaseException as e:
print("Error while getting link from item:", e) print("Error while getting link from item:", e)
continue continue
@ -114,8 +114,8 @@ def get_links(args: configargparse.Namespace) -> Set[str]:
def get_video_infos( def get_video_infos(
args: configargparse.Namespace, ydl_opts: Dict, links: Set[str] args: configargparse.Namespace, ydl_opts: dict, links: list[str]
) -> Dict[str, Dict]: ) -> dict[str, dict]:
""" """
Filter out non-video links and store video download info Filter out non-video links and store video download info
and associated filename and associated filename
@ -148,8 +148,8 @@ def get_video_infos(
def get_downloaded_videos( def get_downloaded_videos(
args: configargparse.Namespace, videosInfos: Dict[str, Dict] args: configargparse.Namespace, videosInfos: dict[str, dict]
) -> Tuple[Set[str], Set[str]]: ) -> tuple[set[str], set[str]]:
videosDownloaded = set() videosDownloaded = set()
videosPartiallyDownloaded = set() videosPartiallyDownloaded = set()
""" """
@ -187,7 +187,7 @@ def get_downloaded_videos(
return videosDownloaded, videosPartiallyDownloaded return videosDownloaded, videosPartiallyDownloaded
def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]: def get_tracked_videos(args: configargparse.Namespace, known: set[str]) -> set[str]:
""" """
Return videos previously downloaded (=tracked) amongst the unread videos. Return videos previously downloaded (=tracked) amongst the unread videos.
This is stored in the tracking directory as empty extension-less files. This is stored in the tracking directory as empty extension-less files.
@ -220,6 +220,8 @@ def main() -> None:
print("→ Retrieveing RSS feed") print("→ Retrieveing RSS feed")
links = get_links(args) links = get_links(args)
# Oldest first
links = links[::-1]
print(f"→ Getting infos on {len(links)} unread articles") print(f"→ Getting infos on {len(links)} unread articles")
videosInfos = get_video_infos(args, ydl_opts, links) videosInfos = get_video_infos(args, ydl_opts, links)
@ -236,8 +238,8 @@ def main() -> None:
markerPath = os.path.join(args.track, onlineFilename) markerPath = os.path.join(args.track, onlineFilename)
open(markerPath, "a").close() open(markerPath, "a").close()
videosToDownload: Set[str] = set() videosToDownload: set[str] = set()
videosReads: Set[str] = set() videosReads: set[str] = set()
for onlineFilename in videosInfos.keys(): for onlineFilename in videosInfos.keys():
# If the video was once downloaded but manually deleted, # If the video was once downloaded but manually deleted,
# the marker should be left # the marker should be left
@ -263,14 +265,19 @@ def main() -> None:
os.chdir(args.videos) os.chdir(args.videos)
exit_code = 0 exit_code = 0
if not args.dryrun: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
# TODO Progressbar one day maybe? for onlineFilename, infos in videosInfos.items():
# We have all the info we need to make a reliable one if onlineFilename not in videosToDownload:
with youtube_dl.YoutubeDL(ydl_opts) as ydl: continue
for onlineFilename in videosToDownload:
infos = videosInfos[onlineFilename]
# Really download # Really download
if args.dryrun:
print(f"Would download {onlineFilename}")
else:
# Apparently that thing is transformed from a LazyList
# somewhere in the normal yt_dlp process
if isinstance(infos["thumbnails"], youtube_dl.utils.LazyList):
infos["thumbnails"] = infos["thumbnails"].exhaust()
try: try:
ydl.process_ie_result(infos, True, {}) ydl.process_ie_result(infos, True, {})