rssVideos: Use yt-dlp and download oldest first
This commit is contained in:
parent
cc79262336
commit
8743082b21
|
@ -8,17 +8,16 @@ with the unread items (non-video links are ignored).
|
|||
"""
|
||||
|
||||
# TODO Distribute this correclty, in the meanwhile please do
|
||||
# pip install --user youtube-dl ConfigArgParse progressbar2
|
||||
# pip install --user yt-dlp ConfigArgParse
|
||||
|
||||
# TODO Better logging (youtube-dl allow to pass loggers)
|
||||
|
||||
import sys
|
||||
from typing import Dict, Set, Tuple
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import os
|
||||
from xml.dom import minidom
|
||||
import youtube_dl
|
||||
import yt_dlp as youtube_dl
|
||||
import configargparse
|
||||
|
||||
|
||||
|
@ -95,18 +94,19 @@ def get_args() -> configargparse.Namespace:
|
|||
return args
|
||||
|
||||
|
||||
def get_links(args: configargparse.Namespace) -> Set[str]:
|
||||
def get_links(args: configargparse.Namespace) -> list[str]:
|
||||
"""
|
||||
Read the feed XML, get the links
|
||||
"""
|
||||
links = set()
|
||||
links = list()
|
||||
with urllib.request.urlopen(args.feed) as request:
|
||||
with minidom.parse(request) as xmldoc:
|
||||
for item in xmldoc.getElementsByTagName("item"):
|
||||
try:
|
||||
linkNode = item.getElementsByTagName("link")[0]
|
||||
link: str = linkNode.childNodes[0].data
|
||||
links.add(link)
|
||||
if link not in links:
|
||||
links.append(link)
|
||||
except BaseException as e:
|
||||
print("Error while getting link from item:", e)
|
||||
continue
|
||||
|
@ -114,8 +114,8 @@ def get_links(args: configargparse.Namespace) -> Set[str]:
|
|||
|
||||
|
||||
def get_video_infos(
|
||||
args: configargparse.Namespace, ydl_opts: Dict, links: Set[str]
|
||||
) -> Dict[str, Dict]:
|
||||
args: configargparse.Namespace, ydl_opts: dict, links: list[str]
|
||||
) -> dict[str, dict]:
|
||||
"""
|
||||
Filter out non-video links and store video download info
|
||||
and associated filename
|
||||
|
@ -148,8 +148,8 @@ def get_video_infos(
|
|||
|
||||
|
||||
def get_downloaded_videos(
|
||||
args: configargparse.Namespace, videosInfos: Dict[str, Dict]
|
||||
) -> Tuple[Set[str], Set[str]]:
|
||||
args: configargparse.Namespace, videosInfos: dict[str, dict]
|
||||
) -> tuple[set[str], set[str]]:
|
||||
videosDownloaded = set()
|
||||
videosPartiallyDownloaded = set()
|
||||
"""
|
||||
|
@ -187,7 +187,7 @@ def get_downloaded_videos(
|
|||
return videosDownloaded, videosPartiallyDownloaded
|
||||
|
||||
|
||||
def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]:
|
||||
def get_tracked_videos(args: configargparse.Namespace, known: set[str]) -> set[str]:
|
||||
"""
|
||||
Return videos previously downloaded (=tracked) amongst the unread videos.
|
||||
This is stored in the tracking directory as empty extension-less files.
|
||||
|
@ -220,6 +220,8 @@ def main() -> None:
|
|||
|
||||
print("→ Retrieveing RSS feed")
|
||||
links = get_links(args)
|
||||
# Oldest first
|
||||
links = links[::-1]
|
||||
|
||||
print(f"→ Getting infos on {len(links)} unread articles")
|
||||
videosInfos = get_video_infos(args, ydl_opts, links)
|
||||
|
@ -236,8 +238,8 @@ def main() -> None:
|
|||
markerPath = os.path.join(args.track, onlineFilename)
|
||||
open(markerPath, "a").close()
|
||||
|
||||
videosToDownload: Set[str] = set()
|
||||
videosReads: Set[str] = set()
|
||||
videosToDownload: set[str] = set()
|
||||
videosReads: set[str] = set()
|
||||
for onlineFilename in videosInfos.keys():
|
||||
# If the video was once downloaded but manually deleted,
|
||||
# the marker should be left
|
||||
|
@ -263,14 +265,19 @@ def main() -> None:
|
|||
os.chdir(args.videos)
|
||||
|
||||
exit_code = 0
|
||||
if not args.dryrun:
|
||||
# TODO Progressbar one day maybe?
|
||||
# We have all the info we need to make a reliable one
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
for onlineFilename in videosToDownload:
|
||||
infos = videosInfos[onlineFilename]
|
||||
for onlineFilename, infos in videosInfos.items():
|
||||
if onlineFilename not in videosToDownload:
|
||||
continue
|
||||
|
||||
# Really download
|
||||
if args.dryrun:
|
||||
print(f"Would download {onlineFilename}")
|
||||
else:
|
||||
# Apparently that thing is transformed from a LazyList
|
||||
# somewhere in the normal yt_dlp process
|
||||
if isinstance(infos["thumbnails"], youtube_dl.utils.LazyList):
|
||||
infos["thumbnails"] = infos["thumbnails"].exhaust()
|
||||
try:
|
||||
ydl.process_ie_result(infos, True, {})
|
||||
|
||||
|
|
Loading…
Reference in a new issue