rssVideos: Use yt-dlp and download oldest first
This commit is contained in:
parent
cc79262336
commit
8743082b21
|
@ -8,17 +8,16 @@ with the unread items (non-video links are ignored).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO Distribute this correclty, in the meanwhile please do
|
# TODO Distribute this correclty, in the meanwhile please do
|
||||||
# pip install --user youtube-dl ConfigArgParse progressbar2
|
# pip install --user yt-dlp ConfigArgParse
|
||||||
|
|
||||||
# TODO Better logging (youtube-dl allow to pass loggers)
|
# TODO Better logging (youtube-dl allow to pass loggers)
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from typing import Dict, Set, Tuple
|
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import os
|
import os
|
||||||
from xml.dom import minidom
|
from xml.dom import minidom
|
||||||
import youtube_dl
|
import yt_dlp as youtube_dl
|
||||||
import configargparse
|
import configargparse
|
||||||
|
|
||||||
|
|
||||||
|
@ -95,18 +94,19 @@ def get_args() -> configargparse.Namespace:
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
def get_links(args: configargparse.Namespace) -> Set[str]:
|
def get_links(args: configargparse.Namespace) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Read the feed XML, get the links
|
Read the feed XML, get the links
|
||||||
"""
|
"""
|
||||||
links = set()
|
links = list()
|
||||||
with urllib.request.urlopen(args.feed) as request:
|
with urllib.request.urlopen(args.feed) as request:
|
||||||
with minidom.parse(request) as xmldoc:
|
with minidom.parse(request) as xmldoc:
|
||||||
for item in xmldoc.getElementsByTagName("item"):
|
for item in xmldoc.getElementsByTagName("item"):
|
||||||
try:
|
try:
|
||||||
linkNode = item.getElementsByTagName("link")[0]
|
linkNode = item.getElementsByTagName("link")[0]
|
||||||
link: str = linkNode.childNodes[0].data
|
link: str = linkNode.childNodes[0].data
|
||||||
links.add(link)
|
if link not in links:
|
||||||
|
links.append(link)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
print("Error while getting link from item:", e)
|
print("Error while getting link from item:", e)
|
||||||
continue
|
continue
|
||||||
|
@ -114,8 +114,8 @@ def get_links(args: configargparse.Namespace) -> Set[str]:
|
||||||
|
|
||||||
|
|
||||||
def get_video_infos(
|
def get_video_infos(
|
||||||
args: configargparse.Namespace, ydl_opts: Dict, links: Set[str]
|
args: configargparse.Namespace, ydl_opts: dict, links: list[str]
|
||||||
) -> Dict[str, Dict]:
|
) -> dict[str, dict]:
|
||||||
"""
|
"""
|
||||||
Filter out non-video links and store video download info
|
Filter out non-video links and store video download info
|
||||||
and associated filename
|
and associated filename
|
||||||
|
@ -148,8 +148,8 @@ def get_video_infos(
|
||||||
|
|
||||||
|
|
||||||
def get_downloaded_videos(
|
def get_downloaded_videos(
|
||||||
args: configargparse.Namespace, videosInfos: Dict[str, Dict]
|
args: configargparse.Namespace, videosInfos: dict[str, dict]
|
||||||
) -> Tuple[Set[str], Set[str]]:
|
) -> tuple[set[str], set[str]]:
|
||||||
videosDownloaded = set()
|
videosDownloaded = set()
|
||||||
videosPartiallyDownloaded = set()
|
videosPartiallyDownloaded = set()
|
||||||
"""
|
"""
|
||||||
|
@ -187,7 +187,7 @@ def get_downloaded_videos(
|
||||||
return videosDownloaded, videosPartiallyDownloaded
|
return videosDownloaded, videosPartiallyDownloaded
|
||||||
|
|
||||||
|
|
||||||
def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]:
|
def get_tracked_videos(args: configargparse.Namespace, known: set[str]) -> set[str]:
|
||||||
"""
|
"""
|
||||||
Return videos previously downloaded (=tracked) amongst the unread videos.
|
Return videos previously downloaded (=tracked) amongst the unread videos.
|
||||||
This is stored in the tracking directory as empty extension-less files.
|
This is stored in the tracking directory as empty extension-less files.
|
||||||
|
@ -220,6 +220,8 @@ def main() -> None:
|
||||||
|
|
||||||
print("→ Retrieveing RSS feed")
|
print("→ Retrieveing RSS feed")
|
||||||
links = get_links(args)
|
links = get_links(args)
|
||||||
|
# Oldest first
|
||||||
|
links = links[::-1]
|
||||||
|
|
||||||
print(f"→ Getting infos on {len(links)} unread articles")
|
print(f"→ Getting infos on {len(links)} unread articles")
|
||||||
videosInfos = get_video_infos(args, ydl_opts, links)
|
videosInfos = get_video_infos(args, ydl_opts, links)
|
||||||
|
@ -236,8 +238,8 @@ def main() -> None:
|
||||||
markerPath = os.path.join(args.track, onlineFilename)
|
markerPath = os.path.join(args.track, onlineFilename)
|
||||||
open(markerPath, "a").close()
|
open(markerPath, "a").close()
|
||||||
|
|
||||||
videosToDownload: Set[str] = set()
|
videosToDownload: set[str] = set()
|
||||||
videosReads: Set[str] = set()
|
videosReads: set[str] = set()
|
||||||
for onlineFilename in videosInfos.keys():
|
for onlineFilename in videosInfos.keys():
|
||||||
# If the video was once downloaded but manually deleted,
|
# If the video was once downloaded but manually deleted,
|
||||||
# the marker should be left
|
# the marker should be left
|
||||||
|
@ -263,14 +265,19 @@ def main() -> None:
|
||||||
os.chdir(args.videos)
|
os.chdir(args.videos)
|
||||||
|
|
||||||
exit_code = 0
|
exit_code = 0
|
||||||
if not args.dryrun:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
# TODO Progressbar one day maybe?
|
for onlineFilename, infos in videosInfos.items():
|
||||||
# We have all the info we need to make a reliable one
|
if onlineFilename not in videosToDownload:
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
continue
|
||||||
for onlineFilename in videosToDownload:
|
|
||||||
infos = videosInfos[onlineFilename]
|
|
||||||
|
|
||||||
# Really download
|
# Really download
|
||||||
|
if args.dryrun:
|
||||||
|
print(f"Would download {onlineFilename}")
|
||||||
|
else:
|
||||||
|
# Apparently that thing is transformed from a LazyList
|
||||||
|
# somewhere in the normal yt_dlp process
|
||||||
|
if isinstance(infos["thumbnails"], youtube_dl.utils.LazyList):
|
||||||
|
infos["thumbnails"] = infos["thumbnails"].exhaust()
|
||||||
try:
|
try:
|
||||||
ydl.process_ie_result(infos, True, {})
|
ydl.process_ie_result(infos, True, {})
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue