#!/usr/bin/env python3 """ Script that download videos that are linked as an article in a RSS feed. The common use case would be a feed from an RSS aggregator with the unread items (non-video links are ignored). """ # TODO Distribute this correclty, in the meanwhile please do # pip install --user youtube-dl ConfigArgParse progressbar2 # TODO Allow to specify youtube_dl options (e.g. subtitles) # TODO Restrict quality (it's not that I don't like 8GB 4K videos but...) from typing import Dict, Set import urllib.request import urllib.parse import os from xml.dom import minidom import youtube_dl import configargparse if __name__ == "__main__": defaultConfigPath = os.path.join(os.path.expanduser( os.getenv('XDG_CONFIG_PATH', '~/.config/')), 'rssVideos') parser = configargparse.ArgParser(description="Download videos linked in " + "a RSS feed (e.g. an unread feed from " + "an RSS aggregator", default_config_files=[defaultConfigPath]) parser.add('-c', '--config', required=False, is_config_file=True, help='Configuration file') parser.add('--feed', help='URL of the RSS feed (must be public for now)', env_var='RSS_VIDEOS_FEED', required=True) parser.add('--videos', help='Directory to store videos', env_var='RSS_VIDEOS_VIDEO_DIR', required=True) # TODO This feature might require additional documentation and an on/off switc parser.add('--track', help='Directory where download videos are maked (so they are not downloaded twice)', env_var='RSS_VIDEOS_TRACK', required=False, default='.rssVideos') args = parser.parse_args() args.videos = os.path.realpath(os.path.expanduser(args.videos)) args.track = os.path.expanduser(args.track) if not os.path.isabs(args.track): args.track = os.path.realpath(os.path.join(args.videos, args.track)) os.makedirs(args.videos, exist_ok=True) os.makedirs(args.track, exist_ok=True) # Read the feed XML, get the links print("→ Retrieveing RSS feed") links: Set[str] = set() with urllib.request.urlopen(args.feed) as request: with minidom.parse(request) as xmldoc: for item in xmldoc.getElementsByTagName('item'): try: linkNode = item.getElementsByTagName('link')[0] link: str = linkNode.childNodes[0].data links.add(link) except BaseException as e: print("Error while getting link from item:", e) continue # Filter out non-video links and store video download info # and associated filename print(f"→ Getting infos on {len(links)} unread articles") videosInfos: Dict[str, str] = {} ydl_opts = { "simulate": True, "quiet": True } with youtube_dl.YoutubeDL(ydl_opts) as ydl: for link in links: print(f"Researching {link}...") try: infos = ydl.extract_info(link) filepath = ydl.prepare_filename(infos) filename, extension = os.path.splitext(filepath) videosInfos[filename] = infos except BaseException as e: print(e) continue # Read the directory content, delete everything that's not a # video on the download list or already downloaded print(f"→ Deciding on what to do for {len(videosInfos)} videos") # Getting information on the video directory videosDownloaded: Set[str] = set() videosPartiallyDownloaded: Set[str] = set() for filepath in os.listdir(args.videos): fullpath = os.path.join(args.videos, filepath) if not os.path.isfile(fullpath): continue filename, extension = os.path.splitext(filepath) for onlineFilename in videosInfos.keys(): # Full name already there: completly downloaded → remove from the download list if filename == onlineFilename: videosDownloaded.add(onlineFilename) break # Partial name already there: not completly downloaded → keep on the download list elif filename.startswith(onlineFilename): videosPartiallyDownloaded.add(onlineFilename) break # Unrelated filename: delete else: print(f"Deleting: {filename}") os.unlink(fullpath) # Getting informations on the tracking directory # Videos that were once downloaded using this tool videosTracked: Set[str] = set() for filepath in os.listdir(args.track): fullpath = os.path.join(args.track, filepath) if not os.path.isfile(fullpath): continue # Here filename is a filepath as no extension if filepath in videosInfos: videosTracked.add(filepath) else: os.unlink(fullpath) # Deciding for the rest based on the informations def markTracked(filename): markerPath = os.path.join(args.track, onlineFilename) open(markerPath, 'a').close() videosToDownload: Set[str] = set() videosReads: Set[str] = set() for onlineFilename in videosInfos.keys(): # If the video was once downloaded but manually deleted, # the marker should be left if onlineFilename in videosTracked: print(f"Should be marked as read: {onlineFilename}") # TODO Automatically do that one day maybe? # Need to login to the FreshRSS API and keep track of # the item id along the process videosReads.add(onlineFilename) elif onlineFilename in videosDownloaded: markTracked(onlineFilename) print(f"Already downloaded: {onlineFilename}") else: if onlineFilename in videosPartiallyDownloaded: print(f"Will be continued: {onlineFilename}") else: print(f"Will be downloaded: {onlineFilename}") videosToDownload.add(onlineFilename) # Download the missing videos print(f"→ Downloading {len(videosToDownload)} videos") os.chdir(args.videos) # TODO Progressbar one day maybe? # We have all the info we need to make a reliable one ydl_opts = { } with youtube_dl.YoutubeDL(ydl_opts) as ydl: for onlineFilename in videosToDownload: infos = videosInfos[onlineFilename] # Really download ydl.process_ie_result(infos, True, {}) markTracked(onlineFilename)