dotfiles/config/scripts/rssVideos

189 lines
6.8 KiB
Plaintext
Raw Normal View History

2019-04-30 08:22:27 +02:00
#!/usr/bin/env python3
"""
Script that download videos that are linked as an article
in a RSS feed.
The common use case would be a feed from an RSS aggregator
with the unread items (non-video links are ignored).
"""
# TODO Distribute this correclty, in the meanwhile please do
# pip install --user youtube-dl ConfigArgParse progressbar2
# TODO Allow to specify youtube_dl options (e.g. subtitles)
# TODO Restrict quality (it's not that I don't like 8GB 4K videos but...)
from typing import Dict, Set
import urllib.request
import urllib.parse
import os
from xml.dom import minidom
import youtube_dl
import configargparse
if __name__ == "__main__":
defaultConfigPath = os.path.join(os.path.expanduser(
os.getenv('XDG_CONFIG_PATH', '~/.config/')), 'rssVideos')
parser = configargparse.ArgParser(description="Download videos linked in " +
"a RSS feed (e.g. an unread feed from " +
"an RSS aggregator",
default_config_files=[defaultConfigPath])
parser.add('-c', '--config', required=False, is_config_file=True,
help='Configuration file')
parser.add('--feed', help='URL of the RSS feed (must be public for now)',
env_var='RSS_VIDEOS_FEED', required=True)
parser.add('--videos', help='Directory to store videos',
env_var='RSS_VIDEOS_VIDEO_DIR', required=True)
2019-05-08 17:25:23 +02:00
parser.add('-n', '--dryrun', help='Do not download the videos',
action='store_const', const=True, default=False)
2019-04-30 08:22:27 +02:00
# TODO This feature might require additional documentation and an on/off switc
parser.add('--track', help='Directory where download videos are maked (so they are not downloaded twice)',
env_var='RSS_VIDEOS_TRACK', required=False, default='.rssVideos')
args = parser.parse_args()
args.videos = os.path.realpath(os.path.expanduser(args.videos))
args.track = os.path.expanduser(args.track)
if not os.path.isabs(args.track):
args.track = os.path.realpath(os.path.join(args.videos, args.track))
os.makedirs(args.videos, exist_ok=True)
os.makedirs(args.track, exist_ok=True)
# Read the feed XML, get the links
print("→ Retrieveing RSS feed")
links: Set[str] = set()
with urllib.request.urlopen(args.feed) as request:
with minidom.parse(request) as xmldoc:
for item in xmldoc.getElementsByTagName('item'):
try:
linkNode = item.getElementsByTagName('link')[0]
link: str = linkNode.childNodes[0].data
links.add(link)
except BaseException as e:
print("Error while getting link from item:", e)
continue
# Filter out non-video links and store video download info
# and associated filename
print(f"→ Getting infos on {len(links)} unread articles")
videosInfos: Dict[str, str] = {}
ydl_opts = {
"simulate": True,
"quiet": True
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
for link in links:
print(f"Researching {link}...")
try:
infos = ydl.extract_info(link)
filepath = ydl.prepare_filename(infos)
filename, extension = os.path.splitext(filepath)
videosInfos[filename] = infos
except BaseException as e:
print(e)
continue
# Read the directory content, delete everything that's not a
# video on the download list or already downloaded
print(f"→ Deciding on what to do for {len(videosInfos)} videos")
# Getting information on the video directory
videosDownloaded: Set[str] = set()
videosPartiallyDownloaded: Set[str] = set()
for filepath in os.listdir(args.videos):
fullpath = os.path.join(args.videos, filepath)
if not os.path.isfile(fullpath):
continue
filename, extension = os.path.splitext(filepath)
for onlineFilename in videosInfos.keys():
# Full name already there: completly downloaded → remove from the download list
if filename == onlineFilename:
videosDownloaded.add(onlineFilename)
break
# Partial name already there: not completly downloaded → keep on the download list
elif filename.startswith(onlineFilename):
videosPartiallyDownloaded.add(onlineFilename)
break
# Unrelated filename: delete
else:
print(f"Deleting: {filename}")
os.unlink(fullpath)
# Getting informations on the tracking directory
# Videos that were once downloaded using this tool
videosTracked: Set[str] = set()
for filepath in os.listdir(args.track):
fullpath = os.path.join(args.track, filepath)
if not os.path.isfile(fullpath):
continue
# Here filename is a filepath as no extension
if filepath in videosInfos:
videosTracked.add(filepath)
else:
os.unlink(fullpath)
# Deciding for the rest based on the informations
def markTracked(filename):
markerPath = os.path.join(args.track, onlineFilename)
open(markerPath, 'a').close()
videosToDownload: Set[str] = set()
videosReads: Set[str] = set()
for onlineFilename in videosInfos.keys():
# If the video was once downloaded but manually deleted,
# the marker should be left
if onlineFilename in videosTracked:
print(f"Should be marked as read: {onlineFilename}")
# TODO Automatically do that one day maybe?
# Need to login to the FreshRSS API and keep track of
# the item id along the process
videosReads.add(onlineFilename)
elif onlineFilename in videosDownloaded:
markTracked(onlineFilename)
print(f"Already downloaded: {onlineFilename}")
else:
if onlineFilename in videosPartiallyDownloaded:
print(f"Will be continued: {onlineFilename}")
else:
print(f"Will be downloaded: {onlineFilename}")
videosToDownload.add(onlineFilename)
# Download the missing videos
print(f"→ Downloading {len(videosToDownload)} videos")
os.chdir(args.videos)
2019-05-08 17:25:23 +02:00
if not args.dryrun:
# TODO Progressbar one day maybe?
# We have all the info we need to make a reliable one
ydl_opts = {
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
for onlineFilename in videosToDownload:
infos = videosInfos[onlineFilename]
2019-04-30 08:22:27 +02:00
2019-05-08 17:25:23 +02:00
# Really download
2019-05-11 14:16:15 +02:00
try:
ydl.process_ie_result(infos, True, {})
2019-04-30 08:22:27 +02:00
2019-05-11 14:16:15 +02:00
markTracked(onlineFilename)
except:
continue
2019-04-30 08:22:27 +02:00