6 changed files with 189 additions and 4 deletions
-
1config/git/gitignore
-
3config/pythonstartup.py
-
182config/scripts/rssVideos
-
1config/shell/shenv
-
4config/tmux.conf
-
2config/vim/pluginconfig
@ -0,0 +1,182 @@ |
|||
#!/usr/bin/env python3 |
|||
|
|||
""" |
|||
Script that download videos that are linked as an article |
|||
in a RSS feed. |
|||
The common use case would be a feed from an RSS aggregator |
|||
with the unread items (non-video links are ignored). |
|||
""" |
|||
|
|||
# TODO Distribute this correclty, in the meanwhile please do |
|||
# pip install --user youtube-dl ConfigArgParse progressbar2 |
|||
|
|||
# TODO Allow to specify youtube_dl options (e.g. subtitles) |
|||
# TODO Restrict quality (it's not that I don't like 8GB 4K videos but...) |
|||
|
|||
from typing import Dict, Set |
|||
import urllib.request |
|||
import urllib.parse |
|||
import os |
|||
from xml.dom import minidom |
|||
import youtube_dl |
|||
import configargparse |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
|
|||
defaultConfigPath = os.path.join(os.path.expanduser( |
|||
os.getenv('XDG_CONFIG_PATH', '~/.config/')), 'rssVideos') |
|||
|
|||
|
|||
parser = configargparse.ArgParser(description="Download videos linked in " + |
|||
"a RSS feed (e.g. an unread feed from " + |
|||
"an RSS aggregator", |
|||
default_config_files=[defaultConfigPath]) |
|||
parser.add('-c', '--config', required=False, is_config_file=True, |
|||
help='Configuration file') |
|||
parser.add('--feed', help='URL of the RSS feed (must be public for now)', |
|||
env_var='RSS_VIDEOS_FEED', required=True) |
|||
parser.add('--videos', help='Directory to store videos', |
|||
env_var='RSS_VIDEOS_VIDEO_DIR', required=True) |
|||
# TODO This feature might require additional documentation and an on/off switc |
|||
parser.add('--track', help='Directory where download videos are maked (so they are not downloaded twice)', |
|||
env_var='RSS_VIDEOS_TRACK', required=False, default='.rssVideos') |
|||
|
|||
args = parser.parse_args() |
|||
args.videos = os.path.realpath(os.path.expanduser(args.videos)) |
|||
args.track = os.path.expanduser(args.track) |
|||
if not os.path.isabs(args.track): |
|||
args.track = os.path.realpath(os.path.join(args.videos, args.track)) |
|||
|
|||
os.makedirs(args.videos, exist_ok=True) |
|||
os.makedirs(args.track, exist_ok=True) |
|||
|
|||
# Read the feed XML, get the links |
|||
print("→ Retrieveing RSS feed") |
|||
|
|||
links: Set[str] = set() |
|||
with urllib.request.urlopen(args.feed) as request: |
|||
with minidom.parse(request) as xmldoc: |
|||
for item in xmldoc.getElementsByTagName('item'): |
|||
try: |
|||
linkNode = item.getElementsByTagName('link')[0] |
|||
link: str = linkNode.childNodes[0].data |
|||
links.add(link) |
|||
except BaseException as e: |
|||
print("Error while getting link from item:", e) |
|||
continue |
|||
|
|||
# Filter out non-video links and store video download info |
|||
# and associated filename |
|||
print(f"→ Getting infos on {len(links)} unread articles") |
|||
|
|||
videosInfos: Dict[str, str] = {} |
|||
|
|||
ydl_opts = { |
|||
"simulate": True, |
|||
"quiet": True |
|||
} |
|||
with youtube_dl.YoutubeDL(ydl_opts) as ydl: |
|||
for link in links: |
|||
print(f"Researching {link}...") |
|||
try: |
|||
infos = ydl.extract_info(link) |
|||
filepath = ydl.prepare_filename(infos) |
|||
filename, extension = os.path.splitext(filepath) |
|||
videosInfos[filename] = infos |
|||
except BaseException as e: |
|||
print(e) |
|||
continue |
|||
|
|||
# Read the directory content, delete everything that's not a |
|||
# video on the download list or already downloaded |
|||
print(f"→ Deciding on what to do for {len(videosInfos)} videos") |
|||
|
|||
# Getting information on the video directory |
|||
|
|||
videosDownloaded: Set[str] = set() |
|||
videosPartiallyDownloaded: Set[str] = set() |
|||
|
|||
for filepath in os.listdir(args.videos): |
|||
fullpath = os.path.join(args.videos, filepath) |
|||
if not os.path.isfile(fullpath): |
|||
continue |
|||
filename, extension = os.path.splitext(filepath) |
|||
|
|||
for onlineFilename in videosInfos.keys(): |
|||
# Full name already there: completly downloaded → remove from the download list |
|||
if filename == onlineFilename: |
|||
videosDownloaded.add(onlineFilename) |
|||
break |
|||
# Partial name already there: not completly downloaded → keep on the download list |
|||
elif filename.startswith(onlineFilename): |
|||
videosPartiallyDownloaded.add(onlineFilename) |
|||
break |
|||
# Unrelated filename: delete |
|||
else: |
|||
print(f"Deleting: {filename}") |
|||
os.unlink(fullpath) |
|||
|
|||
# Getting informations on the tracking directory |
|||
|
|||
# Videos that were once downloaded using this tool |
|||
videosTracked: Set[str] = set() |
|||
|
|||
for filepath in os.listdir(args.track): |
|||
fullpath = os.path.join(args.track, filepath) |
|||
if not os.path.isfile(fullpath): |
|||
continue |
|||
# Here filename is a filepath as no extension |
|||
|
|||
if filepath in videosInfos: |
|||
videosTracked.add(filepath) |
|||
else: |
|||
os.unlink(fullpath) |
|||
|
|||
# Deciding for the rest based on the informations |
|||
|
|||
|
|||
def markTracked(filename): |
|||
markerPath = os.path.join(args.track, onlineFilename) |
|||
open(markerPath, 'a').close() |
|||
|
|||
|
|||
videosToDownload: Set[str] = set() |
|||
videosReads: Set[str] = set() |
|||
for onlineFilename in videosInfos.keys(): |
|||
# If the video was once downloaded but manually deleted, |
|||
# the marker should be left |
|||
if onlineFilename in videosTracked: |
|||
print(f"Should be marked as read: {onlineFilename}") |
|||
# TODO Automatically do that one day maybe? |
|||
# Need to login to the FreshRSS API and keep track of |
|||
# the item id along the process |
|||
videosReads.add(onlineFilename) |
|||
elif onlineFilename in videosDownloaded: |
|||
markTracked(onlineFilename) |
|||
print(f"Already downloaded: {onlineFilename}") |
|||
else: |
|||
if onlineFilename in videosPartiallyDownloaded: |
|||
print(f"Will be continued: {onlineFilename}") |
|||
else: |
|||
print(f"Will be downloaded: {onlineFilename}") |
|||
videosToDownload.add(onlineFilename) |
|||
|
|||
# Download the missing videos |
|||
print(f"→ Downloading {len(videosToDownload)} videos") |
|||
|
|||
os.chdir(args.videos) |
|||
|
|||
# TODO Progressbar one day maybe? |
|||
# We have all the info we need to make a reliable one |
|||
ydl_opts = { |
|||
} |
|||
with youtube_dl.YoutubeDL(ydl_opts) as ydl: |
|||
for onlineFilename in videosToDownload: |
|||
infos = videosInfos[onlineFilename] |
|||
|
|||
# Really download |
|||
ydl.process_ie_result(infos, True, {}) |
|||
|
|||
markTracked(onlineFilename) |
|||
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue