|
|
@ -10,10 +10,10 @@ with the unread items (non-video links are ignored). |
|
|
|
# TODO Distribute this correclty, in the meanwhile please do |
|
|
|
# pip install --user youtube-dl ConfigArgParse progressbar2 |
|
|
|
|
|
|
|
# TODO Allow to specify youtube_dl options (e.g. subtitles) |
|
|
|
# TODO Restrict quality (it's not that I don't like 8GB 4K videos but...) |
|
|
|
# TODO Better logging (youtube-dl allow to pass loggers) |
|
|
|
|
|
|
|
from typing import Dict, Set |
|
|
|
import sys |
|
|
|
from typing import Dict, Set, Tuple |
|
|
|
import urllib.request |
|
|
|
import urllib.parse |
|
|
|
import os |
|
|
@ -22,27 +22,69 @@ import youtube_dl |
|
|
|
import configargparse |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
defaultConfigPath = os.path.join(os.path.expanduser( |
|
|
|
os.getenv('XDG_CONFIG_PATH', '~/.config/')), 'rssVideos') |
|
|
|
|
|
|
|
|
|
|
|
parser = configargparse.ArgParser(description="Download videos linked in " + |
|
|
|
"a RSS feed (e.g. an unread feed from " + |
|
|
|
"an RSS aggregator", |
|
|
|
default_config_files=[defaultConfigPath]) |
|
|
|
parser.add('-c', '--config', required=False, is_config_file=True, |
|
|
|
help='Configuration file') |
|
|
|
parser.add('--feed', help='URL of the RSS feed (must be public for now)', |
|
|
|
env_var='RSS_VIDEOS_FEED', required=True) |
|
|
|
parser.add('--videos', help='Directory to store videos', |
|
|
|
env_var='RSS_VIDEOS_VIDEO_DIR', required=True) |
|
|
|
parser.add('-n', '--dryrun', help='Do not download the videos', |
|
|
|
action='store_const', const=True, default=False) |
|
|
|
# TODO This feature might require additional documentation and an on/off switc |
|
|
|
parser.add('--track', help='Directory where download videos are maked (so they are not downloaded twice)', |
|
|
|
env_var='RSS_VIDEOS_TRACK', required=False, default='.rssVideos') |
|
|
|
def get_args() -> configargparse.Namespace: |
|
|
|
defaultConfigPath = os.path.join( |
|
|
|
os.path.expanduser(os.getenv("XDG_CONFIG_PATH", "~/.config/")), "rssVideos" |
|
|
|
) |
|
|
|
|
|
|
|
parser = configargparse.ArgParser( |
|
|
|
description="Download videos linked in " |
|
|
|
+ "a RSS feed (e.g. an unread feed from " |
|
|
|
+ "an RSS aggregator", |
|
|
|
default_config_files=[defaultConfigPath], |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"-c", "--config", required=False, is_config_file=True, help="Configuration file" |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"--feed", |
|
|
|
help="URL of the RSS feed (must be public for now)", |
|
|
|
env_var="RSS_VIDEOS_FEED", |
|
|
|
required=True, |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"--videos", |
|
|
|
help="Directory to store videos", |
|
|
|
env_var="RSS_VIDEOS_VIDEO_DIR", |
|
|
|
required=True, |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"-n", |
|
|
|
"--dryrun", |
|
|
|
help="Do not download the videos", |
|
|
|
action="store_const", |
|
|
|
const=True, |
|
|
|
default=False, |
|
|
|
) |
|
|
|
# TODO This feature might require additional documentation and an on/off switch |
|
|
|
parser.add( |
|
|
|
"--track", |
|
|
|
help="Directory where download videos are marked " |
|
|
|
+ "to not download them after deletion.", |
|
|
|
env_var="RSS_VIDEOS_TRACK", |
|
|
|
required=False, |
|
|
|
default=".rssVideos", |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"--max-duration", |
|
|
|
help="Skip video longer than this amount of seconds", |
|
|
|
env_var="RSS_VIDEOS_MAX_DURATION", |
|
|
|
type=int, |
|
|
|
default=0, |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"--format", |
|
|
|
help="Use this format to download videos." |
|
|
|
+ " See FORMAT SELECTION in youtube-dl(1)", |
|
|
|
env_var="RSS_VIDEOS_FORMAT", |
|
|
|
default="bestvideo+bestaudio/best", |
|
|
|
) |
|
|
|
parser.add( |
|
|
|
"--subtitles", |
|
|
|
help="Download all subtitles", |
|
|
|
env_var="RSS_VIDEOS_SUBTITLES", |
|
|
|
action="store_true", |
|
|
|
) |
|
|
|
|
|
|
|
args = parser.parse_args() |
|
|
|
args.videos = os.path.realpath(os.path.expanduser(args.videos)) |
|
|
@ -50,54 +92,70 @@ if __name__ == "__main__": |
|
|
|
if not os.path.isabs(args.track): |
|
|
|
args.track = os.path.realpath(os.path.join(args.videos, args.track)) |
|
|
|
|
|
|
|
os.makedirs(args.videos, exist_ok=True) |
|
|
|
os.makedirs(args.track, exist_ok=True) |
|
|
|
return args |
|
|
|
|
|
|
|
# Read the feed XML, get the links |
|
|
|
print("→ Retrieveing RSS feed") |
|
|
|
|
|
|
|
links: Set[str] = set() |
|
|
|
def get_links(args: configargparse.Namespace) -> Set[str]: |
|
|
|
""" |
|
|
|
Read the feed XML, get the links |
|
|
|
""" |
|
|
|
links = set() |
|
|
|
with urllib.request.urlopen(args.feed) as request: |
|
|
|
with minidom.parse(request) as xmldoc: |
|
|
|
for item in xmldoc.getElementsByTagName('item'): |
|
|
|
for item in xmldoc.getElementsByTagName("item"): |
|
|
|
try: |
|
|
|
linkNode = item.getElementsByTagName('link')[0] |
|
|
|
linkNode = item.getElementsByTagName("link")[0] |
|
|
|
link: str = linkNode.childNodes[0].data |
|
|
|
links.add(link) |
|
|
|
except BaseException as e: |
|
|
|
print("Error while getting link from item:", e) |
|
|
|
continue |
|
|
|
return links |
|
|
|
|
|
|
|
# Filter out non-video links and store video download info |
|
|
|
# and associated filename |
|
|
|
print(f"→ Getting infos on {len(links)} unread articles") |
|
|
|
|
|
|
|
videosInfos: Dict[str, str] = {} |
|
|
|
def get_video_infos( |
|
|
|
args: configargparse.Namespace, ydl_opts: Dict, links: Set[str] |
|
|
|
) -> Dict[str, Dict]: |
|
|
|
""" |
|
|
|
Filter out non-video links and store video download info |
|
|
|
and associated filename |
|
|
|
""" |
|
|
|
videosInfos = dict() |
|
|
|
|
|
|
|
ydl_opts = { |
|
|
|
"simulate": True, |
|
|
|
"quiet": True |
|
|
|
} |
|
|
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl: |
|
|
|
dry_ydl_opts = ydl_opts.copy() |
|
|
|
dry_ydl_opts.update({"simulate": True, "quiet": True}) |
|
|
|
with youtube_dl.YoutubeDL(dry_ydl_opts) as ydl: |
|
|
|
for link in links: |
|
|
|
print(f"Researching {link}...") |
|
|
|
try: |
|
|
|
infos = ydl.extract_info(link) |
|
|
|
if args.max_duration > 0 and infos["duration"] > args.max_duration: |
|
|
|
print( |
|
|
|
f"{infos['title']}: Skipping as longer than max duration: " |
|
|
|
f"{infos['duration']} > {args.max_duration}" |
|
|
|
) |
|
|
|
continue |
|
|
|
filepath = ydl.prepare_filename(infos) |
|
|
|
filename, extension = os.path.splitext(filepath) |
|
|
|
videosInfos[filename] = infos |
|
|
|
print(f"{infos['title']}: Added") |
|
|
|
|
|
|
|
except BaseException as e: |
|
|
|
print(e) |
|
|
|
continue |
|
|
|
|
|
|
|
# Read the directory content, delete everything that's not a |
|
|
|
# video on the download list or already downloaded |
|
|
|
print(f"→ Deciding on what to do for {len(videosInfos)} videos") |
|
|
|
return videosInfos |
|
|
|
|
|
|
|
# Getting information on the video directory |
|
|
|
|
|
|
|
videosDownloaded: Set[str] = set() |
|
|
|
videosPartiallyDownloaded: Set[str] = set() |
|
|
|
def get_downloaded_videos( |
|
|
|
args: configargparse.Namespace, videosInfos: Dict[str, Dict] |
|
|
|
) -> Tuple[Set[str], Set[str]]: |
|
|
|
videosDownloaded = set() |
|
|
|
videosPartiallyDownloaded = set() |
|
|
|
""" |
|
|
|
Read the directory content, delete everything that's not a |
|
|
|
video on the download list or already downloaded |
|
|
|
""" |
|
|
|
|
|
|
|
for filepath in os.listdir(args.videos): |
|
|
|
fullpath = os.path.join(args.videos, filepath) |
|
|
@ -106,12 +164,19 @@ if __name__ == "__main__": |
|
|
|
filename, extension = os.path.splitext(filepath) |
|
|
|
|
|
|
|
for onlineFilename in videosInfos.keys(): |
|
|
|
# Full name already there: completly downloaded → remove from the download list |
|
|
|
# Full name already there: completly downloaded |
|
|
|
# → remove from the download list |
|
|
|
if filename == onlineFilename: |
|
|
|
videosDownloaded.add(onlineFilename) |
|
|
|
break |
|
|
|
# Partial name already there: not completly downloaded → keep on the download list |
|
|
|
elif filename.startswith(onlineFilename): |
|
|
|
# Subtitle file |
|
|
|
# → ignore |
|
|
|
if filename.endswith(".vtt"): |
|
|
|
break |
|
|
|
|
|
|
|
# Partial name already there: not completly downloaded |
|
|
|
# → keep on the download list |
|
|
|
videosPartiallyDownloaded.add(onlineFilename) |
|
|
|
break |
|
|
|
# Unrelated filename: delete |
|
|
@ -119,10 +184,17 @@ if __name__ == "__main__": |
|
|
|
print(f"Deleting: {filename}") |
|
|
|
os.unlink(fullpath) |
|
|
|
|
|
|
|
# Getting informations on the tracking directory |
|
|
|
return videosDownloaded, videosPartiallyDownloaded |
|
|
|
|
|
|
|
|
|
|
|
def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]: |
|
|
|
""" |
|
|
|
Return videos previously downloaded (=tracked) amongst the unread videos. |
|
|
|
This is stored in the tracking directory as empty extension-less files. |
|
|
|
Other tracking markers (e.g. for now read videos) are deleted. |
|
|
|
""" |
|
|
|
|
|
|
|
# Videos that were once downloaded using this tool |
|
|
|
videosTracked: Set[str] = set() |
|
|
|
videosTracked = set() |
|
|
|
|
|
|
|
for filepath in os.listdir(args.track): |
|
|
|
fullpath = os.path.join(args.track, filepath) |
|
|
@ -130,18 +202,39 @@ if __name__ == "__main__": |
|
|
|
continue |
|
|
|
# Here filename is a filepath as no extension |
|
|
|
|
|
|
|
if filepath in videosInfos: |
|
|
|
if filepath in known: |
|
|
|
videosTracked.add(filepath) |
|
|
|
else: |
|
|
|
os.unlink(fullpath) |
|
|
|
|
|
|
|
# Deciding for the rest based on the informations |
|
|
|
return videosTracked |
|
|
|
|
|
|
|
|
|
|
|
def markTracked(filename): |
|
|
|
markerPath = os.path.join(args.track, onlineFilename) |
|
|
|
open(markerPath, 'a').close() |
|
|
|
def main() -> None: |
|
|
|
|
|
|
|
args = get_args() |
|
|
|
|
|
|
|
os.makedirs(args.videos, exist_ok=True) |
|
|
|
os.makedirs(args.track, exist_ok=True) |
|
|
|
ydl_opts = {"format": args.format, "allsubtitles": args.subtitles} |
|
|
|
|
|
|
|
print("→ Retrieveing RSS feed") |
|
|
|
links = get_links(args) |
|
|
|
|
|
|
|
print(f"→ Getting infos on {len(links)} unread articles") |
|
|
|
videosInfos = get_video_infos(args, ydl_opts, links) |
|
|
|
|
|
|
|
print(f"→ Deciding on what to do for {len(videosInfos)} videos") |
|
|
|
videosDownloaded, videosPartiallyDownloaded = get_downloaded_videos( |
|
|
|
args, videosInfos |
|
|
|
) |
|
|
|
videosTracked = get_tracked_videos(args, set(videosInfos.keys())) |
|
|
|
|
|
|
|
# Deciding for the rest based on the informations |
|
|
|
|
|
|
|
def markTracked(filename: str) -> None: |
|
|
|
markerPath = os.path.join(args.track, onlineFilename) |
|
|
|
open(markerPath, "a").close() |
|
|
|
|
|
|
|
videosToDownload: Set[str] = set() |
|
|
|
videosReads: Set[str] = set() |
|
|
@ -169,11 +262,10 @@ if __name__ == "__main__": |
|
|
|
|
|
|
|
os.chdir(args.videos) |
|
|
|
|
|
|
|
exit_code = 0 |
|
|
|
if not args.dryrun: |
|
|
|
# TODO Progressbar one day maybe? |
|
|
|
# We have all the info we need to make a reliable one |
|
|
|
ydl_opts = { |
|
|
|
} |
|
|
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl: |
|
|
|
for onlineFilename in videosToDownload: |
|
|
|
infos = videosInfos[onlineFilename] |
|
|
@ -183,6 +275,13 @@ if __name__ == "__main__": |
|
|
|
ydl.process_ie_result(infos, True, {}) |
|
|
|
|
|
|
|
markTracked(onlineFilename) |
|
|
|
except: |
|
|
|
except BaseException as e: |
|
|
|
print(e) |
|
|
|
exit_code = 1 |
|
|
|
continue |
|
|
|
|
|
|
|
sys.exit(exit_code) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
main() |