rssViedos mostly
This commit is contained in:
parent
ceb1e40964
commit
709239dfca
17 changed files with 479 additions and 111 deletions
|
@ -10,10 +10,10 @@ with the unread items (non-video links are ignored).
|
|||
# TODO Distribute this correclty, in the meanwhile please do
|
||||
# pip install --user youtube-dl ConfigArgParse progressbar2
|
||||
|
||||
# TODO Allow to specify youtube_dl options (e.g. subtitles)
|
||||
# TODO Restrict quality (it's not that I don't like 8GB 4K videos but...)
|
||||
# TODO Better logging (youtube-dl allow to pass loggers)
|
||||
|
||||
from typing import Dict, Set
|
||||
import sys
|
||||
from typing import Dict, Set, Tuple
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import os
|
||||
|
@ -22,27 +22,69 @@ import youtube_dl
|
|||
import configargparse
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
def get_args() -> configargparse.Namespace:
|
||||
defaultConfigPath = os.path.join(
|
||||
os.path.expanduser(os.getenv("XDG_CONFIG_PATH", "~/.config/")), "rssVideos"
|
||||
)
|
||||
|
||||
defaultConfigPath = os.path.join(os.path.expanduser(
|
||||
os.getenv('XDG_CONFIG_PATH', '~/.config/')), 'rssVideos')
|
||||
|
||||
|
||||
parser = configargparse.ArgParser(description="Download videos linked in " +
|
||||
"a RSS feed (e.g. an unread feed from " +
|
||||
"an RSS aggregator",
|
||||
default_config_files=[defaultConfigPath])
|
||||
parser.add('-c', '--config', required=False, is_config_file=True,
|
||||
help='Configuration file')
|
||||
parser.add('--feed', help='URL of the RSS feed (must be public for now)',
|
||||
env_var='RSS_VIDEOS_FEED', required=True)
|
||||
parser.add('--videos', help='Directory to store videos',
|
||||
env_var='RSS_VIDEOS_VIDEO_DIR', required=True)
|
||||
parser.add('-n', '--dryrun', help='Do not download the videos',
|
||||
action='store_const', const=True, default=False)
|
||||
# TODO This feature might require additional documentation and an on/off switc
|
||||
parser.add('--track', help='Directory where download videos are maked (so they are not downloaded twice)',
|
||||
env_var='RSS_VIDEOS_TRACK', required=False, default='.rssVideos')
|
||||
parser = configargparse.ArgParser(
|
||||
description="Download videos linked in "
|
||||
+ "a RSS feed (e.g. an unread feed from "
|
||||
+ "an RSS aggregator",
|
||||
default_config_files=[defaultConfigPath],
|
||||
)
|
||||
parser.add(
|
||||
"-c", "--config", required=False, is_config_file=True, help="Configuration file"
|
||||
)
|
||||
parser.add(
|
||||
"--feed",
|
||||
help="URL of the RSS feed (must be public for now)",
|
||||
env_var="RSS_VIDEOS_FEED",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
"--videos",
|
||||
help="Directory to store videos",
|
||||
env_var="RSS_VIDEOS_VIDEO_DIR",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
"-n",
|
||||
"--dryrun",
|
||||
help="Do not download the videos",
|
||||
action="store_const",
|
||||
const=True,
|
||||
default=False,
|
||||
)
|
||||
# TODO This feature might require additional documentation and an on/off switch
|
||||
parser.add(
|
||||
"--track",
|
||||
help="Directory where download videos are marked "
|
||||
+ "to not download them after deletion.",
|
||||
env_var="RSS_VIDEOS_TRACK",
|
||||
required=False,
|
||||
default=".rssVideos",
|
||||
)
|
||||
parser.add(
|
||||
"--max-duration",
|
||||
help="Skip video longer than this amount of seconds",
|
||||
env_var="RSS_VIDEOS_MAX_DURATION",
|
||||
type=int,
|
||||
default=0,
|
||||
)
|
||||
parser.add(
|
||||
"--format",
|
||||
help="Use this format to download videos."
|
||||
+ " See FORMAT SELECTION in youtube-dl(1)",
|
||||
env_var="RSS_VIDEOS_FORMAT",
|
||||
default="bestvideo+bestaudio/best",
|
||||
)
|
||||
parser.add(
|
||||
"--subtitles",
|
||||
help="Download all subtitles",
|
||||
env_var="RSS_VIDEOS_SUBTITLES",
|
||||
action="store_true",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.videos = os.path.realpath(os.path.expanduser(args.videos))
|
||||
|
@ -50,54 +92,70 @@ if __name__ == "__main__":
|
|||
if not os.path.isabs(args.track):
|
||||
args.track = os.path.realpath(os.path.join(args.videos, args.track))
|
||||
|
||||
os.makedirs(args.videos, exist_ok=True)
|
||||
os.makedirs(args.track, exist_ok=True)
|
||||
return args
|
||||
|
||||
# Read the feed XML, get the links
|
||||
print("→ Retrieveing RSS feed")
|
||||
|
||||
links: Set[str] = set()
|
||||
def get_links(args: configargparse.Namespace) -> Set[str]:
|
||||
"""
|
||||
Read the feed XML, get the links
|
||||
"""
|
||||
links = set()
|
||||
with urllib.request.urlopen(args.feed) as request:
|
||||
with minidom.parse(request) as xmldoc:
|
||||
for item in xmldoc.getElementsByTagName('item'):
|
||||
for item in xmldoc.getElementsByTagName("item"):
|
||||
try:
|
||||
linkNode = item.getElementsByTagName('link')[0]
|
||||
linkNode = item.getElementsByTagName("link")[0]
|
||||
link: str = linkNode.childNodes[0].data
|
||||
links.add(link)
|
||||
except BaseException as e:
|
||||
print("Error while getting link from item:", e)
|
||||
continue
|
||||
return links
|
||||
|
||||
# Filter out non-video links and store video download info
|
||||
# and associated filename
|
||||
print(f"→ Getting infos on {len(links)} unread articles")
|
||||
|
||||
videosInfos: Dict[str, str] = {}
|
||||
def get_video_infos(
|
||||
args: configargparse.Namespace, ydl_opts: Dict, links: Set[str]
|
||||
) -> Dict[str, Dict]:
|
||||
"""
|
||||
Filter out non-video links and store video download info
|
||||
and associated filename
|
||||
"""
|
||||
videosInfos = dict()
|
||||
|
||||
ydl_opts = {
|
||||
"simulate": True,
|
||||
"quiet": True
|
||||
}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
dry_ydl_opts = ydl_opts.copy()
|
||||
dry_ydl_opts.update({"simulate": True, "quiet": True})
|
||||
with youtube_dl.YoutubeDL(dry_ydl_opts) as ydl:
|
||||
for link in links:
|
||||
print(f"Researching {link}...")
|
||||
try:
|
||||
infos = ydl.extract_info(link)
|
||||
if args.max_duration > 0 and infos["duration"] > args.max_duration:
|
||||
print(
|
||||
f"{infos['title']}: Skipping as longer than max duration: "
|
||||
f"{infos['duration']} > {args.max_duration}"
|
||||
)
|
||||
continue
|
||||
filepath = ydl.prepare_filename(infos)
|
||||
filename, extension = os.path.splitext(filepath)
|
||||
videosInfos[filename] = infos
|
||||
print(f"{infos['title']}: Added")
|
||||
|
||||
except BaseException as e:
|
||||
print(e)
|
||||
continue
|
||||
|
||||
# Read the directory content, delete everything that's not a
|
||||
# video on the download list or already downloaded
|
||||
print(f"→ Deciding on what to do for {len(videosInfos)} videos")
|
||||
return videosInfos
|
||||
|
||||
# Getting information on the video directory
|
||||
|
||||
videosDownloaded: Set[str] = set()
|
||||
videosPartiallyDownloaded: Set[str] = set()
|
||||
def get_downloaded_videos(
|
||||
args: configargparse.Namespace, videosInfos: Dict[str, Dict]
|
||||
) -> Tuple[Set[str], Set[str]]:
|
||||
videosDownloaded = set()
|
||||
videosPartiallyDownloaded = set()
|
||||
"""
|
||||
Read the directory content, delete everything that's not a
|
||||
video on the download list or already downloaded
|
||||
"""
|
||||
|
||||
for filepath in os.listdir(args.videos):
|
||||
fullpath = os.path.join(args.videos, filepath)
|
||||
|
@ -106,12 +164,19 @@ if __name__ == "__main__":
|
|||
filename, extension = os.path.splitext(filepath)
|
||||
|
||||
for onlineFilename in videosInfos.keys():
|
||||
# Full name already there: completly downloaded → remove from the download list
|
||||
# Full name already there: completly downloaded
|
||||
# → remove from the download list
|
||||
if filename == onlineFilename:
|
||||
videosDownloaded.add(onlineFilename)
|
||||
break
|
||||
# Partial name already there: not completly downloaded → keep on the download list
|
||||
elif filename.startswith(onlineFilename):
|
||||
# Subtitle file
|
||||
# → ignore
|
||||
if filename.endswith(".vtt"):
|
||||
break
|
||||
|
||||
# Partial name already there: not completly downloaded
|
||||
# → keep on the download list
|
||||
videosPartiallyDownloaded.add(onlineFilename)
|
||||
break
|
||||
# Unrelated filename: delete
|
||||
|
@ -119,10 +184,17 @@ if __name__ == "__main__":
|
|||
print(f"Deleting: {filename}")
|
||||
os.unlink(fullpath)
|
||||
|
||||
# Getting informations on the tracking directory
|
||||
return videosDownloaded, videosPartiallyDownloaded
|
||||
|
||||
# Videos that were once downloaded using this tool
|
||||
videosTracked: Set[str] = set()
|
||||
|
||||
def get_tracked_videos(args: configargparse.Namespace, known: Set[str]) -> Set[str]:
|
||||
"""
|
||||
Return videos previously downloaded (=tracked) amongst the unread videos.
|
||||
This is stored in the tracking directory as empty extension-less files.
|
||||
Other tracking markers (e.g. for now read videos) are deleted.
|
||||
"""
|
||||
|
||||
videosTracked = set()
|
||||
|
||||
for filepath in os.listdir(args.track):
|
||||
fullpath = os.path.join(args.track, filepath)
|
||||
|
@ -130,18 +202,39 @@ if __name__ == "__main__":
|
|||
continue
|
||||
# Here filename is a filepath as no extension
|
||||
|
||||
if filepath in videosInfos:
|
||||
if filepath in known:
|
||||
videosTracked.add(filepath)
|
||||
else:
|
||||
os.unlink(fullpath)
|
||||
|
||||
return videosTracked
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
||||
args = get_args()
|
||||
|
||||
os.makedirs(args.videos, exist_ok=True)
|
||||
os.makedirs(args.track, exist_ok=True)
|
||||
ydl_opts = {"format": args.format, "allsubtitles": args.subtitles}
|
||||
|
||||
print("→ Retrieveing RSS feed")
|
||||
links = get_links(args)
|
||||
|
||||
print(f"→ Getting infos on {len(links)} unread articles")
|
||||
videosInfos = get_video_infos(args, ydl_opts, links)
|
||||
|
||||
print(f"→ Deciding on what to do for {len(videosInfos)} videos")
|
||||
videosDownloaded, videosPartiallyDownloaded = get_downloaded_videos(
|
||||
args, videosInfos
|
||||
)
|
||||
videosTracked = get_tracked_videos(args, set(videosInfos.keys()))
|
||||
|
||||
# Deciding for the rest based on the informations
|
||||
|
||||
|
||||
def markTracked(filename):
|
||||
def markTracked(filename: str) -> None:
|
||||
markerPath = os.path.join(args.track, onlineFilename)
|
||||
open(markerPath, 'a').close()
|
||||
|
||||
open(markerPath, "a").close()
|
||||
|
||||
videosToDownload: Set[str] = set()
|
||||
videosReads: Set[str] = set()
|
||||
|
@ -169,11 +262,10 @@ if __name__ == "__main__":
|
|||
|
||||
os.chdir(args.videos)
|
||||
|
||||
exit_code = 0
|
||||
if not args.dryrun:
|
||||
# TODO Progressbar one day maybe?
|
||||
# We have all the info we need to make a reliable one
|
||||
ydl_opts = {
|
||||
}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
for onlineFilename in videosToDownload:
|
||||
infos = videosInfos[onlineFilename]
|
||||
|
@ -183,6 +275,13 @@ if __name__ == "__main__":
|
|||
ydl.process_ie_result(infos, True, {})
|
||||
|
||||
markTracked(onlineFilename)
|
||||
except:
|
||||
except BaseException as e:
|
||||
print(e)
|
||||
exit_code = 1
|
||||
continue
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue