2019-04-30 08:22:27 +02:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
2019-04-30 08:22:27 +02:00
|
|
|
|
"""
|
|
|
|
|
Script that download videos that are linked as an article
|
|
|
|
|
in a RSS feed.
|
|
|
|
|
The common use case would be a feed from an RSS aggregator
|
|
|
|
|
with the unread items (non-video links are ignored).
|
|
|
|
|
"""
|
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
import enum
|
|
|
|
|
import functools
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import pickle
|
2021-12-17 23:16:32 +01:00
|
|
|
|
import random
|
2021-12-17 22:13:46 +01:00
|
|
|
|
import re
|
2021-12-17 23:16:32 +01:00
|
|
|
|
import subprocess
|
2020-12-27 14:20:44 +01:00
|
|
|
|
import sys
|
2021-12-19 11:45:41 +01:00
|
|
|
|
import time
|
2021-12-10 22:59:39 +01:00
|
|
|
|
import typing
|
2019-04-30 08:22:27 +02:00
|
|
|
|
import urllib.parse
|
2021-12-10 22:59:39 +01:00
|
|
|
|
import urllib.request
|
2021-12-18 11:27:24 +01:00
|
|
|
|
import urllib.error
|
2019-04-30 08:22:27 +02:00
|
|
|
|
from xml.dom import minidom
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
import coloredlogs
|
2019-04-30 08:22:27 +02:00
|
|
|
|
import configargparse
|
2021-12-10 22:59:39 +01:00
|
|
|
|
import yt_dlp as youtube_dl
|
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
2021-12-18 12:44:43 +01:00
|
|
|
|
# TODO Lockfile, or a way to parallel watch and download
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
def configure_logging(args: configargparse.Namespace) -> None:
|
|
|
|
|
# Configure logging
|
|
|
|
|
if args.verbosity:
|
|
|
|
|
coloredlogs.install(
|
|
|
|
|
level=args.verbosity,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
coloredlogs.install(
|
|
|
|
|
fmt="%(message)s",
|
|
|
|
|
logger=log,
|
|
|
|
|
)
|
|
|
|
|
|
2021-12-19 11:45:41 +01:00
|
|
|
|
def format_duration(duration: int) -> int:
|
|
|
|
|
return time.strftime("%H:%M:%S", time.gmtime(duration))
|
|
|
|
|
|
2021-12-12 13:40:24 +01:00
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
class RVElement:
|
|
|
|
|
parent: "RVDatabase"
|
2021-12-12 14:27:08 +01:00
|
|
|
|
item: minidom.Element
|
2021-12-12 13:40:24 +01:00
|
|
|
|
was_downloaded: bool
|
2021-12-17 23:16:32 +01:00
|
|
|
|
watched: bool
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
|
|
|
|
self.parent = parent
|
2021-12-12 14:27:08 +01:00
|
|
|
|
self.item = item
|
2021-12-12 13:40:24 +01:00
|
|
|
|
self.was_downloaded = False
|
2021-12-17 23:16:32 +01:00
|
|
|
|
self.watched = False
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
2021-12-12 14:27:08 +01:00
|
|
|
|
def get_tag_data(self, tag_name: str) -> str:
|
|
|
|
|
nodes = self.item.getElementsByTagName(tag_name)
|
|
|
|
|
if len(nodes) != 1:
|
|
|
|
|
raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
|
|
|
|
|
children = nodes[0].childNodes
|
|
|
|
|
if len(children) != 1:
|
|
|
|
|
raise KeyError(
|
|
|
|
|
f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
|
|
|
|
|
)
|
|
|
|
|
return children[0].data
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def title(self) -> str:
|
|
|
|
|
return self.get_tag_data("title")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def link(self) -> str:
|
|
|
|
|
return self.get_tag_data("link")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def creator(self) -> typing.Optional[str]:
|
|
|
|
|
try:
|
|
|
|
|
return self.get_tag_data("dc:creator")
|
|
|
|
|
except KeyError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def description(self) -> str:
|
|
|
|
|
# TODO Testing
|
|
|
|
|
return self.get_tag_data("description")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def date(self) -> str:
|
|
|
|
|
# TODO datetime format
|
|
|
|
|
return self.get_tag_data("pubDate")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def guid(self) -> int:
|
|
|
|
|
return int(self.get_tag_data("guid"))
|
|
|
|
|
|
2021-12-18 12:44:43 +01:00
|
|
|
|
@property
|
|
|
|
|
def is_researched(self) -> bool:
|
|
|
|
|
return "ytdl_infos" in self.__dict__
|
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
def salvage_cache(self, cache: "RVElement") -> None:
|
2021-12-19 11:45:41 +01:00
|
|
|
|
if not self.parent.args.research and cache.is_researched:
|
2021-12-10 22:59:39 +01:00
|
|
|
|
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
|
|
|
|
log.debug(f"From cache: {self}")
|
2021-12-12 13:40:24 +01:00
|
|
|
|
if cache.was_downloaded:
|
|
|
|
|
self.was_downloaded = True
|
2021-12-17 23:16:32 +01:00
|
|
|
|
if cache.watched:
|
|
|
|
|
self.watched = True
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
2021-12-19 11:45:41 +01:00
|
|
|
|
str = f"{self.guid}: {self.creator if self.creator else '?'} – {self.title}"
|
|
|
|
|
if self.is_researched:
|
|
|
|
|
if self.is_video:
|
|
|
|
|
str += f" ({format_duration(self.duration)})"
|
|
|
|
|
else:
|
|
|
|
|
str += " (N/A)"
|
|
|
|
|
else:
|
|
|
|
|
str += " (?)"
|
|
|
|
|
str += f" – {self.link}"
|
|
|
|
|
return str
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def downloaded(self) -> bool:
|
2021-12-18 12:44:43 +01:00
|
|
|
|
if not self.is_researched:
|
2021-12-10 22:59:39 +01:00
|
|
|
|
return False
|
|
|
|
|
return os.path.isfile(self.filepath)
|
|
|
|
|
|
|
|
|
|
@functools.cached_property
|
|
|
|
|
def ytdl_infos(self) -> typing.Optional[dict]:
|
|
|
|
|
log.info(f"Researching: {self}")
|
|
|
|
|
try:
|
|
|
|
|
infos = self.parent.ytdl_dry.extract_info(self.link)
|
2021-12-12 14:52:21 +01:00
|
|
|
|
except KeyboardInterrupt as e:
|
|
|
|
|
raise e
|
|
|
|
|
except youtube_dl.utils.DownloadError as e:
|
2021-12-10 22:59:39 +01:00
|
|
|
|
# TODO Still raise in case of temporary network issue
|
2021-12-18 11:27:24 +01:00
|
|
|
|
log.warning(e)
|
2021-12-10 22:59:39 +01:00
|
|
|
|
infos = None
|
|
|
|
|
# Apparently that thing is transformed from a LazyList
|
|
|
|
|
# somewhere in the normal yt_dlp process
|
|
|
|
|
if (
|
|
|
|
|
infos
|
|
|
|
|
and "thumbnails" in infos
|
|
|
|
|
and isinstance(infos["thumbnails"], youtube_dl.utils.LazyList)
|
|
|
|
|
):
|
|
|
|
|
infos["thumbnails"] = infos["thumbnails"].exhaust()
|
|
|
|
|
# Save database once it's been computed
|
|
|
|
|
self.__dict__["ytdl_infos"] = infos
|
|
|
|
|
self.parent.save()
|
|
|
|
|
return infos
|
|
|
|
|
|
|
|
|
|
@property
|
2021-12-17 22:42:35 +01:00
|
|
|
|
def duration(self) -> int:
|
2021-12-10 22:59:39 +01:00
|
|
|
|
assert self.is_video
|
|
|
|
|
assert self.ytdl_infos
|
2021-12-17 22:42:35 +01:00
|
|
|
|
return self.ytdl_infos["duration"]
|
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
@property
|
|
|
|
|
def is_video(self) -> bool:
|
|
|
|
|
# Duration might be missing in playlists and stuff
|
|
|
|
|
return self.ytdl_infos is not None and "duration" in self.ytdl_infos
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def filepath(self) -> str:
|
|
|
|
|
assert self.is_video
|
2021-12-12 13:40:24 +01:00
|
|
|
|
# TODO This doesn't change the extension to mkv when the formats are incomaptible
|
2021-12-10 22:59:39 +01:00
|
|
|
|
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def filename(self) -> str:
|
|
|
|
|
assert self.is_video
|
|
|
|
|
return os.path.splitext(self.filepath)[0]
|
|
|
|
|
|
|
|
|
|
def download(self) -> None:
|
|
|
|
|
assert self.is_video
|
|
|
|
|
log.info(f"Downloading: {self}")
|
2021-12-12 13:40:24 +01:00
|
|
|
|
if not self.parent.args.dryrun:
|
|
|
|
|
self.parent.ytdl.process_ie_result(self.ytdl_infos, True, {})
|
|
|
|
|
self.was_downloaded = True
|
|
|
|
|
self.parent.save()
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
2021-12-18 12:44:43 +01:00
|
|
|
|
def preload(self) -> None:
|
|
|
|
|
assert self.is_video
|
2021-12-10 22:59:39 +01:00
|
|
|
|
if self.downloaded:
|
2021-12-12 13:40:24 +01:00
|
|
|
|
log.debug(f"Currently downloaded: {self}")
|
|
|
|
|
return
|
|
|
|
|
if self.was_downloaded:
|
|
|
|
|
log.debug(f"Downloaded previously: {self}")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
return
|
|
|
|
|
self.download()
|
|
|
|
|
|
2021-12-17 22:42:35 +01:00
|
|
|
|
MATCHES_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, None: 1}
|
|
|
|
|
|
|
|
|
|
MATCHES_DURATION_COMPARATORS = {
|
|
|
|
|
"<": int.__lt__,
|
|
|
|
|
"-": int.__lt__,
|
|
|
|
|
">": int.__gt__,
|
|
|
|
|
"+": int.__gt__,
|
|
|
|
|
"=": int.__eq__,
|
|
|
|
|
None: int.__le__,
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
def matches_filter(self, args: configargparse.Namespace) -> bool:
|
2021-12-19 11:45:41 +01:00
|
|
|
|
# Inexpensive filters
|
2021-12-18 22:23:48 +01:00
|
|
|
|
if args.seen != "any" and (args.seen == "seen") != self.watched:
|
|
|
|
|
log.debug(f"Not {args.seen}: {self}")
|
2021-12-17 23:16:32 +01:00
|
|
|
|
return False
|
2021-12-17 22:13:46 +01:00
|
|
|
|
if args.title and not re.search(args.title, self.title):
|
2021-12-18 12:44:43 +01:00
|
|
|
|
log.debug(f"Title not matching {args.title}: {self}")
|
2021-12-17 22:13:46 +01:00
|
|
|
|
return False
|
|
|
|
|
if args.guid and not re.search(args.guid, str(self.guid)):
|
2021-12-18 12:44:43 +01:00
|
|
|
|
log.debug(f"Guid not matching {args.guid}: {self}")
|
2021-12-17 22:13:46 +01:00
|
|
|
|
return False
|
|
|
|
|
if args.link and not re.search(args.link, self.link):
|
2021-12-18 12:44:43 +01:00
|
|
|
|
log.debug(f"Link not matching {args.link}: {self}")
|
2021-12-17 22:13:46 +01:00
|
|
|
|
return False
|
2021-12-18 12:44:43 +01:00
|
|
|
|
if args.creator and (not self.creator or not re.search(args.creator, self.creator)):
|
|
|
|
|
log.debug(f"Creator not matching {args.creator}: {self}")
|
2021-12-18 11:27:24 +01:00
|
|
|
|
return False
|
2021-12-19 11:45:41 +01:00
|
|
|
|
|
|
|
|
|
# Expensive filters
|
2021-12-18 11:27:24 +01:00
|
|
|
|
if not self.is_video:
|
2021-12-18 12:44:43 +01:00
|
|
|
|
log.debug(f"Not a video: {self}")
|
2021-12-18 11:27:24 +01:00
|
|
|
|
return False
|
2021-12-17 22:42:35 +01:00
|
|
|
|
if args.duration:
|
|
|
|
|
dur = args.duration
|
|
|
|
|
|
|
|
|
|
mult_index = dur[-1].lower()
|
|
|
|
|
if mult_index.isdigit():
|
|
|
|
|
mult_index = None
|
|
|
|
|
else:
|
|
|
|
|
dur = dur[:-1]
|
|
|
|
|
try:
|
|
|
|
|
multiplier = self.MATCHES_DURATION_MULTIPLIERS[mult_index]
|
|
|
|
|
except IndexError:
|
|
|
|
|
raise ValueError(f"Unknown duration multiplier: {mult_index}")
|
|
|
|
|
|
|
|
|
|
comp_index = dur[0]
|
|
|
|
|
if comp_index.isdigit():
|
|
|
|
|
comp_index = None
|
|
|
|
|
else:
|
|
|
|
|
dur = dur[1:]
|
|
|
|
|
try:
|
|
|
|
|
comparator = self.MATCHES_DURATION_COMPARATORS[comp_index]
|
|
|
|
|
except IndexError:
|
|
|
|
|
raise ValueError(f"Unknown duration comparator: {comp_index}")
|
|
|
|
|
|
|
|
|
|
duration = int(dur)
|
|
|
|
|
if not comparator(self.duration, duration * multiplier):
|
2021-12-18 12:44:43 +01:00
|
|
|
|
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
|
2021-12-17 22:42:35 +01:00
|
|
|
|
return False
|
2021-12-19 11:45:41 +01:00
|
|
|
|
|
2021-12-17 22:13:46 +01:00
|
|
|
|
return True
|
|
|
|
|
|
2021-12-17 23:16:32 +01:00
|
|
|
|
def watch(self) -> None:
|
|
|
|
|
if not self.downloaded:
|
|
|
|
|
self.download()
|
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
cmd = ["mpv", self.filepath]
|
|
|
|
|
log.debug(f"Running {cmd}")
|
|
|
|
|
if not self.parent.args.dryrun:
|
|
|
|
|
proc = subprocess.run(cmd)
|
|
|
|
|
proc.check_returncode()
|
2021-12-17 23:16:32 +01:00
|
|
|
|
|
|
|
|
|
self.watched = True
|
|
|
|
|
self.parent.save()
|
|
|
|
|
|
2021-12-18 12:44:43 +01:00
|
|
|
|
def clean(self) -> None:
|
|
|
|
|
assert self.is_video
|
|
|
|
|
log.info(f"Removing gone video: {self.filename}*")
|
|
|
|
|
for file in os.listdir():
|
|
|
|
|
if file.startswith(self.filename):
|
|
|
|
|
log.debug(f"Removing file: {file}")
|
|
|
|
|
if not self.parent.args.dryrun:
|
|
|
|
|
os.unlink(file)
|
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
class RVDatabase:
|
|
|
|
|
SAVE_FILE = ".cache.p"
|
|
|
|
|
|
|
|
|
|
args: configargparse.Namespace
|
|
|
|
|
elements: list[RVElement]
|
|
|
|
|
|
|
|
|
|
def __init__(self, args: configargparse.Namespace) -> None:
|
|
|
|
|
self.args = args
|
|
|
|
|
|
|
|
|
|
def save(self) -> None:
|
2021-12-12 13:40:24 +01:00
|
|
|
|
log.debug("Saving cache")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
if self.args.dryrun:
|
|
|
|
|
return
|
|
|
|
|
with open(self.SAVE_FILE, "wb") as save_file:
|
|
|
|
|
pickle.dump(self, save_file)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def load(cls) -> typing.Optional["RVDatabase"]:
|
|
|
|
|
try:
|
|
|
|
|
with open(cls.SAVE_FILE, "rb") as save_file:
|
|
|
|
|
return pickle.load(save_file)
|
|
|
|
|
except (TypeError, AttributeError, EOFError):
|
2021-12-18 11:27:24 +01:00
|
|
|
|
log.warning("Corrupt / outdated cache, it will be rebuilt.")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
def salvage_cache(self, cache: "RVDatabase") -> None:
|
|
|
|
|
log.debug(f"Salvaging cache")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
cache_els = dict()
|
|
|
|
|
for cache_el in cache.elements:
|
|
|
|
|
cache_els[cache_el.guid] = cache_el
|
|
|
|
|
for el in self.elements:
|
|
|
|
|
if el.guid in cache_els:
|
2021-12-18 11:27:24 +01:00
|
|
|
|
el.salvage_cache(cache_els[el.guid])
|
|
|
|
|
|
2021-12-18 12:44:43 +01:00
|
|
|
|
def clean_cache(self, cache: "RVDatabase") -> None:
|
|
|
|
|
log.debug(f"Cleaning cache")
|
|
|
|
|
self_els = dict()
|
|
|
|
|
for self_el in self.elements:
|
|
|
|
|
self_els[self_el.guid] = self_el
|
|
|
|
|
for el in cache.elements:
|
|
|
|
|
if el.guid not in self_els:
|
|
|
|
|
if el.is_researched and el.is_video:
|
|
|
|
|
el.clean()
|
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
def import_cache(self, cache: "RVDatabase") -> None:
|
|
|
|
|
log.debug(f"Importing cache")
|
|
|
|
|
self.feed_xml = cache.feed_xml
|
|
|
|
|
self.read_feed()
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
2021-12-12 14:27:08 +01:00
|
|
|
|
@functools.cached_property
|
|
|
|
|
def feed_xml(self) -> minidom.Document:
|
2021-12-18 11:27:24 +01:00
|
|
|
|
log.info("Fetching RSS feed")
|
2021-12-12 14:27:08 +01:00
|
|
|
|
with urllib.request.urlopen(self.args.feed) as request:
|
|
|
|
|
return minidom.parse(request)
|
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
def read_feed(self) -> None:
|
2021-12-18 11:27:24 +01:00
|
|
|
|
self.elements = []
|
2021-12-12 14:27:08 +01:00
|
|
|
|
for item in self.feed_xml.getElementsByTagName("item"):
|
|
|
|
|
element = RVElement(self, item)
|
|
|
|
|
self.elements.insert(0, element)
|
|
|
|
|
log.debug(f"Known: {element}")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
|
|
|
|
|
def clean(self) -> None:
|
2021-12-18 11:56:28 +01:00
|
|
|
|
log.debug("Cleaning")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
filenames = set()
|
|
|
|
|
for element in self.elements:
|
2021-12-18 11:56:28 +01:00
|
|
|
|
if element.is_video:
|
2021-12-10 22:59:39 +01:00
|
|
|
|
filenames.add(element.filename)
|
|
|
|
|
for file in os.listdir():
|
|
|
|
|
if file == RVDatabase.SAVE_FILE:
|
|
|
|
|
continue
|
|
|
|
|
if not os.path.isfile(file):
|
|
|
|
|
continue
|
|
|
|
|
for filename in filenames:
|
|
|
|
|
if file.startswith(filename):
|
|
|
|
|
break
|
|
|
|
|
else:
|
2021-12-18 12:44:43 +01:00
|
|
|
|
log.info(f"Removing unknown file: {file}")
|
2021-12-10 22:59:39 +01:00
|
|
|
|
if not self.args.dryrun:
|
|
|
|
|
os.unlink(file)
|
|
|
|
|
|
2021-12-18 12:44:43 +01:00
|
|
|
|
@property
|
|
|
|
|
def all_researched(self) -> bool:
|
|
|
|
|
for element in self.elements:
|
|
|
|
|
if not element.is_researched:
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def attempt_clean(self) -> None:
|
|
|
|
|
if self.all_researched:
|
|
|
|
|
self.clean()
|
|
|
|
|
|
2021-12-10 22:59:39 +01:00
|
|
|
|
@property
|
|
|
|
|
def ytdl_opts(self) -> dict:
|
|
|
|
|
return {"format": self.args.format, "allsubtitles": self.args.subtitles}
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def ytdl_dry_opts(self) -> dict:
|
|
|
|
|
opts = self.ytdl_opts.copy()
|
|
|
|
|
opts.update({"simulate": True, "quiet": True})
|
|
|
|
|
return opts
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def ytdl(self) -> youtube_dl.YoutubeDL:
|
|
|
|
|
return youtube_dl.YoutubeDL(self.ytdl_opts)
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def ytdl_dry(self) -> youtube_dl.YoutubeDL:
|
|
|
|
|
return youtube_dl.YoutubeDL(self.ytdl_dry_opts)
|
2019-04-30 08:22:27 +02:00
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
|
|
|
|
|
elements: typing.Iterable[RVElement]
|
2021-12-19 11:45:41 +01:00
|
|
|
|
# Inexpensive sort
|
|
|
|
|
if args.order == "new":
|
2021-12-18 11:27:24 +01:00
|
|
|
|
elements = reversed(self.elements)
|
2021-12-19 11:45:41 +01:00
|
|
|
|
elif args.order == "title":
|
|
|
|
|
elements = sorted(self.elements, key=lambda el: el.title)
|
|
|
|
|
elif args.order == "creator":
|
|
|
|
|
elements = sorted(self.elements, key=lambda el: el.creator or '')
|
|
|
|
|
elif args.order == "link":
|
|
|
|
|
elements = sorted(self.elements, key=lambda el: el.link)
|
2021-12-18 11:27:24 +01:00
|
|
|
|
elif args.order == "random":
|
|
|
|
|
elements_random = self.elements.copy()
|
|
|
|
|
random.shuffle(elements_random)
|
|
|
|
|
elements = elements_random
|
2021-12-19 11:45:41 +01:00
|
|
|
|
else:
|
|
|
|
|
elements = self.elements
|
|
|
|
|
|
|
|
|
|
# Possibly expensive filtering
|
|
|
|
|
elements = filter(lambda el: el.matches_filter(args), elements)
|
|
|
|
|
|
|
|
|
|
# Expensive sort
|
|
|
|
|
if args.order == "short":
|
|
|
|
|
elements = sorted(elements, key=lambda el: el.duration if el.is_video else 0)
|
|
|
|
|
elif args.order == "short":
|
|
|
|
|
elements = sorted(elements, key=lambda el: el.duration if el.is_video else 0, reverse=True)
|
|
|
|
|
|
|
|
|
|
return elements
|
2021-12-18 11:27:24 +01:00
|
|
|
|
|
2019-04-30 08:22:27 +02:00
|
|
|
|
|
2020-12-27 14:20:44 +01:00
|
|
|
|
def get_args() -> configargparse.Namespace:
|
|
|
|
|
defaultConfigPath = os.path.join(
|
|
|
|
|
os.path.expanduser(os.getenv("XDG_CONFIG_PATH", "~/.config/")), "rssVideos"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
parser = configargparse.ArgParser(
|
|
|
|
|
description="Download videos linked in "
|
|
|
|
|
+ "a RSS feed (e.g. an unread feed from "
|
|
|
|
|
+ "an RSS aggregator",
|
|
|
|
|
default_config_files=[defaultConfigPath],
|
|
|
|
|
)
|
2021-12-18 11:56:28 +01:00
|
|
|
|
|
|
|
|
|
# Runtime settings
|
2021-12-10 22:59:39 +01:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
"-v",
|
|
|
|
|
"--verbosity",
|
|
|
|
|
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
|
|
|
default=None,
|
|
|
|
|
help="Verbosity of log messages",
|
|
|
|
|
)
|
2020-12-27 14:20:44 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"-c", "--config", required=False, is_config_file=True, help="Configuration file"
|
|
|
|
|
)
|
2021-12-18 11:56:28 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"-n",
|
|
|
|
|
"--dryrun",
|
|
|
|
|
help="Only pretend to do actions",
|
|
|
|
|
action="store_const",
|
|
|
|
|
const=True,
|
|
|
|
|
default=False,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Input/Output
|
2020-12-27 14:20:44 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"--feed",
|
|
|
|
|
help="URL of the RSS feed (must be public for now)",
|
|
|
|
|
env_var="RSS_VIDEOS_FEED",
|
|
|
|
|
required=True,
|
|
|
|
|
)
|
2021-12-19 11:45:41 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"--research",
|
|
|
|
|
help="Fetch video info again",
|
|
|
|
|
action="store_true",
|
|
|
|
|
)
|
2020-12-27 14:20:44 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"--videos",
|
|
|
|
|
help="Directory to store videos",
|
|
|
|
|
env_var="RSS_VIDEOS_VIDEO_DIR",
|
|
|
|
|
required=True,
|
|
|
|
|
)
|
2021-12-18 11:56:28 +01:00
|
|
|
|
|
|
|
|
|
# Which videos
|
2020-12-27 14:20:44 +01:00
|
|
|
|
parser.add(
|
2021-12-18 11:56:28 +01:00
|
|
|
|
"--order",
|
2021-12-19 11:45:41 +01:00
|
|
|
|
choices=("old", "new", "title", "creator", "link", "short", "long", "random"),
|
2021-12-18 11:56:28 +01:00
|
|
|
|
default="old",
|
|
|
|
|
help="Sorting mechanism",
|
2020-12-27 14:20:44 +01:00
|
|
|
|
)
|
2021-12-18 11:56:28 +01:00
|
|
|
|
parser.add("--guid", help="Regex to filter guid")
|
|
|
|
|
parser.add("--creator", help="Regex to filter by creator")
|
|
|
|
|
parser.add("--title", help="Regex to filter by title")
|
|
|
|
|
parser.add("--link", help="Regex to filter by link")
|
|
|
|
|
parser.add("--duration", help="Comparative to filter by duration")
|
2021-12-18 22:23:48 +01:00
|
|
|
|
parser.add("--seen", choices=("seen","unseen","any"), default="unseen", help="Only include seen/unseen/any videos")
|
2021-12-18 11:56:28 +01:00
|
|
|
|
# TODO Envrionment variables
|
2020-12-27 14:20:44 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"--max-duration",
|
2021-12-18 11:56:28 +01:00
|
|
|
|
help="(Deprecated, use --duration instead)",
|
2020-12-27 14:20:44 +01:00
|
|
|
|
env_var="RSS_VIDEOS_MAX_DURATION",
|
|
|
|
|
type=int,
|
|
|
|
|
default=0,
|
|
|
|
|
)
|
2021-12-18 11:56:28 +01:00
|
|
|
|
# TODO Allow to ask
|
|
|
|
|
|
|
|
|
|
# How to download
|
2020-12-27 14:20:44 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"--format",
|
|
|
|
|
help="Use this format to download videos."
|
|
|
|
|
+ " See FORMAT SELECTION in youtube-dl(1)",
|
|
|
|
|
env_var="RSS_VIDEOS_FORMAT",
|
|
|
|
|
default="bestvideo+bestaudio/best",
|
|
|
|
|
)
|
|
|
|
|
parser.add(
|
|
|
|
|
"--subtitles",
|
|
|
|
|
help="Download all subtitles",
|
|
|
|
|
env_var="RSS_VIDEOS_SUBTITLES",
|
|
|
|
|
action="store_true",
|
|
|
|
|
)
|
2019-04-30 08:22:27 +02:00
|
|
|
|
|
2021-12-18 11:56:28 +01:00
|
|
|
|
parser.add(
|
|
|
|
|
"action",
|
|
|
|
|
nargs="?",
|
2021-12-19 11:45:41 +01:00
|
|
|
|
choices=("download", "list", "watch", "binge", "clean", "seen", "unseen", "duration"),
|
2021-12-18 11:56:28 +01:00
|
|
|
|
default="download",
|
|
|
|
|
)
|
2021-12-10 23:13:29 +01:00
|
|
|
|
|
2019-04-30 08:22:27 +02:00
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
args.videos = os.path.realpath(os.path.expanduser(args.videos))
|
2021-12-18 11:56:28 +01:00
|
|
|
|
if not args.duration and args.max_duration:
|
|
|
|
|
args.duration = str(args.max_duration)
|
2019-04-30 08:22:27 +02:00
|
|
|
|
|
2020-12-27 14:20:44 +01:00
|
|
|
|
return args
|
2019-04-30 08:22:27 +02:00
|
|
|
|
|
|
|
|
|
|
2020-12-27 14:20:44 +01:00
|
|
|
|
def main() -> None:
|
|
|
|
|
args = get_args()
|
2021-12-10 22:59:39 +01:00
|
|
|
|
configure_logging(args)
|
2020-12-27 14:20:44 +01:00
|
|
|
|
|
|
|
|
|
os.makedirs(args.videos, exist_ok=True)
|
2019-04-30 08:22:27 +02:00
|
|
|
|
os.chdir(args.videos)
|
|
|
|
|
|
2021-12-18 11:27:24 +01:00
|
|
|
|
database = RVDatabase(args)
|
|
|
|
|
cache = RVDatabase.load()
|
|
|
|
|
try:
|
|
|
|
|
database.read_feed()
|
|
|
|
|
except urllib.error.URLError as err:
|
2021-12-18 11:56:28 +01:00
|
|
|
|
if args.action == "download" or not cache:
|
2021-12-18 11:27:24 +01:00
|
|
|
|
raise err
|
|
|
|
|
else:
|
|
|
|
|
log.warning("Cannot fetch RSS feed, using cached feed.", err)
|
|
|
|
|
database.import_cache(cache)
|
|
|
|
|
if cache:
|
|
|
|
|
database.salvage_cache(cache)
|
2021-12-18 12:44:43 +01:00
|
|
|
|
database.clean_cache(cache)
|
|
|
|
|
database.save()
|
2021-12-18 11:56:28 +01:00
|
|
|
|
|
|
|
|
|
log.debug(f"Running action")
|
2021-12-18 12:44:43 +01:00
|
|
|
|
if args.action == "clean":
|
|
|
|
|
database.clean()
|
|
|
|
|
else:
|
|
|
|
|
database.attempt_clean()
|
2021-12-19 11:45:41 +01:00
|
|
|
|
if args.action == "duration":
|
|
|
|
|
duration = 0
|
2021-12-18 12:44:43 +01:00
|
|
|
|
for element in database.filter(args):
|
|
|
|
|
if args.action == "download":
|
|
|
|
|
element.preload()
|
|
|
|
|
elif args.action == "list":
|
|
|
|
|
print(element)
|
|
|
|
|
elif args.action in ("watch", "binge"):
|
|
|
|
|
element.watch()
|
2021-12-19 11:45:41 +01:00
|
|
|
|
if args.action == "watch":
|
|
|
|
|
break
|
2021-12-19 10:59:02 +01:00
|
|
|
|
elif args.action == "seen":
|
|
|
|
|
element.watched = True
|
|
|
|
|
elif args.action == "unseen":
|
|
|
|
|
element.watched = False
|
2021-12-19 11:45:41 +01:00
|
|
|
|
elif args.action == "duration":
|
|
|
|
|
duration += element.duration
|
|
|
|
|
else:
|
|
|
|
|
raise NotImplementedError(f"Unimplemented action: {args.action}")
|
|
|
|
|
if args.action == "duration":
|
|
|
|
|
print(format_duration(duration))
|
2021-12-18 12:44:43 +01:00
|
|
|
|
database.attempt_clean()
|
2021-12-18 11:27:24 +01:00
|
|
|
|
database.save()
|
2021-12-17 23:16:32 +01:00
|
|
|
|
|
2020-12-27 14:20:44 +01:00
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|