Compare commits
6 commits
f4c81e346a
...
105bd9461c
Author | SHA1 | Date | |
---|---|---|---|
Geoffrey Frogeye | 105bd9461c | ||
Geoffrey Frogeye | 5b7926df8f | ||
Geoffrey Frogeye | 00a9da6afc | ||
Geoffrey Frogeye | daff602a31 | ||
Geoffrey Frogeye | 9684586eec | ||
Geoffrey Frogeye | 4890555668 |
|
@ -17,6 +17,7 @@ import random
|
|||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import typing
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
@ -25,11 +26,12 @@ from xml.dom import minidom
|
|||
|
||||
import coloredlogs
|
||||
import configargparse
|
||||
import yt_dlp as youtube_dl
|
||||
import yt_dlp
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# TODO Lockfile, or a way to parallel watch and download
|
||||
# TODO Save ytdl infos and view info separately
|
||||
|
||||
def configure_logging(args: configargparse.Namespace) -> None:
|
||||
# Configure logging
|
||||
|
@ -44,16 +46,76 @@ def configure_logging(args: configargparse.Namespace) -> None:
|
|||
)
|
||||
|
||||
|
||||
class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
|
||||
"""
|
||||
yt_dlp.process_ie_result() doesn't return a completely updated info dict,
|
||||
notably the extension is still the one before it realizes the files cannot
|
||||
be merged. So we use this PostProcessor to catch the info dict in its final
|
||||
form and save what we need from it (it's not serializable in this state).
|
||||
"""
|
||||
|
||||
def __init__(self, rvelement: "RVElement") -> None:
|
||||
self.rvelement = rvelement
|
||||
super().__init__()
|
||||
|
||||
def run(self, info: dict) -> tuple[list, dict]:
|
||||
self.rvelement.update_post_download(info)
|
||||
return [], info
|
||||
|
||||
def parse_duration(string: str) -> int:
|
||||
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
|
||||
|
||||
mult_index = string[-1].lower()
|
||||
if mult_index.isdigit():
|
||||
mult_index = ""
|
||||
else:
|
||||
string = string[:-1]
|
||||
try:
|
||||
multiplier = DURATION_MULTIPLIERS[mult_index]
|
||||
except IndexError:
|
||||
raise ValueError(f"Unknown duration multiplier: {mult_index}")
|
||||
|
||||
return int(string) * multiplier
|
||||
|
||||
|
||||
def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
|
||||
DURATION_COMPARATORS = {
|
||||
"<": int.__lt__,
|
||||
"-": int.__lt__,
|
||||
">": int.__gt__,
|
||||
"+": int.__gt__,
|
||||
"=": int.__eq__,
|
||||
"": int.__le__,
|
||||
}
|
||||
|
||||
comp_index = compstr[0]
|
||||
if comp_index.isdigit():
|
||||
comp_index = ""
|
||||
else:
|
||||
compstr = compstr[1:]
|
||||
try:
|
||||
comparator = DURATION_COMPARATORS[comp_index]
|
||||
except IndexError:
|
||||
raise ValueError(f"Unknown duration comparator: {comp_index}")
|
||||
|
||||
duration = parse_duration(compstr)
|
||||
|
||||
return lambda d: comparator(d, duration)
|
||||
|
||||
def format_duration(duration: int) -> str:
|
||||
return time.strftime("%H:%M:%S", time.gmtime(duration))
|
||||
|
||||
|
||||
class RVElement:
|
||||
parent: "RVDatabase"
|
||||
item: minidom.Element
|
||||
was_downloaded: bool
|
||||
downloaded_filepath: typing.Optional[str]
|
||||
watched: bool
|
||||
|
||||
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
||||
self.parent = parent
|
||||
self.item = item
|
||||
self.was_downloaded = False
|
||||
self.downloaded_filepath = None
|
||||
self.watched = False
|
||||
|
||||
def get_tag_data(self, tag_name: str) -> str:
|
||||
|
@ -101,16 +163,25 @@ class RVElement:
|
|||
return "ytdl_infos" in self.__dict__
|
||||
|
||||
def salvage_cache(self, cache: "RVElement") -> None:
|
||||
if cache.is_researched:
|
||||
if not self.parent.args.research and cache.is_researched:
|
||||
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
||||
log.debug(f"From cache: {self}")
|
||||
if cache.was_downloaded:
|
||||
self.was_downloaded = True
|
||||
if cache.downloaded_filepath:
|
||||
self.downloaded_filepath = cache.downloaded_filepath
|
||||
if cache.watched:
|
||||
self.watched = True
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.guid}: {self.creator} – {self.title} – {self.link}"
|
||||
str = f"{self.guid}: {self.creator if self.creator else '?'} – {self.title}"
|
||||
if self.is_researched:
|
||||
if self.is_video:
|
||||
str += f" ({format_duration(self.duration)})"
|
||||
else:
|
||||
str += " (N/A)"
|
||||
else:
|
||||
str += " (?)"
|
||||
str += f" – {self.link}"
|
||||
return str
|
||||
|
||||
@property
|
||||
def downloaded(self) -> bool:
|
||||
|
@ -122,21 +193,15 @@ class RVElement:
|
|||
def ytdl_infos(self) -> typing.Optional[dict]:
|
||||
log.info(f"Researching: {self}")
|
||||
try:
|
||||
infos = self.parent.ytdl_dry.extract_info(self.link)
|
||||
infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
except youtube_dl.utils.DownloadError as e:
|
||||
except yt_dlp.utils.DownloadError as e:
|
||||
# TODO Still raise in case of temporary network issue
|
||||
log.warning(e)
|
||||
infos = None
|
||||
# Apparently that thing is transformed from a LazyList
|
||||
# somewhere in the normal yt_dlp process
|
||||
if (
|
||||
infos
|
||||
and "thumbnails" in infos
|
||||
and isinstance(infos["thumbnails"], youtube_dl.utils.LazyList)
|
||||
):
|
||||
infos["thumbnails"] = infos["thumbnails"].exhaust()
|
||||
if infos:
|
||||
infos = self.parent.ytdl_dry.sanitize_info(infos)
|
||||
# Save database once it's been computed
|
||||
self.__dict__["ytdl_infos"] = infos
|
||||
self.parent.save()
|
||||
|
@ -156,7 +221,8 @@ class RVElement:
|
|||
@property
|
||||
def filepath(self) -> str:
|
||||
assert self.is_video
|
||||
# TODO This doesn't change the extension to mkv when the formats are incomaptible
|
||||
if self.downloaded_filepath:
|
||||
return self.downloaded_filepath
|
||||
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
||||
|
||||
@property
|
||||
|
@ -168,10 +234,18 @@ class RVElement:
|
|||
assert self.is_video
|
||||
log.info(f"Downloading: {self}")
|
||||
if not self.parent.args.dryrun:
|
||||
self.parent.ytdl.process_ie_result(self.ytdl_infos, True, {})
|
||||
self.was_downloaded = True
|
||||
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
|
||||
ydl.add_post_processor(SaveInfoPP(self))
|
||||
ydl.process_ie_result(self.ytdl_infos, download=True)
|
||||
self.parent.save()
|
||||
|
||||
def update_post_download(self, info: dict) -> None:
|
||||
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
|
||||
|
||||
@property
|
||||
def was_downloaded(self) -> bool:
|
||||
return self.downloaded_filepath is not None
|
||||
|
||||
def preload(self) -> None:
|
||||
assert self.is_video
|
||||
if self.downloaded:
|
||||
|
@ -182,18 +256,8 @@ class RVElement:
|
|||
return
|
||||
self.download()
|
||||
|
||||
MATCHES_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, None: 1}
|
||||
|
||||
MATCHES_DURATION_COMPARATORS = {
|
||||
"<": int.__lt__,
|
||||
"-": int.__lt__,
|
||||
">": int.__gt__,
|
||||
"+": int.__gt__,
|
||||
"=": int.__eq__,
|
||||
None: int.__le__,
|
||||
}
|
||||
|
||||
def matches_filter(self, args: configargparse.Namespace) -> bool:
|
||||
# Inexpensive filters
|
||||
if args.seen != "any" and (args.seen == "seen") != self.watched:
|
||||
log.debug(f"Not {args.seen}: {self}")
|
||||
return False
|
||||
|
@ -206,39 +270,22 @@ class RVElement:
|
|||
if args.link and not re.search(args.link, self.link):
|
||||
log.debug(f"Link not matching {args.link}: {self}")
|
||||
return False
|
||||
if args.creator and (not self.creator or not re.search(args.creator, self.creator)):
|
||||
if args.creator and (
|
||||
not self.creator or not re.search(args.creator, self.creator)
|
||||
):
|
||||
log.debug(f"Creator not matching {args.creator}: {self}")
|
||||
return False
|
||||
|
||||
# Expensive filters
|
||||
if not self.is_video:
|
||||
log.debug(f"Not a video: {self}")
|
||||
return False
|
||||
if args.duration:
|
||||
dur = args.duration
|
||||
|
||||
mult_index = dur[-1].lower()
|
||||
if mult_index.isdigit():
|
||||
mult_index = None
|
||||
else:
|
||||
dur = dur[:-1]
|
||||
try:
|
||||
multiplier = self.MATCHES_DURATION_MULTIPLIERS[mult_index]
|
||||
except IndexError:
|
||||
raise ValueError(f"Unknown duration multiplier: {mult_index}")
|
||||
|
||||
comp_index = dur[0]
|
||||
if comp_index.isdigit():
|
||||
comp_index = None
|
||||
else:
|
||||
dur = dur[1:]
|
||||
try:
|
||||
comparator = self.MATCHES_DURATION_COMPARATORS[comp_index]
|
||||
except IndexError:
|
||||
raise ValueError(f"Unknown duration comparator: {comp_index}")
|
||||
|
||||
duration = int(dur)
|
||||
if not comparator(self.duration, duration * multiplier):
|
||||
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
|
||||
if args.duration and not compare_duration(args.duration)(self.duration):
|
||||
log.debug(
|
||||
f"Duration {self.duration} not matching {args.duration}: {self}"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def watch(self) -> None:
|
||||
|
@ -365,28 +412,60 @@ class RVDatabase:
|
|||
@property
|
||||
def ytdl_dry_opts(self) -> dict:
|
||||
opts = self.ytdl_opts.copy()
|
||||
opts.update({"simulate": True, "quiet": True})
|
||||
opts.update({"quiet": True})
|
||||
return opts
|
||||
|
||||
@property
|
||||
def ytdl(self) -> youtube_dl.YoutubeDL:
|
||||
return youtube_dl.YoutubeDL(self.ytdl_opts)
|
||||
|
||||
@property
|
||||
def ytdl_dry(self) -> youtube_dl.YoutubeDL:
|
||||
return youtube_dl.YoutubeDL(self.ytdl_dry_opts)
|
||||
def ytdl_dry(self) -> yt_dlp.YoutubeDL:
|
||||
return yt_dlp.YoutubeDL(self.ytdl_dry_opts)
|
||||
|
||||
def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
|
||||
elements: typing.Iterable[RVElement]
|
||||
if args.order == "old":
|
||||
elements = self.elements
|
||||
elif args.order == "new":
|
||||
# Inexpensive sort
|
||||
if args.order == "new":
|
||||
elements = reversed(self.elements)
|
||||
elif args.order == "title":
|
||||
elements = sorted(self.elements, key=lambda el: el.title)
|
||||
elif args.order == "creator":
|
||||
elements = sorted(self.elements, key=lambda el: el.creator or "")
|
||||
elif args.order == "link":
|
||||
elements = sorted(self.elements, key=lambda el: el.link)
|
||||
elif args.order == "random":
|
||||
elements_random = self.elements.copy()
|
||||
random.shuffle(elements_random)
|
||||
elements = elements_random
|
||||
return filter(lambda el: el.matches_filter(args), elements)
|
||||
else:
|
||||
elements = self.elements
|
||||
|
||||
# Possibly expensive filtering
|
||||
elements = filter(lambda el: el.matches_filter(args), elements)
|
||||
|
||||
# Expensive sort
|
||||
if args.order == "short":
|
||||
elements = sorted(
|
||||
elements, key=lambda el: el.duration if el.is_video else 0
|
||||
)
|
||||
elif args.order == "long":
|
||||
elements = sorted(
|
||||
elements, key=lambda el: el.duration if el.is_video else 0, reverse=True
|
||||
)
|
||||
|
||||
# Post sorting filtering
|
||||
if args.total_duration:
|
||||
rem = parse_duration(args.total_duration)
|
||||
old_els = list(elements)
|
||||
elements = list()
|
||||
while rem > 0:
|
||||
for el in old_els:
|
||||
if el.duration < rem:
|
||||
elements.append(el)
|
||||
rem -= el.duration
|
||||
old_els.remove(el)
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
def get_args() -> configargparse.Namespace:
|
||||
|
@ -428,6 +507,17 @@ def get_args() -> configargparse.Namespace:
|
|||
env_var="RSS_VIDEOS_FEED",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
"--research",
|
||||
help="Fetch video info again",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add(
|
||||
"--no-refresh",
|
||||
dest="refresh",
|
||||
help="Don't fetch feed",
|
||||
action="store_false",
|
||||
)
|
||||
parser.add(
|
||||
"--videos",
|
||||
help="Directory to store videos",
|
||||
|
@ -438,7 +528,7 @@ def get_args() -> configargparse.Namespace:
|
|||
# Which videos
|
||||
parser.add(
|
||||
"--order",
|
||||
choices=("old", "new", "random"),
|
||||
choices=("old", "new", "title", "creator", "link", "short", "long", "random"),
|
||||
default="old",
|
||||
help="Sorting mechanism",
|
||||
)
|
||||
|
@ -447,7 +537,16 @@ def get_args() -> configargparse.Namespace:
|
|||
parser.add("--title", help="Regex to filter by title")
|
||||
parser.add("--link", help="Regex to filter by link")
|
||||
parser.add("--duration", help="Comparative to filter by duration")
|
||||
parser.add("--seen", choices=("seen","unseen","any"), default="unseen", help="Only include seen/unseen/any videos")
|
||||
parser.add(
|
||||
"--seen",
|
||||
choices=("seen", "unseen", "any"),
|
||||
default="unseen",
|
||||
help="Only include seen/unseen/any videos",
|
||||
)
|
||||
parser.add(
|
||||
"--total-duration",
|
||||
help="Use videos that fit under the total given",
|
||||
)
|
||||
# TODO Envrionment variables
|
||||
parser.add(
|
||||
"--max-duration",
|
||||
|
@ -476,7 +575,15 @@ def get_args() -> configargparse.Namespace:
|
|||
parser.add(
|
||||
"action",
|
||||
nargs="?",
|
||||
choices=("download", "list", "watch", "binge", "clean"),
|
||||
choices=(
|
||||
"download",
|
||||
"list",
|
||||
"watch",
|
||||
"binge",
|
||||
"clean",
|
||||
"seen",
|
||||
"unseen",
|
||||
),
|
||||
default="download",
|
||||
)
|
||||
|
||||
|
@ -497,14 +604,22 @@ def main() -> None:
|
|||
|
||||
database = RVDatabase(args)
|
||||
cache = RVDatabase.load()
|
||||
feed_fetched = False
|
||||
if args.refresh:
|
||||
try:
|
||||
database.read_feed()
|
||||
feed_fetched = True
|
||||
except urllib.error.URLError as err:
|
||||
if args.action == "download" or not cache:
|
||||
raise err
|
||||
else:
|
||||
log.warning("Cannot fetch RSS feed, using cached feed.", err)
|
||||
if args.action == "download":
|
||||
raise RuntimeError("Couldn't fetch feed, refusing to download")
|
||||
# This is a quirky failsafe in case of no internet connection,
|
||||
# so the script doesn't go noting that no element is a video.
|
||||
if not feed_fetched:
|
||||
if cache:
|
||||
log.warning("Using cached feed.")
|
||||
database.import_cache(cache)
|
||||
else:
|
||||
raise FileNotFoundError("Feed not fetched and no cached feed.")
|
||||
if cache:
|
||||
database.salvage_cache(cache)
|
||||
database.clean_cache(cache)
|
||||
|
@ -514,7 +629,7 @@ def main() -> None:
|
|||
if args.action == "clean":
|
||||
database.clean()
|
||||
else:
|
||||
database.attempt_clean()
|
||||
duration = 0
|
||||
for element in database.filter(args):
|
||||
if args.action == "download":
|
||||
element.preload()
|
||||
|
@ -524,6 +639,18 @@ def main() -> None:
|
|||
element.watch()
|
||||
if args.action == "watch":
|
||||
break
|
||||
elif args.action == "seen":
|
||||
if not element.watched:
|
||||
log.info(f"Maked as seen: {element}")
|
||||
element.watched = True
|
||||
elif args.action == "unseen":
|
||||
if element.watched:
|
||||
log.info(f"Maked as unseen: {element}")
|
||||
element.watched = False
|
||||
else:
|
||||
raise NotImplementedError(f"Unimplemented action: {args.action}")
|
||||
duration += element.duration if element.is_video else 0
|
||||
log.info(f"Total duration: {format_duration(duration)}")
|
||||
database.attempt_clean()
|
||||
database.save()
|
||||
|
||||
|
|
Loading…
Reference in a new issue