Compare commits
6 commits
f4c81e346a
...
105bd9461c
Author | SHA1 | Date | |
---|---|---|---|
Geoffrey Frogeye | 105bd9461c | ||
Geoffrey Frogeye | 5b7926df8f | ||
Geoffrey Frogeye | 00a9da6afc | ||
Geoffrey Frogeye | daff602a31 | ||
Geoffrey Frogeye | 9684586eec | ||
Geoffrey Frogeye | 4890555668 |
|
@ -17,6 +17,7 @@ import random
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
import typing
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
@ -25,11 +26,12 @@ from xml.dom import minidom
|
||||||
|
|
||||||
import coloredlogs
|
import coloredlogs
|
||||||
import configargparse
|
import configargparse
|
||||||
import yt_dlp as youtube_dl
|
import yt_dlp
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# TODO Lockfile, or a way to parallel watch and download
|
# TODO Lockfile, or a way to parallel watch and download
|
||||||
|
# TODO Save ytdl infos and view info separately
|
||||||
|
|
||||||
def configure_logging(args: configargparse.Namespace) -> None:
|
def configure_logging(args: configargparse.Namespace) -> None:
|
||||||
# Configure logging
|
# Configure logging
|
||||||
|
@ -44,16 +46,76 @@ def configure_logging(args: configargparse.Namespace) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
|
||||||
|
"""
|
||||||
|
yt_dlp.process_ie_result() doesn't return a completely updated info dict,
|
||||||
|
notably the extension is still the one before it realizes the files cannot
|
||||||
|
be merged. So we use this PostProcessor to catch the info dict in its final
|
||||||
|
form and save what we need from it (it's not serializable in this state).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, rvelement: "RVElement") -> None:
|
||||||
|
self.rvelement = rvelement
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def run(self, info: dict) -> tuple[list, dict]:
|
||||||
|
self.rvelement.update_post_download(info)
|
||||||
|
return [], info
|
||||||
|
|
||||||
|
def parse_duration(string: str) -> int:
|
||||||
|
DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
|
||||||
|
|
||||||
|
mult_index = string[-1].lower()
|
||||||
|
if mult_index.isdigit():
|
||||||
|
mult_index = ""
|
||||||
|
else:
|
||||||
|
string = string[:-1]
|
||||||
|
try:
|
||||||
|
multiplier = DURATION_MULTIPLIERS[mult_index]
|
||||||
|
except IndexError:
|
||||||
|
raise ValueError(f"Unknown duration multiplier: {mult_index}")
|
||||||
|
|
||||||
|
return int(string) * multiplier
|
||||||
|
|
||||||
|
|
||||||
|
def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
|
||||||
|
DURATION_COMPARATORS = {
|
||||||
|
"<": int.__lt__,
|
||||||
|
"-": int.__lt__,
|
||||||
|
">": int.__gt__,
|
||||||
|
"+": int.__gt__,
|
||||||
|
"=": int.__eq__,
|
||||||
|
"": int.__le__,
|
||||||
|
}
|
||||||
|
|
||||||
|
comp_index = compstr[0]
|
||||||
|
if comp_index.isdigit():
|
||||||
|
comp_index = ""
|
||||||
|
else:
|
||||||
|
compstr = compstr[1:]
|
||||||
|
try:
|
||||||
|
comparator = DURATION_COMPARATORS[comp_index]
|
||||||
|
except IndexError:
|
||||||
|
raise ValueError(f"Unknown duration comparator: {comp_index}")
|
||||||
|
|
||||||
|
duration = parse_duration(compstr)
|
||||||
|
|
||||||
|
return lambda d: comparator(d, duration)
|
||||||
|
|
||||||
|
def format_duration(duration: int) -> str:
|
||||||
|
return time.strftime("%H:%M:%S", time.gmtime(duration))
|
||||||
|
|
||||||
|
|
||||||
class RVElement:
|
class RVElement:
|
||||||
parent: "RVDatabase"
|
parent: "RVDatabase"
|
||||||
item: minidom.Element
|
item: minidom.Element
|
||||||
was_downloaded: bool
|
downloaded_filepath: typing.Optional[str]
|
||||||
watched: bool
|
watched: bool
|
||||||
|
|
||||||
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.item = item
|
self.item = item
|
||||||
self.was_downloaded = False
|
self.downloaded_filepath = None
|
||||||
self.watched = False
|
self.watched = False
|
||||||
|
|
||||||
def get_tag_data(self, tag_name: str) -> str:
|
def get_tag_data(self, tag_name: str) -> str:
|
||||||
|
@ -101,16 +163,25 @@ class RVElement:
|
||||||
return "ytdl_infos" in self.__dict__
|
return "ytdl_infos" in self.__dict__
|
||||||
|
|
||||||
def salvage_cache(self, cache: "RVElement") -> None:
|
def salvage_cache(self, cache: "RVElement") -> None:
|
||||||
if cache.is_researched:
|
if not self.parent.args.research and cache.is_researched:
|
||||||
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
||||||
log.debug(f"From cache: {self}")
|
log.debug(f"From cache: {self}")
|
||||||
if cache.was_downloaded:
|
if cache.downloaded_filepath:
|
||||||
self.was_downloaded = True
|
self.downloaded_filepath = cache.downloaded_filepath
|
||||||
if cache.watched:
|
if cache.watched:
|
||||||
self.watched = True
|
self.watched = True
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f"{self.guid}: {self.creator} – {self.title} – {self.link}"
|
str = f"{self.guid}: {self.creator if self.creator else '?'} – {self.title}"
|
||||||
|
if self.is_researched:
|
||||||
|
if self.is_video:
|
||||||
|
str += f" ({format_duration(self.duration)})"
|
||||||
|
else:
|
||||||
|
str += " (N/A)"
|
||||||
|
else:
|
||||||
|
str += " (?)"
|
||||||
|
str += f" – {self.link}"
|
||||||
|
return str
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def downloaded(self) -> bool:
|
def downloaded(self) -> bool:
|
||||||
|
@ -122,21 +193,15 @@ class RVElement:
|
||||||
def ytdl_infos(self) -> typing.Optional[dict]:
|
def ytdl_infos(self) -> typing.Optional[dict]:
|
||||||
log.info(f"Researching: {self}")
|
log.info(f"Researching: {self}")
|
||||||
try:
|
try:
|
||||||
infos = self.parent.ytdl_dry.extract_info(self.link)
|
infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
|
||||||
except KeyboardInterrupt as e:
|
except KeyboardInterrupt as e:
|
||||||
raise e
|
raise e
|
||||||
except youtube_dl.utils.DownloadError as e:
|
except yt_dlp.utils.DownloadError as e:
|
||||||
# TODO Still raise in case of temporary network issue
|
# TODO Still raise in case of temporary network issue
|
||||||
log.warning(e)
|
log.warning(e)
|
||||||
infos = None
|
infos = None
|
||||||
# Apparently that thing is transformed from a LazyList
|
if infos:
|
||||||
# somewhere in the normal yt_dlp process
|
infos = self.parent.ytdl_dry.sanitize_info(infos)
|
||||||
if (
|
|
||||||
infos
|
|
||||||
and "thumbnails" in infos
|
|
||||||
and isinstance(infos["thumbnails"], youtube_dl.utils.LazyList)
|
|
||||||
):
|
|
||||||
infos["thumbnails"] = infos["thumbnails"].exhaust()
|
|
||||||
# Save database once it's been computed
|
# Save database once it's been computed
|
||||||
self.__dict__["ytdl_infos"] = infos
|
self.__dict__["ytdl_infos"] = infos
|
||||||
self.parent.save()
|
self.parent.save()
|
||||||
|
@ -156,7 +221,8 @@ class RVElement:
|
||||||
@property
|
@property
|
||||||
def filepath(self) -> str:
|
def filepath(self) -> str:
|
||||||
assert self.is_video
|
assert self.is_video
|
||||||
# TODO This doesn't change the extension to mkv when the formats are incomaptible
|
if self.downloaded_filepath:
|
||||||
|
return self.downloaded_filepath
|
||||||
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -168,10 +234,18 @@ class RVElement:
|
||||||
assert self.is_video
|
assert self.is_video
|
||||||
log.info(f"Downloading: {self}")
|
log.info(f"Downloading: {self}")
|
||||||
if not self.parent.args.dryrun:
|
if not self.parent.args.dryrun:
|
||||||
self.parent.ytdl.process_ie_result(self.ytdl_infos, True, {})
|
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
|
||||||
self.was_downloaded = True
|
ydl.add_post_processor(SaveInfoPP(self))
|
||||||
|
ydl.process_ie_result(self.ytdl_infos, download=True)
|
||||||
self.parent.save()
|
self.parent.save()
|
||||||
|
|
||||||
|
def update_post_download(self, info: dict) -> None:
|
||||||
|
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def was_downloaded(self) -> bool:
|
||||||
|
return self.downloaded_filepath is not None
|
||||||
|
|
||||||
def preload(self) -> None:
|
def preload(self) -> None:
|
||||||
assert self.is_video
|
assert self.is_video
|
||||||
if self.downloaded:
|
if self.downloaded:
|
||||||
|
@ -182,18 +256,8 @@ class RVElement:
|
||||||
return
|
return
|
||||||
self.download()
|
self.download()
|
||||||
|
|
||||||
MATCHES_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, None: 1}
|
|
||||||
|
|
||||||
MATCHES_DURATION_COMPARATORS = {
|
|
||||||
"<": int.__lt__,
|
|
||||||
"-": int.__lt__,
|
|
||||||
">": int.__gt__,
|
|
||||||
"+": int.__gt__,
|
|
||||||
"=": int.__eq__,
|
|
||||||
None: int.__le__,
|
|
||||||
}
|
|
||||||
|
|
||||||
def matches_filter(self, args: configargparse.Namespace) -> bool:
|
def matches_filter(self, args: configargparse.Namespace) -> bool:
|
||||||
|
# Inexpensive filters
|
||||||
if args.seen != "any" and (args.seen == "seen") != self.watched:
|
if args.seen != "any" and (args.seen == "seen") != self.watched:
|
||||||
log.debug(f"Not {args.seen}: {self}")
|
log.debug(f"Not {args.seen}: {self}")
|
||||||
return False
|
return False
|
||||||
|
@ -206,39 +270,22 @@ class RVElement:
|
||||||
if args.link and not re.search(args.link, self.link):
|
if args.link and not re.search(args.link, self.link):
|
||||||
log.debug(f"Link not matching {args.link}: {self}")
|
log.debug(f"Link not matching {args.link}: {self}")
|
||||||
return False
|
return False
|
||||||
if args.creator and (not self.creator or not re.search(args.creator, self.creator)):
|
if args.creator and (
|
||||||
|
not self.creator or not re.search(args.creator, self.creator)
|
||||||
|
):
|
||||||
log.debug(f"Creator not matching {args.creator}: {self}")
|
log.debug(f"Creator not matching {args.creator}: {self}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Expensive filters
|
||||||
if not self.is_video:
|
if not self.is_video:
|
||||||
log.debug(f"Not a video: {self}")
|
log.debug(f"Not a video: {self}")
|
||||||
return False
|
return False
|
||||||
if args.duration:
|
if args.duration and not compare_duration(args.duration)(self.duration):
|
||||||
dur = args.duration
|
log.debug(
|
||||||
|
f"Duration {self.duration} not matching {args.duration}: {self}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
mult_index = dur[-1].lower()
|
|
||||||
if mult_index.isdigit():
|
|
||||||
mult_index = None
|
|
||||||
else:
|
|
||||||
dur = dur[:-1]
|
|
||||||
try:
|
|
||||||
multiplier = self.MATCHES_DURATION_MULTIPLIERS[mult_index]
|
|
||||||
except IndexError:
|
|
||||||
raise ValueError(f"Unknown duration multiplier: {mult_index}")
|
|
||||||
|
|
||||||
comp_index = dur[0]
|
|
||||||
if comp_index.isdigit():
|
|
||||||
comp_index = None
|
|
||||||
else:
|
|
||||||
dur = dur[1:]
|
|
||||||
try:
|
|
||||||
comparator = self.MATCHES_DURATION_COMPARATORS[comp_index]
|
|
||||||
except IndexError:
|
|
||||||
raise ValueError(f"Unknown duration comparator: {comp_index}")
|
|
||||||
|
|
||||||
duration = int(dur)
|
|
||||||
if not comparator(self.duration, duration * multiplier):
|
|
||||||
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
|
|
||||||
return False
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def watch(self) -> None:
|
def watch(self) -> None:
|
||||||
|
@ -365,28 +412,60 @@ class RVDatabase:
|
||||||
@property
|
@property
|
||||||
def ytdl_dry_opts(self) -> dict:
|
def ytdl_dry_opts(self) -> dict:
|
||||||
opts = self.ytdl_opts.copy()
|
opts = self.ytdl_opts.copy()
|
||||||
opts.update({"simulate": True, "quiet": True})
|
opts.update({"quiet": True})
|
||||||
return opts
|
return opts
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ytdl(self) -> youtube_dl.YoutubeDL:
|
def ytdl_dry(self) -> yt_dlp.YoutubeDL:
|
||||||
return youtube_dl.YoutubeDL(self.ytdl_opts)
|
return yt_dlp.YoutubeDL(self.ytdl_dry_opts)
|
||||||
|
|
||||||
@property
|
|
||||||
def ytdl_dry(self) -> youtube_dl.YoutubeDL:
|
|
||||||
return youtube_dl.YoutubeDL(self.ytdl_dry_opts)
|
|
||||||
|
|
||||||
def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
|
def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
|
||||||
elements: typing.Iterable[RVElement]
|
elements: typing.Iterable[RVElement]
|
||||||
if args.order == "old":
|
# Inexpensive sort
|
||||||
elements = self.elements
|
if args.order == "new":
|
||||||
elif args.order == "new":
|
|
||||||
elements = reversed(self.elements)
|
elements = reversed(self.elements)
|
||||||
|
elif args.order == "title":
|
||||||
|
elements = sorted(self.elements, key=lambda el: el.title)
|
||||||
|
elif args.order == "creator":
|
||||||
|
elements = sorted(self.elements, key=lambda el: el.creator or "")
|
||||||
|
elif args.order == "link":
|
||||||
|
elements = sorted(self.elements, key=lambda el: el.link)
|
||||||
elif args.order == "random":
|
elif args.order == "random":
|
||||||
elements_random = self.elements.copy()
|
elements_random = self.elements.copy()
|
||||||
random.shuffle(elements_random)
|
random.shuffle(elements_random)
|
||||||
elements = elements_random
|
elements = elements_random
|
||||||
return filter(lambda el: el.matches_filter(args), elements)
|
else:
|
||||||
|
elements = self.elements
|
||||||
|
|
||||||
|
# Possibly expensive filtering
|
||||||
|
elements = filter(lambda el: el.matches_filter(args), elements)
|
||||||
|
|
||||||
|
# Expensive sort
|
||||||
|
if args.order == "short":
|
||||||
|
elements = sorted(
|
||||||
|
elements, key=lambda el: el.duration if el.is_video else 0
|
||||||
|
)
|
||||||
|
elif args.order == "long":
|
||||||
|
elements = sorted(
|
||||||
|
elements, key=lambda el: el.duration if el.is_video else 0, reverse=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Post sorting filtering
|
||||||
|
if args.total_duration:
|
||||||
|
rem = parse_duration(args.total_duration)
|
||||||
|
old_els = list(elements)
|
||||||
|
elements = list()
|
||||||
|
while rem > 0:
|
||||||
|
for el in old_els:
|
||||||
|
if el.duration < rem:
|
||||||
|
elements.append(el)
|
||||||
|
rem -= el.duration
|
||||||
|
old_els.remove(el)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return elements
|
||||||
|
|
||||||
|
|
||||||
def get_args() -> configargparse.Namespace:
|
def get_args() -> configargparse.Namespace:
|
||||||
|
@ -428,6 +507,17 @@ def get_args() -> configargparse.Namespace:
|
||||||
env_var="RSS_VIDEOS_FEED",
|
env_var="RSS_VIDEOS_FEED",
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
|
parser.add(
|
||||||
|
"--research",
|
||||||
|
help="Fetch video info again",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
parser.add(
|
||||||
|
"--no-refresh",
|
||||||
|
dest="refresh",
|
||||||
|
help="Don't fetch feed",
|
||||||
|
action="store_false",
|
||||||
|
)
|
||||||
parser.add(
|
parser.add(
|
||||||
"--videos",
|
"--videos",
|
||||||
help="Directory to store videos",
|
help="Directory to store videos",
|
||||||
|
@ -438,7 +528,7 @@ def get_args() -> configargparse.Namespace:
|
||||||
# Which videos
|
# Which videos
|
||||||
parser.add(
|
parser.add(
|
||||||
"--order",
|
"--order",
|
||||||
choices=("old", "new", "random"),
|
choices=("old", "new", "title", "creator", "link", "short", "long", "random"),
|
||||||
default="old",
|
default="old",
|
||||||
help="Sorting mechanism",
|
help="Sorting mechanism",
|
||||||
)
|
)
|
||||||
|
@ -447,7 +537,16 @@ def get_args() -> configargparse.Namespace:
|
||||||
parser.add("--title", help="Regex to filter by title")
|
parser.add("--title", help="Regex to filter by title")
|
||||||
parser.add("--link", help="Regex to filter by link")
|
parser.add("--link", help="Regex to filter by link")
|
||||||
parser.add("--duration", help="Comparative to filter by duration")
|
parser.add("--duration", help="Comparative to filter by duration")
|
||||||
parser.add("--seen", choices=("seen","unseen","any"), default="unseen", help="Only include seen/unseen/any videos")
|
parser.add(
|
||||||
|
"--seen",
|
||||||
|
choices=("seen", "unseen", "any"),
|
||||||
|
default="unseen",
|
||||||
|
help="Only include seen/unseen/any videos",
|
||||||
|
)
|
||||||
|
parser.add(
|
||||||
|
"--total-duration",
|
||||||
|
help="Use videos that fit under the total given",
|
||||||
|
)
|
||||||
# TODO Envrionment variables
|
# TODO Envrionment variables
|
||||||
parser.add(
|
parser.add(
|
||||||
"--max-duration",
|
"--max-duration",
|
||||||
|
@ -476,7 +575,15 @@ def get_args() -> configargparse.Namespace:
|
||||||
parser.add(
|
parser.add(
|
||||||
"action",
|
"action",
|
||||||
nargs="?",
|
nargs="?",
|
||||||
choices=("download", "list", "watch", "binge", "clean"),
|
choices=(
|
||||||
|
"download",
|
||||||
|
"list",
|
||||||
|
"watch",
|
||||||
|
"binge",
|
||||||
|
"clean",
|
||||||
|
"seen",
|
||||||
|
"unseen",
|
||||||
|
),
|
||||||
default="download",
|
default="download",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -497,14 +604,22 @@ def main() -> None:
|
||||||
|
|
||||||
database = RVDatabase(args)
|
database = RVDatabase(args)
|
||||||
cache = RVDatabase.load()
|
cache = RVDatabase.load()
|
||||||
try:
|
feed_fetched = False
|
||||||
database.read_feed()
|
if args.refresh:
|
||||||
except urllib.error.URLError as err:
|
try:
|
||||||
if args.action == "download" or not cache:
|
database.read_feed()
|
||||||
raise err
|
feed_fetched = True
|
||||||
else:
|
except urllib.error.URLError as err:
|
||||||
log.warning("Cannot fetch RSS feed, using cached feed.", err)
|
if args.action == "download":
|
||||||
|
raise RuntimeError("Couldn't fetch feed, refusing to download")
|
||||||
|
# This is a quirky failsafe in case of no internet connection,
|
||||||
|
# so the script doesn't go noting that no element is a video.
|
||||||
|
if not feed_fetched:
|
||||||
|
if cache:
|
||||||
|
log.warning("Using cached feed.")
|
||||||
database.import_cache(cache)
|
database.import_cache(cache)
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError("Feed not fetched and no cached feed.")
|
||||||
if cache:
|
if cache:
|
||||||
database.salvage_cache(cache)
|
database.salvage_cache(cache)
|
||||||
database.clean_cache(cache)
|
database.clean_cache(cache)
|
||||||
|
@ -514,7 +629,7 @@ def main() -> None:
|
||||||
if args.action == "clean":
|
if args.action == "clean":
|
||||||
database.clean()
|
database.clean()
|
||||||
else:
|
else:
|
||||||
database.attempt_clean()
|
duration = 0
|
||||||
for element in database.filter(args):
|
for element in database.filter(args):
|
||||||
if args.action == "download":
|
if args.action == "download":
|
||||||
element.preload()
|
element.preload()
|
||||||
|
@ -522,8 +637,20 @@ def main() -> None:
|
||||||
print(element)
|
print(element)
|
||||||
elif args.action in ("watch", "binge"):
|
elif args.action in ("watch", "binge"):
|
||||||
element.watch()
|
element.watch()
|
||||||
if args.action == "watch":
|
if args.action == "watch":
|
||||||
break
|
break
|
||||||
|
elif args.action == "seen":
|
||||||
|
if not element.watched:
|
||||||
|
log.info(f"Maked as seen: {element}")
|
||||||
|
element.watched = True
|
||||||
|
elif args.action == "unseen":
|
||||||
|
if element.watched:
|
||||||
|
log.info(f"Maked as unseen: {element}")
|
||||||
|
element.watched = False
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"Unimplemented action: {args.action}")
|
||||||
|
duration += element.duration if element.is_video else 0
|
||||||
|
log.info(f"Total duration: {format_duration(duration)}")
|
||||||
database.attempt_clean()
|
database.attempt_clean()
|
||||||
database.save()
|
database.save()
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue