rssVideos: Now thread-safe (kinda)
This commit is contained in:
parent
2e759f9fc6
commit
d88520552b
|
@ -9,6 +9,7 @@ with the unread items (non-video links are ignored).
|
|||
"""
|
||||
|
||||
import datetime
|
||||
import filelock
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
|
@ -26,8 +27,6 @@ import yt_dlp
|
|||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# TODO Lockfile, or a way to parallel watch and download
|
||||
|
||||
|
||||
def configure_logging(args: configargparse.Namespace) -> None:
|
||||
# Configure logging
|
||||
|
@ -107,17 +106,33 @@ def format_duration(duration: int) -> str:
|
|||
class RVElement:
|
||||
parent: "RVDatabase"
|
||||
item: dict
|
||||
downloaded_filepath: typing.Optional[str]
|
||||
|
||||
RERESEARCH_AFTER = datetime.timedelta(hours=1)
|
||||
|
||||
def __init__(self, parent: "RVDatabase", item: dict) -> None:
|
||||
self.parent = parent
|
||||
self.item = item
|
||||
self.downloaded_filepath = None
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
return self.item["id"]
|
||||
|
||||
@property
|
||||
def sid(self) -> str:
|
||||
return self.id.split("/")[-1]
|
||||
|
||||
def metafile(self, extension: str) -> str:
|
||||
return os.path.join(self.parent.METADATA_FOLDER, f"{self.sid}.{extension}")
|
||||
|
||||
def metafile_read(self, extension: str) -> typing.Any:
|
||||
return self.parent.metafile_read(f"{self.sid}.{extension}")
|
||||
|
||||
def metafile_write(self, extension: str, data: typing.Any) -> None:
|
||||
return self.parent.metafile_write(f"{self.sid}.{extension}", data)
|
||||
|
||||
def save(self) -> None:
|
||||
self.metafile_write("item", self.item)
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
return self.item["title"]
|
||||
|
@ -136,14 +151,8 @@ class RVElement:
|
|||
|
||||
@property
|
||||
def is_researched(self) -> bool:
|
||||
return "ytdl_infos" in self.__dict__
|
||||
|
||||
def salvage_cache(self, cache: "RVElement") -> None:
|
||||
if cache.is_researched:
|
||||
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
|
||||
log.debug(f"From cache: {self}")
|
||||
if cache.downloaded_filepath:
|
||||
self.downloaded_filepath = cache.downloaded_filepath
|
||||
metafile = self.metafile("ytdl")
|
||||
return os.path.isfile(metafile)
|
||||
|
||||
def __str__(self) -> str:
|
||||
str = f"{self.date.strftime('%y-%m-%d %H:%M')} ("
|
||||
|
@ -169,6 +178,14 @@ class RVElement:
|
|||
|
||||
@functools.cached_property
|
||||
def ytdl_infos(self) -> typing.Optional[dict]:
|
||||
try:
|
||||
return self.metafile_read("ytdl")
|
||||
except (FileNotFoundError, TypeError, AttributeError, EOFError):
|
||||
infos = self._ytdl_infos()
|
||||
self.metafile_write("ytdl", infos)
|
||||
return infos
|
||||
|
||||
def _ytdl_infos(self) -> typing.Optional[dict]:
|
||||
log.info(f"Researching: {self}")
|
||||
try:
|
||||
infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
|
||||
|
@ -180,9 +197,6 @@ class RVElement:
|
|||
infos = None
|
||||
if infos:
|
||||
infos = self.parent.ytdl_dry.sanitize_info(infos)
|
||||
# Save database once it's been computed
|
||||
self.__dict__["ytdl_infos"] = infos
|
||||
self.parent.save()
|
||||
return infos
|
||||
|
||||
@property
|
||||
|
@ -196,6 +210,18 @@ class RVElement:
|
|||
# Duration might be missing in playlists and stuff
|
||||
return self.ytdl_infos is not None and "duration" in self.ytdl_infos
|
||||
|
||||
@functools.cached_property
|
||||
def downloaded_filepath(self) -> typing.Optional[str]:
|
||||
try:
|
||||
return self.metafile_read("path")
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def was_downloaded(self) -> bool:
|
||||
metafile = self.metafile("path")
|
||||
return os.path.exists(metafile)
|
||||
|
||||
@property
|
||||
def filepath(self) -> str:
|
||||
assert self.is_video
|
||||
|
@ -204,37 +230,36 @@ class RVElement:
|
|||
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
|
||||
|
||||
@property
|
||||
def filename(self) -> str:
|
||||
def basename(self) -> str:
|
||||
assert self.is_video
|
||||
return os.path.splitext(self.filepath)[0]
|
||||
|
||||
def expire_info(self) -> None:
|
||||
metafile = self.metafile("ytdl")
|
||||
if os.path.isfile(metafile):
|
||||
stat = os.stat(metafile)
|
||||
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
|
||||
diff = datetime.datetime.now() - mtime
|
||||
if diff > self.RERESEARCH_AFTER:
|
||||
os.unlink(metafile)
|
||||
del self.ytdl_infos
|
||||
|
||||
def download(self) -> None:
|
||||
assert self.is_video
|
||||
if self.downloaded:
|
||||
return
|
||||
self.expire_info()
|
||||
log.info(f"Downloading: {self}")
|
||||
if self.parent.args.research:
|
||||
del self.ytdl_infos
|
||||
lockfile = self.metafile("lock")
|
||||
with filelock.FileLock(lockfile):
|
||||
if not self.parent.args.dryrun:
|
||||
with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
|
||||
ydl.add_post_processor(SaveInfoPP(self))
|
||||
ydl.process_ie_result(self.ytdl_infos, download=True)
|
||||
self.parent.save()
|
||||
|
||||
def update_post_download(self, info: dict) -> None:
|
||||
self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
|
||||
|
||||
@property
|
||||
def was_downloaded(self) -> bool:
|
||||
return self.downloaded_filepath is not None
|
||||
|
||||
def preload(self) -> None:
|
||||
assert self.is_video
|
||||
if self.downloaded:
|
||||
log.debug(f"Currently downloaded: {self}")
|
||||
return
|
||||
if self.was_downloaded:
|
||||
log.debug(f"Downloaded previously: {self}")
|
||||
return
|
||||
self.download()
|
||||
self.metafile_write("path", self.downloaded_filepath)
|
||||
|
||||
@property
|
||||
def watched(self) -> bool:
|
||||
|
@ -270,7 +295,6 @@ class RVElement:
|
|||
return True
|
||||
|
||||
def watch(self) -> None:
|
||||
if not self.downloaded:
|
||||
self.download()
|
||||
|
||||
cmd = ["mpv", self.filepath]
|
||||
|
@ -279,17 +303,27 @@ class RVElement:
|
|||
proc = subprocess.run(cmd)
|
||||
proc.check_returncode()
|
||||
|
||||
self.clean()
|
||||
self.undownload()
|
||||
self.try_mark_read()
|
||||
|
||||
def clean(self) -> None:
|
||||
assert self.is_video
|
||||
log.info(f"Removing gone video: {self.filename}*")
|
||||
for file in os.listdir():
|
||||
if file.startswith(self.filename):
|
||||
log.debug(f"Removing file: {file}")
|
||||
def clean_file(self, folder: str, basename: str) -> None:
|
||||
for file in os.listdir(folder):
|
||||
if file.startswith(basename):
|
||||
path = os.path.join(folder, file)
|
||||
log.debug(f"Removing file: {path}")
|
||||
if not self.parent.args.dryrun:
|
||||
os.unlink(file)
|
||||
os.unlink(path)
|
||||
|
||||
def undownload(self) -> None:
|
||||
assert self.is_video
|
||||
log.info(f"Removing gone video: {self.basename}*")
|
||||
self.clean_file(".", self.basename)
|
||||
|
||||
def clean(self) -> None:
|
||||
if self.is_video:
|
||||
self.undownload()
|
||||
log.info(f"Removing gone metadata: {self.sid}*")
|
||||
self.clean_file(self.parent.METADATA_FOLDER, self.sid)
|
||||
|
||||
def mark_read(self) -> None:
|
||||
log.debug(f"Marking {self} read")
|
||||
|
@ -309,7 +343,7 @@ class RVElement:
|
|||
if r.text.strip() != "OK":
|
||||
raise RuntimeError(f"Couldn't mark {self} as read: {r.text}")
|
||||
log.info(f"Marked {self} as read")
|
||||
self.parent.elements.remove(self)
|
||||
self.clean()
|
||||
|
||||
def try_mark_read(self) -> None:
|
||||
try:
|
||||
|
@ -319,7 +353,7 @@ class RVElement:
|
|||
|
||||
|
||||
class RVDatabase:
|
||||
SAVE_FILE = ".cache.p"
|
||||
METADATA_FOLDER = ".metadata"
|
||||
|
||||
args: configargparse.Namespace
|
||||
elements: list[RVElement]
|
||||
|
@ -327,53 +361,27 @@ class RVDatabase:
|
|||
def __init__(self, args: configargparse.Namespace) -> None:
|
||||
self.args = args
|
||||
|
||||
def save(self) -> None:
|
||||
log.debug("Saving cache")
|
||||
if self.args.dryrun:
|
||||
return
|
||||
with open(self.SAVE_FILE, "wb") as save_file:
|
||||
pickle.dump(self, save_file)
|
||||
def metafile_read(self, name: str) -> typing.Any:
|
||||
path = os.path.join(self.METADATA_FOLDER, name)
|
||||
log.debug(f"Reading {path}")
|
||||
with open(path, "rb") as mf:
|
||||
return pickle.load(mf)
|
||||
|
||||
@classmethod
|
||||
def load(cls) -> typing.Optional["RVDatabase"]:
|
||||
try:
|
||||
with open(cls.SAVE_FILE, "rb") as save_file:
|
||||
return pickle.load(save_file)
|
||||
except (TypeError, AttributeError, EOFError):
|
||||
log.warning("Corrupt / outdated cache, it will be rebuilt.")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def salvage_cache_pre(self, cache: "RVDatabase") -> None:
|
||||
if "auth_headers" in cache.__dict__:
|
||||
self.auth_headers = cache.auth_headers
|
||||
|
||||
def salvage_cache(self, cache: "RVDatabase") -> None:
|
||||
log.debug("Salvaging cache")
|
||||
cache_els = dict()
|
||||
for cache_el in cache.elements:
|
||||
cache_els[cache_el.id] = cache_el
|
||||
for el in self.elements:
|
||||
if el.id in cache_els:
|
||||
el.salvage_cache(cache_els[el.id])
|
||||
def metafile_write(self, name: str, data: typing.Any) -> None:
|
||||
path = os.path.join(self.METADATA_FOLDER, name)
|
||||
log.debug(f"Writing {path}")
|
||||
if not self.args.dryrun:
|
||||
with open(path, "wb") as mf:
|
||||
pickle.dump(data, mf)
|
||||
|
||||
def clean_cache(self, cache: "RVDatabase") -> None:
|
||||
log.debug("Cleaning cache")
|
||||
self_els = dict()
|
||||
for self_el in self.elements:
|
||||
self_els[self_el.id] = self_el
|
||||
fresh_ids = set(el.id for el in self.elements)
|
||||
for el in cache.elements:
|
||||
if el.id not in self_els:
|
||||
if el.is_researched and el.is_video:
|
||||
if el.id not in fresh_ids:
|
||||
el.clean()
|
||||
|
||||
def import_cache(self, cache: "RVDatabase") -> None:
|
||||
log.debug("Importing cache")
|
||||
self.build_list([element.item for element in cache.elements])
|
||||
|
||||
@functools.cached_property
|
||||
def auth_headers(self) -> dict[str, str]:
|
||||
def _auth_headers(self) -> dict[str, str]:
|
||||
r = requests.get(
|
||||
f"{self.args.url}/accounts/ClientLogin",
|
||||
params={"Email": self.args.email, "Passwd": self.args.passwd},
|
||||
|
@ -385,6 +393,15 @@ class RVDatabase:
|
|||
return {"Authorization": f"GoogleLogin auth={val}"}
|
||||
raise RuntimeError("Couldn't find auth= key")
|
||||
|
||||
@functools.cached_property
|
||||
def auth_headers(self) -> dict[str, str]:
|
||||
try:
|
||||
return self.metafile_read(".auth_headers")
|
||||
except FileNotFoundError:
|
||||
headers = self._auth_headers()
|
||||
self.metafile_write(".auth_headers", headers)
|
||||
return headers
|
||||
|
||||
def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
|
||||
log.info("Fetching RSS feed")
|
||||
continuation: typing.Optional[str] = None
|
||||
|
@ -409,45 +426,47 @@ class RVDatabase:
|
|||
while continuation:
|
||||
yield from next_page()
|
||||
|
||||
def build_list(self, items: typing.Iterable[dict]) -> None:
|
||||
def fetch_cache_elements(self) -> typing.Generator[dict, None, None]:
|
||||
log.info("Fetching from cache")
|
||||
for file in os.listdir(self.METADATA_FOLDER):
|
||||
if not file.endswith(".item"):
|
||||
continue
|
||||
yield self.metafile_read(file)
|
||||
|
||||
def build_list(self, items: typing.Iterable[dict], save: bool = False) -> None:
|
||||
self.elements = []
|
||||
for item in items:
|
||||
element = RVElement(self, item)
|
||||
self.elements.insert(0, element)
|
||||
log.debug(f"Known: {element}")
|
||||
if save:
|
||||
element.save()
|
||||
|
||||
def read_feed(self) -> None:
|
||||
self.build_list(self.fetch_feed_elements())
|
||||
self.build_list(self.fetch_feed_elements(), save=True)
|
||||
|
||||
def read_cache(self) -> None:
|
||||
self.build_list(self.fetch_cache_elements())
|
||||
|
||||
def clean_folder(self, folder: str, basenames: set[str]) -> None:
|
||||
for file in os.listdir(folder):
|
||||
path = os.path.join(folder, file)
|
||||
if not os.path.isfile(path) or file[0] == ".":
|
||||
continue
|
||||
for basename in basenames:
|
||||
if file.startswith(basename):
|
||||
break
|
||||
else:
|
||||
log.info(f"Removing unknown file: {path}")
|
||||
if not self.args.dryrun:
|
||||
os.unlink(path)
|
||||
|
||||
def clean(self) -> None:
|
||||
log.debug("Cleaning")
|
||||
filenames = set()
|
||||
for element in self.elements:
|
||||
if element.is_video:
|
||||
filenames.add(element.filename)
|
||||
for file in os.listdir():
|
||||
if file == RVDatabase.SAVE_FILE:
|
||||
continue
|
||||
if not os.path.isfile(file):
|
||||
continue
|
||||
for filename in filenames:
|
||||
if file.startswith(filename):
|
||||
break
|
||||
else:
|
||||
log.info(f"Removing unknown file: {file}")
|
||||
if not self.args.dryrun:
|
||||
os.unlink(file)
|
||||
|
||||
@property
|
||||
def all_researched(self) -> bool:
|
||||
for element in self.elements:
|
||||
if not element.is_researched:
|
||||
return False
|
||||
return True
|
||||
|
||||
def attempt_clean(self) -> None:
|
||||
if self.all_researched:
|
||||
self.clean()
|
||||
filenames = set(el.basename for el in self.elements if el.is_video)
|
||||
self.clean_folder(".", filenames)
|
||||
ids = set(el.sid for el in self.elements)
|
||||
self.clean_folder(self.METADATA_FOLDER, ids)
|
||||
|
||||
@property
|
||||
def ytdl_opts(self) -> dict:
|
||||
|
@ -468,7 +487,9 @@ class RVDatabase:
|
|||
elements: typing.Iterable[RVElement]
|
||||
# Inexpensive sort
|
||||
if args.order == "new":
|
||||
elements = reversed(elements_src)
|
||||
elements = sorted(elements_src, key=lambda el: el.date, reverse=True)
|
||||
elif args.order == "old":
|
||||
elements = sorted(elements_src, key=lambda el: el.date)
|
||||
elif args.order == "title":
|
||||
elements = sorted(elements_src, key=lambda el: el.title)
|
||||
elif args.order == "creator":
|
||||
|
@ -478,8 +499,6 @@ class RVDatabase:
|
|||
elif args.order == "random":
|
||||
elements = elements_src
|
||||
random.shuffle(elements)
|
||||
else:
|
||||
elements = elements_src
|
||||
|
||||
# Possibly expensive filtering
|
||||
elements = filter(lambda el: el.matches_filter(args), elements)
|
||||
|
@ -575,11 +594,6 @@ def get_args() -> configargparse.Namespace:
|
|||
env_var="RSS_VIDEOS_PASSWD",
|
||||
required=True,
|
||||
)
|
||||
parser.add(
|
||||
"--research",
|
||||
help="Fetch video info again",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add(
|
||||
"--no-refresh",
|
||||
dest="refresh",
|
||||
|
@ -641,67 +655,46 @@ def get_args() -> configargparse.Namespace:
|
|||
"list",
|
||||
"watch",
|
||||
"binge",
|
||||
"clean",
|
||||
),
|
||||
default="download",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.videos = os.path.realpath(os.path.expanduser(args.videos))
|
||||
if not args.duration and args.max_duration:
|
||||
args.duration = str(args.max_duration)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def get_database(args: configargparse.Namespace) -> RVDatabase:
|
||||
database = RVDatabase(args)
|
||||
cache = RVDatabase.load()
|
||||
feed_fetched = False
|
||||
if cache:
|
||||
database.salvage_cache_pre(cache)
|
||||
if args.refresh:
|
||||
try:
|
||||
database.read_feed()
|
||||
feed_fetched = True
|
||||
except requests.ConnectionError as err:
|
||||
if args.action == "download":
|
||||
raise RuntimeError("Couldn't fetch feed, refusing to download")
|
||||
# This is a quirky failsafe in case of no internet connection,
|
||||
# so the script doesn't go noting that no element is a video.
|
||||
log.warning(f"Couldn't fetch feed: {err}")
|
||||
if not feed_fetched:
|
||||
if cache:
|
||||
log.warning("Using cached feed.")
|
||||
database.import_cache(cache)
|
||||
else:
|
||||
raise FileNotFoundError("Feed not fetched and no cached feed.")
|
||||
if cache:
|
||||
database.salvage_cache(cache)
|
||||
database.clean_cache(cache)
|
||||
database.save()
|
||||
cache = RVDatabase(args)
|
||||
cache.read_cache()
|
||||
if not args.refresh:
|
||||
return cache
|
||||
|
||||
return database
|
||||
fresh = RVDatabase(args)
|
||||
fresh.read_feed()
|
||||
fresh.clean_cache(cache)
|
||||
return fresh
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = get_args()
|
||||
configure_logging(args)
|
||||
|
||||
os.makedirs(args.videos, exist_ok=True)
|
||||
metadata_dir = os.path.join(args.videos, RVDatabase.METADATA_FOLDER)
|
||||
for dir in (args.videos, metadata_dir):
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
os.chdir(args.videos)
|
||||
|
||||
database = get_database(args)
|
||||
database.clean()
|
||||
|
||||
log.debug("Running action")
|
||||
if args.action == "clean":
|
||||
database.clean()
|
||||
else:
|
||||
duration = 0
|
||||
for element in database.filter(args):
|
||||
duration += element.duration if element.is_video else 0
|
||||
if args.action == "download":
|
||||
element.preload()
|
||||
element.download()
|
||||
elif args.action == "list":
|
||||
print(element)
|
||||
elif args.action in ("watch", "binge"):
|
||||
|
@ -711,9 +704,7 @@ def main() -> None:
|
|||
else:
|
||||
raise NotImplementedError(f"Unimplemented action: {args.action}")
|
||||
log.info(f"Total duration: {format_duration(duration)}")
|
||||
database.attempt_clean()
|
||||
database.try_mark_watched_read()
|
||||
database.save()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in a new issue