From 1948fc0af2e41b7732686693886e37bc2e756dfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geoffrey=20=E2=80=9CFrogeye=E2=80=9D=20Preud=27homme?= Date: Sat, 18 Dec 2021 12:44:43 +0100 Subject: [PATCH] rssVideos: Cleverer cleaning --- config/scripts/rssVideos | 88 +++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/config/scripts/rssVideos b/config/scripts/rssVideos index 45eef2b..71ba5a6 100755 --- a/config/scripts/rssVideos +++ b/config/scripts/rssVideos @@ -29,6 +29,7 @@ import yt_dlp as youtube_dl log = logging.getLogger(__name__) +# TODO Lockfile, or a way to parallel watch and download def configure_logging(args: configargparse.Namespace) -> None: # Configure logging @@ -95,8 +96,12 @@ class RVElement: def guid(self) -> int: return int(self.get_tag_data("guid")) + @property + def is_researched(self) -> bool: + return "ytdl_infos" in self.__dict__ + def salvage_cache(self, cache: "RVElement") -> None: - if "ytdl_infos" in cache.__dict__: + if cache.is_researched: self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"] log.debug(f"From cache: {self}") if cache.was_downloaded: @@ -109,7 +114,7 @@ class RVElement: @property def downloaded(self) -> bool: - if "ytdl_infos" not in self.__dict__: + if not self.is_researched: return False return os.path.isfile(self.filepath) @@ -167,10 +172,8 @@ class RVElement: self.was_downloaded = True self.parent.save() - def act(self) -> None: - if not self.is_video: - log.debug(f"Not a video: {self}") - return + def preload(self) -> None: + assert self.is_video if self.downloaded: log.debug(f"Currently downloaded: {self}") return @@ -192,16 +195,22 @@ class RVElement: def matches_filter(self, args: configargparse.Namespace) -> bool: if self.watched: + log.debug(f"Already watched: {self}") return False if args.title and not re.search(args.title, self.title): + log.debug(f"Title not matching {args.title}: {self}") return False if args.guid and not re.search(args.guid, str(self.guid)): + log.debug(f"Guid not matching {args.guid}: {self}") return False if args.link and not re.search(args.link, self.link): + log.debug(f"Link not matching {args.link}: {self}") return False - if args.creator and self.creator and not re.search(args.creator, self.creator): + if args.creator and (not self.creator or not re.search(args.creator, self.creator)): + log.debug(f"Creator not matching {args.creator}: {self}") return False if not self.is_video: + log.debug(f"Not a video: {self}") return False if args.duration: dur = args.duration @@ -228,6 +237,7 @@ class RVElement: duration = int(dur) if not comparator(self.duration, duration * multiplier): + log.debug(f"Duration {self.duration} not matching {args.duration}: {self}") return False return True @@ -244,6 +254,15 @@ class RVElement: self.watched = True self.parent.save() + def clean(self) -> None: + assert self.is_video + log.info(f"Removing gone video: {self.filename}*") + for file in os.listdir(): + if file.startswith(self.filename): + log.debug(f"Removing file: {file}") + if not self.parent.args.dryrun: + os.unlink(file) + class RVDatabase: SAVE_FILE = ".cache.p" @@ -281,6 +300,16 @@ class RVDatabase: if el.guid in cache_els: el.salvage_cache(cache_els[el.guid]) + def clean_cache(self, cache: "RVDatabase") -> None: + log.debug(f"Cleaning cache") + self_els = dict() + for self_el in self.elements: + self_els[self_el.guid] = self_el + for el in cache.elements: + if el.guid not in self_els: + if el.is_researched and el.is_video: + el.clean() + def import_cache(self, cache: "RVDatabase") -> None: log.debug(f"Importing cache") self.feed_xml = cache.feed_xml @@ -314,10 +343,21 @@ class RVDatabase: if file.startswith(filename): break else: - log.info(f"Removing: {file}") + log.info(f"Removing unknown file: {file}") if not self.args.dryrun: os.unlink(file) + @property + def all_researched(self) -> bool: + for element in self.elements: + if not element.is_researched: + return False + return True + + def attempt_clean(self) -> None: + if self.all_researched: + self.clean() + @property def ytdl_opts(self) -> dict: return {"format": self.args.format, "allsubtitles": self.args.subtitles} @@ -435,7 +475,7 @@ def get_args() -> configargparse.Namespace: parser.add( "action", nargs="?", - choices=("download", "list", "watch", "binge"), + choices=("download", "list", "watch", "binge", "clean"), default="download", ) @@ -466,22 +506,24 @@ def main() -> None: database.import_cache(cache) if cache: database.salvage_cache(cache) - - if args.action == "download": - # TODO Clean on watch? / cache import with missing video / all researched - database.clean() + database.clean_cache(cache) + database.save() log.debug(f"Running action") - for element in database.filter(args): - if args.action == "download": - element.act() - elif args.action == "list": - print(element) - elif args.action in ("watch", "binge"): - element.watch() - if args.action == "watch": - break - + if args.action == "clean": + database.clean() + else: + database.attempt_clean() + for element in database.filter(args): + if args.action == "download": + element.preload() + elif args.action == "list": + print(element) + elif args.action in ("watch", "binge"): + element.watch() + if args.action == "watch": + break + database.attempt_clean() database.save()