rssVideos: Cleverer cleaning

This commit is contained in:
Geoffrey Frogeye 2021-12-18 12:44:43 +01:00
parent 2dce725ee5
commit 1948fc0af2
Signed by: geoffrey
GPG key ID: C72403E7F82E6AD8

View file

@ -29,6 +29,7 @@ import yt_dlp as youtube_dl
log = logging.getLogger(__name__)
# TODO Lockfile, or a way to parallel watch and download
def configure_logging(args: configargparse.Namespace) -> None:
# Configure logging
@ -95,8 +96,12 @@ class RVElement:
def guid(self) -> int:
return int(self.get_tag_data("guid"))
@property
def is_researched(self) -> bool:
return "ytdl_infos" in self.__dict__
def salvage_cache(self, cache: "RVElement") -> None:
if "ytdl_infos" in cache.__dict__:
if cache.is_researched:
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
log.debug(f"From cache: {self}")
if cache.was_downloaded:
@ -109,7 +114,7 @@ class RVElement:
@property
def downloaded(self) -> bool:
if "ytdl_infos" not in self.__dict__:
if not self.is_researched:
return False
return os.path.isfile(self.filepath)
@ -167,10 +172,8 @@ class RVElement:
self.was_downloaded = True
self.parent.save()
def act(self) -> None:
if not self.is_video:
log.debug(f"Not a video: {self}")
return
def preload(self) -> None:
assert self.is_video
if self.downloaded:
log.debug(f"Currently downloaded: {self}")
return
@ -192,16 +195,22 @@ class RVElement:
def matches_filter(self, args: configargparse.Namespace) -> bool:
if self.watched:
log.debug(f"Already watched: {self}")
return False
if args.title and not re.search(args.title, self.title):
log.debug(f"Title not matching {args.title}: {self}")
return False
if args.guid and not re.search(args.guid, str(self.guid)):
log.debug(f"Guid not matching {args.guid}: {self}")
return False
if args.link and not re.search(args.link, self.link):
log.debug(f"Link not matching {args.link}: {self}")
return False
if args.creator and self.creator and not re.search(args.creator, self.creator):
if args.creator and (not self.creator or not re.search(args.creator, self.creator)):
log.debug(f"Creator not matching {args.creator}: {self}")
return False
if not self.is_video:
log.debug(f"Not a video: {self}")
return False
if args.duration:
dur = args.duration
@ -228,6 +237,7 @@ class RVElement:
duration = int(dur)
if not comparator(self.duration, duration * multiplier):
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
return False
return True
@ -244,6 +254,15 @@ class RVElement:
self.watched = True
self.parent.save()
def clean(self) -> None:
assert self.is_video
log.info(f"Removing gone video: {self.filename}*")
for file in os.listdir():
if file.startswith(self.filename):
log.debug(f"Removing file: {file}")
if not self.parent.args.dryrun:
os.unlink(file)
class RVDatabase:
SAVE_FILE = ".cache.p"
@ -281,6 +300,16 @@ class RVDatabase:
if el.guid in cache_els:
el.salvage_cache(cache_els[el.guid])
def clean_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Cleaning cache")
self_els = dict()
for self_el in self.elements:
self_els[self_el.guid] = self_el
for el in cache.elements:
if el.guid not in self_els:
if el.is_researched and el.is_video:
el.clean()
def import_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Importing cache")
self.feed_xml = cache.feed_xml
@ -314,10 +343,21 @@ class RVDatabase:
if file.startswith(filename):
break
else:
log.info(f"Removing: {file}")
log.info(f"Removing unknown file: {file}")
if not self.args.dryrun:
os.unlink(file)
@property
def all_researched(self) -> bool:
for element in self.elements:
if not element.is_researched:
return False
return True
def attempt_clean(self) -> None:
if self.all_researched:
self.clean()
@property
def ytdl_opts(self) -> dict:
return {"format": self.args.format, "allsubtitles": self.args.subtitles}
@ -435,7 +475,7 @@ def get_args() -> configargparse.Namespace:
parser.add(
"action",
nargs="?",
choices=("download", "list", "watch", "binge"),
choices=("download", "list", "watch", "binge", "clean"),
default="download",
)
@ -466,22 +506,24 @@ def main() -> None:
database.import_cache(cache)
if cache:
database.salvage_cache(cache)
if args.action == "download":
# TODO Clean on watch? / cache import with missing video / all researched
database.clean()
database.clean_cache(cache)
database.save()
log.debug(f"Running action")
if args.action == "clean":
database.clean()
else:
database.attempt_clean()
for element in database.filter(args):
if args.action == "download":
element.act()
element.preload()
elif args.action == "list":
print(element)
elif args.action in ("watch", "binge"):
element.watch()
if args.action == "watch":
break
database.attempt_clean()
database.save()