rssVideos: Add --seen flag

rssVideos: Cleverer cleaning
rssVideos: Abstract with download process as well
2021-12-18 22:23:48 +01:00 · 2021-12-18 12:44:43 +01:00 · 2021-12-18 11:56:28 +01:00 · 2021-12-18 11:27:24 +01:00 · 2021-12-17 23:16:32 +01:00 · 2021-12-17 22:42:35 +01:00
7 changed files with 318 additions and 119 deletions
--- a/config/automatrop/roles/desktop_environment/tasks/main.yml
+++ b/config/automatrop/roles/desktop_environment/tasks/main.yml
@ -6,6 +6,7 @@
  with_items:
    - ".config/Xresources"
    - ".config/rofi"
+    - ".local/share/rofi/themes"
    - ".local/bin"
    - ".local/share/fonts"
    - ".config/qutebrowser"
@ -108,17 +109,24 @@
    - color
  when: display_server == 'x11'

- name: Set base16 theme for rofi
+- name: Set base16 theme for rofi < 1.4
  copy:
-    content: "{{ base16_schemes['schemes'][base16_scheme]['rofi']['themes']['base16-' + base16_scheme + '.' + item] }}"
-    dest: "{{ ansible_env.HOME }}/.config/rofi/theme.{{ item }}"
+    content: "{{ base16_schemes['schemes'][base16_scheme]['rofi']['themes']['base16-' + base16_scheme + '.config'] }}"
+    dest: "{{ ansible_env.HOME }}/.config/rofi/theme.config"
    mode: "u=rw,g=r,o=r"
-  with_items:
-    - rasi
-    - config
  tags:
    - color

+- name: Set base16 theme for rofi >= 1.4
+  copy:
+    content: "{{ base16_schemes['schemes'][base16_scheme]['rofi']['themes']['base16-' + base16_scheme + '.rasi'] }}"
+    dest: "{{ ansible_env.HOME }}/.local/share/rofi/themes/current.rasi"
+    mode: "u=rw,g=r,o=r"
+  tags:
+    - color
+    - g
+  when: no
+
 - name: Configure Dunst
  template:
    src: "{{ ansible_env.HOME }}/.config/dunst/dunstrc.j2"
--- a/config/rofi/.gitignore
+++ b/config/rofi/.gitignore
@ -1,3 +1 @@
 theme.config
-theme.rasi
-
--- a/config/rofi/config
+++ b/config/rofi/config
@ -1,8 +1,4 @@
 #include "theme.config"
 rofi.theme: theme
-rofi.cycle: true
-rofi.case-sensitive: false
-rofi.scroll-method: 0
-rofi.show-match: true
 rofi.lazy-grab: false
 rofi.matching: regex
--- a/config/rofi/config.rasi
+++ b/config/rofi/config.rasi
@ -0,0 +1,6 @@
+configuration {
+    theme: "current";
+    lazy-grab: false;
+    matching: "regex";
+}
+
--- a/config/scripts/requirements.txt
+++ b/config/scripts/requirements.txt
@ -1,3 +1,4 @@
 coloredlogs>=10.0<11
 progressbar2>=3.47.0<4
-youtube-dl>=2021.6.6
+yt-dlp>=2021.10.22
+ConfigArgParse>=1.5<2
--- a/config/scripts/rssVideos
+++ b/config/scripts/rssVideos
@ -8,18 +8,19 @@ The common use case would be a feed from an RSS aggregator
 with the unread items (non-video links are ignored).
 """

-# TODO Distribute this correclty, in the meanwhile please do
-# pip install --user coloredlogs ConfigArgParse yt-dlp
-
 import enum
 import functools
 import logging
 import os
 import pickle
+import random
+import re
+import subprocess
 import sys
 import typing
 import urllib.parse
 import urllib.request
+import urllib.error
 from xml.dom import minidom

 import coloredlogs
@ -28,6 +29,7 @@ import yt_dlp as youtube_dl

 log = logging.getLogger(__name__)

+# TODO Lockfile, or a way to parallel watch and download

 def configure_logging(args: configargparse.Namespace) -> None:
    # Configure logging
@ -41,52 +43,78 @@ def configure_logging(args: configargparse.Namespace) -> None:
            logger=log,
        )

-class RVCommand(enum.Enum):
-    download = "download"
-    list = "list"

 class RVElement:
-    title: str
-    link: str
-    # creator: str
-    # description: str
-    # date: datetime.datetime
-    guid: int
-
    parent: "RVDatabase"
+    item: minidom.Element
+    was_downloaded: bool
+    watched: bool

    def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
-        def get_data(tag_name: str) -> str:
-            nodes = item.getElementsByTagName(tag_name)
+        self.parent = parent
+        self.item = item
+        self.was_downloaded = False
+        self.watched = False
+
+    def get_tag_data(self, tag_name: str) -> str:
+        nodes = self.item.getElementsByTagName(tag_name)
        if len(nodes) != 1:
-                raise RuntimeError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
+            raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
        children = nodes[0].childNodes
        if len(children) != 1:
-                raise RuntimeError(
+            raise KeyError(
                f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
            )
        return children[0].data

-        self.title = get_data("title")
-        self.link = get_data("link")
-        # self.creator = get_data("dc:creator")
-        # self.description = get_data("description")
-        # self.date = get_data("pubDate")
-        self.guid = int(get_data("guid"))
+    @property
+    def title(self) -> str:
+        return self.get_tag_data("title")

-        self.parent = parent
+    @property
+    def link(self) -> str:
+        return self.get_tag_data("link")

-    def read_cache(self, cache: "RVElement") -> None:
-        if "ytdl_infos" in cache.__dict__:
+    @property
+    def creator(self) -> typing.Optional[str]:
+        try:
+            return self.get_tag_data("dc:creator")
+        except KeyError:
+            return None
+
+    @property
+    def description(self) -> str:
+        # TODO Testing
+        return self.get_tag_data("description")
+
+    @property
+    def date(self) -> str:
+        # TODO datetime format
+        return self.get_tag_data("pubDate")
+
+    @property
+    def guid(self) -> int:
+        return int(self.get_tag_data("guid"))
+
+    @property
+    def is_researched(self) -> bool:
+        return "ytdl_infos" in self.__dict__
+
+    def salvage_cache(self, cache: "RVElement") -> None:
+        if cache.is_researched:
            self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
            log.debug(f"From cache: {self}")
+        if cache.was_downloaded:
+            self.was_downloaded = True
+        if cache.watched:
+            self.watched = True

    def __str__(self) -> str:
-        return f"{self.title} – {self.link}"
+        return f"{self.guid}: {self.creator} – {self.title} – {self.link}"

    @property
    def downloaded(self) -> bool:
-        if "ytdl_infos" not in self.__dict__:
+        if not self.is_researched:
            return False
        return os.path.isfile(self.filepath)

@ -95,9 +123,11 @@ class RVElement:
        log.info(f"Researching: {self}")
        try:
            infos = self.parent.ytdl_dry.extract_info(self.link)
-        except BaseException as e:
+        except KeyboardInterrupt as e:
+            raise e
+        except youtube_dl.utils.DownloadError as e:
            # TODO Still raise in case of temporary network issue
-            log.warn(e)
+            log.warning(e)
            infos = None
        # Apparently that thing is transformed from a LazyList
        # somewhere in the normal yt_dlp process
@ -113,15 +143,10 @@ class RVElement:
        return infos

    @property
-    def skip(self) -> bool:
+    def duration(self) -> int:
        assert self.is_video
        assert self.ytdl_infos
-        if (
-            self.parent.args.max_duration > 0
-            and self.ytdl_infos["duration"] > self.parent.args.max_duration
-        ):
-            return True
-        return False
+        return self.ytdl_infos["duration"]

    @property
    def is_video(self) -> bool:
@ -131,6 +156,7 @@ class RVElement:
    @property
    def filepath(self) -> str:
        assert self.is_video
+        # TODO This doesn't change the extension to mkv when the formats are incomaptible
        return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)

    @property
@ -141,22 +167,102 @@ class RVElement:
    def download(self) -> None:
        assert self.is_video
        log.info(f"Downloading: {self}")
-        if self.parent.args.dryrun:
-            return
+        if not self.parent.args.dryrun:
            self.parent.ytdl.process_ie_result(self.ytdl_infos, True, {})
+        self.was_downloaded = True
+        self.parent.save()

-    def act(self) -> None:
-        if not self.is_video:
-            log.debug(f"Not a video: {self}")
-            return
+    def preload(self) -> None:
+        assert self.is_video
        if self.downloaded:
-            log.debug(f"Already downloaded: {self}")
+            log.debug(f"Currently downloaded: {self}")
            return
-        if self.skip:
-            log.debug(f"Skipped: {self}")
+        if self.was_downloaded:
+            log.debug(f"Downloaded previously: {self}")
            return
        self.download()

+    MATCHES_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, None: 1}
+
+    MATCHES_DURATION_COMPARATORS = {
+        "<": int.__lt__,
+        "-": int.__lt__,
+        ">": int.__gt__,
+        "+": int.__gt__,
+        "=": int.__eq__,
+        None: int.__le__,
+    }
+
+    def matches_filter(self, args: configargparse.Namespace) -> bool:
+        if args.seen != "any" and (args.seen == "seen") != self.watched:
+            log.debug(f"Not {args.seen}: {self}")
+            return False
+        if args.title and not re.search(args.title, self.title):
+            log.debug(f"Title not matching {args.title}: {self}")
+            return False
+        if args.guid and not re.search(args.guid, str(self.guid)):
+            log.debug(f"Guid not matching {args.guid}: {self}")
+            return False
+        if args.link and not re.search(args.link, self.link):
+            log.debug(f"Link not matching {args.link}: {self}")
+            return False
+        if args.creator and (not self.creator or not re.search(args.creator, self.creator)):
+            log.debug(f"Creator not matching {args.creator}: {self}")
+            return False
+        if not self.is_video:
+            log.debug(f"Not a video: {self}")
+            return False
+        if args.duration:
+            dur = args.duration
+
+            mult_index = dur[-1].lower()
+            if mult_index.isdigit():
+                mult_index = None
+            else:
+                dur = dur[:-1]
+            try:
+                multiplier = self.MATCHES_DURATION_MULTIPLIERS[mult_index]
+            except IndexError:
+                raise ValueError(f"Unknown duration multiplier: {mult_index}")
+
+            comp_index = dur[0]
+            if comp_index.isdigit():
+                comp_index = None
+            else:
+                dur = dur[1:]
+            try:
+                comparator = self.MATCHES_DURATION_COMPARATORS[comp_index]
+            except IndexError:
+                raise ValueError(f"Unknown duration comparator: {comp_index}")
+
+            duration = int(dur)
+            if not comparator(self.duration, duration * multiplier):
+                log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
+                return False
+        return True
+
+    def watch(self) -> None:
+        if not self.downloaded:
+            self.download()
+
+        cmd = ["mpv", self.filepath]
+        log.debug(f"Running {cmd}")
+        if not self.parent.args.dryrun:
+            proc = subprocess.run(cmd)
+            proc.check_returncode()
+
+        self.watched = True
+        self.parent.save()
+
+    def clean(self) -> None:
+        assert self.is_video
+        log.info(f"Removing gone video: {self.filename}*")
+        for file in os.listdir():
+            if file.startswith(self.filename):
+                log.debug(f"Removing file: {file}")
+                if not self.parent.args.dryrun:
+                    os.unlink(file)
+

 class RVDatabase:
    SAVE_FILE = ".cache.p"
@ -168,6 +274,7 @@ class RVDatabase:
        self.args = args

    def save(self) -> None:
+        log.debug("Saving cache")
        if self.args.dryrun:
            return
        with open(self.SAVE_FILE, "wb") as save_file:
@ -179,30 +286,50 @@ class RVDatabase:
            with open(cls.SAVE_FILE, "rb") as save_file:
                return pickle.load(save_file)
        except (TypeError, AttributeError, EOFError):
-            log.warn("Corrupt / outdated cache, it will be rebuilt.")
+            log.warning("Corrupt / outdated cache, it will be rebuilt.")
        except FileNotFoundError:
            pass
        return None

-    def read_cache(self, cache: "RVDatabase") -> None:
+    def salvage_cache(self, cache: "RVDatabase") -> None:
+        log.debug(f"Salvaging cache")
        cache_els = dict()
        for cache_el in cache.elements:
            cache_els[cache_el.guid] = cache_el
        for el in self.elements:
            if el.guid in cache_els:
-                el.read_cache(cache_els[el.guid])
+                el.salvage_cache(cache_els[el.guid])
+
+    def clean_cache(self, cache: "RVDatabase") -> None:
+        log.debug(f"Cleaning cache")
+        self_els = dict()
+        for self_el in self.elements:
+            self_els[self_el.guid] = self_el
+        for el in cache.elements:
+            if el.guid not in self_els:
+                if el.is_researched and el.is_video:
+                    el.clean()
+
+    def import_cache(self, cache: "RVDatabase") -> None:
+        log.debug(f"Importing cache")
+        self.feed_xml = cache.feed_xml
+        self.read_feed()
+
+    @functools.cached_property
+    def feed_xml(self) -> minidom.Document:
+        log.info("Fetching RSS feed")
+        with urllib.request.urlopen(self.args.feed) as request:
+            return minidom.parse(request)

    def read_feed(self) -> None:
-        log.info("Fetching RSS feed")
-        self.elements = list()
-        with urllib.request.urlopen(self.args.feed) as request:
-            with minidom.parse(request) as xmldoc:
-                for item in xmldoc.getElementsByTagName("item"):
+        self.elements = []
+        for item in self.feed_xml.getElementsByTagName("item"):
            element = RVElement(self, item)
            self.elements.insert(0, element)
            log.debug(f"Known: {element}")

    def clean(self) -> None:
+        log.debug("Cleaning")
        filenames = set()
        for element in self.elements:
            if element.is_video:
@ -216,13 +343,20 @@ class RVDatabase:
                if file.startswith(filename):
                    break
            else:
-                log.info(f"Removing: {file}")
+                log.info(f"Removing unknown file: {file}")
                if not self.args.dryrun:
                    os.unlink(file)

-    def act_all(self) -> None:
+    @property
+    def all_researched(self) -> bool:
        for element in self.elements:
-            element.act()
+            if not element.is_researched:
+                return False
+        return True
+
+    def attempt_clean(self) -> None:
+        if self.all_researched:
+            self.clean()

    @property
    def ytdl_opts(self) -> dict:
@ -242,6 +376,18 @@ class RVDatabase:
    def ytdl_dry(self) -> youtube_dl.YoutubeDL:
        return youtube_dl.YoutubeDL(self.ytdl_dry_opts)

+    def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
+        elements: typing.Iterable[RVElement]
+        if args.order == "old":
+            elements = self.elements
+        elif args.order == "new":
+            elements = reversed(self.elements)
+        elif args.order == "random":
+            elements_random = self.elements.copy()
+            random.shuffle(elements_random)
+            elements = elements_random
+        return filter(lambda el: el.matches_filter(args), elements)
+

 def get_args() -> configargparse.Namespace:
    defaultConfigPath = os.path.join(
@ -254,6 +400,8 @@ def get_args() -> configargparse.Namespace:
        + "an RSS aggregator",
        default_config_files=[defaultConfigPath],
    )
+
+    # Runtime settings
    parser.add_argument(
        "-v",
        "--verbosity",
@ -264,6 +412,16 @@ def get_args() -> configargparse.Namespace:
    parser.add(
        "-c", "--config", required=False, is_config_file=True, help="Configuration file"
    )
+    parser.add(
+        "-n",
+        "--dryrun",
+        help="Only pretend to do actions",
+        action="store_const",
+        const=True,
+        default=False,
+    )
+
+    # Input/Output
    parser.add(
        "--feed",
        help="URL of the RSS feed (must be public for now)",
@ -276,21 +434,31 @@ def get_args() -> configargparse.Namespace:
        env_var="RSS_VIDEOS_VIDEO_DIR",
        required=True,
    )
+
+    # Which videos
    parser.add(
-        "-n",
-        "--dryrun",
-        help="Do not download the videos",
-        action="store_const",
-        const=True,
-        default=False,
+        "--order",
+        choices=("old", "new", "random"),
+        default="old",
+        help="Sorting mechanism",
    )
+    parser.add("--guid", help="Regex to filter guid")
+    parser.add("--creator", help="Regex to filter by creator")
+    parser.add("--title", help="Regex to filter by title")
+    parser.add("--link", help="Regex to filter by link")
+    parser.add("--duration", help="Comparative to filter by duration")
+    parser.add("--seen", choices=("seen","unseen","any"), default="unseen", help="Only include seen/unseen/any videos")
+    # TODO Envrionment variables
    parser.add(
        "--max-duration",
-        help="Skip video longer than this amount of seconds",
+        help="(Deprecated, use --duration instead)",
        env_var="RSS_VIDEOS_MAX_DURATION",
        type=int,
        default=0,
    )
+    # TODO Allow to ask
+
+    # How to download
    parser.add(
        "--format",
        help="Use this format to download videos."
@ -305,17 +473,17 @@ def get_args() -> configargparse.Namespace:
        action="store_true",
    )

-    parser.set_defaults(subcommand=RVCommand.download)
-    subparsers = parser.add_subparsers(title="subcommand")
-
-    sc_download = subparsers.add_parser("download")
-    sc_download.set_defaults(subcommand=RVCommand.download)
-
-    sc_list = subparsers.add_parser("list")
-    sc_list.set_defaults(subcommand=RVCommand.list)
+    parser.add(
+        "action",
+        nargs="?",
+        choices=("download", "list", "watch", "binge", "clean"),
+        default="download",
+    )

    args = parser.parse_args()
    args.videos = os.path.realpath(os.path.expanduser(args.videos))
+    if not args.duration and args.max_duration:
+        args.duration = str(args.max_duration)

    return args

@ -327,22 +495,37 @@ def main() -> None:
    os.makedirs(args.videos, exist_ok=True)
    os.chdir(args.videos)

-    if args.subcommand == RVCommand.download:
    database = RVDatabase(args)
-        database.read_feed()
    cache = RVDatabase.load()
+    try:
+        database.read_feed()
+    except urllib.error.URLError as err:
+        if args.action == "download" or not cache:
+            raise err
+        else:
+            log.warning("Cannot fetch RSS feed, using cached feed.", err)
+            database.import_cache(cache)
    if cache:
-            database.read_cache(cache)
-        database.clean()
-        database.act_all()
+        database.salvage_cache(cache)
+        database.clean_cache(cache)
        database.save()

-    elif args.subcommand == RVCommand.list:
-        cache = RVDatabase.load()
-        if not cache:
-            raise FileNotFoundError("This command doesn't work without a cache yet.")
-        for element in cache.elements:
+    log.debug(f"Running action")
+    if args.action == "clean":
+        database.clean()
+    else:
+        database.attempt_clean()
+        for element in database.filter(args):
+            if args.action == "download":
+                element.preload()
+            elif args.action == "list":
                print(element)
+            elif args.action in ("watch", "binge"):
+                element.watch()
+            if args.action == "watch":
+                break
+        database.attempt_clean()
+    database.save()


 if __name__ == "__main__":
--- a/config/scripts/videoQuota
+++ b/config/scripts/videoQuota
@ -33,6 +33,7 @@ audio_br_bi = 128000
 quota_by = int(sys.argv[1])
 in_file = sys.argv[2]
 out_file = sys.argv[3]
+filters = sys.argv[4:]

 quota_bi = quota_by * 8
 duration = duration_file(in_file)
@ -40,15 +41,21 @@ tot_br_bi = quota_bi / duration
 video_br_bi = int(tot_br_bi - audio_br_bi)
 assert video_br_bi > 0, "Not even enough space for audio"

-cmd = [
+cmd = (
+    [
        "ffmpeg",
        "-i",
        in_file,
+    ]
+    + filters
+    + [
        "-b:v",
        str(video_br_bi),
        "-b:a",
        str(audio_br_bi),
        out_file,
-]
+    ]
+)

+print(" ".join(cmd))
 subprocess.run(cmd, check=True)
Author	SHA1	Message	Date
Geoffrey Frogeye	f4c81e346a	rssVideos: Add --seen flag	2021-12-18 22:23:48 +01:00
Geoffrey Frogeye	1948fc0af2	rssVideos: Cleverer cleaning	2021-12-18 12:44:43 +01:00
Geoffrey Frogeye	2dce725ee5	rssVideos: Abstract with download process as well	2021-12-18 11:56:28 +01:00
Geoffrey Frogeye	07af9360fa	rssVideos: Abstract a bit, add binge	2021-12-18 11:27:24 +01:00
Geoffrey Frogeye	5b195bd141	rssVideos: Add watch	2021-12-17 23:16:32 +01:00
Geoffrey Frogeye	7423a93203	rssVideos: Filter by duration	2021-12-17 22:42:35 +01:00
Geoffrey Frogeye	7aeecb1bff	videoQuota: ✨	2021-12-17 22:41:47 +01:00
Geoffrey Frogeye	f11338a04a	rssVideos: Support list filters	2021-12-17 22:13:46 +01:00
Geoffrey Frogeye	9100edac1e	videoQuota: Support filters	2021-12-17 22:13:27 +01:00
Geoffrey Frogeye	76df5d4d80	Upgrade rofi config	2021-12-15 21:59:45 +01:00
Geoffrey Frogeye	7f0e24a29d	rssVideos: Slightly better error handling Makes it actually quit on Ctrl+C	2021-12-12 14:52:21 +01:00
Geoffrey Frogeye	6a6f5401e6	rssVideos: Show creator Even if it's not always present for all RSS feeds	2021-12-12 14:27:08 +01:00
Geoffrey Frogeye	9493edc1fd	rssVideos: Don't download already downloaded videos Because the good extension is not the one expected :/	2021-12-12 13:40:24 +01:00