dotfiles/config/scripts/rssVideos

#!/usr/bin/env python3


"""
Script that download videos that are linked as an article
in a RSS feed.
The common use case would be a feed from an RSS aggregator
with the unread items (non-video links are ignored).
"""

import datetime
import filelock
import functools
import logging
import os
import pickle
import random
import requests
import re
import subprocess
import time
import typing
import sys

import coloredlogs
import configargparse
import yt_dlp

log = logging.getLogger(__name__)


def configure_logging(args: configargparse.Namespace) -> None:
    # Configure logging
    if args.verbosity:
        coloredlogs.install(
            level=args.verbosity,
        )
    else:
        coloredlogs.install(
            fmt="%(message)s",
            logger=log,
        )


class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
    """
    yt_dlp.process_ie_result() doesn't return a completely updated info dict,
    notably the extension is still the one before it realizes the files cannot
    be merged. So we use this PostProcessor to catch the info dict in its final
    form and save what we need from it (it's not serializable in this state).
    """

    def __init__(self, rvelement: "RVElement") -> None:
        self.rvelement = rvelement
        super().__init__()

    def run(self, info: dict) -> tuple[list, dict]:
        self.rvelement.update_post_download(info)
        return [], info


def parse_duration(string: str) -> int:
    DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}

    mult_index = string[-1].lower()
    if mult_index.isdigit():
        mult_index = ""
    else:
        string = string[:-1]
    try:
        multiplier = DURATION_MULTIPLIERS[mult_index]
    except IndexError:
        raise ValueError(f"Unknown duration multiplier: {mult_index}")

    return int(string) * multiplier


def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
    DURATION_COMPARATORS = {
        "<": int.__lt__,
        "-": int.__lt__,
        ">": int.__gt__,
        "+": int.__gt__,
        "=": int.__eq__,
        "": int.__le__,
    }

    comp_index = compstr[0]
    if comp_index.isdigit():
        comp_index = ""
    else:
        compstr = compstr[1:]
    try:
        comparator = DURATION_COMPARATORS[comp_index]
    except IndexError:
        raise ValueError(f"Unknown duration comparator: {comp_index}")

    duration = parse_duration(compstr)

    return lambda d: comparator(d, duration)


def format_duration(duration: int) -> str:
    return time.strftime("%H:%M:%S", time.gmtime(duration))


class RVElement:
    parent: "RVDatabase"
    item: dict

    RERESEARCH_AFTER = datetime.timedelta(hours=1)

    def __init__(self, parent: "RVDatabase", item: dict) -> None:
        self.parent = parent
        self.item = item

    @property
    def id(self) -> str:
        return self.item["id"]

    @property
    def sid(self) -> str:
        return self.id.split("/")[-1]

    def metafile(self, extension: str) -> str:
        return os.path.join(self.parent.METADATA_FOLDER, f"{self.sid}.{extension}")

    def metafile_read(self, extension: str) -> typing.Any:
        return self.parent.metafile_read(f"{self.sid}.{extension}")

    def metafile_write(self, extension: str, data: typing.Any) -> None:
        return self.parent.metafile_write(f"{self.sid}.{extension}", data)

    def save(self) -> None:
        self.metafile_write("item", self.item)

    @property
    def title(self) -> str:
        return self.item["title"]

    @property
    def link(self) -> str:
        return self.item["canonical"][0]["href"]

    @property
    def creator(self) -> str:
        return self.item["origin"]["title"]

    @property
    def date(self) -> datetime.datetime:
        timestamp = (
            int(self.item.get("timestampUsec", "0")) / 1000000
            or int(self.item.get("crawlTimeMsec", "0")) / 1000
            or self.item["published"]
        )
        return datetime.datetime.fromtimestamp(timestamp)

    @property
    def is_researched(self) -> bool:
        metafile = self.metafile("ytdl")
        return os.path.isfile(metafile)

    def __str__(self) -> str:
        str = f"{self.date.strftime('%y-%m-%d %H:%M')} ("
        if self.is_researched:
            if self.is_video:
                str += format_duration(self.duration)
            else:
                str += "--:--:--"
        else:
            str += "??:??:??"
        str += (
            f") {self.creator if self.creator else '?'} "
            f"– {self.title} "
            f"– {self.link}"
        )
        return str

    @property
    def downloaded(self) -> bool:
        if not self.is_researched:
            return False
        return os.path.isfile(self.filepath)

    @functools.cached_property
    def ytdl_infos(self) -> typing.Optional[dict]:
        try:
            return self.metafile_read("ytdl")
        except (FileNotFoundError, TypeError, AttributeError, EOFError):
            infos = self._ytdl_infos()
            self.metafile_write("ytdl", infos)
        return infos

    def _ytdl_infos(self) -> typing.Optional[dict]:
        log.info(f"Researching: {self}")
        try:
            infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
        except KeyboardInterrupt as e:
            raise e
        except yt_dlp.utils.DownloadError as e:
            # TODO Still raise in case of temporary network issue
            log.warning(e)
            infos = None
        if infos:
            infos = self.parent.ytdl_dry.sanitize_info(infos)
        return infos

    @property
    def duration(self) -> int:
        assert self.is_video
        assert self.ytdl_infos
        return self.ytdl_infos["duration"]

    @property
    def is_video(self) -> bool:
        # Duration might be missing in playlists and stuff
        return self.ytdl_infos is not None and "duration" in self.ytdl_infos

    @functools.cached_property
    def downloaded_filepath(self) -> typing.Optional[str]:
        try:
            return self.metafile_read("path")
        except FileNotFoundError:
            return None

    @property
    def was_downloaded(self) -> bool:
        metafile = self.metafile("path")
        return os.path.exists(metafile)

    @property
    def filepath(self) -> str:
        assert self.is_video
        if self.downloaded_filepath:
            return self.downloaded_filepath
        return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)

    @property
    def basename(self) -> str:
        assert self.is_video
        return os.path.splitext(self.filepath)[0]

    def expire_info(self) -> None:
        metafile = self.metafile("ytdl")
        if os.path.isfile(metafile):
            stat = os.stat(metafile)
            mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
            diff = datetime.datetime.now() - mtime
            if diff > self.RERESEARCH_AFTER:
                os.unlink(metafile)
                del self.ytdl_infos

    def download(self) -> None:
        assert self.is_video
        if self.downloaded:
            return
        self.expire_info()
        log.info(f"Downloading: {self}")
        lockfile = self.metafile("lock")
        with filelock.FileLock(lockfile):
            if not self.parent.args.dryrun:
                with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
                    ydl.add_post_processor(SaveInfoPP(self))
                    ydl.process_ie_result(self.ytdl_infos, download=True)

    def update_post_download(self, info: dict) -> None:
        self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
        assert self.downloaded_filepath
        assert self.downloaded_filepath.startswith(self.basename)
        self.metafile_write("path", self.downloaded_filepath)

    @property
    def watched(self) -> bool:
        if not self.is_researched:
            return False
        return self.was_downloaded and not self.downloaded

    def matches_filter(self, args: configargparse.Namespace) -> bool:
        # Inexpensive filters
        if args.seen != "any" and (args.seen == "seen") != self.watched:
            log.debug(f"Not {args.seen}: {self}")
            return False
        if args.title and not re.search(args.title, self.title):
            log.debug(f"Title not matching {args.title}: {self}")
            return False
        if args.link and not re.search(args.link, self.link):
            log.debug(f"Link not matching {args.link}: {self}")
            return False
        if args.creator and (
            not self.creator or not re.search(args.creator, self.creator)
        ):
            log.debug(f"Creator not matching {args.creator}: {self}")
            return False

        # Expensive filters
        if not self.is_video:
            log.debug(f"Not a video: {self}")
            return False
        if args.duration and not compare_duration(args.duration)(self.duration):
            log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
            return False

        return True

    def watch(self) -> None:
        self.download()

        cmd = ["mpv", self.filepath]
        log.debug(f"Running {cmd}")
        if not self.parent.args.dryrun:
            proc = subprocess.run(cmd)
            proc.check_returncode()

        self.undownload()
        self.try_mark_read()

    def clean_file(self, folder: str, basename: str) -> None:
        for file in os.listdir(folder):
            if file.startswith(basename):
                path = os.path.join(folder, file)
                log.debug(f"Removing file: {path}")
                if not self.parent.args.dryrun:
                    os.unlink(path)

    def undownload(self) -> None:
        assert self.is_video
        log.info(f"Removing gone video: {self.basename}*")
        self.clean_file(".", self.basename)

    def clean(self) -> None:
        if self.is_researched and self.is_video:
            self.undownload()
        log.info(f"Removing gone metadata: {self.sid}*")
        self.clean_file(self.parent.METADATA_FOLDER, self.sid)

    def mark_read(self) -> None:
        log.debug(f"Marking {self} read")
        if self.parent.args.dryrun:
            return
        r = requests.post(
            f"{self.parent.args.url}/reader/api/0/edit-tag",
            data={
                "i": self.id,
                "a": "user/-/state/com.google/read",
                "ac": "edit",
                "token": self.parent.feed_token,
            },
            headers=self.parent.auth_headers,
        )
        r.raise_for_status()
        if r.text.strip() != "OK":
            raise RuntimeError(f"Couldn't mark {self} as read: {r.text}")
        log.info(f"Marked {self} as read")
        self.clean()

    def try_mark_read(self) -> None:
        try:
            self.mark_read()
        except requests.ConnectionError:
            log.warning(f"Couldn't mark {self} as read")


class RVDatabase:
    METADATA_FOLDER = ".metadata"

    args: configargparse.Namespace
    elements: list[RVElement]

    def __init__(self, args: configargparse.Namespace) -> None:
        self.args = args

    def metafile_read(self, name: str) -> typing.Any:
        path = os.path.join(self.METADATA_FOLDER, name)
        log.debug(f"Reading {path}")
        with open(path, "rb") as mf:
            return pickle.load(mf)

    def metafile_write(self, name: str, data: typing.Any) -> None:
        path = os.path.join(self.METADATA_FOLDER, name)
        log.debug(f"Writing {path}")
        if not self.args.dryrun:
            with open(path, "wb") as mf:
                pickle.dump(data, mf)

    def clean_cache(self, cache: "RVDatabase") -> None:
        log.debug("Cleaning cache")
        fresh_ids = set(el.id for el in self.elements)
        for el in cache.elements:
            if el.id not in fresh_ids:
                el.clean()

    def _auth_headers(self) -> dict[str, str]:
        r = requests.get(
            f"{self.args.url}/accounts/ClientLogin",
            params={"Email": self.args.email, "Passwd": self.args.passwd},
        )
        r.raise_for_status()
        for line in r.text.split("\n"):
            if line.lower().startswith("auth="):
                val = "=".join(line.split("=")[1:])
                return {"Authorization": f"GoogleLogin auth={val}"}
        raise RuntimeError("Couldn't find auth= key")

    @functools.cached_property
    def auth_headers(self) -> dict[str, str]:
        try:
            return self.metafile_read(".auth_headers")
        except FileNotFoundError:
            headers = self._auth_headers()
            self.metafile_write(".auth_headers", headers)
            return headers

    def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
        log.info("Fetching RSS feed")
        continuation: typing.Optional[str] = None
        with requests.Session() as s:

            def next_page() -> typing.Generator[dict, None, None]:
                nonlocal continuation
                r = s.get(
                    f"{self.args.url}/reader/api/0/stream/contents",
                    params={
                        "xt": "user/-/state/com.google/read",
                        "c": continuation,
                    },
                    headers=self.auth_headers,
                )
                r.raise_for_status()
                json = r.json()
                yield from json["items"]
                continuation = json.get("continuation")

            yield from next_page()
            while continuation:
                yield from next_page()

    def fetch_cache_elements(self) -> typing.Generator[dict, None, None]:
        log.info("Fetching from cache")
        for file in os.listdir(self.METADATA_FOLDER):
            if not file.endswith(".item"):
                continue
            yield self.metafile_read(file)

    def build_list(self, items: typing.Iterable[dict], save: bool = False) -> None:
        self.elements = []
        for item in items:
            element = RVElement(self, item)
            self.elements.insert(0, element)
            log.debug(f"Known: {element}")
            if save:
                element.save()

    def read_feed(self) -> None:
        self.build_list(self.fetch_feed_elements(), save=True)

    def read_cache(self) -> None:
        self.build_list(self.fetch_cache_elements())

    def clean_folder(self, folder: str, basenames: set[str]) -> None:
        for file in os.listdir(folder):
            path = os.path.join(folder, file)
            if not os.path.isfile(path) or file[0] == ".":
                continue
            for basename in basenames:
                if file.startswith(basename):
                    break
            else:
                log.info(f"Removing unknown file: {path}")
                if not self.args.dryrun:
                    os.unlink(path)

    def clean(self) -> None:
        log.debug("Cleaning")
        filenames = set(el.basename for el in self.elements if el.is_video)
        self.clean_folder(".", filenames)
        ids = set(el.sid for el in self.elements)
        self.clean_folder(self.METADATA_FOLDER, ids)

    @property
    def ytdl_opts(self) -> dict:
        # Get user/system options
        prev_argv = sys.argv
        sys.argv = ["yt-dlp"]
        _, _, _, ydl_opts = yt_dlp.parse_options()
        sys.argv = prev_argv
        return ydl_opts

    @property
    def ytdl_dry_opts(self) -> dict:
        opts = self.ytdl_opts.copy()
        opts.update({"quiet": True})
        return opts

    @property
    def ytdl_dry(self) -> yt_dlp.YoutubeDL:
        return yt_dlp.YoutubeDL(self.ytdl_dry_opts)

    def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
        elements_src = self.elements.copy()
        elements: typing.Iterable[RVElement]
        # Inexpensive sort
        if args.order == "new":
            elements = sorted(elements_src, key=lambda el: el.date, reverse=True)
        elif args.order == "old":
            elements = sorted(elements_src, key=lambda el: el.date)
        elif args.order == "title":
            elements = sorted(elements_src, key=lambda el: el.title)
        elif args.order == "creator":
            elements = sorted(elements_src, key=lambda el: el.creator or "")
        elif args.order == "link":
            elements = sorted(elements_src, key=lambda el: el.link)
        elif args.order == "random":
            elements = elements_src
            random.shuffle(elements)

        # Possibly expensive filtering
        elements = filter(lambda el: el.matches_filter(args), elements)

        # Expensive sort
        if args.order == "short":
            elements = sorted(
                elements, key=lambda el: el.duration if el.is_video else 0
            )
        elif args.order == "long":
            elements = sorted(
                elements, key=lambda el: el.duration if el.is_video else 0, reverse=True
            )

        # Post sorting filtering
        if args.total_duration:
            rem = parse_duration(args.total_duration)
            old_els = list(elements)
            elements = list()
            while rem > 0:
                for el in old_els:
                    if el.duration < rem:
                        elements.append(el)
                        rem -= el.duration
                        old_els.remove(el)
                        break
                else:
                    break

        return elements

    @functools.cached_property
    def feed_token(self) -> str:
        r = requests.get(
            f"{self.args.url}/reader/api/0/token",
            headers=self.auth_headers,
        )
        r.raise_for_status()
        return r.text.strip()

    def try_mark_watched_read(self) -> None:
        for element in self.elements:
            if element.watched:
                element.try_mark_read()


def get_args() -> configargparse.Namespace:
    defaultConfigPath = os.path.join(
        os.path.expanduser(os.getenv("XDG_CONFIG_PATH", "~/.config/")), "rssVideos"
    )

    parser = configargparse.ArgParser(
        description="Download videos in unread articles from a feed aggregator",
        default_config_files=[defaultConfigPath],
    )

    # Runtime settings
    parser.add_argument(
        "-v",
        "--verbosity",
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
        default=None,
        help="Verbosity of log messages",
    )
    parser.add(
        "-c", "--config", required=False, is_config_file=True, help="Configuration file"
    )
    parser.add(
        "-n",
        "--dryrun",
        help="Only pretend to do actions",
        action="store_const",
        const=True,
        default=False,
    )

    # Input/Output
    parser.add(
        "--url",
        help="URL of the Google Reader API of the aggregator",
        env_var="RSS_VIDEOS_URL",
        required=True,
    )
    parser.add(
        "--email",
        help="E-mail / user to connect to the aggregator",
        env_var="RSS_VIDEOS_EMAIL",
        required=True,
    )
    parser.add(
        "--passwd",
        help="Password to connect to the aggregator",
        env_var="RSS_VIDEOS_PASSWD",
        required=True,
    )
    parser.add(
        "--no-refresh",
        dest="refresh",
        help="Don't fetch feed",
        action="store_false",
    )
    parser.add(
        "--videos",
        help="Directory to store videos",
        env_var="RSS_VIDEOS_VIDEO_DIR",
        required=True,
    )

    # Which videos
    parser.add(
        "--order",
        choices=("old", "new", "title", "creator", "link", "short", "long", "random"),
        default="old",
        help="Sorting mechanism",
    )
    parser.add("--creator", help="Regex to filter by creator")
    parser.add("--title", help="Regex to filter by title")
    parser.add("--link", help="Regex to filter by link")
    parser.add("--duration", help="Comparative to filter by duration")
    # TODO Date selector
    parser.add(
        "--seen",
        choices=("seen", "unseen", "any"),
        default="unseen",
        help="Only include seen/unseen/any videos",
    )
    parser.add(
        "--total-duration",
        help="Use videos that fit under the total given",
    )
    # TODO Envrionment variables
    # TODO Allow to ask

    parser.add(
        "action",
        nargs="?",
        choices=(
            "download",
            "list",
            "watch",
            "binge",
        ),
        default="download",
    )

    args = parser.parse_args()
    args.videos = os.path.realpath(os.path.expanduser(args.videos))

    return args


def get_database(args: configargparse.Namespace) -> RVDatabase:
    cache = RVDatabase(args)
    cache.read_cache()
    if not args.refresh:
        return cache

    fresh = RVDatabase(args)
    fresh.read_feed()
    fresh.clean_cache(cache)
    return fresh


def main() -> None:
    args = get_args()
    configure_logging(args)

    metadata_dir = os.path.join(args.videos, RVDatabase.METADATA_FOLDER)
    for dir in (args.videos, metadata_dir):
        os.makedirs(dir, exist_ok=True)
    os.chdir(args.videos)

    database = get_database(args)

    log.debug("Running action")
    duration = 0
    for element in database.filter(args):
        duration += element.duration if element.is_video else 0
        if args.action == "download":
            element.download()
        elif args.action == "list":
            print(element)
        elif args.action in ("watch", "binge"):
            element.watch()
            if args.action == "watch":
                break
        else:
            raise NotImplementedError(f"Unimplemented action: {args.action}")
    log.info(f"Total duration: {format_duration(duration)}")
    database.try_mark_watched_read()
    database.clean()


if __name__ == "__main__":
    main()
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
+								#!/usr/bin/env python3
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
+								"""
 								Script that download videos that are linked as an article
 								in a RSS feed.
 								The common use case would be a feed from an RSS aggregator
 								with the unread items (non-video links are ignored).
 								"""
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								import datetime
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								import filelock
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								import functools
 								import logging
 								import os
 								import pickle
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
+								import random
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								import requests
-												rssVideos: Support list filters

											
										
										
											2021-12-17 22:13:46 +01:00
+								import re
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
+								import subprocess
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								import time
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								import typing
-												rssVideos: Fix the options issue

											
										
										
											2022-04-10 09:58:06 +02:00
+								import sys
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								import coloredlogs
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
+								import configargparse
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								import yt_dlp
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								log = logging.getLogger(__name__)
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								def configure_logging(args: configargparse.Namespace) -> None:
 								    # Configure logging
 								    if args.verbosity:
 								        coloredlogs.install(
 								            level=args.verbosity,
 								        )
 								    else:
 								        coloredlogs.install(
 								            fmt="%(message)s",
 								            logger=log,
 								        )
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
 								class SaveInfoPP(yt_dlp.postprocessor.common.PostProcessor):
 								    """
 								    yt_dlp.process_ie_result() doesn't return a completely updated info dict,
 								    notably the extension is still the one before it realizes the files cannot
 								    be merged. So we use this PostProcessor to catch the info dict in its final
-												rssVideos: Better sanitization of ytdl info

											
										
										
											2021-12-20 18:57:13 +01:00
+								    form and save what we need from it (it's not serializable in this state).
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								    """
 								    def __init__(self, rvelement: "RVElement") -> None:
 								        self.rvelement = rvelement
 								        super().__init__()
 								    def run(self, info: dict) -> tuple[list, dict]:
-												rssVideos: Better sanitization of ytdl info

											
										
										
											2021-12-20 18:57:13 +01:00
+								        self.rvelement.update_post_download(info)
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								        return [], info
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
-												rssVideos: --total-duration

Controleld binging

											
										
										
											2021-12-19 23:13:41 +01:00
+								def parse_duration(string: str) -> int:
 								    DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "": 1}
 								    mult_index = string[-1].lower()
 								    if mult_index.isdigit():
 								        mult_index = ""
 								    else:
 								        string = string[:-1]
 								    try:
 								        multiplier = DURATION_MULTIPLIERS[mult_index]
 								    except IndexError:
 								        raise ValueError(f"Unknown duration multiplier: {mult_index}")
 								    return int(string) * multiplier
 								def compare_duration(compstr: str) -> typing.Callable[[int], bool]:
 								    DURATION_COMPARATORS = {
 								        "<": int.__lt__,
 								        "-": int.__lt__,
 								        ">": int.__gt__,
 								        "+": int.__gt__,
 								        "=": int.__eq__,
 								        "": int.__le__,
 								    }
 								    comp_index = compstr[0]
 								    if comp_index.isdigit():
 								        comp_index = ""
 								    else:
 								        compstr = compstr[1:]
 								    try:
 								        comparator = DURATION_COMPARATORS[comp_index]
 								    except IndexError:
 								        raise ValueError(f"Unknown duration comparator: {comp_index}")
 								    duration = parse_duration(compstr)
 								    return lambda d: comparator(d, duration)
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								def format_duration(duration: int) -> str:
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								    return time.strftime("%H:%M:%S", time.gmtime(duration))
-												rssVideos: Don't download already downloaded videos

Because the good extension is not the one expected :/

											
										
										
											2021-12-12 13:40:24 +01:00
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								class RVElement:
 								    parent: "RVDatabase"
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								    item: dict
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
 								    RERESEARCH_AFTER = datetime.timedelta(hours=1)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								    def __init__(self, parent: "RVDatabase", item: dict) -> None:
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        self.parent = parent
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
+								        self.item = item
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
 								    @property
 								    def id(self) -> str:
 								        return self.item["id"]
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    @property
 								    def sid(self) -> str:
 								        return self.id.split("/")[-1]
 								    def metafile(self, extension: str) -> str:
 								        return os.path.join(self.parent.METADATA_FOLDER, f"{self.sid}.{extension}")
 								    def metafile_read(self, extension: str) -> typing.Any:
 								        return self.parent.metafile_read(f"{self.sid}.{extension}")
 								    def metafile_write(self, extension: str, data: typing.Any) -> None:
 								        return self.parent.metafile_write(f"{self.sid}.{extension}", data)
 								    def save(self) -> None:
 								        self.metafile_write("item", self.item)
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
+								    @property
 								    def title(self) -> str:
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        return self.item["title"]
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
 								    @property
 								    def link(self) -> str:
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        return self.item["canonical"][0]["href"]
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
 								    @property
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								    def creator(self) -> str:
 								        return self.item["origin"]["title"]
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
 								    @property
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								    def date(self) -> datetime.datetime:
-												Make rssVideos use release date

											
										
										
											2022-11-04 14:07:37 +01:00
+								        timestamp = (
 								            int(self.item.get("timestampUsec", "0")) / 1000000
 								            or int(self.item.get("crawlTimeMsec", "0")) / 1000
 								            or self.item["published"]
 								        )
 								        return datetime.datetime.fromtimestamp(timestamp)
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								    @property
 								    def is_researched(self) -> bool:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        metafile = self.metafile("ytdl")
 								        return os.path.isfile(metafile)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								    def __str__(self) -> str:
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								        str = f"{self.date.strftime('%y-%m-%d %H:%M')} ("
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        if self.is_researched:
 								            if self.is_video:
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								                str += format_duration(self.duration)
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								            else:
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								                str += "--:--:--"
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        else:
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								            str += "??:??:??"
 								        str += (
 								            f") {self.creator if self.creator else '?'} "
 								            f"– {self.title} "
 								            f"– {self.link}"
 								        )
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        return str
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								    @property
 								    def downloaded(self) -> bool:
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								        if not self.is_researched:
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								            return False
 								        return os.path.isfile(self.filepath)
 								    @functools.cached_property
 								    def ytdl_infos(self) -> typing.Optional[dict]:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        try:
 								            return self.metafile_read("ytdl")
 								        except (FileNotFoundError, TypeError, AttributeError, EOFError):
 								            infos = self._ytdl_infos()
 								            self.metafile_write("ytdl", infos)
 								        return infos
 								    def _ytdl_infos(self) -> typing.Optional[dict]:
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        log.info(f"Researching: {self}")
 								        try:
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								            infos = self.parent.ytdl_dry.extract_info(self.link, download=False)
-												rssVideos: Slightly better error handling

Makes it actually quit on Ctrl+C

											
										
										
											2021-12-12 14:52:21 +01:00
+								        except KeyboardInterrupt as e:
 								            raise e
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								        except yt_dlp.utils.DownloadError as e:
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								            # TODO Still raise in case of temporary network issue
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								            log.warning(e)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								            infos = None
-												rssVideos: Better sanitization of ytdl info

											
										
										
											2021-12-20 18:57:13 +01:00
+								        if infos:
 								            infos = self.parent.ytdl_dry.sanitize_info(infos)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        return infos
 								    @property
-												rssVideos: Filter by duration

											
										
										
											2021-12-17 22:42:35 +01:00
+								    def duration(self) -> int:
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        assert self.is_video
 								        assert self.ytdl_infos
-												rssVideos: Filter by duration

											
										
										
											2021-12-17 22:42:35 +01:00
+								        return self.ytdl_infos["duration"]
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								    @property
 								    def is_video(self) -> bool:
 								        # Duration might be missing in playlists and stuff
 								        return self.ytdl_infos is not None and "duration" in self.ytdl_infos
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    @functools.cached_property
 								    def downloaded_filepath(self) -> typing.Optional[str]:
 								        try:
 								            return self.metafile_read("path")
 								        except FileNotFoundError:
 								            return None
 								    @property
 								    def was_downloaded(self) -> bool:
 								        metafile = self.metafile("path")
 								        return os.path.exists(metafile)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								    @property
 								    def filepath(self) -> str:
 								        assert self.is_video
-												rssVideos: Better sanitization of ytdl info

											
										
										
											2021-12-20 18:57:13 +01:00
+								        if self.downloaded_filepath:
 								            return self.downloaded_filepath
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
 								    @property
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def basename(self) -> str:
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        assert self.is_video
 								        return os.path.splitext(self.filepath)[0]
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def expire_info(self) -> None:
 								        metafile = self.metafile("ytdl")
 								        if os.path.isfile(metafile):
 								            stat = os.stat(metafile)
 								            mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
 								            diff = datetime.datetime.now() - mtime
 								            if diff > self.RERESEARCH_AFTER:
 								                os.unlink(metafile)
 								                del self.ytdl_infos
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								    def download(self) -> None:
 								        assert self.is_video
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        if self.downloaded:
 								            return
 								        self.expire_info()
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        log.info(f"Downloading: {self}")
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        lockfile = self.metafile("lock")
 								        with filelock.FileLock(lockfile):
 								            if not self.parent.args.dryrun:
 								                with yt_dlp.YoutubeDL(self.parent.ytdl_opts) as ydl:
 								                    ydl.add_post_processor(SaveInfoPP(self))
 								                    ydl.process_ie_result(self.ytdl_infos, download=True)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Better sanitization of ytdl info

											
										
										
											2021-12-20 18:57:13 +01:00
+								    def update_post_download(self, info: dict) -> None:
 								        self.downloaded_filepath = self.parent.ytdl_dry.prepare_filename(info)
-												Smol fixes rssVideos

											
										
										
											2022-03-27 13:02:55 +02:00
+								        assert self.downloaded_filepath
 								        assert self.downloaded_filepath.startswith(self.basename)
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        self.metafile_write("path", self.downloaded_filepath)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
+								    @property
 								    def watched(self) -> bool:
 								        if not self.is_researched:
 								            return False
 								        return self.was_downloaded and not self.downloaded
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								    def matches_filter(self, args: configargparse.Namespace) -> bool:
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        # Inexpensive filters
-												rssVideos: Add --seen flag

											
										
										
											2021-12-18 22:23:48 +01:00
+								        if args.seen != "any" and (args.seen == "seen") != self.watched:
 								            log.debug(f"Not {args.seen}: {self}")
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
+								            return False
-												rssVideos: Support list filters

											
										
										
											2021-12-17 22:13:46 +01:00
+								        if args.title and not re.search(args.title, self.title):
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								            log.debug(f"Title not matching {args.title}: {self}")
-												rssVideos: Support list filters

											
										
										
											2021-12-17 22:13:46 +01:00
+								            return False
 								        if args.link and not re.search(args.link, self.link):
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								            log.debug(f"Link not matching {args.link}: {self}")
-												rssVideos: Support list filters

											
										
										
											2021-12-17 22:13:46 +01:00
+								            return False
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								        if args.creator and (
 								            not self.creator or not re.search(args.creator, self.creator)
 								        ):
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								            log.debug(f"Creator not matching {args.creator}: {self}")
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								            return False
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
 								        # Expensive filters
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								        if not self.is_video:
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								            log.debug(f"Not a video: {self}")
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								            return False
-												rssVideos: --total-duration

Controleld binging

											
										
										
											2021-12-19 23:13:41 +01:00
+								        if args.duration and not compare_duration(args.duration)(self.duration):
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								            log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
-												rssVideos: --total-duration

Controleld binging

											
										
										
											2021-12-19 23:13:41 +01:00
+								            return False
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
-												rssVideos: Support list filters

											
										
										
											2021-12-17 22:13:46 +01:00
+								        return True
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
+								    def watch(self) -> None:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        self.download()
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								        cmd = ["mpv", self.filepath]
 								        log.debug(f"Running {cmd}")
 								        if not self.parent.args.dryrun:
 								            proc = subprocess.run(cmd)
 								            proc.check_returncode()
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        self.undownload()
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
+								        self.try_mark_read()
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def clean_file(self, folder: str, basename: str) -> None:
 								        for file in os.listdir(folder):
 								            if file.startswith(basename):
 								                path = os.path.join(folder, file)
 								                log.debug(f"Removing file: {path}")
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								                if not self.parent.args.dryrun:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								                    os.unlink(path)
 								    def undownload(self) -> None:
 								        assert self.is_video
 								        log.info(f"Removing gone video: {self.basename}*")
 								        self.clean_file(".", self.basename)
 								    def clean(self) -> None:
-												rssVideos: Smol fixes

											
										
										
											2022-04-02 20:53:06 +02:00
+								        if self.is_researched and self.is_video:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								            self.undownload()
 								        log.info(f"Removing gone metadata: {self.sid}*")
 								        self.clean_file(self.parent.METADATA_FOLDER, self.sid)
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
+								    def mark_read(self) -> None:
 								        log.debug(f"Marking {self} read")
 								        if self.parent.args.dryrun:
 								            return
 								        r = requests.post(
 								            f"{self.parent.args.url}/reader/api/0/edit-tag",
 								            data={
 								                "i": self.id,
 								                "a": "user/-/state/com.google/read",
 								                "ac": "edit",
 								                "token": self.parent.feed_token,
 								            },
 								            headers=self.parent.auth_headers,
 								        )
 								        r.raise_for_status()
 								        if r.text.strip() != "OK":
 								            raise RuntimeError(f"Couldn't mark {self} as read: {r.text}")
 								        log.info(f"Marked {self} as read")
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        self.clean()
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
 								    def try_mark_read(self) -> None:
 								        try:
 								            self.mark_read()
 								        except requests.ConnectionError:
 								            log.warning(f"Couldn't mark {self} as read")
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								class RVDatabase:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    METADATA_FOLDER = ".metadata"
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								    args: configargparse.Namespace
 								    elements: list[RVElement]
 								    def __init__(self, args: configargparse.Namespace) -> None:
 								        self.args = args
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def metafile_read(self, name: str) -> typing.Any:
 								        path = os.path.join(self.METADATA_FOLDER, name)
 								        log.debug(f"Reading {path}")
 								        with open(path, "rb") as mf:
 								            return pickle.load(mf)
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def metafile_write(self, name: str, data: typing.Any) -> None:
 								        path = os.path.join(self.METADATA_FOLDER, name)
 								        log.debug(f"Writing {path}")
 								        if not self.args.dryrun:
 								            with open(path, "wb") as mf:
 								                pickle.dump(data, mf)
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								    def clean_cache(self, cache: "RVDatabase") -> None:
-												rssVideos: Clean up

											
										
										
											2021-12-28 21:39:10 +01:00
+								        log.debug("Cleaning cache")
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        fresh_ids = set(el.id for el in self.elements)
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
+								        for el in cache.elements:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								            if el.id not in fresh_ids:
 								                el.clean()
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def _auth_headers(self) -> dict[str, str]:
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        r = requests.get(
 								            f"{self.args.url}/accounts/ClientLogin",
 								            params={"Email": self.args.email, "Passwd": self.args.passwd},
 								        )
 								        r.raise_for_status()
 								        for line in r.text.split("\n"):
 								            if line.lower().startswith("auth="):
 								                val = "=".join(line.split("=")[1:])
 								                return {"Authorization": f"GoogleLogin auth={val}"}
 								        raise RuntimeError("Couldn't find auth= key")
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    @functools.cached_property
 								    def auth_headers(self) -> dict[str, str]:
 								        try:
 								            return self.metafile_read(".auth_headers")
 								        except FileNotFoundError:
 								            headers = self._auth_headers()
 								            self.metafile_write(".auth_headers", headers)
 								            return headers
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								    def fetch_feed_elements(self) -> typing.Generator[dict, None, None]:
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								        log.info("Fetching RSS feed")
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        continuation: typing.Optional[str] = None
 								        with requests.Session() as s:
 								            def next_page() -> typing.Generator[dict, None, None]:
 								                nonlocal continuation
 								                r = s.get(
 								                    f"{self.args.url}/reader/api/0/stream/contents",
 								                    params={
 								                        "xt": "user/-/state/com.google/read",
 								                        "c": continuation,
 								                    },
 								                    headers=self.auth_headers,
 								                )
 								                r.raise_for_status()
 								                json = r.json()
 								                yield from json["items"]
 								                continuation = json.get("continuation")
 								            yield from next_page()
 								            while continuation:
 								                yield from next_page()
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def fetch_cache_elements(self) -> typing.Generator[dict, None, None]:
 								        log.info("Fetching from cache")
 								        for file in os.listdir(self.METADATA_FOLDER):
 								            if not file.endswith(".item"):
 								                continue
 								            yield self.metafile_read(file)
 								    def build_list(self, items: typing.Iterable[dict], save: bool = False) -> None:
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								        self.elements = []
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        for item in items:
-												rssVideos: Show creator

Even if it's not always  present for all RSS feeds

											
										
										
											2021-12-12 14:27:08 +01:00
+								            element = RVElement(self, item)
 								            self.elements.insert(0, element)
 								            log.debug(f"Known: {element}")
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								            if save:
 								                element.save()
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								    def read_feed(self) -> None:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								        self.build_list(self.fetch_feed_elements(), save=True)
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def read_cache(self) -> None:
 								        self.build_list(self.fetch_cache_elements())
 								    def clean_folder(self, folder: str, basenames: set[str]) -> None:
 								        for file in os.listdir(folder):
 								            path = os.path.join(folder, file)
 								            if not os.path.isfile(path) or file[0] == ".":
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								                continue
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								            for basename in basenames:
 								                if file.startswith(basename):
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								                    break
 								            else:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								                log.info(f"Removing unknown file: {path}")
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								                if not self.args.dryrun:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								                    os.unlink(path)
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    def clean(self) -> None:
 								        log.debug("Cleaning")
 								        filenames = set(el.basename for el in self.elements if el.is_video)
 								        self.clean_folder(".", filenames)
 								        ids = set(el.sid for el in self.elements)
 								        self.clean_folder(self.METADATA_FOLDER, ids)
-												rssVideos: Cleverer cleaning

											
										
										
											2021-12-18 12:44:43 +01:00
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								    @property
 								    def ytdl_opts(self) -> dict:
-												rssVideos: Use yt_dlp config when possible

											
										
										
											2022-03-27 15:18:41 +02:00
+								        # Get user/system options
-												rssVideos: Fix the options issue

											
										
										
											2022-04-10 09:58:06 +02:00
+								        prev_argv = sys.argv
-												Make rssVideos use release date

											
										
										
											2022-11-04 14:07:37 +01:00
+								        sys.argv = ["yt-dlp"]
-												rssVideos: Fix the options issue

											
										
										
											2022-04-10 09:58:06 +02:00
+								        _, _, _, ydl_opts = yt_dlp.parse_options()
 								        sys.argv = prev_argv
-												rssVideos: Use yt_dlp config when possible

											
										
										
											2022-03-27 15:18:41 +02:00
+								        return ydl_opts
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
 								    @property
 								    def ytdl_dry_opts(self) -> dict:
 								        opts = self.ytdl_opts.copy()
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								        opts.update({"quiet": True})
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								        return opts
 								    @property
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								    def ytdl_dry(self) -> yt_dlp.YoutubeDL:
 								        return yt_dlp.YoutubeDL(self.ytdl_dry_opts)
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								    def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
-												rssVideos: Fix binge mode

											
										
										
											2022-01-08 12:36:04 +01:00
+								        elements_src = self.elements.copy()
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								        elements: typing.Iterable[RVElement]
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        # Inexpensive sort
 								        if args.order == "new":
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								            elements = sorted(elements_src, key=lambda el: el.date, reverse=True)
 								        elif args.order == "old":
 								            elements = sorted(elements_src, key=lambda el: el.date)
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        elif args.order == "title":
-												rssVideos: Fix binge mode

											
										
										
											2022-01-08 12:36:04 +01:00
+								            elements = sorted(elements_src, key=lambda el: el.title)
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        elif args.order == "creator":
-												rssVideos: Fix binge mode

											
										
										
											2022-01-08 12:36:04 +01:00
+								            elements = sorted(elements_src, key=lambda el: el.creator or "")
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        elif args.order == "link":
-												rssVideos: Fix binge mode

											
										
										
											2022-01-08 12:36:04 +01:00
+								            elements = sorted(elements_src, key=lambda el: el.link)
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
+								        elif args.order == "random":
-												rssVideos: Fix binge mode

											
										
										
											2022-01-08 12:36:04 +01:00
+								            elements = elements_src
 								            random.shuffle(elements)
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
 								        # Possibly expensive filtering
 								        elements = filter(lambda el: el.matches_filter(args), elements)
 								        # Expensive sort
 								        if args.order == "short":
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								            elements = sorted(
 								                elements, key=lambda el: el.duration if el.is_video else 0
 								            )
-												rssVideos: --total-duration

Controleld binging

											
										
										
											2021-12-19 23:13:41 +01:00
+								        elif args.order == "long":
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								            elements = sorted(
 								                elements, key=lambda el: el.duration if el.is_video else 0, reverse=True
 								            )
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
-												rssVideos: --total-duration

Controleld binging

											
										
										
											2021-12-19 23:13:41 +01:00
+								        # Post sorting filtering
 								        if args.total_duration:
 								            rem = parse_duration(args.total_duration)
 								            old_els = list(elements)
 								            elements = list()
 								            while rem > 0:
 								                for el in old_els:
 								                    if el.duration < rem:
 								                        elements.append(el)
 								                        rem -= el.duration
 								                        old_els.remove(el)
 								                        break
 								                else:
 								                    break
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        return elements
-												rssVideos: Abstract a bit, add binge

											
										
										
											2021-12-18 11:27:24 +01:00
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
+								    @functools.cached_property
 								    def feed_token(self) -> str:
 								        r = requests.get(
 								            f"{self.args.url}/reader/api/0/token",
 								            headers=self.auth_headers,
 								        )
 								        r.raise_for_status()
 								        return r.text.strip()
 								    def try_mark_watched_read(self) -> None:
 								        for element in self.elements:
 								            if element.watched:
 								                element.try_mark_read()
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								def get_args() -> configargparse.Namespace:
 								    defaultConfigPath = os.path.join(
 								        os.path.expanduser(os.getenv("XDG_CONFIG_PATH", "~/.config/")), "rssVideos"
 								    )
 								    parser = configargparse.ArgParser(
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        description="Download videos in unread articles from a feed aggregator",
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								        default_config_files=[defaultConfigPath],
 								    )
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
 								    # Runtime settings
-												Refactored rssVideos

- Has a cache for yt-dlp research (save time on reruns)
- Simplified logic for cleanup / continue downloading
- Using OOP / functional programming (?)
- Removed tracking logic (unused)

											
										
										
											2021-12-10 22:59:39 +01:00
+								    parser.add_argument(
 								        "-v",
 								        "--verbosity",
 								        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
 								        default=None,
 								        help="Verbosity of log messages",
 								    )
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								    parser.add(
 								        "-c", "--config", required=False, is_config_file=True, help="Configuration file"
 								    )
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
+								    parser.add(
 								        "-n",
 								        "--dryrun",
 								        help="Only pretend to do actions",
 								        action="store_const",
 								        const=True,
 								        default=False,
 								    )
 								    # Input/Output
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								    parser.add(
-												rssVideos: Use GReader API

											
										
										
											2021-12-28 12:35:08 +01:00
+								        "--url",
 								        help="URL of the Google Reader API of the aggregator",
 								        env_var="RSS_VIDEOS_URL",
 								        required=True,
 								    )
 								    parser.add(
 								        "--email",
 								        help="E-mail / user to connect to the aggregator",
 								        env_var="RSS_VIDEOS_EMAIL",
 								        required=True,
 								    )
 								    parser.add(
 								        "--passwd",
 								        help="Password to connect to the aggregator",
 								        env_var="RSS_VIDEOS_PASSWD",
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								        required=True,
 								    )
-												rssVideos: Allow skipping feed fetching

For dev speed

											
										
										
											2021-12-19 22:29:16 +01:00
+								    parser.add(
 								        "--no-refresh",
 								        dest="refresh",
 								        help="Don't fetch feed",
 								        action="store_false",
 								    )
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								    parser.add(
 								        "--videos",
 								        help="Directory to store videos",
 								        env_var="RSS_VIDEOS_VIDEO_DIR",
 								        required=True,
 								    )
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
 								    # Which videos
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								    parser.add(
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
+								        "--order",
-												rssVideos: More sort orders and duration command

											
										
										
											2021-12-19 11:45:41 +01:00
+								        choices=("old", "new", "title", "creator", "link", "short", "long", "random"),
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
+								        default="old",
 								        help="Sorting mechanism",
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								    )
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
+								    parser.add("--creator", help="Regex to filter by creator")
 								    parser.add("--title", help="Regex to filter by title")
 								    parser.add("--link", help="Regex to filter by link")
 								    parser.add("--duration", help="Comparative to filter by duration")
-												rssVideos: Replace guid by date and id

											
										
										
											2021-12-29 14:40:00 +01:00
+								    # TODO Date selector
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								    parser.add(
 								        "--seen",
 								        choices=("seen", "unseen", "any"),
 								        default="unseen",
 								        help="Only include seen/unseen/any videos",
 								    )
-												rssVideos: --total-duration

Controleld binging

											
										
										
											2021-12-19 23:13:41 +01:00
+								    parser.add(
 								        "--total-duration",
 								        help="Use videos that fit under the total given",
 								    )
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
+								    # TODO Envrionment variables
 								    # TODO Allow to ask
 								    parser.add(
 								        "action",
 								        nargs="?",
-												rssVideos: Work correctly with merged files

											
										
										
											2021-12-19 15:10:16 +01:00
+								        choices=(
 								            "download",
 								            "list",
 								            "watch",
 								            "binge",
 								        ),
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
+								        default="download",
 								    )
-												rssVideos: Add list command

Then you know which video you should watch next :)

											
										
										
											2021-12-10 23:13:29 +01:00
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
+								    args = parser.parse_args()
 								    args.videos = os.path.realpath(os.path.expanduser(args.videos))
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
+								    return args
-												rssVideos

											
										
										
											2019-04-30 08:22:27 +02:00
-												rssVideos: Clean up

											
										
										
											2021-12-28 21:39:10 +01:00
+								def get_database(args: configargparse.Namespace) -> RVDatabase:
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    cache = RVDatabase(args)
 								    cache.read_cache()
 								    if not args.refresh:
 								        return cache
-												rssVideos: Abstract with download process as well

											
										
										
											2021-12-18 11:56:28 +01:00
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    fresh = RVDatabase(args)
 								    fresh.read_feed()
 								    fresh.clean_cache(cache)
 								    return fresh
-												rssVideos: Clean up

											
										
										
											2021-12-28 21:39:10 +01:00
 								def main() -> None:
 								    args = get_args()
 								    configure_logging(args)
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    metadata_dir = os.path.join(args.videos, RVDatabase.METADATA_FOLDER)
 								    for dir in (args.videos, metadata_dir):
 								        os.makedirs(dir, exist_ok=True)
-												rssVideos: Clean up

											
										
										
											2021-12-28 21:39:10 +01:00
+								    os.chdir(args.videos)
 								    database = get_database(args)
 								    log.debug("Running action")
-												rssVideos: Now thread-safe (kinda)

											
										
										
											2022-03-23 18:54:05 +01:00
+								    duration = 0
 								    for element in database.filter(args):
 								        duration += element.duration if element.is_video else 0
 								        if args.action == "download":
 								            element.download()
 								        elif args.action == "list":
 								            print(element)
 								        elif args.action in ("watch", "binge"):
 								            element.watch()
 								            if args.action == "watch":
 								                break
 								        else:
 								            raise NotImplementedError(f"Unimplemented action: {args.action}")
 								    log.info(f"Total duration: {format_duration(duration)}")
-												rssVideos: Sync read state

Deleted but previously downloaded = read

											
										
										
											2021-12-29 12:56:07 +01:00
+								    database.try_mark_watched_read()
-												Smol fixes rssVideos

											
										
										
											2022-03-27 13:02:55 +02:00
+								    database.clean()
-												rssVideos: Add watch

											
										
										
											2021-12-17 23:16:32 +01:00
-												rssViedos mostly

											
										
										
											2020-12-27 14:20:44 +01:00
 								if __name__ == "__main__":
 								    main()