Compare commits

...

13 commits

Author SHA1 Message Date
Geoffrey Frogeye f4c81e346a
rssVideos: Add --seen flag 2021-12-18 22:23:48 +01:00
Geoffrey Frogeye 1948fc0af2
rssVideos: Cleverer cleaning 2021-12-18 12:44:43 +01:00
Geoffrey Frogeye 2dce725ee5
rssVideos: Abstract with download process as well 2021-12-18 11:56:28 +01:00
Geoffrey Frogeye 07af9360fa
rssVideos: Abstract a bit, add binge 2021-12-18 11:27:24 +01:00
Geoffrey Frogeye 5b195bd141
rssVideos: Add watch 2021-12-17 23:16:32 +01:00
Geoffrey Frogeye 7423a93203
rssVideos: Filter by duration 2021-12-17 22:42:35 +01:00
Geoffrey Frogeye 7aeecb1bff
videoQuota: 2021-12-17 22:41:47 +01:00
Geoffrey Frogeye f11338a04a
rssVideos: Support list filters 2021-12-17 22:13:46 +01:00
Geoffrey Frogeye 9100edac1e
videoQuota: Support filters 2021-12-17 22:13:27 +01:00
Geoffrey Frogeye 76df5d4d80
Upgrade rofi config 2021-12-15 21:59:45 +01:00
Geoffrey Frogeye 7f0e24a29d
rssVideos: Slightly better error handling
Makes it actually quit on Ctrl+C
2021-12-12 14:52:21 +01:00
Geoffrey Frogeye 6a6f5401e6
rssVideos: Show creator
Even if it's not always  present for all RSS feeds
2021-12-12 14:27:08 +01:00
Geoffrey Frogeye 9493edc1fd
rssVideos: Don't download already downloaded videos
Because the good extension is not the one expected :/
2021-12-12 13:40:24 +01:00
7 changed files with 318 additions and 119 deletions

View file

@ -6,6 +6,7 @@
with_items:
- ".config/Xresources"
- ".config/rofi"
- ".local/share/rofi/themes"
- ".local/bin"
- ".local/share/fonts"
- ".config/qutebrowser"
@ -108,17 +109,24 @@
- color
when: display_server == 'x11'
- name: Set base16 theme for rofi
- name: Set base16 theme for rofi < 1.4
copy:
content: "{{ base16_schemes['schemes'][base16_scheme]['rofi']['themes']['base16-' + base16_scheme + '.' + item] }}"
dest: "{{ ansible_env.HOME }}/.config/rofi/theme.{{ item }}"
content: "{{ base16_schemes['schemes'][base16_scheme]['rofi']['themes']['base16-' + base16_scheme + '.config'] }}"
dest: "{{ ansible_env.HOME }}/.config/rofi/theme.config"
mode: "u=rw,g=r,o=r"
with_items:
- rasi
- config
tags:
- color
- name: Set base16 theme for rofi >= 1.4
copy:
content: "{{ base16_schemes['schemes'][base16_scheme]['rofi']['themes']['base16-' + base16_scheme + '.rasi'] }}"
dest: "{{ ansible_env.HOME }}/.local/share/rofi/themes/current.rasi"
mode: "u=rw,g=r,o=r"
tags:
- color
- g
when: no
- name: Configure Dunst
template:
src: "{{ ansible_env.HOME }}/.config/dunst/dunstrc.j2"

View file

@ -1,3 +1 @@
theme.config
theme.rasi

View file

@ -1,8 +1,4 @@
#include "theme.config"
rofi.theme: theme
rofi.cycle: true
rofi.case-sensitive: false
rofi.scroll-method: 0
rofi.show-match: true
rofi.lazy-grab: false
rofi.matching: regex

6
config/rofi/config.rasi Normal file
View file

@ -0,0 +1,6 @@
configuration {
theme: "current";
lazy-grab: false;
matching: "regex";
}

View file

@ -1,3 +1,4 @@
coloredlogs>=10.0<11
progressbar2>=3.47.0<4
youtube-dl>=2021.6.6
yt-dlp>=2021.10.22
ConfigArgParse>=1.5<2

View file

@ -8,18 +8,19 @@ The common use case would be a feed from an RSS aggregator
with the unread items (non-video links are ignored).
"""
# TODO Distribute this correclty, in the meanwhile please do
# pip install --user coloredlogs ConfigArgParse yt-dlp
import enum
import functools
import logging
import os
import pickle
import random
import re
import subprocess
import sys
import typing
import urllib.parse
import urllib.request
import urllib.error
from xml.dom import minidom
import coloredlogs
@ -28,6 +29,7 @@ import yt_dlp as youtube_dl
log = logging.getLogger(__name__)
# TODO Lockfile, or a way to parallel watch and download
def configure_logging(args: configargparse.Namespace) -> None:
# Configure logging
@ -41,52 +43,78 @@ def configure_logging(args: configargparse.Namespace) -> None:
logger=log,
)
class RVCommand(enum.Enum):
download = "download"
list = "list"
class RVElement:
title: str
link: str
# creator: str
# description: str
# date: datetime.datetime
guid: int
parent: "RVDatabase"
item: minidom.Element
was_downloaded: bool
watched: bool
def __init__(self, parent: "RVDatabase", item: minidom.Element) -> None:
def get_data(tag_name: str) -> str:
nodes = item.getElementsByTagName(tag_name)
self.parent = parent
self.item = item
self.was_downloaded = False
self.watched = False
def get_tag_data(self, tag_name: str) -> str:
nodes = self.item.getElementsByTagName(tag_name)
if len(nodes) != 1:
raise RuntimeError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
raise KeyError(f"Exepected 1 tag `{tag_name}`, got {len(nodes)}.")
children = nodes[0].childNodes
if len(children) != 1:
raise RuntimeError(
raise KeyError(
f"Exepected 1 children for tag `{tag_name}`, got {len(children)}."
)
return children[0].data
self.title = get_data("title")
self.link = get_data("link")
# self.creator = get_data("dc:creator")
# self.description = get_data("description")
# self.date = get_data("pubDate")
self.guid = int(get_data("guid"))
@property
def title(self) -> str:
return self.get_tag_data("title")
self.parent = parent
@property
def link(self) -> str:
return self.get_tag_data("link")
def read_cache(self, cache: "RVElement") -> None:
if "ytdl_infos" in cache.__dict__:
@property
def creator(self) -> typing.Optional[str]:
try:
return self.get_tag_data("dc:creator")
except KeyError:
return None
@property
def description(self) -> str:
# TODO Testing
return self.get_tag_data("description")
@property
def date(self) -> str:
# TODO datetime format
return self.get_tag_data("pubDate")
@property
def guid(self) -> int:
return int(self.get_tag_data("guid"))
@property
def is_researched(self) -> bool:
return "ytdl_infos" in self.__dict__
def salvage_cache(self, cache: "RVElement") -> None:
if cache.is_researched:
self.__dict__["ytdl_infos"] = cache.__dict__["ytdl_infos"]
log.debug(f"From cache: {self}")
if cache.was_downloaded:
self.was_downloaded = True
if cache.watched:
self.watched = True
def __str__(self) -> str:
return f"{self.title} {self.link}"
return f"{self.guid}: {self.creator} {self.title} {self.link}"
@property
def downloaded(self) -> bool:
if "ytdl_infos" not in self.__dict__:
if not self.is_researched:
return False
return os.path.isfile(self.filepath)
@ -95,9 +123,11 @@ class RVElement:
log.info(f"Researching: {self}")
try:
infos = self.parent.ytdl_dry.extract_info(self.link)
except BaseException as e:
except KeyboardInterrupt as e:
raise e
except youtube_dl.utils.DownloadError as e:
# TODO Still raise in case of temporary network issue
log.warn(e)
log.warning(e)
infos = None
# Apparently that thing is transformed from a LazyList
# somewhere in the normal yt_dlp process
@ -113,15 +143,10 @@ class RVElement:
return infos
@property
def skip(self) -> bool:
def duration(self) -> int:
assert self.is_video
assert self.ytdl_infos
if (
self.parent.args.max_duration > 0
and self.ytdl_infos["duration"] > self.parent.args.max_duration
):
return True
return False
return self.ytdl_infos["duration"]
@property
def is_video(self) -> bool:
@ -131,6 +156,7 @@ class RVElement:
@property
def filepath(self) -> str:
assert self.is_video
# TODO This doesn't change the extension to mkv when the formats are incomaptible
return self.parent.ytdl_dry.prepare_filename(self.ytdl_infos)
@property
@ -141,22 +167,102 @@ class RVElement:
def download(self) -> None:
assert self.is_video
log.info(f"Downloading: {self}")
if self.parent.args.dryrun:
return
if not self.parent.args.dryrun:
self.parent.ytdl.process_ie_result(self.ytdl_infos, True, {})
self.was_downloaded = True
self.parent.save()
def act(self) -> None:
if not self.is_video:
log.debug(f"Not a video: {self}")
return
def preload(self) -> None:
assert self.is_video
if self.downloaded:
log.debug(f"Already downloaded: {self}")
log.debug(f"Currently downloaded: {self}")
return
if self.skip:
log.debug(f"Skipped: {self}")
if self.was_downloaded:
log.debug(f"Downloaded previously: {self}")
return
self.download()
MATCHES_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, None: 1}
MATCHES_DURATION_COMPARATORS = {
"<": int.__lt__,
"-": int.__lt__,
">": int.__gt__,
"+": int.__gt__,
"=": int.__eq__,
None: int.__le__,
}
def matches_filter(self, args: configargparse.Namespace) -> bool:
if args.seen != "any" and (args.seen == "seen") != self.watched:
log.debug(f"Not {args.seen}: {self}")
return False
if args.title and not re.search(args.title, self.title):
log.debug(f"Title not matching {args.title}: {self}")
return False
if args.guid and not re.search(args.guid, str(self.guid)):
log.debug(f"Guid not matching {args.guid}: {self}")
return False
if args.link and not re.search(args.link, self.link):
log.debug(f"Link not matching {args.link}: {self}")
return False
if args.creator and (not self.creator or not re.search(args.creator, self.creator)):
log.debug(f"Creator not matching {args.creator}: {self}")
return False
if not self.is_video:
log.debug(f"Not a video: {self}")
return False
if args.duration:
dur = args.duration
mult_index = dur[-1].lower()
if mult_index.isdigit():
mult_index = None
else:
dur = dur[:-1]
try:
multiplier = self.MATCHES_DURATION_MULTIPLIERS[mult_index]
except IndexError:
raise ValueError(f"Unknown duration multiplier: {mult_index}")
comp_index = dur[0]
if comp_index.isdigit():
comp_index = None
else:
dur = dur[1:]
try:
comparator = self.MATCHES_DURATION_COMPARATORS[comp_index]
except IndexError:
raise ValueError(f"Unknown duration comparator: {comp_index}")
duration = int(dur)
if not comparator(self.duration, duration * multiplier):
log.debug(f"Duration {self.duration} not matching {args.duration}: {self}")
return False
return True
def watch(self) -> None:
if not self.downloaded:
self.download()
cmd = ["mpv", self.filepath]
log.debug(f"Running {cmd}")
if not self.parent.args.dryrun:
proc = subprocess.run(cmd)
proc.check_returncode()
self.watched = True
self.parent.save()
def clean(self) -> None:
assert self.is_video
log.info(f"Removing gone video: {self.filename}*")
for file in os.listdir():
if file.startswith(self.filename):
log.debug(f"Removing file: {file}")
if not self.parent.args.dryrun:
os.unlink(file)
class RVDatabase:
SAVE_FILE = ".cache.p"
@ -168,6 +274,7 @@ class RVDatabase:
self.args = args
def save(self) -> None:
log.debug("Saving cache")
if self.args.dryrun:
return
with open(self.SAVE_FILE, "wb") as save_file:
@ -179,30 +286,50 @@ class RVDatabase:
with open(cls.SAVE_FILE, "rb") as save_file:
return pickle.load(save_file)
except (TypeError, AttributeError, EOFError):
log.warn("Corrupt / outdated cache, it will be rebuilt.")
log.warning("Corrupt / outdated cache, it will be rebuilt.")
except FileNotFoundError:
pass
return None
def read_cache(self, cache: "RVDatabase") -> None:
def salvage_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Salvaging cache")
cache_els = dict()
for cache_el in cache.elements:
cache_els[cache_el.guid] = cache_el
for el in self.elements:
if el.guid in cache_els:
el.read_cache(cache_els[el.guid])
el.salvage_cache(cache_els[el.guid])
def clean_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Cleaning cache")
self_els = dict()
for self_el in self.elements:
self_els[self_el.guid] = self_el
for el in cache.elements:
if el.guid not in self_els:
if el.is_researched and el.is_video:
el.clean()
def import_cache(self, cache: "RVDatabase") -> None:
log.debug(f"Importing cache")
self.feed_xml = cache.feed_xml
self.read_feed()
@functools.cached_property
def feed_xml(self) -> minidom.Document:
log.info("Fetching RSS feed")
with urllib.request.urlopen(self.args.feed) as request:
return minidom.parse(request)
def read_feed(self) -> None:
log.info("Fetching RSS feed")
self.elements = list()
with urllib.request.urlopen(self.args.feed) as request:
with minidom.parse(request) as xmldoc:
for item in xmldoc.getElementsByTagName("item"):
self.elements = []
for item in self.feed_xml.getElementsByTagName("item"):
element = RVElement(self, item)
self.elements.insert(0, element)
log.debug(f"Known: {element}")
def clean(self) -> None:
log.debug("Cleaning")
filenames = set()
for element in self.elements:
if element.is_video:
@ -216,13 +343,20 @@ class RVDatabase:
if file.startswith(filename):
break
else:
log.info(f"Removing: {file}")
log.info(f"Removing unknown file: {file}")
if not self.args.dryrun:
os.unlink(file)
def act_all(self) -> None:
@property
def all_researched(self) -> bool:
for element in self.elements:
element.act()
if not element.is_researched:
return False
return True
def attempt_clean(self) -> None:
if self.all_researched:
self.clean()
@property
def ytdl_opts(self) -> dict:
@ -242,6 +376,18 @@ class RVDatabase:
def ytdl_dry(self) -> youtube_dl.YoutubeDL:
return youtube_dl.YoutubeDL(self.ytdl_dry_opts)
def filter(self, args: configargparse.Namespace) -> typing.Iterable[RVElement]:
elements: typing.Iterable[RVElement]
if args.order == "old":
elements = self.elements
elif args.order == "new":
elements = reversed(self.elements)
elif args.order == "random":
elements_random = self.elements.copy()
random.shuffle(elements_random)
elements = elements_random
return filter(lambda el: el.matches_filter(args), elements)
def get_args() -> configargparse.Namespace:
defaultConfigPath = os.path.join(
@ -254,6 +400,8 @@ def get_args() -> configargparse.Namespace:
+ "an RSS aggregator",
default_config_files=[defaultConfigPath],
)
# Runtime settings
parser.add_argument(
"-v",
"--verbosity",
@ -264,6 +412,16 @@ def get_args() -> configargparse.Namespace:
parser.add(
"-c", "--config", required=False, is_config_file=True, help="Configuration file"
)
parser.add(
"-n",
"--dryrun",
help="Only pretend to do actions",
action="store_const",
const=True,
default=False,
)
# Input/Output
parser.add(
"--feed",
help="URL of the RSS feed (must be public for now)",
@ -276,21 +434,31 @@ def get_args() -> configargparse.Namespace:
env_var="RSS_VIDEOS_VIDEO_DIR",
required=True,
)
# Which videos
parser.add(
"-n",
"--dryrun",
help="Do not download the videos",
action="store_const",
const=True,
default=False,
"--order",
choices=("old", "new", "random"),
default="old",
help="Sorting mechanism",
)
parser.add("--guid", help="Regex to filter guid")
parser.add("--creator", help="Regex to filter by creator")
parser.add("--title", help="Regex to filter by title")
parser.add("--link", help="Regex to filter by link")
parser.add("--duration", help="Comparative to filter by duration")
parser.add("--seen", choices=("seen","unseen","any"), default="unseen", help="Only include seen/unseen/any videos")
# TODO Envrionment variables
parser.add(
"--max-duration",
help="Skip video longer than this amount of seconds",
help="(Deprecated, use --duration instead)",
env_var="RSS_VIDEOS_MAX_DURATION",
type=int,
default=0,
)
# TODO Allow to ask
# How to download
parser.add(
"--format",
help="Use this format to download videos."
@ -305,17 +473,17 @@ def get_args() -> configargparse.Namespace:
action="store_true",
)
parser.set_defaults(subcommand=RVCommand.download)
subparsers = parser.add_subparsers(title="subcommand")
sc_download = subparsers.add_parser("download")
sc_download.set_defaults(subcommand=RVCommand.download)
sc_list = subparsers.add_parser("list")
sc_list.set_defaults(subcommand=RVCommand.list)
parser.add(
"action",
nargs="?",
choices=("download", "list", "watch", "binge", "clean"),
default="download",
)
args = parser.parse_args()
args.videos = os.path.realpath(os.path.expanduser(args.videos))
if not args.duration and args.max_duration:
args.duration = str(args.max_duration)
return args
@ -327,22 +495,37 @@ def main() -> None:
os.makedirs(args.videos, exist_ok=True)
os.chdir(args.videos)
if args.subcommand == RVCommand.download:
database = RVDatabase(args)
database.read_feed()
cache = RVDatabase.load()
try:
database.read_feed()
except urllib.error.URLError as err:
if args.action == "download" or not cache:
raise err
else:
log.warning("Cannot fetch RSS feed, using cached feed.", err)
database.import_cache(cache)
if cache:
database.read_cache(cache)
database.clean()
database.act_all()
database.salvage_cache(cache)
database.clean_cache(cache)
database.save()
elif args.subcommand == RVCommand.list:
cache = RVDatabase.load()
if not cache:
raise FileNotFoundError("This command doesn't work without a cache yet.")
for element in cache.elements:
log.debug(f"Running action")
if args.action == "clean":
database.clean()
else:
database.attempt_clean()
for element in database.filter(args):
if args.action == "download":
element.preload()
elif args.action == "list":
print(element)
elif args.action in ("watch", "binge"):
element.watch()
if args.action == "watch":
break
database.attempt_clean()
database.save()
if __name__ == "__main__":

View file

@ -33,6 +33,7 @@ audio_br_bi = 128000
quota_by = int(sys.argv[1])
in_file = sys.argv[2]
out_file = sys.argv[3]
filters = sys.argv[4:]
quota_bi = quota_by * 8
duration = duration_file(in_file)
@ -40,15 +41,21 @@ tot_br_bi = quota_bi / duration
video_br_bi = int(tot_br_bi - audio_br_bi)
assert video_br_bi > 0, "Not even enough space for audio"
cmd = [
cmd = (
[
"ffmpeg",
"-i",
in_file,
]
+ filters
+ [
"-b:v",
str(video_br_bi),
"-b:a",
str(audio_br_bi),
out_file,
]
]
)
print(" ".join(cmd))
subprocess.run(cmd, check=True)