dotfiles/hm/scripts/compressPictureMovies

244 lines
7.4 KiB
Plaintext
Raw Permalink Normal View History

#!/usr/bin/env nix-shell
#! nix-shell -i python3 --pure
#! nix-shell -p python3 python3Packages.coloredlogs python3Packages.progressbar2 ffmpeg
2018-06-24 18:28:37 +02:00
2023-11-23 22:59:09 +01:00
import datetime
import hashlib
import json
import logging
2018-06-24 18:28:37 +02:00
import os
import shutil
2023-11-23 22:59:09 +01:00
import statistics
2018-06-24 18:28:37 +02:00
import subprocess
import sys
2023-11-23 22:59:09 +01:00
import tempfile
import time
2018-08-07 16:09:41 +02:00
import coloredlogs
import progressbar
2021-06-13 11:49:21 +02:00
coloredlogs.install(level="DEBUG", fmt="%(levelname)s %(message)s")
2018-08-07 16:09:41 +02:00
log = logging.getLogger()
2018-06-24 18:28:37 +02:00
# Constants
PICTURES_FOLDER = os.path.join(os.path.expanduser("~"), "Images")
2018-08-07 16:09:41 +02:00
ORIGINAL_FOLDER = os.path.join(os.path.expanduser("~"), ".ImagesOriginaux")
MOVIE_EXTENSIONS = ["mov", "avi", "mp4", "3gp", "webm", "mkv"]
OUTPUT_EXTENSION = "webm"
OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0"]
# OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libaom-av1", "-crf", "30", "-strict", "experimental", "-c:a", "libopus"]
DURATION_MAX_DEV = 1
def videoMetadata(filename):
assert os.path.isfile(filename)
cmd = ["ffmpeg", "-i", filename, "-f", "ffmetadata", "-"]
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
p.check_returncode()
metadataRaw = p.stdout
data = dict()
2021-06-13 11:49:21 +02:00
for metadataLine in metadataRaw.split(b"\n"):
2018-08-07 16:09:41 +02:00
# Skip empty lines
if not len(metadataLine):
continue
# Skip comments
2021-06-13 11:49:21 +02:00
if metadataLine.startswith(b";"):
2018-08-07 16:09:41 +02:00
continue
# Parse key-value
2021-06-13 11:49:21 +02:00
metadataLineSplit = metadataLine.split(b"=")
2018-08-07 16:09:41 +02:00
if len(metadataLineSplit) != 2:
log.warning("Unparsed metadata line: `{}`".format(metadataLine))
continue
key, val = metadataLineSplit
key = key.decode().lower()
val = val.decode()
data[key] = val
return data
2021-06-13 11:49:21 +02:00
2018-08-07 16:09:41 +02:00
def videoInfos(filename):
assert os.path.isfile(filename)
cmd = ["ffprobe", filename, "-print_format", "json", "-show_streams"]
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
p.check_returncode()
infosRaw = p.stdout
infos = json.loads(infosRaw)
return infos
2021-06-13 11:49:21 +02:00
2018-08-07 16:09:41 +02:00
from pprint import pprint
2021-06-13 11:49:21 +02:00
2018-08-07 16:09:41 +02:00
def streamDuration(stream):
if "duration" in stream:
return float(stream["duration"])
elif "sample_rate" in stream and "nb_frames" in stream:
return int(stream["nb_frames"]) / int(stream["sample_rate"])
elif "tags" in stream and "DURATION" in stream["tags"]:
durRaw = stream["tags"]["DURATION"]
durSplit = durRaw.split(":")
assert len(durSplit) == 3
durSplitFloat = [float(a) for a in durSplit]
hours, minutes, seconds = durSplitFloat
return (hours * 60 + minutes) * 60 + seconds
else:
raise KeyError("Can't find duration information in stream")
2021-06-13 11:49:21 +02:00
2018-08-07 16:09:41 +02:00
def videoDuration(filename):
# TODO Doesn't work with VP8 / webm
infos = videoInfos(filename)
durations = [streamDuration(stream) for stream in infos["streams"]]
dev = statistics.stdev(durations)
assert dev <= DURATION_MAX_DEV, "Too much deviation ({} s)".format(dev)
2021-06-13 11:49:21 +02:00
return sum(durations) / len(durations)
2018-08-07 16:09:41 +02:00
2018-06-24 18:28:37 +02:00
2018-08-07 16:09:41 +02:00
todos = set()
totalSize = 0
totalDuration = 0
2018-06-24 18:28:37 +02:00
# Walk folders
2018-08-07 16:09:41 +02:00
log.info("Listing files in {}".format(PICTURES_FOLDER))
allVideos = list()
2018-06-24 18:28:37 +02:00
for root, dirs, files in os.walk(PICTURES_FOLDER):
# If folder is in ORIGINAL_FOLDER, skip it
2018-08-07 16:09:41 +02:00
if root.startswith(ORIGINAL_FOLDER):
2018-06-24 18:28:37 +02:00
continue
# Iterate over files
for inputName in files:
# If the file is not a video, skip it
inputNameBase, inputExt = os.path.splitext(inputName)
inputExt = inputExt[1:].lower()
if inputExt not in MOVIE_EXTENSIONS:
continue
2018-08-07 16:09:41 +02:00
allVideos.append((root, inputName))
log.info("Analyzing videos")
for root, inputName in progressbar.progressbar(allVideos):
inputNameBase, inputExt = os.path.splitext(inputName)
inputExt = inputExt[1:].lower()
# Generates all needed filepaths
## Found file
inputFull = os.path.join(root, inputName)
inputRel = os.path.relpath(inputFull, PICTURES_FOLDER)
## Original file
originalFull = os.path.join(ORIGINAL_FOLDER, inputRel)
originalRel = inputRel
assert not os.path.isfile(originalFull), originalFile + " exists"
## Compressed file
outputFull = os.path.join(root, inputNameBase + "." + OUTPUT_EXTENSION)
# If the extension is the same of the output one
if inputExt == OUTPUT_EXTENSION:
# Read the metadata of the video
meta = videoMetadata(inputFull)
# If it has the field with the original file
2021-06-13 11:49:21 +02:00
if "original" in meta:
2018-08-07 16:09:41 +02:00
# Skip file
2018-06-24 18:28:37 +02:00
continue
else:
2018-08-07 16:09:41 +02:00
assert not os.path.isfile(outputFull), outputFull + " exists"
2018-06-24 18:28:37 +02:00
2018-08-07 16:09:41 +02:00
size = os.stat(inputFull).st_size
try:
duration = videoDuration(inputFull)
except Exception as e:
log.warning("Can't determine duration of {}, skipping".format(inputFull))
log.debug(e, exc_info=True)
continue
2018-06-24 18:28:37 +02:00
2018-08-07 16:09:41 +02:00
todo = (inputFull, originalFull, outputFull, size, duration)
totalDuration += duration
totalSize += size
todos.add(todo)
2021-06-13 11:49:21 +02:00
log.info(
"Converting {} videos ({})".format(
len(todos), datetime.timedelta(seconds=totalDuration)
)
)
2018-08-07 16:09:41 +02:00
2023-11-23 22:59:09 +01:00
2018-08-07 16:09:41 +02:00
# From https://stackoverflow.com/a/3431838
def sha256(fname):
hash_sha256 = hashlib.sha256()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(131072), b""):
hash_sha256.update(chunk)
return hash_sha256.hexdigest()
2021-06-13 11:49:21 +02:00
2018-08-07 16:09:41 +02:00
# Progress bar things
totalDataSize = progressbar.widgets.DataSize()
2021-06-13 11:49:21 +02:00
totalDataSize.variable = "max_value"
barWidgets = [
progressbar.widgets.DataSize(),
" of ",
totalDataSize,
" ",
progressbar.widgets.Bar(),
" ",
progressbar.widgets.FileTransferSpeed(),
" ",
progressbar.widgets.AdaptiveETA(),
]
2018-08-07 16:09:41 +02:00
bar = progressbar.DataTransferBar(max_value=totalSize, widgets=barWidgets)
bar.start()
processedSize = 0
for inputFull, originalFull, outputFull, size, duration in todos:
2021-06-13 11:49:21 +02:00
tmpfile = tempfile.mkstemp(
prefix="compressPictureMovies", suffix="." + OUTPUT_EXTENSION
)[1]
2018-08-07 16:09:41 +02:00
try:
# Calculate the sum of the original file
checksum = sha256(inputFull)
# Initiate a conversion in a temporary file
originalRel = os.path.relpath(originalFull, ORIGINAL_FOLDER)
originalContent = "{} {}".format(originalRel, checksum)
metadataCmd = ["-metadata", 'original="{}"'.format(originalContent)]
2021-06-13 11:49:21 +02:00
cmd = (
["ffmpeg", "-hide_banner", "-y", "-i", inputFull]
+ OUTPUT_FFMPEG_PARAMETERS
+ metadataCmd
+ [tmpfile]
)
2018-08-07 16:09:41 +02:00
p = subprocess.run(cmd)
p.check_returncode()
# Verify the durartion of the new file
newDuration = videoDuration(tmpfile)
dev = statistics.stdev((duration, newDuration))
assert dev < DURATION_MAX_DEV, "Too much deviation in duration"
# Move the original to the corresponding original folder
originalDir = os.path.dirname(originalFull)
os.makedirs(originalDir, exist_ok=True)
shutil.move(inputFull, originalFull)
# Move the converted file in place of the original
shutil.move(tmpfile, outputFull)
except Exception as e:
log.error("Couldn't process file {}".format(inputFull))
log.error(e, exc_info=True)
try:
os.unlink(tmpfile)
except Exception:
pass
# Progress bar things
processedSize += size
bar.update(processedSize)
bar.finish()
# TODO Iterate over the already compressed videos to assert the originals are
# in their correct place, else move them