dotfiles/config/nix/scripts/compressPictureMovies

#!/usr/bin/env nix-shell
#! nix-shell -i python3 --pure
#! nix-shell -p python3 python3Packages.coloredlogs python3Packages.progressbar2 ffmpeg

import datetime
import hashlib
import json
import logging
import os
import shutil
import statistics
import subprocess
import sys
import tempfile
import time

import coloredlogs
import progressbar

coloredlogs.install(level="DEBUG", fmt="%(levelname)s %(message)s")
log = logging.getLogger()

# Constants
PICTURES_FOLDER = os.path.join(os.path.expanduser("~"), "Images")
ORIGINAL_FOLDER = os.path.join(os.path.expanduser("~"), ".ImagesOriginaux")
MOVIE_EXTENSIONS = ["mov", "avi", "mp4", "3gp", "webm", "mkv"]
OUTPUT_EXTENSION = "webm"
OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0"]
# OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libaom-av1", "-crf", "30", "-strict", "experimental", "-c:a", "libopus"]
DURATION_MAX_DEV = 1


def videoMetadata(filename):
    assert os.path.isfile(filename)
    cmd = ["ffmpeg", "-i", filename, "-f", "ffmetadata", "-"]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
    p.check_returncode()
    metadataRaw = p.stdout
    data = dict()
    for metadataLine in metadataRaw.split(b"\n"):
        # Skip empty lines
        if not len(metadataLine):
            continue
        # Skip comments
        if metadataLine.startswith(b";"):
            continue
        # Parse key-value
        metadataLineSplit = metadataLine.split(b"=")
        if len(metadataLineSplit) != 2:
            log.warning("Unparsed metadata line: `{}`".format(metadataLine))
            continue
        key, val = metadataLineSplit
        key = key.decode().lower()
        val = val.decode()
        data[key] = val
    return data


def videoInfos(filename):
    assert os.path.isfile(filename)
    cmd = ["ffprobe", filename, "-print_format", "json", "-show_streams"]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
    p.check_returncode()
    infosRaw = p.stdout
    infos = json.loads(infosRaw)
    return infos


from pprint import pprint


def streamDuration(stream):
    if "duration" in stream:
        return float(stream["duration"])
    elif "sample_rate" in stream and "nb_frames" in stream:
        return int(stream["nb_frames"]) / int(stream["sample_rate"])
    elif "tags" in stream and "DURATION" in stream["tags"]:
        durRaw = stream["tags"]["DURATION"]
        durSplit = durRaw.split(":")
        assert len(durSplit) == 3
        durSplitFloat = [float(a) for a in durSplit]
        hours, minutes, seconds = durSplitFloat
        return (hours * 60 + minutes) * 60 + seconds
    else:
        raise KeyError("Can't find duration information in stream")


def videoDuration(filename):
    # TODO Doesn't work with VP8 / webm
    infos = videoInfos(filename)
    durations = [streamDuration(stream) for stream in infos["streams"]]
    dev = statistics.stdev(durations)
    assert dev <= DURATION_MAX_DEV, "Too much deviation ({} s)".format(dev)
    return sum(durations) / len(durations)


todos = set()
totalSize = 0
totalDuration = 0

# Walk folders
log.info("Listing files in {}".format(PICTURES_FOLDER))
allVideos = list()
for root, dirs, files in os.walk(PICTURES_FOLDER):
    # If folder is in ORIGINAL_FOLDER, skip it
    if root.startswith(ORIGINAL_FOLDER):
        continue
    # Iterate over files
    for inputName in files:
        # If the file is not a video, skip it
        inputNameBase, inputExt = os.path.splitext(inputName)
        inputExt = inputExt[1:].lower()
        if inputExt not in MOVIE_EXTENSIONS:
            continue

        allVideos.append((root, inputName))

log.info("Analyzing videos")
for root, inputName in progressbar.progressbar(allVideos):
    inputNameBase, inputExt = os.path.splitext(inputName)
    inputExt = inputExt[1:].lower()

    # Generates all needed filepaths
    ## Found file
    inputFull = os.path.join(root, inputName)
    inputRel = os.path.relpath(inputFull, PICTURES_FOLDER)
    ## Original file
    originalFull = os.path.join(ORIGINAL_FOLDER, inputRel)
    originalRel = inputRel
    assert not os.path.isfile(originalFull), originalFile + " exists"

    ## Compressed file
    outputFull = os.path.join(root, inputNameBase + "." + OUTPUT_EXTENSION)

    # If the extension is the same of the output one
    if inputExt == OUTPUT_EXTENSION:
        # Read the metadata of the video
        meta = videoMetadata(inputFull)

        # If it has the field with the original file
        if "original" in meta:
            # Skip file
            continue
    else:
        assert not os.path.isfile(outputFull), outputFull + " exists"

    size = os.stat(inputFull).st_size
    try:
        duration = videoDuration(inputFull)
    except Exception as e:
        log.warning("Can't determine duration of {}, skipping".format(inputFull))
        log.debug(e, exc_info=True)
        continue

    todo = (inputFull, originalFull, outputFull, size, duration)

    totalDuration += duration
    totalSize += size
    todos.add(todo)

log.info(
    "Converting {} videos ({})".format(
        len(todos), datetime.timedelta(seconds=totalDuration)
    )
)


# From https://stackoverflow.com/a/3431838
def sha256(fname):
    hash_sha256 = hashlib.sha256()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(131072), b""):
            hash_sha256.update(chunk)
    return hash_sha256.hexdigest()


# Progress bar things
totalDataSize = progressbar.widgets.DataSize()
totalDataSize.variable = "max_value"
barWidgets = [
    progressbar.widgets.DataSize(),
    " of ",
    totalDataSize,
    " ",
    progressbar.widgets.Bar(),
    " ",
    progressbar.widgets.FileTransferSpeed(),
    " ",
    progressbar.widgets.AdaptiveETA(),
]
bar = progressbar.DataTransferBar(max_value=totalSize, widgets=barWidgets)
bar.start()
processedSize = 0


for inputFull, originalFull, outputFull, size, duration in todos:
    tmpfile = tempfile.mkstemp(
        prefix="compressPictureMovies", suffix="." + OUTPUT_EXTENSION
    )[1]
    try:
        # Calculate the sum of the original file
        checksum = sha256(inputFull)

        # Initiate a conversion in a temporary file
        originalRel = os.path.relpath(originalFull, ORIGINAL_FOLDER)
        originalContent = "{} {}".format(originalRel, checksum)
        metadataCmd = ["-metadata", 'original="{}"'.format(originalContent)]
        cmd = (
            ["ffmpeg", "-hide_banner", "-y", "-i", inputFull]
            + OUTPUT_FFMPEG_PARAMETERS
            + metadataCmd
            + [tmpfile]
        )
        p = subprocess.run(cmd)
        p.check_returncode()

        # Verify the durartion of the new file
        newDuration = videoDuration(tmpfile)
        dev = statistics.stdev((duration, newDuration))
        assert dev < DURATION_MAX_DEV, "Too much deviation in duration"

        # Move the original to the corresponding original folder
        originalDir = os.path.dirname(originalFull)
        os.makedirs(originalDir, exist_ok=True)
        shutil.move(inputFull, originalFull)

        # Move the converted file in place of the original
        shutil.move(tmpfile, outputFull)
    except Exception as e:
        log.error("Couldn't process file {}".format(inputFull))
        log.error(e, exc_info=True)
        try:
            os.unlink(tmpfile)
        except Exception:
            pass
    # Progress bar things
    processedSize += size
    bar.update(processedSize)
bar.finish()


# TODO Iterate over the already compressed videos to assert the originals are
# in their correct place, else move them