#!/usr/bin/env python3 import os import shutil import subprocess import sys import logging import coloredlogs import progressbar import time import hashlib import tempfile import json import statistics import datetime coloredlogs.install(level='DEBUG', fmt='%(levelname)s %(message)s') log = logging.getLogger() # Constants PICTURES_FOLDER = os.path.join(os.path.expanduser("~"), "Images") ORIGINAL_FOLDER = os.path.join(os.path.expanduser("~"), ".ImagesOriginaux") MOVIE_EXTENSIONS = ["mov", "avi", "mp4", "3gp", "webm", "mkv"] OUTPUT_EXTENSION = "webm" OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0"] # OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libaom-av1", "-crf", "30", "-strict", "experimental", "-c:a", "libopus"] DURATION_MAX_DEV = 1 def videoMetadata(filename): assert os.path.isfile(filename) cmd = ["ffmpeg", "-i", filename, "-f", "ffmetadata", "-"] p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) p.check_returncode() metadataRaw = p.stdout data = dict() for metadataLine in metadataRaw.split(b'\n'): # Skip empty lines if not len(metadataLine): continue # Skip comments if metadataLine.startswith(b';'): continue # Parse key-value metadataLineSplit = metadataLine.split(b'=') if len(metadataLineSplit) != 2: log.warning("Unparsed metadata line: `{}`".format(metadataLine)) continue key, val = metadataLineSplit key = key.decode().lower() val = val.decode() data[key] = val return data def videoInfos(filename): assert os.path.isfile(filename) cmd = ["ffprobe", filename, "-print_format", "json", "-show_streams"] p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) p.check_returncode() infosRaw = p.stdout infos = json.loads(infosRaw) return infos from pprint import pprint def streamDuration(stream): if "duration" in stream: return float(stream["duration"]) elif "sample_rate" in stream and "nb_frames" in stream: return int(stream["nb_frames"]) / int(stream["sample_rate"]) elif "tags" in stream and "DURATION" in stream["tags"]: durRaw = stream["tags"]["DURATION"] durSplit = durRaw.split(":") assert len(durSplit) == 3 durSplitFloat = [float(a) for a in durSplit] hours, minutes, seconds = durSplitFloat return (hours * 60 + minutes) * 60 + seconds else: raise KeyError("Can't find duration information in stream") def videoDuration(filename): # TODO Doesn't work with VP8 / webm infos = videoInfos(filename) durations = [streamDuration(stream) for stream in infos["streams"]] dev = statistics.stdev(durations) assert dev <= DURATION_MAX_DEV, "Too much deviation ({} s)".format(dev) return sum(durations)/len(durations) todos = set() totalSize = 0 totalDuration = 0 # Walk folders log.info("Listing files in {}".format(PICTURES_FOLDER)) allVideos = list() for root, dirs, files in os.walk(PICTURES_FOLDER): # If folder is in ORIGINAL_FOLDER, skip it if root.startswith(ORIGINAL_FOLDER): continue # Iterate over files for inputName in files: # If the file is not a video, skip it inputNameBase, inputExt = os.path.splitext(inputName) inputExt = inputExt[1:].lower() if inputExt not in MOVIE_EXTENSIONS: continue allVideos.append((root, inputName)) log.info("Analyzing videos") for root, inputName in progressbar.progressbar(allVideos): inputNameBase, inputExt = os.path.splitext(inputName) inputExt = inputExt[1:].lower() # Generates all needed filepaths ## Found file inputFull = os.path.join(root, inputName) inputRel = os.path.relpath(inputFull, PICTURES_FOLDER) ## Original file originalFull = os.path.join(ORIGINAL_FOLDER, inputRel) originalRel = inputRel assert not os.path.isfile(originalFull), originalFile + " exists" ## Compressed file outputFull = os.path.join(root, inputNameBase + "." + OUTPUT_EXTENSION) # If the extension is the same of the output one if inputExt == OUTPUT_EXTENSION: # Read the metadata of the video meta = videoMetadata(inputFull) # If it has the field with the original file if 'original' in meta: # Skip file continue else: assert not os.path.isfile(outputFull), outputFull + " exists" size = os.stat(inputFull).st_size try: duration = videoDuration(inputFull) except Exception as e: log.warning("Can't determine duration of {}, skipping".format(inputFull)) log.debug(e, exc_info=True) continue todo = (inputFull, originalFull, outputFull, size, duration) totalDuration += duration totalSize += size todos.add(todo) log.info("Converting {} videos ({})".format(len(todos), datetime.timedelta(seconds=totalDuration))) # From https://stackoverflow.com/a/3431838 def sha256(fname): hash_sha256 = hashlib.sha256() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(131072), b""): hash_sha256.update(chunk) return hash_sha256.hexdigest() # Progress bar things totalDataSize = progressbar.widgets.DataSize() totalDataSize.variable = 'max_value' barWidgets = [progressbar.widgets.DataSize(), ' of ', totalDataSize, ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.FileTransferSpeed(), ' ', progressbar.widgets.AdaptiveETA()] bar = progressbar.DataTransferBar(max_value=totalSize, widgets=barWidgets) bar.start() processedSize = 0 for inputFull, originalFull, outputFull, size, duration in todos: tmpfile = tempfile.mkstemp(prefix="compressPictureMovies", suffix="."+OUTPUT_EXTENSION)[1] try: # Calculate the sum of the original file checksum = sha256(inputFull) # Initiate a conversion in a temporary file originalRel = os.path.relpath(originalFull, ORIGINAL_FOLDER) originalContent = "{} {}".format(originalRel, checksum) metadataCmd = ["-metadata", 'original="{}"'.format(originalContent)] cmd = ["ffmpeg", "-hide_banner", "-y", "-i", inputFull] + OUTPUT_FFMPEG_PARAMETERS + metadataCmd + [tmpfile] p = subprocess.run(cmd) p.check_returncode() # Verify the durartion of the new file newDuration = videoDuration(tmpfile) dev = statistics.stdev((duration, newDuration)) assert dev < DURATION_MAX_DEV, "Too much deviation in duration" # Move the original to the corresponding original folder originalDir = os.path.dirname(originalFull) os.makedirs(originalDir, exist_ok=True) shutil.move(inputFull, originalFull) # Move the converted file in place of the original shutil.move(tmpfile, outputFull) except Exception as e: log.error("Couldn't process file {}".format(inputFull)) log.error(e, exc_info=True) try: os.unlink(tmpfile) except Exception: pass # Progress bar things processedSize += size bar.update(processedSize) bar.finish() # TODO Iterate over the already compressed videos to assert the originals are # in their correct place, else move them