2018-06-24 18:28:37 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
|
|
|
import sys
|
2018-08-07 16:09:41 +02:00
|
|
|
import logging
|
|
|
|
import coloredlogs
|
|
|
|
import progressbar
|
|
|
|
import time
|
|
|
|
import hashlib
|
|
|
|
import tempfile
|
|
|
|
import json
|
|
|
|
import statistics
|
|
|
|
import datetime
|
|
|
|
|
2021-06-13 11:49:21 +02:00
|
|
|
coloredlogs.install(level="DEBUG", fmt="%(levelname)s %(message)s")
|
2018-08-07 16:09:41 +02:00
|
|
|
log = logging.getLogger()
|
2018-06-24 18:28:37 +02:00
|
|
|
|
|
|
|
# Constants
|
|
|
|
PICTURES_FOLDER = os.path.join(os.path.expanduser("~"), "Images")
|
2018-08-07 16:09:41 +02:00
|
|
|
ORIGINAL_FOLDER = os.path.join(os.path.expanduser("~"), ".ImagesOriginaux")
|
|
|
|
MOVIE_EXTENSIONS = ["mov", "avi", "mp4", "3gp", "webm", "mkv"]
|
|
|
|
OUTPUT_EXTENSION = "webm"
|
|
|
|
OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0"]
|
|
|
|
# OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libaom-av1", "-crf", "30", "-strict", "experimental", "-c:a", "libopus"]
|
|
|
|
DURATION_MAX_DEV = 1
|
|
|
|
|
|
|
|
|
|
|
|
def videoMetadata(filename):
|
|
|
|
assert os.path.isfile(filename)
|
|
|
|
cmd = ["ffmpeg", "-i", filename, "-f", "ffmetadata", "-"]
|
|
|
|
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
|
|
|
p.check_returncode()
|
|
|
|
metadataRaw = p.stdout
|
|
|
|
data = dict()
|
2021-06-13 11:49:21 +02:00
|
|
|
for metadataLine in metadataRaw.split(b"\n"):
|
2018-08-07 16:09:41 +02:00
|
|
|
# Skip empty lines
|
|
|
|
if not len(metadataLine):
|
|
|
|
continue
|
|
|
|
# Skip comments
|
2021-06-13 11:49:21 +02:00
|
|
|
if metadataLine.startswith(b";"):
|
2018-08-07 16:09:41 +02:00
|
|
|
continue
|
|
|
|
# Parse key-value
|
2021-06-13 11:49:21 +02:00
|
|
|
metadataLineSplit = metadataLine.split(b"=")
|
2018-08-07 16:09:41 +02:00
|
|
|
if len(metadataLineSplit) != 2:
|
|
|
|
log.warning("Unparsed metadata line: `{}`".format(metadataLine))
|
|
|
|
continue
|
|
|
|
key, val = metadataLineSplit
|
|
|
|
key = key.decode().lower()
|
|
|
|
val = val.decode()
|
|
|
|
data[key] = val
|
|
|
|
return data
|
|
|
|
|
2021-06-13 11:49:21 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
def videoInfos(filename):
|
|
|
|
assert os.path.isfile(filename)
|
|
|
|
cmd = ["ffprobe", filename, "-print_format", "json", "-show_streams"]
|
|
|
|
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
|
|
|
p.check_returncode()
|
|
|
|
infosRaw = p.stdout
|
|
|
|
infos = json.loads(infosRaw)
|
|
|
|
return infos
|
|
|
|
|
2021-06-13 11:49:21 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
from pprint import pprint
|
2021-06-13 11:49:21 +02:00
|
|
|
|
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
def streamDuration(stream):
|
|
|
|
if "duration" in stream:
|
|
|
|
return float(stream["duration"])
|
|
|
|
elif "sample_rate" in stream and "nb_frames" in stream:
|
|
|
|
return int(stream["nb_frames"]) / int(stream["sample_rate"])
|
|
|
|
elif "tags" in stream and "DURATION" in stream["tags"]:
|
|
|
|
durRaw = stream["tags"]["DURATION"]
|
|
|
|
durSplit = durRaw.split(":")
|
|
|
|
assert len(durSplit) == 3
|
|
|
|
durSplitFloat = [float(a) for a in durSplit]
|
|
|
|
hours, minutes, seconds = durSplitFloat
|
|
|
|
return (hours * 60 + minutes) * 60 + seconds
|
|
|
|
else:
|
|
|
|
raise KeyError("Can't find duration information in stream")
|
|
|
|
|
2021-06-13 11:49:21 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
def videoDuration(filename):
|
|
|
|
# TODO Doesn't work with VP8 / webm
|
|
|
|
infos = videoInfos(filename)
|
|
|
|
durations = [streamDuration(stream) for stream in infos["streams"]]
|
|
|
|
dev = statistics.stdev(durations)
|
|
|
|
assert dev <= DURATION_MAX_DEV, "Too much deviation ({} s)".format(dev)
|
2021-06-13 11:49:21 +02:00
|
|
|
return sum(durations) / len(durations)
|
2018-08-07 16:09:41 +02:00
|
|
|
|
2018-06-24 18:28:37 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
todos = set()
|
|
|
|
totalSize = 0
|
|
|
|
totalDuration = 0
|
2018-06-24 18:28:37 +02:00
|
|
|
|
|
|
|
# Walk folders
|
2018-08-07 16:09:41 +02:00
|
|
|
log.info("Listing files in {}".format(PICTURES_FOLDER))
|
|
|
|
allVideos = list()
|
2018-06-24 18:28:37 +02:00
|
|
|
for root, dirs, files in os.walk(PICTURES_FOLDER):
|
|
|
|
# If folder is in ORIGINAL_FOLDER, skip it
|
2018-08-07 16:09:41 +02:00
|
|
|
if root.startswith(ORIGINAL_FOLDER):
|
2018-06-24 18:28:37 +02:00
|
|
|
continue
|
|
|
|
# Iterate over files
|
|
|
|
for inputName in files:
|
|
|
|
# If the file is not a video, skip it
|
|
|
|
inputNameBase, inputExt = os.path.splitext(inputName)
|
|
|
|
inputExt = inputExt[1:].lower()
|
|
|
|
if inputExt not in MOVIE_EXTENSIONS:
|
|
|
|
continue
|
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
allVideos.append((root, inputName))
|
|
|
|
|
|
|
|
log.info("Analyzing videos")
|
|
|
|
for root, inputName in progressbar.progressbar(allVideos):
|
|
|
|
inputNameBase, inputExt = os.path.splitext(inputName)
|
|
|
|
inputExt = inputExt[1:].lower()
|
|
|
|
|
|
|
|
# Generates all needed filepaths
|
|
|
|
## Found file
|
|
|
|
inputFull = os.path.join(root, inputName)
|
|
|
|
inputRel = os.path.relpath(inputFull, PICTURES_FOLDER)
|
|
|
|
## Original file
|
|
|
|
originalFull = os.path.join(ORIGINAL_FOLDER, inputRel)
|
|
|
|
originalRel = inputRel
|
|
|
|
assert not os.path.isfile(originalFull), originalFile + " exists"
|
|
|
|
|
|
|
|
## Compressed file
|
|
|
|
outputFull = os.path.join(root, inputNameBase + "." + OUTPUT_EXTENSION)
|
|
|
|
|
|
|
|
# If the extension is the same of the output one
|
|
|
|
if inputExt == OUTPUT_EXTENSION:
|
|
|
|
# Read the metadata of the video
|
|
|
|
meta = videoMetadata(inputFull)
|
|
|
|
|
|
|
|
# If it has the field with the original file
|
2021-06-13 11:49:21 +02:00
|
|
|
if "original" in meta:
|
2018-08-07 16:09:41 +02:00
|
|
|
# Skip file
|
2018-06-24 18:28:37 +02:00
|
|
|
continue
|
|
|
|
else:
|
2018-08-07 16:09:41 +02:00
|
|
|
assert not os.path.isfile(outputFull), outputFull + " exists"
|
2018-06-24 18:28:37 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
size = os.stat(inputFull).st_size
|
|
|
|
try:
|
|
|
|
duration = videoDuration(inputFull)
|
|
|
|
except Exception as e:
|
|
|
|
log.warning("Can't determine duration of {}, skipping".format(inputFull))
|
|
|
|
log.debug(e, exc_info=True)
|
|
|
|
continue
|
2018-06-24 18:28:37 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
todo = (inputFull, originalFull, outputFull, size, duration)
|
|
|
|
|
|
|
|
totalDuration += duration
|
|
|
|
totalSize += size
|
|
|
|
todos.add(todo)
|
|
|
|
|
2021-06-13 11:49:21 +02:00
|
|
|
log.info(
|
|
|
|
"Converting {} videos ({})".format(
|
|
|
|
len(todos), datetime.timedelta(seconds=totalDuration)
|
|
|
|
)
|
|
|
|
)
|
2018-08-07 16:09:41 +02:00
|
|
|
|
|
|
|
# From https://stackoverflow.com/a/3431838
|
|
|
|
def sha256(fname):
|
|
|
|
hash_sha256 = hashlib.sha256()
|
|
|
|
with open(fname, "rb") as f:
|
|
|
|
for chunk in iter(lambda: f.read(131072), b""):
|
|
|
|
hash_sha256.update(chunk)
|
|
|
|
return hash_sha256.hexdigest()
|
|
|
|
|
2021-06-13 11:49:21 +02:00
|
|
|
|
2018-08-07 16:09:41 +02:00
|
|
|
# Progress bar things
|
|
|
|
totalDataSize = progressbar.widgets.DataSize()
|
2021-06-13 11:49:21 +02:00
|
|
|
totalDataSize.variable = "max_value"
|
|
|
|
barWidgets = [
|
|
|
|
progressbar.widgets.DataSize(),
|
|
|
|
" of ",
|
|
|
|
totalDataSize,
|
|
|
|
" ",
|
|
|
|
progressbar.widgets.Bar(),
|
|
|
|
" ",
|
|
|
|
progressbar.widgets.FileTransferSpeed(),
|
|
|
|
" ",
|
|
|
|
progressbar.widgets.AdaptiveETA(),
|
|
|
|
]
|
2018-08-07 16:09:41 +02:00
|
|
|
bar = progressbar.DataTransferBar(max_value=totalSize, widgets=barWidgets)
|
|
|
|
bar.start()
|
|
|
|
processedSize = 0
|
|
|
|
|
|
|
|
|
|
|
|
for inputFull, originalFull, outputFull, size, duration in todos:
|
2021-06-13 11:49:21 +02:00
|
|
|
tmpfile = tempfile.mkstemp(
|
|
|
|
prefix="compressPictureMovies", suffix="." + OUTPUT_EXTENSION
|
|
|
|
)[1]
|
2018-08-07 16:09:41 +02:00
|
|
|
try:
|
|
|
|
# Calculate the sum of the original file
|
|
|
|
checksum = sha256(inputFull)
|
|
|
|
|
|
|
|
# Initiate a conversion in a temporary file
|
|
|
|
originalRel = os.path.relpath(originalFull, ORIGINAL_FOLDER)
|
|
|
|
originalContent = "{} {}".format(originalRel, checksum)
|
|
|
|
metadataCmd = ["-metadata", 'original="{}"'.format(originalContent)]
|
2021-06-13 11:49:21 +02:00
|
|
|
cmd = (
|
|
|
|
["ffmpeg", "-hide_banner", "-y", "-i", inputFull]
|
|
|
|
+ OUTPUT_FFMPEG_PARAMETERS
|
|
|
|
+ metadataCmd
|
|
|
|
+ [tmpfile]
|
|
|
|
)
|
2018-08-07 16:09:41 +02:00
|
|
|
p = subprocess.run(cmd)
|
|
|
|
p.check_returncode()
|
|
|
|
|
|
|
|
# Verify the durartion of the new file
|
|
|
|
newDuration = videoDuration(tmpfile)
|
|
|
|
dev = statistics.stdev((duration, newDuration))
|
|
|
|
assert dev < DURATION_MAX_DEV, "Too much deviation in duration"
|
|
|
|
|
|
|
|
# Move the original to the corresponding original folder
|
|
|
|
originalDir = os.path.dirname(originalFull)
|
|
|
|
os.makedirs(originalDir, exist_ok=True)
|
|
|
|
shutil.move(inputFull, originalFull)
|
|
|
|
|
|
|
|
# Move the converted file in place of the original
|
|
|
|
shutil.move(tmpfile, outputFull)
|
|
|
|
except Exception as e:
|
|
|
|
log.error("Couldn't process file {}".format(inputFull))
|
|
|
|
log.error(e, exc_info=True)
|
|
|
|
try:
|
|
|
|
os.unlink(tmpfile)
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
# Progress bar things
|
|
|
|
processedSize += size
|
|
|
|
bar.update(processedSize)
|
|
|
|
bar.finish()
|
|
|
|
|
|
|
|
|
|
|
|
# TODO Iterate over the already compressed videos to assert the originals are
|
|
|
|
# in their correct place, else move them
|