|
|
@ -4,20 +4,98 @@ import os |
|
|
|
import shutil |
|
|
|
import subprocess |
|
|
|
import sys |
|
|
|
import logging |
|
|
|
import coloredlogs |
|
|
|
import progressbar |
|
|
|
import time |
|
|
|
import hashlib |
|
|
|
import tempfile |
|
|
|
import json |
|
|
|
import statistics |
|
|
|
import datetime |
|
|
|
|
|
|
|
coloredlogs.install(level='DEBUG', fmt='%(levelname)s %(message)s') |
|
|
|
log = logging.getLogger() |
|
|
|
|
|
|
|
# Constants |
|
|
|
PICTURES_FOLDER = os.path.join(os.path.expanduser("~"), "Images") |
|
|
|
ORIGNAL_FOLDER = os.path.join(PICTURES_FOLDER, ".Originaux") |
|
|
|
MOVIE_EXTENSIONS = ["mov", "avi", "mp4"] |
|
|
|
OUTPUT_EXTENSION = "mp4" |
|
|
|
OUTPUT_FFMPEG_PARAMETERS = ["-codec:v", "libx265", "-crf", "28", "-preset:v", "slower", "-codec:a", "libfdk_aac", "-movflags", "+faststart", "-vbr", "5"] |
|
|
|
OUTPUT_METADATA_FIELD = ["episode_id"] |
|
|
|
ORIGINAL_FOLDER = os.path.join(os.path.expanduser("~"), ".ImagesOriginaux") |
|
|
|
MOVIE_EXTENSIONS = ["mov", "avi", "mp4", "3gp", "webm", "mkv"] |
|
|
|
OUTPUT_EXTENSION = "webm" |
|
|
|
OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0"] |
|
|
|
# OUTPUT_FFMPEG_PARAMETERS = ["-c:v", "libaom-av1", "-crf", "30", "-strict", "experimental", "-c:a", "libopus"] |
|
|
|
DURATION_MAX_DEV = 1 |
|
|
|
|
|
|
|
|
|
|
|
def videoMetadata(filename): |
|
|
|
assert os.path.isfile(filename) |
|
|
|
cmd = ["ffmpeg", "-i", filename, "-f", "ffmetadata", "-"] |
|
|
|
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) |
|
|
|
p.check_returncode() |
|
|
|
metadataRaw = p.stdout |
|
|
|
data = dict() |
|
|
|
for metadataLine in metadataRaw.split(b'\n'): |
|
|
|
# Skip empty lines |
|
|
|
if not len(metadataLine): |
|
|
|
continue |
|
|
|
# Skip comments |
|
|
|
if metadataLine.startswith(b';'): |
|
|
|
continue |
|
|
|
# Parse key-value |
|
|
|
metadataLineSplit = metadataLine.split(b'=') |
|
|
|
if len(metadataLineSplit) != 2: |
|
|
|
log.warning("Unparsed metadata line: `{}`".format(metadataLine)) |
|
|
|
continue |
|
|
|
key, val = metadataLineSplit |
|
|
|
key = key.decode().lower() |
|
|
|
val = val.decode() |
|
|
|
data[key] = val |
|
|
|
return data |
|
|
|
|
|
|
|
def videoInfos(filename): |
|
|
|
assert os.path.isfile(filename) |
|
|
|
cmd = ["ffprobe", filename, "-print_format", "json", "-show_streams"] |
|
|
|
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) |
|
|
|
p.check_returncode() |
|
|
|
infosRaw = p.stdout |
|
|
|
infos = json.loads(infosRaw) |
|
|
|
return infos |
|
|
|
|
|
|
|
from pprint import pprint |
|
|
|
def streamDuration(stream): |
|
|
|
if "duration" in stream: |
|
|
|
return float(stream["duration"]) |
|
|
|
elif "sample_rate" in stream and "nb_frames" in stream: |
|
|
|
return int(stream["nb_frames"]) / int(stream["sample_rate"]) |
|
|
|
elif "tags" in stream and "DURATION" in stream["tags"]: |
|
|
|
durRaw = stream["tags"]["DURATION"] |
|
|
|
durSplit = durRaw.split(":") |
|
|
|
assert len(durSplit) == 3 |
|
|
|
durSplitFloat = [float(a) for a in durSplit] |
|
|
|
hours, minutes, seconds = durSplitFloat |
|
|
|
return (hours * 60 + minutes) * 60 + seconds |
|
|
|
else: |
|
|
|
raise KeyError("Can't find duration information in stream") |
|
|
|
|
|
|
|
def videoDuration(filename): |
|
|
|
# TODO Doesn't work with VP8 / webm |
|
|
|
infos = videoInfos(filename) |
|
|
|
durations = [streamDuration(stream) for stream in infos["streams"]] |
|
|
|
dev = statistics.stdev(durations) |
|
|
|
assert dev <= DURATION_MAX_DEV, "Too much deviation ({} s)".format(dev) |
|
|
|
return sum(durations)/len(durations) |
|
|
|
|
|
|
|
|
|
|
|
todos = set() |
|
|
|
totalSize = 0 |
|
|
|
totalDuration = 0 |
|
|
|
|
|
|
|
# Walk folders |
|
|
|
log.info("Listing files in {}".format(PICTURES_FOLDER)) |
|
|
|
allVideos = list() |
|
|
|
for root, dirs, files in os.walk(PICTURES_FOLDER): |
|
|
|
# If folder is in ORIGINAL_FOLDER, skip it |
|
|
|
if root.startswith(ORIGNAL_FOLDER): |
|
|
|
if root.startswith(ORIGINAL_FOLDER): |
|
|
|
continue |
|
|
|
# Iterate over files |
|
|
|
for inputName in files: |
|
|
@ -27,137 +105,109 @@ for root, dirs, files in os.walk(PICTURES_FOLDER): |
|
|
|
if inputExt not in MOVIE_EXTENSIONS: |
|
|
|
continue |
|
|
|
|
|
|
|
# Generates all needed filepaths |
|
|
|
## Found file |
|
|
|
inputFull = os.path.join(root, inputName) |
|
|
|
inputRel = os.path.relpath(inputFull, PICTURES_FOLDER) |
|
|
|
## Original file |
|
|
|
originalFull = os.path.join(ORIGNAL_FOLDER, inputRel) |
|
|
|
originalRel = inputRel |
|
|
|
## Compressed file |
|
|
|
outputFull = os.path.join(root, inputNameBase + "." + OUTPUT_EXTENSION) |
|
|
|
|
|
|
|
# If the extension is the same of the output one |
|
|
|
if inputExt == OUTPUT_EXTENSION: |
|
|
|
# Read the metadata of the video |
|
|
|
metadataRaw = subprocess.run(["ffmpeg", "-i", inputFull, "-f", "ffmetadata", "-"], stdout=subprocess.PIPE).stdout |
|
|
|
# If it has the field with the original file |
|
|
|
originalRel = None |
|
|
|
wantedPattern = OUTPUT_METADATA_FIELD.encode() + b"=" |
|
|
|
for metadataLine in metadataRaw.split('\n'): |
|
|
|
if metadataLine.startswith(wantedPattern): |
|
|
|
originalRel = metadataLine[len(wantedPattern)+1:] |
|
|
|
break |
|
|
|
if originalRel: |
|
|
|
# If the original file does not exists, warn about it |
|
|
|
originalFull = os.path.join(ORIGNAL_FOLDER, originalRel) |
|
|
|
if not os.path.isfile(originalFull): |
|
|
|
print("WARN {inputRel} states to have {originalRel} as original but this file doesn't exist".format(inputRel=inputRel, originalRel=originalRel)) |
|
|
|
# If the original is not aligned with the compressed, warn about it (TODO move it automatically) |
|
|
|
if inputRel != originalRel: |
|
|
|
print("WARN {inputRel} is not aligned with original {originalRel}".format(inputRel=inputRel, originalRel=originalRel)) |
|
|
|
# Skip file |
|
|
|
continue |
|
|
|
# Initiate a conversion in a temporary file |
|
|
|
# If the temporary file does not have the same caracteristics as the original |
|
|
|
# Warn about it |
|
|
|
# Delete it |
|
|
|
# Skip file |
|
|
|
# Move the original to the corresponding original folder |
|
|
|
# Move the converted file in place of the original |
|
|
|
|
|
|
|
# TODO Iterate over the orignal folder to find non-matching compressed videos not found in the above pass |
|
|
|
|
|
|
|
sys.exit(0) |
|
|
|
|
|
|
|
# Constants |
|
|
|
SOURCE_FOLDER = os.path.join(os.path.expanduser("~"), "Musique") |
|
|
|
OUTPUT_FOLDER = os.path.join(os.path.expanduser("~"), ".MusiqueCompressed") |
|
|
|
CONVERSIONS = {"flac": "m4a"} |
|
|
|
FORBIDDEN_EXTENSIONS = ["jpg", "pdf", "ffs_db"] |
|
|
|
FORGIVEN_FILENAMES = ["cover.jpg"] |
|
|
|
IGNORED_EMPTY_FOLDER = [".stfolder"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Listing files |
|
|
|
sourceFiles = dict() |
|
|
|
for root, dirs, files in os.walk(SOURCE_FOLDER): |
|
|
|
for f in files: |
|
|
|
fullPath = os.path.join(root, f) |
|
|
|
path = os.path.relpath(fullPath, SOURCE_FOLDER) |
|
|
|
sourceFiles[path] = os.path.getctime(fullPath) |
|
|
|
|
|
|
|
outputFiles = dict() |
|
|
|
for root, dirs, files in os.walk(OUTPUT_FOLDER): |
|
|
|
for f in files: |
|
|
|
fullPath = os.path.join(root, f) |
|
|
|
path = os.path.relpath(fullPath, OUTPUT_FOLDER) |
|
|
|
outputFiles[path] = os.path.getctime(fullPath) |
|
|
|
|
|
|
|
# Sorting files |
|
|
|
remainingConversions = dict() |
|
|
|
extraFiles = list(outputFiles.keys()) |
|
|
|
|
|
|
|
def convertPath(path): |
|
|
|
filename, extension = os.path.splitext(path) |
|
|
|
extension = extension[1:].lower() |
|
|
|
# If the extension isn't allowed |
|
|
|
if extension in FORBIDDEN_EXTENSIONS: |
|
|
|
basename = os.path.basename(path) |
|
|
|
# And the filename is not an exception |
|
|
|
if basename not in FORGIVEN_FILENAMES: |
|
|
|
# This file shouldn't be copied nor converted |
|
|
|
return False |
|
|
|
# If this needs a conversion |
|
|
|
elif extension in CONVERSIONS: |
|
|
|
extension = CONVERSIONS[extension] |
|
|
|
return filename + "." + extension |
|
|
|
# In all other case, this is a simple copy |
|
|
|
return path |
|
|
|
|
|
|
|
for sourceFile in sourceFiles: |
|
|
|
outputFile = convertPath(sourceFile) |
|
|
|
# If the file should not be converted, do nothing |
|
|
|
if outputFile == False: |
|
|
|
continue |
|
|
|
# If the file already has something as an output |
|
|
|
elif outputFile in outputFiles: |
|
|
|
extraFiles.remove(outputFile) |
|
|
|
# If the output file is newer than the source file, do not initiate a conversion |
|
|
|
if outputFiles[outputFile] >= sourceFiles[sourceFile]: |
|
|
|
allVideos.append((root, inputName)) |
|
|
|
|
|
|
|
log.info("Analyzing videos") |
|
|
|
for root, inputName in progressbar.progressbar(allVideos): |
|
|
|
inputNameBase, inputExt = os.path.splitext(inputName) |
|
|
|
inputExt = inputExt[1:].lower() |
|
|
|
|
|
|
|
# Generates all needed filepaths |
|
|
|
## Found file |
|
|
|
inputFull = os.path.join(root, inputName) |
|
|
|
inputRel = os.path.relpath(inputFull, PICTURES_FOLDER) |
|
|
|
## Original file |
|
|
|
originalFull = os.path.join(ORIGINAL_FOLDER, inputRel) |
|
|
|
originalRel = inputRel |
|
|
|
assert not os.path.isfile(originalFull), originalFile + " exists" |
|
|
|
|
|
|
|
## Compressed file |
|
|
|
outputFull = os.path.join(root, inputNameBase + "." + OUTPUT_EXTENSION) |
|
|
|
|
|
|
|
# If the extension is the same of the output one |
|
|
|
if inputExt == OUTPUT_EXTENSION: |
|
|
|
# Read the metadata of the video |
|
|
|
meta = videoMetadata(inputFull) |
|
|
|
|
|
|
|
# If it has the field with the original file |
|
|
|
if 'original' in meta: |
|
|
|
# Skip file |
|
|
|
continue |
|
|
|
# If the file needs to be converted, do it |
|
|
|
remainingConversions[sourceFile] = outputFile |
|
|
|
|
|
|
|
# Converting files |
|
|
|
for sourceFile in remainingConversions: |
|
|
|
outputFile = remainingConversions[sourceFile] |
|
|
|
|
|
|
|
# Creating folder if it doesn't exists |
|
|
|
fullOutputFile = os.path.join(OUTPUT_FOLDER, outputFile) |
|
|
|
fullOutputDir = os.path.dirname(fullOutputFile) |
|
|
|
os.makedirs(fullOutputDir, exist_ok=True) |
|
|
|
|
|
|
|
# Converting |
|
|
|
fullSourceFile = os.path.join(SOURCE_FOLDER, sourceFile) |
|
|
|
print(fullSourceFile, "→", fullOutputFile) |
|
|
|
if sourceFile == outputFile: |
|
|
|
# shutil.copy(fullSourceFile, fullOutputFile) |
|
|
|
os.link(fullSourceFile, fullOutputFile) |
|
|
|
else: |
|
|
|
subprocess.run(["ffmpeg", "-y", "-i", fullSourceFile, "-codec:a", "libfdk_aac", "-cutoff", "18000", "-movflags", "+faststart", "-vbr", "5", fullOutputFile]) |
|
|
|
assert not os.path.isfile(outputFull), outputFull + " exists" |
|
|
|
|
|
|
|
# Removing extra files |
|
|
|
for extraFile in extraFiles: |
|
|
|
fullExtraFile = os.path.join(OUTPUT_FOLDER, extraFile) |
|
|
|
os.remove(fullExtraFile) |
|
|
|
|
|
|
|
# Removing empty dirs |
|
|
|
for root, dirs, files in os.walk(OUTPUT_FOLDER): |
|
|
|
if not dirs and not files: |
|
|
|
dirBasename = os.path.basename(root) |
|
|
|
if dirBasename not in IGNORED_EMPTY_FOLDER: |
|
|
|
os.rmdir(root) |
|
|
|
|
|
|
|
size = os.stat(inputFull).st_size |
|
|
|
try: |
|
|
|
duration = videoDuration(inputFull) |
|
|
|
except Exception as e: |
|
|
|
log.warning("Can't determine duration of {}, skipping".format(inputFull)) |
|
|
|
log.debug(e, exc_info=True) |
|
|
|
continue |
|
|
|
|
|
|
|
todo = (inputFull, originalFull, outputFull, size, duration) |
|
|
|
|
|
|
|
totalDuration += duration |
|
|
|
totalSize += size |
|
|
|
todos.add(todo) |
|
|
|
|
|
|
|
log.info("Converting {} videos ({})".format(len(todos), datetime.timedelta(seconds=totalDuration))) |
|
|
|
|
|
|
|
# From https://stackoverflow.com/a/3431838 |
|
|
|
def sha256(fname): |
|
|
|
hash_sha256 = hashlib.sha256() |
|
|
|
with open(fname, "rb") as f: |
|
|
|
for chunk in iter(lambda: f.read(131072), b""): |
|
|
|
hash_sha256.update(chunk) |
|
|
|
return hash_sha256.hexdigest() |
|
|
|
|
|
|
|
# Progress bar things |
|
|
|
totalDataSize = progressbar.widgets.DataSize() |
|
|
|
totalDataSize.variable = 'max_value' |
|
|
|
barWidgets = [progressbar.widgets.DataSize(), ' of ', totalDataSize, ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.FileTransferSpeed(), ' ', progressbar.widgets.AdaptiveETA()] |
|
|
|
bar = progressbar.DataTransferBar(max_value=totalSize, widgets=barWidgets) |
|
|
|
bar.start() |
|
|
|
processedSize = 0 |
|
|
|
|
|
|
|
|
|
|
|
for inputFull, originalFull, outputFull, size, duration in todos: |
|
|
|
tmpfile = tempfile.mkstemp(prefix="compressPictureMovies", suffix="."+OUTPUT_EXTENSION)[1] |
|
|
|
try: |
|
|
|
# Calculate the sum of the original file |
|
|
|
checksum = sha256(inputFull) |
|
|
|
|
|
|
|
# Initiate a conversion in a temporary file |
|
|
|
originalRel = os.path.relpath(originalFull, ORIGINAL_FOLDER) |
|
|
|
originalContent = "{} {}".format(originalRel, checksum) |
|
|
|
metadataCmd = ["-metadata", 'original="{}"'.format(originalContent)] |
|
|
|
cmd = ["ffmpeg", "-hide_banner", "-y", "-i", inputFull] + OUTPUT_FFMPEG_PARAMETERS + metadataCmd + [tmpfile] |
|
|
|
p = subprocess.run(cmd) |
|
|
|
p.check_returncode() |
|
|
|
|
|
|
|
# Verify the durartion of the new file |
|
|
|
newDuration = videoDuration(tmpfile) |
|
|
|
dev = statistics.stdev((duration, newDuration)) |
|
|
|
assert dev < DURATION_MAX_DEV, "Too much deviation in duration" |
|
|
|
|
|
|
|
# Move the original to the corresponding original folder |
|
|
|
originalDir = os.path.dirname(originalFull) |
|
|
|
os.makedirs(originalDir, exist_ok=True) |
|
|
|
shutil.move(inputFull, originalFull) |
|
|
|
|
|
|
|
# Move the converted file in place of the original |
|
|
|
shutil.move(tmpfile, outputFull) |
|
|
|
except Exception as e: |
|
|
|
log.error("Couldn't process file {}".format(inputFull)) |
|
|
|
log.error(e, exc_info=True) |
|
|
|
try: |
|
|
|
os.unlink(tmpfile) |
|
|
|
except Exception: |
|
|
|
pass |
|
|
|
# Progress bar things |
|
|
|
processedSize += size |
|
|
|
bar.update(processedSize) |
|
|
|
bar.finish() |
|
|
|
|
|
|
|
|
|
|
|
# TODO Iterate over the already compressed videos to assert the originals are |
|
|
|
# in their correct place, else move them |