#!/usr/bin/env python3 # Handles sync-conflict files import argparse import os import pickle import re import sys import zlib import coloredlogs import progressbar import logging progressbar.streams.wrap_stderr() coloredlogs.install(level='INFO', fmt='%(levelname)s %(message)s') log = logging.getLogger() # 1) Create file list with conflict files # 2) Gather file informations (date, owner, size, checksum) # 3) Propose what to do def sizeof_fmt(num, suffix='B'): # Stolen from https://stackoverflow.com/a/1094933 for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: if abs(num) < 1024.0: return "%3.1f %s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f %s%s" % (num, 'Yi', suffix) class Table(): def __init__(self, width, height): self.width = width self.height = height self.data = [['' ** self.height] ** self.width] def set(x, y, data): self.data[x][y] = str(data) class Database(): VERSION = 1 CONFLICT_PATTERN = re.compile('\.sync-conflict-\d{8}-\d{6}-\w{7}') def __init__(self, directory): self.version = Database.VERSION self.directory = directory self.data = dict() def prune(self): toPrune = list() for filepath, databaseFile in self.data.items(): databaseFile.migrate() # TODO Temp dev stuff databaseFile.prune() if not databaseFile.isRelevant(): toPrune.append(filepath) for filepath in toPrune: del self.data[filepath] def nbFiles(self): return sum(databaseFile.nbFiles() for databaseFile in self.data.values()) def totalSize(self): return sum(databaseFile.totalSize() for databaseFile in self.data.values()) def maxSize(self): return sum(databaseFile.maxSize() for databaseFile in self.data.values()) def totalChecksumSize(self): return sum(databaseFile.totalChecksumSize() for databaseFile in self.data.values()) def getList(self): self.prune() log.info("Finding conflict files") widgets = [ progressbar.AnimatedMarker(), ' ', progressbar.BouncingBar(), ' ', progressbar.DynamicMessage('conflicts'), ' ', progressbar.DynamicMessage('files'), ' ', progressbar.DynamicMessage('dir', width=20, precision=20), ' ', progressbar.Timer(), ] bar = progressbar.ProgressBar(widgets=widgets).start() f = 0 for root, dirs, files in os.walk(self.directory): for conflictFilename in files: f += 1 if not Database.CONFLICT_PATTERN.search(conflictFilename): continue filename = Database.CONFLICT_PATTERN.sub('', conflictFilename) key = (root, filename) if key in self.data: dataFile = self.data[key] else: dataFile = DatabaseFile(root, filename) self.data[key] = dataFile if filename in files: dataFile.addConflict(filename) dataFile.addConflict(conflictFilename) bar.update(conflicts=len(self.data), files=f, dir=root[(len(self.directory)+1):]) bar.finish() log.info( f"Found {len(self.data)} conflicts, totalling {self.nbFiles()} conflict files.") def getStats(self): log.info("Getting stats from conflict files") bar = progressbar.ProgressBar(max_value=self.nbFiles()).start() f = 0 for databaseFile in self.data.values(): databaseFile.getStats() f += databaseFile.nbFiles() bar.update(f) bar.finish() log.info( f"Total file size: {sizeof_fmt(self.totalSize())}, possible save: {sizeof_fmt(self.totalSize() - self.maxSize())}") def getChecksums(self): log.info("Checksumming conflict files") widgets = [ progressbar.DataSize(), ' of ', progressbar.DataSize('max_value'), ' (', progressbar.AdaptiveTransferSpeed(), ') ', progressbar.Bar(), ' ', progressbar.DynamicMessage('dir', width=20, precision=20), ' ', progressbar.DynamicMessage('file', width=20, precision=20), ' ', progressbar.Timer(), ' ', progressbar.AdaptiveETA(), ] bar = progressbar.DataTransferBar( max_value=self.totalChecksumSize(), widgets=widgets).start() f = 0 for databaseFile in self.data.values(): bar.update(f, dir=databaseFile.root[(len(self.directory)+1):], file=databaseFile.filename) f += databaseFile.totalChecksumSize() try: databaseFile.getChecksums() except KeyboardInterrupt: return except BaseException as e: log.error(e, exc_info=True) pass bar.finish() def act(self): pass class DatabaseFile(): BLOCK_SIZE = 4096 RELEVANT_STATS = ('st_mode', 'st_uid', 'st_gid', 'st_size', 'st_mtime', 'st_ctime') def __init__(self, root, filename): self.root = root self.filename = filename self.stats = [] self.conflicts = [] self.checksums = [] log.debug(f"{self.root}/{self.filename} - new") def addConflict(self, conflict): if conflict in self.conflicts: return self.conflicts.append(conflict) self.stats.append(None) self.checksums.append(None) log.debug(f"{self.root}/{self.filename} - add: {conflict}") def migrate(self): # Temp dev stuff since I don't want to resum that whole 400 GiB dir if self.stats is None: self.stats = [None] * len(self.conflicts) try: if self.checksums is None: self.checksums = [None] * len(self.conflicts) except AttributeError: self.checksums = [None] * len(self.conflicts) def removeConflict(self, conflict): f = self.conflicts.index(conflict) del self.conflicts[f] del self.stats[f] del self.checksums[f] log.debug(f"{self.root}/{self.filename} - del: {conflict}") def getPathFile(self, conflict): return os.path.join(self.root, conflict) def getPathFiles(self): return [self.getPathFile(conflict) for conflict in self.conflicts] def prune(self): toPrune = list() for conflict in self.conflicts: if not os.path.isfile(self.getPathFile(conflict)): toPrune.append(conflict) if len(toPrune): for conflict in toPrune: self.removeConflict(conflict) def isRelevant(self): if len(self.conflicts) == 1: if self.conflicts[0] == self.filename: return False elif len(self.conflicts) < 1: return False else: return True def nbFiles(self): return len(self.conflicts) def totalSize(self): return sum((stat.st_size if stat is not None else 0) for stat in self.stats) def maxSize(self): return max((stat.st_size if stat is not None else 0) for stat in self.stats) def totalChecksumSize(self): size = 0 for f, checksum in enumerate(self.checksums): if checksum is None: stat = self.stats[f] if stat is not None: size += stat.st_size return size def getStats(self): for f, conflict in enumerate(self.conflicts): oldStat = self.stats[f] newStat = os.stat(self.getPathFile(conflict)) oldChecksum = self.checksums[f] # If it's been already summed, and we have the same inode and same ctime, don't resum if oldStat is None or not isinstance(oldChecksum, int) or oldStat.st_size != newStat.st_size or oldStat.st_dev != newStat.st_dev or oldStat.st_ino != newStat.st_ino or oldStat.st_ctime != newStat.st_ctime or oldStat.st_dev != newStat.st_dev: self.checksums[f] = None self.stats[f] = newStat # If all the file are of different size, set as different files if len(self.stats) == len(set([s.st_size for s in self.stats])): self.checksums = [False] * len(self.conflicts) # If all the files are the same inode, set as same files if len(set([s.st_ino for s in self.stats])) == 1 and len(set([s.st_dev for s in self.stats])) == 1: self.checksums = [True] * len(self.conflicts) def getChecksums(self): # TODO It's not even required to have a sum, this thing is not collision resistant now # TODO We might use BTRFS feature to know if conflict files are deduplicated between them filedescs = dict() for f, conflict in enumerate(self.conflicts): if self.checksums[f] is not None: continue self.checksums[f] = 1 filedescs[f] = open(self.getPathFile(conflict), 'rb') while len(filedescs): toClose = set() # Compute checksums for next block for all files for f, filedesc in filedescs.items(): data = filedesc.read(DatabaseFile.BLOCK_SIZE) self.checksums[f] = zlib.adler32(data, self.checksums[f]) if len(data) < DatabaseFile.BLOCK_SIZE: toClose.add(f) # Stop summing as soon as checksum diverge for f in filedescs.keys(): if self.checksums.count(self.checksums[f]) < 2: toClose.add(f) for f in toClose: filedescs[f].close() del filedescs[f] def getFeatures(self): features = dict() features['sum'] = self.checksums for stat in DatabaseFile.RELEVANT_STATS: features[stat] = [self.stats[f].__getattribute__( stat) for f in enumerate(self.stats)] return features def getDiffFeatures(self): features = self.getFeatures() diffFeatures = dict() for key, vals in features.items(): if len(set(vals)) > 1: diffFeatures[key] = vals return diffFeatures def printInfos(self): print(os.path.join(self.root, self.name)) # nf = re.sub( '', f) # F = os.path.join(root, f) # NF = os.path.join(root, nf) # if os.path.exists(NF): # print(f"'{F}' → '{NF}': file already exists") # else: # print(f"'{F}' → '{NF}': done") # # os.rename(F, NF) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Handle Syncthing's .sync-conflict files ") # Execution flow parser.add_argument( '--database', help='Database path for file informations') parser.add_argument('directory', metavar='DIRECTORY', nargs='?', help='Directory to analyse') args = parser.parse_args() # Argument default values attribution if args.directory is None: args.directory = os.curdir args.directory = os.path.realpath(args.directory) # Create / load the database database = None if args.database: if os.path.isfile(args.database): try: with open(args.database, 'rb') as databaseFile: database = pickle.load(databaseFile) assert isinstance(database, Database) except BaseException as e: raise ValueError("Not a database file") assert database.version <= Database.VERSION, "Version of the loaded database is too recent" assert database.directory == args.directory, "Directory of the loaded database doesn't match" if database is None: database = Database(args.directory) def saveDatabase(): if args.database: global database with open(args.database, 'wb') as databaseFile: pickle.dump(database, databaseFile) database.getList() saveDatabase() database.getStats() saveDatabase() database.getChecksums() saveDatabase() database.act()