rmf
This commit is contained in:
		
							parent
							
								
									c427ac3370
								
							
						
					
					
						commit
						21615a1f9c
					
				
					 3 changed files with 360 additions and 17 deletions
				
			
		|  | @ -1,3 +1,3 @@ | |||
| set runtimepath^=~/.vim runtimepath+=~/.vim/after | ||||
| set runtimepath+=~/.config/vim,~/.cache/vim | ||||
| let &packpath = &runtimepath | ||||
| source ~/.vimrc | ||||
| source ~/.config/vimrc | ||||
|  |  | |||
|  | @ -1,23 +1,365 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| # Rename sync-conflict files to normal files | ||||
| 
 | ||||
| # WARNING Does not check for conclicts | ||||
| # Handles sync-conflict files | ||||
| 
 | ||||
| import argparse | ||||
| import os | ||||
| import pickle | ||||
| import re | ||||
| import sys | ||||
| import zlib | ||||
| import coloredlogs | ||||
| import progressbar | ||||
| import logging | ||||
| 
 | ||||
| for root, dirs, files in os.walk('.'): | ||||
|     for f in files: | ||||
|         if '.sync-conflict' not in f: | ||||
|             continue | ||||
|         nf = re.sub('.sync-conflict-\d{8}-\d{6}-\w{7}', '', f) | ||||
|         F = os.path.join(root, f) | ||||
|         NF = os.path.join(root, nf) | ||||
|         if os.path.exists(NF): | ||||
|             print(f"'{F}' → '{NF}': file already exists") | ||||
| progressbar.streams.wrap_stderr() | ||||
| coloredlogs.install(level='INFO', fmt='%(levelname)s %(message)s') | ||||
| log = logging.getLogger() | ||||
| 
 | ||||
| # 1) Create file list with conflict files | ||||
| # 2) Gather file informations (date, owner, size, checksum) | ||||
| # 3) Propose what to do | ||||
| 
 | ||||
| 
 | ||||
| def sizeof_fmt(num, suffix='B'): | ||||
|     # Stolen from https://stackoverflow.com/a/1094933 | ||||
|     for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: | ||||
|         if abs(num) < 1024.0: | ||||
|             return "%3.1f %s%s" % (num, unit, suffix) | ||||
|         num /= 1024.0 | ||||
|     return "%.1f %s%s" % (num, 'Yi', suffix) | ||||
| 
 | ||||
| 
 | ||||
| class Table(): | ||||
|     def __init__(self, width, height): | ||||
|         self.width = width | ||||
|         self.height = height | ||||
|         self.data = [['' ** self.height] ** self.width] | ||||
| 
 | ||||
|     def set(x, y, data): | ||||
|         self.data[x][y] = str(data) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| class Database(): | ||||
|     VERSION = 1 | ||||
|     CONFLICT_PATTERN = re.compile('\.sync-conflict-\d{8}-\d{6}-\w{7}') | ||||
| 
 | ||||
|     def __init__(self, directory): | ||||
|         self.version = Database.VERSION | ||||
|         self.directory = directory | ||||
|         self.data = dict() | ||||
| 
 | ||||
|     def prune(self): | ||||
|         toPrune = list() | ||||
|         for filepath, databaseFile in self.data.items(): | ||||
|             databaseFile.migrate()  # TODO Temp dev stuff | ||||
|             databaseFile.prune() | ||||
|             if not databaseFile.isRelevant(): | ||||
|                 toPrune.append(filepath) | ||||
|         for filepath in toPrune: | ||||
|             del self.data[filepath] | ||||
| 
 | ||||
|     def nbFiles(self): | ||||
|         return sum(databaseFile.nbFiles() for databaseFile in self.data.values()) | ||||
| 
 | ||||
|     def totalSize(self): | ||||
|         return sum(databaseFile.totalSize() for databaseFile in self.data.values()) | ||||
| 
 | ||||
|     def maxSize(self): | ||||
|         return sum(databaseFile.maxSize() for databaseFile in self.data.values()) | ||||
| 
 | ||||
|     def totalChecksumSize(self): | ||||
|         return sum(databaseFile.totalChecksumSize() for databaseFile in self.data.values()) | ||||
| 
 | ||||
|     def getList(self): | ||||
|         self.prune() | ||||
| 
 | ||||
|         log.info("Finding conflict files") | ||||
|         widgets = [ | ||||
|             progressbar.AnimatedMarker(), ' ', | ||||
|             progressbar.BouncingBar(), ' ', | ||||
|             progressbar.DynamicMessage('conflicts'), ' ', | ||||
|             progressbar.DynamicMessage('files'), ' ', | ||||
|             progressbar.DynamicMessage('dir', width=20, precision=20), ' ', | ||||
|             progressbar.Timer(), | ||||
|         ] | ||||
|         bar = progressbar.ProgressBar(widgets=widgets).start() | ||||
|         f = 0 | ||||
|         for root, dirs, files in os.walk(self.directory): | ||||
|             for conflictFilename in files: | ||||
|                 f += 1 | ||||
|                 if not Database.CONFLICT_PATTERN.search(conflictFilename): | ||||
|                     continue | ||||
|                 filename = Database.CONFLICT_PATTERN.sub('', conflictFilename) | ||||
|                 key = (root, filename) | ||||
|                 if key in self.data: | ||||
|                     dataFile = self.data[key] | ||||
|                 else: | ||||
|                     dataFile = DatabaseFile(root, filename) | ||||
|                     self.data[key] = dataFile | ||||
| 
 | ||||
|                 if filename in files: | ||||
|                     dataFile.addConflict(filename) | ||||
|                 dataFile.addConflict(conflictFilename) | ||||
| 
 | ||||
|             bar.update(conflicts=len(self.data), files=f, | ||||
|                        dir=root[(len(self.directory)+1):]) | ||||
|         bar.finish() | ||||
|         log.info( | ||||
|             f"Found {len(self.data)} conflicts, totalling {self.nbFiles()} conflict files.") | ||||
| 
 | ||||
|     def getStats(self): | ||||
|         log.info("Getting stats from conflict files") | ||||
|         bar = progressbar.ProgressBar(max_value=self.nbFiles()).start() | ||||
|         f = 0 | ||||
|         for databaseFile in self.data.values(): | ||||
|             databaseFile.getStats() | ||||
|             f += databaseFile.nbFiles() | ||||
|             bar.update(f) | ||||
|         bar.finish() | ||||
|         log.info( | ||||
|             f"Total file size: {sizeof_fmt(self.totalSize())}, possible save: {sizeof_fmt(self.totalSize() - self.maxSize())}") | ||||
| 
 | ||||
|     def getChecksums(self): | ||||
|         log.info("Checksumming conflict files") | ||||
|         widgets = [ | ||||
|             progressbar.DataSize(), ' of ', progressbar.DataSize('max_value'), | ||||
|             ' (', progressbar.AdaptiveTransferSpeed(), ') ', | ||||
|             progressbar.Bar(), ' ', | ||||
|             progressbar.DynamicMessage('dir', width=20, precision=20), ' ', | ||||
|             progressbar.DynamicMessage('file', width=20, precision=20), ' ', | ||||
|             progressbar.Timer(), ' ', | ||||
|             progressbar.AdaptiveETA(), | ||||
|         ] | ||||
|         bar = progressbar.DataTransferBar( | ||||
|             max_value=self.totalChecksumSize(), widgets=widgets).start() | ||||
|         f = 0 | ||||
|         for databaseFile in self.data.values(): | ||||
|             bar.update(f, dir=databaseFile.root[(len(self.directory)+1):], file=databaseFile.filename) | ||||
|             f += databaseFile.totalChecksumSize() | ||||
|             try: | ||||
|                 databaseFile.getChecksums() | ||||
|             except KeyboardInterrupt: | ||||
|                 return | ||||
|             except BaseException as e: | ||||
|                 log.error(e, exc_info=True) | ||||
|                 pass | ||||
|         bar.finish() | ||||
| 
 | ||||
|     def act(self): | ||||
|         pass | ||||
| 
 | ||||
| 
 | ||||
| class DatabaseFile(): | ||||
|     BLOCK_SIZE = 4096 | ||||
|     RELEVANT_STATS = ('st_mode', 'st_uid', 'st_gid', | ||||
|                       'st_size', 'st_mtime', 'st_ctime') | ||||
| 
 | ||||
|     def __init__(self, root, filename): | ||||
|         self.root = root | ||||
|         self.filename = filename | ||||
|         self.stats = [] | ||||
|         self.conflicts = [] | ||||
|         self.checksums = [] | ||||
|         log.debug(f"{self.root}/{self.filename} - new") | ||||
| 
 | ||||
|     def addConflict(self, conflict): | ||||
|         if conflict in self.conflicts: | ||||
|             return | ||||
|         self.conflicts.append(conflict) | ||||
|         self.stats.append(None) | ||||
|         self.checksums.append(None) | ||||
|         log.debug(f"{self.root}/{self.filename} - add: {conflict}") | ||||
| 
 | ||||
|     def migrate(self): | ||||
|         # Temp dev stuff since I don't want to resum that whole 400 GiB dir | ||||
|         if self.stats is None: | ||||
|             self.stats = [None] * len(self.conflicts) | ||||
|         try: | ||||
|             if self.checksums is None: | ||||
|                 self.checksums = [None] * len(self.conflicts) | ||||
|         except AttributeError: | ||||
|             self.checksums = [None] * len(self.conflicts) | ||||
| 
 | ||||
|     def removeConflict(self, conflict): | ||||
|         f = self.conflicts.index(conflict) | ||||
|         del self.conflicts[f] | ||||
|         del self.stats[f] | ||||
|         del self.checksums[f] | ||||
|         log.debug(f"{self.root}/{self.filename} - del: {conflict}") | ||||
| 
 | ||||
|     def getPathFile(self, conflict): | ||||
|         return os.path.join(self.root, conflict) | ||||
| 
 | ||||
|     def getPathFiles(self): | ||||
|         return [self.getPathFile(conflict) for conflict in self.conflicts] | ||||
| 
 | ||||
|     def prune(self): | ||||
|         toPrune = list() | ||||
|         for conflict in self.conflicts: | ||||
|             if not os.path.isfile(self.getPathFile(conflict)): | ||||
|                 toPrune.append(conflict) | ||||
| 
 | ||||
|         if len(toPrune): | ||||
|             for conflict in toPrune: | ||||
|                 self.removeConflict(conflict) | ||||
| 
 | ||||
|     def isRelevant(self): | ||||
|         if len(self.conflicts) == 1: | ||||
|             if self.conflicts[0] == self.filename: | ||||
|                 return False | ||||
|         elif len(self.conflicts) < 1: | ||||
|             return False | ||||
|         else: | ||||
|             print(f"'{F}' → '{NF}': done") | ||||
|             os.rename(F, NF) | ||||
|             return True | ||||
| 
 | ||||
|     def nbFiles(self): | ||||
|         return len(self.conflicts) | ||||
| 
 | ||||
|     def totalSize(self): | ||||
|         return sum((stat.st_size if stat is not None else 0) for stat in self.stats) | ||||
| 
 | ||||
|     def maxSize(self): | ||||
|         return max((stat.st_size if stat is not None else 0) for stat in self.stats) | ||||
| 
 | ||||
|     def totalChecksumSize(self): | ||||
|         size = 0 | ||||
|         for f, checksum in enumerate(self.checksums): | ||||
|             if checksum is None: | ||||
|                 stat = self.stats[f] | ||||
|                 if stat is not None: | ||||
|                     size += stat.st_size | ||||
|         return size | ||||
| 
 | ||||
|     def getStats(self): | ||||
|         for f, conflict in enumerate(self.conflicts): | ||||
|             oldStat = self.stats[f] | ||||
|             newStat = os.stat(self.getPathFile(conflict)) | ||||
|             oldChecksum = self.checksums[f] | ||||
| 
 | ||||
|             # If it's been already summed, and we have the same inode and same ctime, don't resum | ||||
|             if oldStat is None or not isinstance(oldChecksum, int) or oldStat.st_size != newStat.st_size or oldStat.st_dev != newStat.st_dev or oldStat.st_ino != newStat.st_ino or oldStat.st_ctime != newStat.st_ctime or oldStat.st_dev != newStat.st_dev: | ||||
|                 self.checksums[f] = None | ||||
| 
 | ||||
|             self.stats[f] = newStat | ||||
| 
 | ||||
|         # If all the file are of different size, set as different files | ||||
|         if len(self.stats) == len(set([s.st_size for s in self.stats])): | ||||
|             self.checksums = [False] * len(self.conflicts) | ||||
| 
 | ||||
|         # If all the files are the same inode, set as same files | ||||
|         if len(set([s.st_ino for s in self.stats])) == 1 and len(set([s.st_dev for s in self.stats])) == 1: | ||||
|             self.checksums = [True] * len(self.conflicts) | ||||
| 
 | ||||
|     def getChecksums(self): | ||||
|         # TODO It's not even required to have a sum, this thing is not collision resistant now | ||||
|         # TODO We might use BTRFS feature to know if conflict files are deduplicated between them | ||||
| 
 | ||||
|         filedescs = dict() | ||||
|         for f, conflict in enumerate(self.conflicts): | ||||
|             if self.checksums[f] is not None: | ||||
|                 continue | ||||
|             self.checksums[f] = 1 | ||||
|             filedescs[f] = open(self.getPathFile(conflict), 'rb') | ||||
| 
 | ||||
|         while len(filedescs): | ||||
|             toClose = set() | ||||
| 
 | ||||
|             # Compute checksums for next block for all files | ||||
|             for f, filedesc in filedescs.items(): | ||||
|                 data = filedesc.read(DatabaseFile.BLOCK_SIZE) | ||||
|                 self.checksums[f] = zlib.adler32(data, self.checksums[f]) | ||||
|                 if len(data) < DatabaseFile.BLOCK_SIZE: | ||||
|                     toClose.add(f) | ||||
| 
 | ||||
|             # Stop summing as soon as checksum diverge | ||||
|             for f in filedescs.keys(): | ||||
|                 if self.checksums.count(self.checksums[f]) < 2: | ||||
|                     toClose.add(f) | ||||
| 
 | ||||
|             for f in toClose: | ||||
|                 filedescs[f].close() | ||||
|                 del filedescs[f] | ||||
| 
 | ||||
|     def getFeatures(self): | ||||
|         features = dict() | ||||
|         features['sum'] = self.checksums | ||||
|         for stat in DatabaseFile.RELEVANT_STATS: | ||||
|             features[stat] = [self.stats[f].__getattribute__( | ||||
|                 stat) for f in enumerate(self.stats)] | ||||
|         return features | ||||
| 
 | ||||
|     def getDiffFeatures(self): | ||||
|         features = self.getFeatures() | ||||
|         diffFeatures = dict() | ||||
|         for key, vals in features.items(): | ||||
|             if len(set(vals)) > 1: | ||||
|                 diffFeatures[key] = vals | ||||
|         return diffFeatures | ||||
| 
 | ||||
|     def printInfos(self): | ||||
|         print(os.path.join(self.root, self.name)) | ||||
| 
 | ||||
|         # nf = re.sub( '', f) | ||||
|         # F = os.path.join(root, f) | ||||
|         # NF = os.path.join(root, nf) | ||||
|         # if os.path.exists(NF): | ||||
|         #     print(f"'{F}' → '{NF}': file already exists") | ||||
|         # else: | ||||
|         #     print(f"'{F}' → '{NF}': done") | ||||
|         #     # os.rename(F, NF) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 
 | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description="Handle Syncthing's .sync-conflict files ") | ||||
| 
 | ||||
|     # Execution flow | ||||
|     parser.add_argument( | ||||
|         '--database', help='Database path for file informations') | ||||
| 
 | ||||
|     parser.add_argument('directory', metavar='DIRECTORY', | ||||
|                         nargs='?', help='Directory to analyse') | ||||
| 
 | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     # Argument default values attribution | ||||
|     if args.directory is None: | ||||
|         args.directory = os.curdir | ||||
|     args.directory = os.path.realpath(args.directory) | ||||
| 
 | ||||
|     # Create / load the database | ||||
|     database = None | ||||
|     if args.database: | ||||
|         if os.path.isfile(args.database): | ||||
|             try: | ||||
|                 with open(args.database, 'rb') as databaseFile: | ||||
|                     database = pickle.load(databaseFile) | ||||
|                 assert isinstance(database, Database) | ||||
|             except BaseException as e: | ||||
|                 raise ValueError("Not a database file") | ||||
|             assert database.version <= Database.VERSION, "Version of the loaded database is too recent" | ||||
|             assert database.directory == args.directory, "Directory of the loaded database doesn't match" | ||||
| 
 | ||||
|     if database is None: | ||||
|         database = Database(args.directory) | ||||
| 
 | ||||
|     def saveDatabase(): | ||||
|         if args.database: | ||||
|             global database | ||||
|             with open(args.database, 'wb') as databaseFile: | ||||
|                 pickle.dump(database, databaseFile) | ||||
| 
 | ||||
|     database.getList() | ||||
|     saveDatabase() | ||||
| 
 | ||||
|     database.getStats() | ||||
|     saveDatabase() | ||||
| 
 | ||||
|     database.getChecksums() | ||||
|     saveDatabase() | ||||
| 
 | ||||
|     database.act() | ||||
|  |  | |||
|  | @ -45,11 +45,12 @@ export JAVA_FONTS=/usr/share/fonts/TTF # 2019-04-25 Attempt to remove .java/font | |||
| # export ARDMK_VENDOR=archlinux-arduino | ||||
| 
 | ||||
| # Get out of my $HOME! | ||||
| direnv GOPATH "$HOME/.cache/go" | ||||
| export BOOT9_PATH="$HOME/.local/share/citra-emu/sysdata/boot9.bin" | ||||
| direnv CARGOHOME "$HOME/.cache/cargo" # There are config in there that we can version if one want | ||||
| direnv CCACHE_BASEDIR "$HOME/.cache/ccache" | ||||
| export CCACHE_CONFIGPATH="$HOME/.config/ccache.conf" | ||||
| direnv GNUPGHOME "$HOME/.config/gnupg" | ||||
| direnv GOPATH "$HOME/.cache/go" | ||||
| direnv GRADLE_USER_HOME "$HOME/.cache/gradle" | ||||
| export INPUTRC="$HOME/.config/inputrc" | ||||
| export LESSHISTFILE="$HOME/.cache/lesshst" | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue