This commit is contained in:
Geoffrey Frogeye 2019-06-16 17:14:00 +02:00
parent c427ac3370
commit 21615a1f9c
3 changed files with 360 additions and 17 deletions

View file

@ -1,3 +1,3 @@
set runtimepath^=~/.vim runtimepath+=~/.vim/after set runtimepath+=~/.config/vim,~/.cache/vim
let &packpath = &runtimepath let &packpath = &runtimepath
source ~/.vimrc source ~/.config/vimrc

View file

@ -1,23 +1,365 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Rename sync-conflict files to normal files # Handles sync-conflict files
# WARNING Does not check for conclicts
import argparse
import os import os
import pickle
import re import re
import sys
import zlib
import coloredlogs
import progressbar
import logging
for root, dirs, files in os.walk('.'): progressbar.streams.wrap_stderr()
for f in files: coloredlogs.install(level='INFO', fmt='%(levelname)s %(message)s')
if '.sync-conflict' not in f: log = logging.getLogger()
continue
nf = re.sub('.sync-conflict-\d{8}-\d{6}-\w{7}', '', f) # 1) Create file list with conflict files
F = os.path.join(root, f) # 2) Gather file informations (date, owner, size, checksum)
NF = os.path.join(root, nf) # 3) Propose what to do
if os.path.exists(NF):
print(f"'{F}' → '{NF}': file already exists")
def sizeof_fmt(num, suffix='B'):
# Stolen from https://stackoverflow.com/a/1094933
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f %s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f %s%s" % (num, 'Yi', suffix)
class Table():
def __init__(self, width, height):
self.width = width
self.height = height
self.data = [['' ** self.height] ** self.width]
def set(x, y, data):
self.data[x][y] = str(data)
class Database():
VERSION = 1
CONFLICT_PATTERN = re.compile('\.sync-conflict-\d{8}-\d{6}-\w{7}')
def __init__(self, directory):
self.version = Database.VERSION
self.directory = directory
self.data = dict()
def prune(self):
toPrune = list()
for filepath, databaseFile in self.data.items():
databaseFile.migrate() # TODO Temp dev stuff
databaseFile.prune()
if not databaseFile.isRelevant():
toPrune.append(filepath)
for filepath in toPrune:
del self.data[filepath]
def nbFiles(self):
return sum(databaseFile.nbFiles() for databaseFile in self.data.values())
def totalSize(self):
return sum(databaseFile.totalSize() for databaseFile in self.data.values())
def maxSize(self):
return sum(databaseFile.maxSize() for databaseFile in self.data.values())
def totalChecksumSize(self):
return sum(databaseFile.totalChecksumSize() for databaseFile in self.data.values())
def getList(self):
self.prune()
log.info("Finding conflict files")
widgets = [
progressbar.AnimatedMarker(), ' ',
progressbar.BouncingBar(), ' ',
progressbar.DynamicMessage('conflicts'), ' ',
progressbar.DynamicMessage('files'), ' ',
progressbar.DynamicMessage('dir', width=20, precision=20), ' ',
progressbar.Timer(),
]
bar = progressbar.ProgressBar(widgets=widgets).start()
f = 0
for root, dirs, files in os.walk(self.directory):
for conflictFilename in files:
f += 1
if not Database.CONFLICT_PATTERN.search(conflictFilename):
continue
filename = Database.CONFLICT_PATTERN.sub('', conflictFilename)
key = (root, filename)
if key in self.data:
dataFile = self.data[key]
else:
dataFile = DatabaseFile(root, filename)
self.data[key] = dataFile
if filename in files:
dataFile.addConflict(filename)
dataFile.addConflict(conflictFilename)
bar.update(conflicts=len(self.data), files=f,
dir=root[(len(self.directory)+1):])
bar.finish()
log.info(
f"Found {len(self.data)} conflicts, totalling {self.nbFiles()} conflict files.")
def getStats(self):
log.info("Getting stats from conflict files")
bar = progressbar.ProgressBar(max_value=self.nbFiles()).start()
f = 0
for databaseFile in self.data.values():
databaseFile.getStats()
f += databaseFile.nbFiles()
bar.update(f)
bar.finish()
log.info(
f"Total file size: {sizeof_fmt(self.totalSize())}, possible save: {sizeof_fmt(self.totalSize() - self.maxSize())}")
def getChecksums(self):
log.info("Checksumming conflict files")
widgets = [
progressbar.DataSize(), ' of ', progressbar.DataSize('max_value'),
' (', progressbar.AdaptiveTransferSpeed(), ') ',
progressbar.Bar(), ' ',
progressbar.DynamicMessage('dir', width=20, precision=20), ' ',
progressbar.DynamicMessage('file', width=20, precision=20), ' ',
progressbar.Timer(), ' ',
progressbar.AdaptiveETA(),
]
bar = progressbar.DataTransferBar(
max_value=self.totalChecksumSize(), widgets=widgets).start()
f = 0
for databaseFile in self.data.values():
bar.update(f, dir=databaseFile.root[(len(self.directory)+1):], file=databaseFile.filename)
f += databaseFile.totalChecksumSize()
try:
databaseFile.getChecksums()
except KeyboardInterrupt:
return
except BaseException as e:
log.error(e, exc_info=True)
pass
bar.finish()
def act(self):
pass
class DatabaseFile():
BLOCK_SIZE = 4096
RELEVANT_STATS = ('st_mode', 'st_uid', 'st_gid',
'st_size', 'st_mtime', 'st_ctime')
def __init__(self, root, filename):
self.root = root
self.filename = filename
self.stats = []
self.conflicts = []
self.checksums = []
log.debug(f"{self.root}/{self.filename} - new")
def addConflict(self, conflict):
if conflict in self.conflicts:
return
self.conflicts.append(conflict)
self.stats.append(None)
self.checksums.append(None)
log.debug(f"{self.root}/{self.filename} - add: {conflict}")
def migrate(self):
# Temp dev stuff since I don't want to resum that whole 400 GiB dir
if self.stats is None:
self.stats = [None] * len(self.conflicts)
try:
if self.checksums is None:
self.checksums = [None] * len(self.conflicts)
except AttributeError:
self.checksums = [None] * len(self.conflicts)
def removeConflict(self, conflict):
f = self.conflicts.index(conflict)
del self.conflicts[f]
del self.stats[f]
del self.checksums[f]
log.debug(f"{self.root}/{self.filename} - del: {conflict}")
def getPathFile(self, conflict):
return os.path.join(self.root, conflict)
def getPathFiles(self):
return [self.getPathFile(conflict) for conflict in self.conflicts]
def prune(self):
toPrune = list()
for conflict in self.conflicts:
if not os.path.isfile(self.getPathFile(conflict)):
toPrune.append(conflict)
if len(toPrune):
for conflict in toPrune:
self.removeConflict(conflict)
def isRelevant(self):
if len(self.conflicts) == 1:
if self.conflicts[0] == self.filename:
return False
elif len(self.conflicts) < 1:
return False
else: else:
print(f"'{F}' → '{NF}': done") return True
os.rename(F, NF)
def nbFiles(self):
return len(self.conflicts)
def totalSize(self):
return sum((stat.st_size if stat is not None else 0) for stat in self.stats)
def maxSize(self):
return max((stat.st_size if stat is not None else 0) for stat in self.stats)
def totalChecksumSize(self):
size = 0
for f, checksum in enumerate(self.checksums):
if checksum is None:
stat = self.stats[f]
if stat is not None:
size += stat.st_size
return size
def getStats(self):
for f, conflict in enumerate(self.conflicts):
oldStat = self.stats[f]
newStat = os.stat(self.getPathFile(conflict))
oldChecksum = self.checksums[f]
# If it's been already summed, and we have the same inode and same ctime, don't resum
if oldStat is None or not isinstance(oldChecksum, int) or oldStat.st_size != newStat.st_size or oldStat.st_dev != newStat.st_dev or oldStat.st_ino != newStat.st_ino or oldStat.st_ctime != newStat.st_ctime or oldStat.st_dev != newStat.st_dev:
self.checksums[f] = None
self.stats[f] = newStat
# If all the file are of different size, set as different files
if len(self.stats) == len(set([s.st_size for s in self.stats])):
self.checksums = [False] * len(self.conflicts)
# If all the files are the same inode, set as same files
if len(set([s.st_ino for s in self.stats])) == 1 and len(set([s.st_dev for s in self.stats])) == 1:
self.checksums = [True] * len(self.conflicts)
def getChecksums(self):
# TODO It's not even required to have a sum, this thing is not collision resistant now
# TODO We might use BTRFS feature to know if conflict files are deduplicated between them
filedescs = dict()
for f, conflict in enumerate(self.conflicts):
if self.checksums[f] is not None:
continue
self.checksums[f] = 1
filedescs[f] = open(self.getPathFile(conflict), 'rb')
while len(filedescs):
toClose = set()
# Compute checksums for next block for all files
for f, filedesc in filedescs.items():
data = filedesc.read(DatabaseFile.BLOCK_SIZE)
self.checksums[f] = zlib.adler32(data, self.checksums[f])
if len(data) < DatabaseFile.BLOCK_SIZE:
toClose.add(f)
# Stop summing as soon as checksum diverge
for f in filedescs.keys():
if self.checksums.count(self.checksums[f]) < 2:
toClose.add(f)
for f in toClose:
filedescs[f].close()
del filedescs[f]
def getFeatures(self):
features = dict()
features['sum'] = self.checksums
for stat in DatabaseFile.RELEVANT_STATS:
features[stat] = [self.stats[f].__getattribute__(
stat) for f in enumerate(self.stats)]
return features
def getDiffFeatures(self):
features = self.getFeatures()
diffFeatures = dict()
for key, vals in features.items():
if len(set(vals)) > 1:
diffFeatures[key] = vals
return diffFeatures
def printInfos(self):
print(os.path.join(self.root, self.name))
# nf = re.sub( '', f)
# F = os.path.join(root, f)
# NF = os.path.join(root, nf)
# if os.path.exists(NF):
# print(f"'{F}' → '{NF}': file already exists")
# else:
# print(f"'{F}' → '{NF}': done")
# # os.rename(F, NF)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Handle Syncthing's .sync-conflict files ")
# Execution flow
parser.add_argument(
'--database', help='Database path for file informations')
parser.add_argument('directory', metavar='DIRECTORY',
nargs='?', help='Directory to analyse')
args = parser.parse_args()
# Argument default values attribution
if args.directory is None:
args.directory = os.curdir
args.directory = os.path.realpath(args.directory)
# Create / load the database
database = None
if args.database:
if os.path.isfile(args.database):
try:
with open(args.database, 'rb') as databaseFile:
database = pickle.load(databaseFile)
assert isinstance(database, Database)
except BaseException as e:
raise ValueError("Not a database file")
assert database.version <= Database.VERSION, "Version of the loaded database is too recent"
assert database.directory == args.directory, "Directory of the loaded database doesn't match"
if database is None:
database = Database(args.directory)
def saveDatabase():
if args.database:
global database
with open(args.database, 'wb') as databaseFile:
pickle.dump(database, databaseFile)
database.getList()
saveDatabase()
database.getStats()
saveDatabase()
database.getChecksums()
saveDatabase()
database.act()

View file

@ -45,11 +45,12 @@ export JAVA_FONTS=/usr/share/fonts/TTF # 2019-04-25 Attempt to remove .java/font
# export ARDMK_VENDOR=archlinux-arduino # export ARDMK_VENDOR=archlinux-arduino
# Get out of my $HOME! # Get out of my $HOME!
direnv GOPATH "$HOME/.cache/go" export BOOT9_PATH="$HOME/.local/share/citra-emu/sysdata/boot9.bin"
direnv CARGOHOME "$HOME/.cache/cargo" # There are config in there that we can version if one want direnv CARGOHOME "$HOME/.cache/cargo" # There are config in there that we can version if one want
direnv CCACHE_BASEDIR "$HOME/.cache/ccache" direnv CCACHE_BASEDIR "$HOME/.cache/ccache"
export CCACHE_CONFIGPATH="$HOME/.config/ccache.conf" export CCACHE_CONFIGPATH="$HOME/.config/ccache.conf"
direnv GNUPGHOME "$HOME/.config/gnupg" direnv GNUPGHOME "$HOME/.config/gnupg"
direnv GOPATH "$HOME/.cache/go"
direnv GRADLE_USER_HOME "$HOME/.cache/gradle" direnv GRADLE_USER_HOME "$HOME/.cache/gradle"
export INPUTRC="$HOME/.config/inputrc" export INPUTRC="$HOME/.config/inputrc"
export LESSHISTFILE="$HOME/.cache/lesshst" export LESSHISTFILE="$HOME/.cache/lesshst"