rmf
This commit is contained in:
parent
c427ac3370
commit
21615a1f9c
|
@ -1,3 +1,3 @@
|
|||
set runtimepath^=~/.vim runtimepath+=~/.vim/after
|
||||
set runtimepath+=~/.config/vim,~/.cache/vim
|
||||
let &packpath = &runtimepath
|
||||
source ~/.vimrc
|
||||
source ~/.config/vimrc
|
||||
|
|
|
@ -1,23 +1,365 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Rename sync-conflict files to normal files
|
||||
|
||||
# WARNING Does not check for conclicts
|
||||
# Handles sync-conflict files
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import sys
|
||||
import zlib
|
||||
import coloredlogs
|
||||
import progressbar
|
||||
import logging
|
||||
|
||||
for root, dirs, files in os.walk('.'):
|
||||
for f in files:
|
||||
if '.sync-conflict' not in f:
|
||||
continue
|
||||
nf = re.sub('.sync-conflict-\d{8}-\d{6}-\w{7}', '', f)
|
||||
F = os.path.join(root, f)
|
||||
NF = os.path.join(root, nf)
|
||||
if os.path.exists(NF):
|
||||
print(f"'{F}' → '{NF}': file already exists")
|
||||
progressbar.streams.wrap_stderr()
|
||||
coloredlogs.install(level='INFO', fmt='%(levelname)s %(message)s')
|
||||
log = logging.getLogger()
|
||||
|
||||
# 1) Create file list with conflict files
|
||||
# 2) Gather file informations (date, owner, size, checksum)
|
||||
# 3) Propose what to do
|
||||
|
||||
|
||||
def sizeof_fmt(num, suffix='B'):
|
||||
# Stolen from https://stackoverflow.com/a/1094933
|
||||
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
|
||||
if abs(num) < 1024.0:
|
||||
return "%3.1f %s%s" % (num, unit, suffix)
|
||||
num /= 1024.0
|
||||
return "%.1f %s%s" % (num, 'Yi', suffix)
|
||||
|
||||
|
||||
class Table():
|
||||
def __init__(self, width, height):
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.data = [['' ** self.height] ** self.width]
|
||||
|
||||
def set(x, y, data):
|
||||
self.data[x][y] = str(data)
|
||||
|
||||
|
||||
|
||||
|
||||
class Database():
|
||||
VERSION = 1
|
||||
CONFLICT_PATTERN = re.compile('\.sync-conflict-\d{8}-\d{6}-\w{7}')
|
||||
|
||||
def __init__(self, directory):
|
||||
self.version = Database.VERSION
|
||||
self.directory = directory
|
||||
self.data = dict()
|
||||
|
||||
def prune(self):
|
||||
toPrune = list()
|
||||
for filepath, databaseFile in self.data.items():
|
||||
databaseFile.migrate() # TODO Temp dev stuff
|
||||
databaseFile.prune()
|
||||
if not databaseFile.isRelevant():
|
||||
toPrune.append(filepath)
|
||||
for filepath in toPrune:
|
||||
del self.data[filepath]
|
||||
|
||||
def nbFiles(self):
|
||||
return sum(databaseFile.nbFiles() for databaseFile in self.data.values())
|
||||
|
||||
def totalSize(self):
|
||||
return sum(databaseFile.totalSize() for databaseFile in self.data.values())
|
||||
|
||||
def maxSize(self):
|
||||
return sum(databaseFile.maxSize() for databaseFile in self.data.values())
|
||||
|
||||
def totalChecksumSize(self):
|
||||
return sum(databaseFile.totalChecksumSize() for databaseFile in self.data.values())
|
||||
|
||||
def getList(self):
|
||||
self.prune()
|
||||
|
||||
log.info("Finding conflict files")
|
||||
widgets = [
|
||||
progressbar.AnimatedMarker(), ' ',
|
||||
progressbar.BouncingBar(), ' ',
|
||||
progressbar.DynamicMessage('conflicts'), ' ',
|
||||
progressbar.DynamicMessage('files'), ' ',
|
||||
progressbar.DynamicMessage('dir', width=20, precision=20), ' ',
|
||||
progressbar.Timer(),
|
||||
]
|
||||
bar = progressbar.ProgressBar(widgets=widgets).start()
|
||||
f = 0
|
||||
for root, dirs, files in os.walk(self.directory):
|
||||
for conflictFilename in files:
|
||||
f += 1
|
||||
if not Database.CONFLICT_PATTERN.search(conflictFilename):
|
||||
continue
|
||||
filename = Database.CONFLICT_PATTERN.sub('', conflictFilename)
|
||||
key = (root, filename)
|
||||
if key in self.data:
|
||||
dataFile = self.data[key]
|
||||
else:
|
||||
dataFile = DatabaseFile(root, filename)
|
||||
self.data[key] = dataFile
|
||||
|
||||
if filename in files:
|
||||
dataFile.addConflict(filename)
|
||||
dataFile.addConflict(conflictFilename)
|
||||
|
||||
bar.update(conflicts=len(self.data), files=f,
|
||||
dir=root[(len(self.directory)+1):])
|
||||
bar.finish()
|
||||
log.info(
|
||||
f"Found {len(self.data)} conflicts, totalling {self.nbFiles()} conflict files.")
|
||||
|
||||
def getStats(self):
|
||||
log.info("Getting stats from conflict files")
|
||||
bar = progressbar.ProgressBar(max_value=self.nbFiles()).start()
|
||||
f = 0
|
||||
for databaseFile in self.data.values():
|
||||
databaseFile.getStats()
|
||||
f += databaseFile.nbFiles()
|
||||
bar.update(f)
|
||||
bar.finish()
|
||||
log.info(
|
||||
f"Total file size: {sizeof_fmt(self.totalSize())}, possible save: {sizeof_fmt(self.totalSize() - self.maxSize())}")
|
||||
|
||||
def getChecksums(self):
|
||||
log.info("Checksumming conflict files")
|
||||
widgets = [
|
||||
progressbar.DataSize(), ' of ', progressbar.DataSize('max_value'),
|
||||
' (', progressbar.AdaptiveTransferSpeed(), ') ',
|
||||
progressbar.Bar(), ' ',
|
||||
progressbar.DynamicMessage('dir', width=20, precision=20), ' ',
|
||||
progressbar.DynamicMessage('file', width=20, precision=20), ' ',
|
||||
progressbar.Timer(), ' ',
|
||||
progressbar.AdaptiveETA(),
|
||||
]
|
||||
bar = progressbar.DataTransferBar(
|
||||
max_value=self.totalChecksumSize(), widgets=widgets).start()
|
||||
f = 0
|
||||
for databaseFile in self.data.values():
|
||||
bar.update(f, dir=databaseFile.root[(len(self.directory)+1):], file=databaseFile.filename)
|
||||
f += databaseFile.totalChecksumSize()
|
||||
try:
|
||||
databaseFile.getChecksums()
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
except BaseException as e:
|
||||
log.error(e, exc_info=True)
|
||||
pass
|
||||
bar.finish()
|
||||
|
||||
def act(self):
|
||||
pass
|
||||
|
||||
|
||||
class DatabaseFile():
|
||||
BLOCK_SIZE = 4096
|
||||
RELEVANT_STATS = ('st_mode', 'st_uid', 'st_gid',
|
||||
'st_size', 'st_mtime', 'st_ctime')
|
||||
|
||||
def __init__(self, root, filename):
|
||||
self.root = root
|
||||
self.filename = filename
|
||||
self.stats = []
|
||||
self.conflicts = []
|
||||
self.checksums = []
|
||||
log.debug(f"{self.root}/{self.filename} - new")
|
||||
|
||||
def addConflict(self, conflict):
|
||||
if conflict in self.conflicts:
|
||||
return
|
||||
self.conflicts.append(conflict)
|
||||
self.stats.append(None)
|
||||
self.checksums.append(None)
|
||||
log.debug(f"{self.root}/{self.filename} - add: {conflict}")
|
||||
|
||||
def migrate(self):
|
||||
# Temp dev stuff since I don't want to resum that whole 400 GiB dir
|
||||
if self.stats is None:
|
||||
self.stats = [None] * len(self.conflicts)
|
||||
try:
|
||||
if self.checksums is None:
|
||||
self.checksums = [None] * len(self.conflicts)
|
||||
except AttributeError:
|
||||
self.checksums = [None] * len(self.conflicts)
|
||||
|
||||
def removeConflict(self, conflict):
|
||||
f = self.conflicts.index(conflict)
|
||||
del self.conflicts[f]
|
||||
del self.stats[f]
|
||||
del self.checksums[f]
|
||||
log.debug(f"{self.root}/{self.filename} - del: {conflict}")
|
||||
|
||||
def getPathFile(self, conflict):
|
||||
return os.path.join(self.root, conflict)
|
||||
|
||||
def getPathFiles(self):
|
||||
return [self.getPathFile(conflict) for conflict in self.conflicts]
|
||||
|
||||
def prune(self):
|
||||
toPrune = list()
|
||||
for conflict in self.conflicts:
|
||||
if not os.path.isfile(self.getPathFile(conflict)):
|
||||
toPrune.append(conflict)
|
||||
|
||||
if len(toPrune):
|
||||
for conflict in toPrune:
|
||||
self.removeConflict(conflict)
|
||||
|
||||
def isRelevant(self):
|
||||
if len(self.conflicts) == 1:
|
||||
if self.conflicts[0] == self.filename:
|
||||
return False
|
||||
elif len(self.conflicts) < 1:
|
||||
return False
|
||||
else:
|
||||
print(f"'{F}' → '{NF}': done")
|
||||
os.rename(F, NF)
|
||||
return True
|
||||
|
||||
def nbFiles(self):
|
||||
return len(self.conflicts)
|
||||
|
||||
def totalSize(self):
|
||||
return sum((stat.st_size if stat is not None else 0) for stat in self.stats)
|
||||
|
||||
def maxSize(self):
|
||||
return max((stat.st_size if stat is not None else 0) for stat in self.stats)
|
||||
|
||||
def totalChecksumSize(self):
|
||||
size = 0
|
||||
for f, checksum in enumerate(self.checksums):
|
||||
if checksum is None:
|
||||
stat = self.stats[f]
|
||||
if stat is not None:
|
||||
size += stat.st_size
|
||||
return size
|
||||
|
||||
def getStats(self):
|
||||
for f, conflict in enumerate(self.conflicts):
|
||||
oldStat = self.stats[f]
|
||||
newStat = os.stat(self.getPathFile(conflict))
|
||||
oldChecksum = self.checksums[f]
|
||||
|
||||
# If it's been already summed, and we have the same inode and same ctime, don't resum
|
||||
if oldStat is None or not isinstance(oldChecksum, int) or oldStat.st_size != newStat.st_size or oldStat.st_dev != newStat.st_dev or oldStat.st_ino != newStat.st_ino or oldStat.st_ctime != newStat.st_ctime or oldStat.st_dev != newStat.st_dev:
|
||||
self.checksums[f] = None
|
||||
|
||||
self.stats[f] = newStat
|
||||
|
||||
# If all the file are of different size, set as different files
|
||||
if len(self.stats) == len(set([s.st_size for s in self.stats])):
|
||||
self.checksums = [False] * len(self.conflicts)
|
||||
|
||||
# If all the files are the same inode, set as same files
|
||||
if len(set([s.st_ino for s in self.stats])) == 1 and len(set([s.st_dev for s in self.stats])) == 1:
|
||||
self.checksums = [True] * len(self.conflicts)
|
||||
|
||||
def getChecksums(self):
|
||||
# TODO It's not even required to have a sum, this thing is not collision resistant now
|
||||
# TODO We might use BTRFS feature to know if conflict files are deduplicated between them
|
||||
|
||||
filedescs = dict()
|
||||
for f, conflict in enumerate(self.conflicts):
|
||||
if self.checksums[f] is not None:
|
||||
continue
|
||||
self.checksums[f] = 1
|
||||
filedescs[f] = open(self.getPathFile(conflict), 'rb')
|
||||
|
||||
while len(filedescs):
|
||||
toClose = set()
|
||||
|
||||
# Compute checksums for next block for all files
|
||||
for f, filedesc in filedescs.items():
|
||||
data = filedesc.read(DatabaseFile.BLOCK_SIZE)
|
||||
self.checksums[f] = zlib.adler32(data, self.checksums[f])
|
||||
if len(data) < DatabaseFile.BLOCK_SIZE:
|
||||
toClose.add(f)
|
||||
|
||||
# Stop summing as soon as checksum diverge
|
||||
for f in filedescs.keys():
|
||||
if self.checksums.count(self.checksums[f]) < 2:
|
||||
toClose.add(f)
|
||||
|
||||
for f in toClose:
|
||||
filedescs[f].close()
|
||||
del filedescs[f]
|
||||
|
||||
def getFeatures(self):
|
||||
features = dict()
|
||||
features['sum'] = self.checksums
|
||||
for stat in DatabaseFile.RELEVANT_STATS:
|
||||
features[stat] = [self.stats[f].__getattribute__(
|
||||
stat) for f in enumerate(self.stats)]
|
||||
return features
|
||||
|
||||
def getDiffFeatures(self):
|
||||
features = self.getFeatures()
|
||||
diffFeatures = dict()
|
||||
for key, vals in features.items():
|
||||
if len(set(vals)) > 1:
|
||||
diffFeatures[key] = vals
|
||||
return diffFeatures
|
||||
|
||||
def printInfos(self):
|
||||
print(os.path.join(self.root, self.name))
|
||||
|
||||
# nf = re.sub( '', f)
|
||||
# F = os.path.join(root, f)
|
||||
# NF = os.path.join(root, nf)
|
||||
# if os.path.exists(NF):
|
||||
# print(f"'{F}' → '{NF}': file already exists")
|
||||
# else:
|
||||
# print(f"'{F}' → '{NF}': done")
|
||||
# # os.rename(F, NF)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Handle Syncthing's .sync-conflict files ")
|
||||
|
||||
# Execution flow
|
||||
parser.add_argument(
|
||||
'--database', help='Database path for file informations')
|
||||
|
||||
parser.add_argument('directory', metavar='DIRECTORY',
|
||||
nargs='?', help='Directory to analyse')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Argument default values attribution
|
||||
if args.directory is None:
|
||||
args.directory = os.curdir
|
||||
args.directory = os.path.realpath(args.directory)
|
||||
|
||||
# Create / load the database
|
||||
database = None
|
||||
if args.database:
|
||||
if os.path.isfile(args.database):
|
||||
try:
|
||||
with open(args.database, 'rb') as databaseFile:
|
||||
database = pickle.load(databaseFile)
|
||||
assert isinstance(database, Database)
|
||||
except BaseException as e:
|
||||
raise ValueError("Not a database file")
|
||||
assert database.version <= Database.VERSION, "Version of the loaded database is too recent"
|
||||
assert database.directory == args.directory, "Directory of the loaded database doesn't match"
|
||||
|
||||
if database is None:
|
||||
database = Database(args.directory)
|
||||
|
||||
def saveDatabase():
|
||||
if args.database:
|
||||
global database
|
||||
with open(args.database, 'wb') as databaseFile:
|
||||
pickle.dump(database, databaseFile)
|
||||
|
||||
database.getList()
|
||||
saveDatabase()
|
||||
|
||||
database.getStats()
|
||||
saveDatabase()
|
||||
|
||||
database.getChecksums()
|
||||
saveDatabase()
|
||||
|
||||
database.act()
|
||||
|
|
|
@ -45,11 +45,12 @@ export JAVA_FONTS=/usr/share/fonts/TTF # 2019-04-25 Attempt to remove .java/font
|
|||
# export ARDMK_VENDOR=archlinux-arduino
|
||||
|
||||
# Get out of my $HOME!
|
||||
direnv GOPATH "$HOME/.cache/go"
|
||||
export BOOT9_PATH="$HOME/.local/share/citra-emu/sysdata/boot9.bin"
|
||||
direnv CARGOHOME "$HOME/.cache/cargo" # There are config in there that we can version if one want
|
||||
direnv CCACHE_BASEDIR "$HOME/.cache/ccache"
|
||||
export CCACHE_CONFIGPATH="$HOME/.config/ccache.conf"
|
||||
direnv GNUPGHOME "$HOME/.config/gnupg"
|
||||
direnv GOPATH "$HOME/.cache/go"
|
||||
direnv GRADLE_USER_HOME "$HOME/.cache/gradle"
|
||||
export INPUTRC="$HOME/.config/inputrc"
|
||||
export LESSHISTFILE="$HOME/.cache/lesshst"
|
||||
|
|
Loading…
Reference in a new issue