def DegradeOriginalRows(self, ExcludeFolder=[]): Trace(self.Folder) if not sql.tables(self.Table): return def is_excluded(folder): for cur in ExcludeFolder: if cur in folder: return True if os.path.abspath(folder) == os.path.abspath(self.Folder): return True return False rows = sql.execute('select distinct original from %s where original <> 0' % (self.Table), Flatten=True) for count, original in enumerate(rows): dbg_print(original, count) rows = sql.execute('select idx, modified_date, path from %s where idx=%d or original=%d order by modified_date ASC' % (self.Table, original, original)) for idx, row in enumerate(rows): filePath = row[2] folder = os.path.dirname(filePath) if folder in ExcludeFolder or is_excluded(folder): continue basename, ext = os.path.splitext(os.path.basename(filePath)) suffix = os.path.splitext(basename)[1].lstrip('.') #print(ext, suffix, basename) if is_number(suffix): continue if idx != 0: print() self.ChangeOriginal(original, row[0]) break
def DeleteDups(self, DeleteFiles=False, Types='[AllMediaTypes]'): Trace(self.Folder) if not sql.tables(self.Table): return Globals.IgnoreExpandErrors = True query = 'SELECT idx, path from [Table] where original <> 0' dups = sql.execute(Expand(query), Verbose=self.Verbose) idxList = [] for dup in dups: idx, path = dup try: destPath = os.path.dirname(path) destPath = destPath.replace(':\\', ':\\ServerName\dups\\') if not DeleteFiles: self.MoveFile(idx, path, destPath, Update=False) else: Log(r'Delete %s' % path) DeleteFile(path) idxList.append([str(idx)]) except (KeyboardInterrupt, SystemExit): raise except: LogPlainError('Delete failed for %s' % (path)) query = r'DELETE FROM [Table] WHERE idx=?' sql.execute_many(Expand(query), idxList)
def Register(self): row = None if sql.tables(FolderComp.RegistryTable): row = sql.select(FolderComp.RegistryTable, WhereClause=Expand(r"WHERE folder=?"), Verbose=self.Verbose, Data=[self.Folder]) now = time.time() if not row: row = [self.Folder, now, now] else: row = row[0] row[2] = now sql.write_to_table(FolderComp.RegistryTable, [row], FolderComp.RegistryColumns, UseExistingTable=True, IdentityIndex=True, Verbose=self.Verbose)
def MoveDups(self, Types='[AllMediaTypes]'): Trace(self.Folder) if not sql.tables(self.Table): return Globals.IgnoreExpandErrors = True dups = self.select_rows('original!=0', Types, Columns=['idx', 'path', 'folder', 'original']) for dup in dups: idx, path, folder, original = dup query = 'SELECT idx, folder from [Table] where idx=[original]' orig = sql.execute(Expand(query), Verbose=self.Verbose) orig_idx, orig_folder = orig if folder != orig_folder: self.MoveFile(idx, path, orig_folder)
def AddNewFile(self, FilePath, DestFolder, KeepOriginals=False): if not sql.tables(self.Table): return False destFile = self.MoveFile(-1, FilePath, DestFolder, False, KeepOriginals=KeepOriginals) if not destFile: return False # add to Database filename = os.path.basename(destFile) folder = os.path.dirname(destFile) stats = os.stat(destFile) basename, ext = os.path.splitext(filename) rows = [] rows.append([filename, destFile, folder, stats.st_size, stats.st_mtime, stats.st_ctime, 0, Home.MediaTypeFromExtension(ext)]) sql.write_to_table(self.Table, rows, FindDups.Columns, UseExistingTable=True, SkipInsert=['idx'], Verbose=self.Verbose) return destFile
def DegradeOriginalShowFolders(self, ExcludeFolder=[]): Trace(self.Folder) if not sql.tables(self.Table): return def is_excluded(folder): for cur in ExcludeFolder: if cur in folder: return True if os.path.abspath(folder) == os.path.abspath(self.Folder): return True return False count = 0 rows = sql.execute('select distinct original from %s where original <> 0' % (self.Table), Flatten=True) data = [] for original in rows: count += 1 dbg_print(original, count) rows = sql.execute('select idx, modified_date, path from %s where idx=%d or original=%d order by modified_date ASC' % (self.Table, original, original)) included = [] for row in rows: filePath = row[2] folder = os.path.dirname(filePath) basename, ext = os.path.splitext(os.path.basename(filePath)) suffix = os.path.splitext(basename)[1].lstrip('.') #print(ext, suffix, basename) if is_number(suffix): continue elif folder in ExcludeFolder or is_excluded(folder): continue if folder not in included: included.append(folder) if len(included) == 0: folder = os.path.dirname(rows[0][2]) included.append(folder) for folder in included: if folder not in data: data.append(folder) data.sort() PrettyPrint(data, 'Included Folders') Exit()
def PrintDups(self, Limit=None, Types='[AllMediaTypes]'): Trace(self.Folder) if not sql.tables(self.Table): return rows = sql.execute('select A.path, B.path from %s as A, %s as B where A.original <> 0 and B.idx = A.original' % (self.Table, self.Table)) dups = [] for row in rows: if os.path.exists(row[0]) and os.path.exists(row[1]): dups.append(row) rows = dups Log(r'Found %d dups' % (len(rows))) if len(rows) > 100: PrettyPrintList(rows, FilePath=ExpandPath(r'[Temp]\Dups.log')) Log(r'Generated [Temp]\Dups.log') else: PrettyPrintList(rows, UseExpand=False)
def SetOldestOriginal(self): Trace(self.Folder) if not sql.tables(self.Table): return count = 0 rows = sql.execute('select distinct original from %s where original <> 0' % (self.Table), Flatten=True) for original in rows: count += 1 dbg_print(original, count) rows = sql.execute('select idx, modified_date, path from %s where idx=%d or original=%d order by modified_date ASC' % (self.Table, original, original)) data = [] for row in rows: filePath = row[2] if os.path.exists(filePath): oldest = row newOriginal = oldest[0] sql.execute('update %s set original=%s where idx=%s or original=%s' % (self.Table, newOriginal, original, original)) break sql.execute('update %s set original=0 where idx=original' % (self.Table))
def MoveFile(self, Idx, SourceFile, DestFolder, Update=True, KeepOriginals=False): if not sql.tables(self.Table): return False # Trace(r'[Idx] [SourceFile] [DestFolder]') try: uniqFile = '' if not KeepOriginals: uniqFile = MoveToUniqueFile(SourceFile, DestFolder, '') else: uniqFile = CopyToUniqueFile(SourceFile, DestFolder, '') ImportLog.log(SourceFile, uniqFile) if Update: query = r'UPDATE [Table] set path=?, folder=? WHERE idx=?' #print([uniqFile, DestFolder, Idx]) sql.execute(Expand(query), Verbose=self.Verbose, Data=[uniqFile, DestFolder, Idx]) except (KeyboardInterrupt, SystemExit): raise except: LogPlainError('move failed for [Idx]') ReportException()
def IsDupFile(self, FilePath): #Trace(FilePath) if not sql.tables(self.Table): return False stats = os.stat(FilePath) size = stats.st_size query = 'SELECT path from [Table] WHERE size=[size]' rows = flatten(sql.execute(Expand(query), Verbose=self.Verbose)) dups = [] for row in rows: path = row if not os.path.exists(path): LogPlainError(r'Error missing file: %s' % (path)) continue if filecmp.cmp(FilePath, path, False): return True return False
def GetFileDups(self, FilePath): # Trace(FilePath) if not sql.tables(self.Table): return [] stats = os.stat(FilePath) size = stats.st_size query = 'SELECT path from [Table] WHERE size=[size]' rows = sql.execute(Expand(query), Verbose=self.Verbose) dups = [] for row in rows: path = row if not os.path.exists(path): Log(r'Error missing file: [path]') continue if filecmp.cmp(FilePath, path, False): dups.append(path) return dups
def UnRegister(self): sql.drop_table(self.Table) if sql.tables(FindDups.RegistryTable): sql.execute(Expand("delete from {0} where folder='[Folder]'".format(FindDups.RegistryTable)), Verbose=self.Verbose)
def GetDuplicates(self, Original): #Trace(self.Folder) if not sql.tables(self.Table): return [] return sql.execute('select idx, path from %s where original = %d' % (self.Table, Original))
def GetOriginals(self, Limit=None, Types='[AllMediaTypes]'): Trace(self.Folder) if not sql.tables(self.Table): return [] return sql.execute('select DISTINCT B.idx, B.path from %s as A, %s as B where A.original <> 0 and B.idx = A.original' % (self.Table, self.Table))
def ChangeOriginal(self, old, new): if not sql.tables(self.Table): return sql.execute('update %s set original=%s where idx=%s or original=%s' % (self.Table, new, old, old), Verbose=True) sql.execute('update %s set original=0 where idx=%s' % (self.Table, new), Verbose=True)
def UnRegister(self): sql.drop_table(self.Table) if sql.tables(FolderComp.RegistryTable): sql.execute(Expand("delete from {0} where folder=?".format(FolderComp.RegistryTable)), Verbose=self.Verbose, Data=[self.Folder])
def Tables(Table=''): PrettyPrint(sql.tables(Table))
def FindDups(self, Types='[AllMediaTypes]'): Trace(self.Folder, Types) if not sql.tables(self.Table): return [] self.Verbose = True def FindDupsInSet(rowSet): dups = [] foundIdx = [] foundPathNames = [] for idx, left in enumerate(rowSet): idxLeft = left[0] pathLeft = left[2] for right in rowSet[idx + 1 : ]: idxRight = right[0] if idxRight in foundIdx: continue pathRight = right[2] if filecmp.cmp(pathLeft, pathRight, False): dups.append([idxLeft, idxRight]) foundIdx.append(idxRight) foundPathNames.append(pathRight) return dups, foundPathNames rows = self.select_rows('', Types, SortColumns=['size', 'modified_date ASC']) results = [] dups = [] found = 0 Log('Total rows: %d' % (len(rows))) print(' Idx Dups Size') rowSet = [] allSets = [rowSet] prev_size = 0 for idx, row in enumerate(rows): print('\r%5d %5d' % (idx, found), end=' ') filepath = row[2] if not os.path.exists(filepath): Log('Error missing file: [filepath]') continue size = row[4] if idx == 0: prev_size = size if size == prev_size: rowSet.append(row) else: if len(rowSet): rowSet = [] allSets.append(rowSet) prev_size = size print('') for rowSet in allSets: if not len(rowSet): continue dupsRowSet, pathsRowSet = FindDupsInSet(rowSet) dups.extend(dupsRowSet) results.extend(pathsRowSet) found += len(dupsRowSet) Log(r'Found %d duplicates' % (len(dups))) updated = sql.update(self.Table, dups, ['original=?'], "WHERE idx=?", Verbose=self.Verbose) Log(r'Updated %d duplicate rows' % (updated)) return results