def __call__(self, fobj): for (chunk1, chunk2) in zip_longest(self.filehash, hash_chunks(fobj.path), fillvalue=''): if chunk1 != chunk2: return False return True
def group_by_hash(fsobjects): hashes = {} for fobj in fsobjects: chunks = hash_chunks(fobj.path) chunk = next(chunks) while chunk in hashes: for dup in hashes[chunk]: _, dup_chunks = dup try: hashes[next(dup_chunks)] = [dup] hashes[chunk].remove(dup) except StopIteration: pass try: chunk = next(chunks) except StopIteration: hashes[chunk].append((fobj, chunks)) break else: hashes[chunk] = [(fobj, chunks)] groups = [] for dups in hashes.values(): group = [] for (dup, _) in dups: group.append(dup) if group: groups.append(group) return groups
def __init__(self, filepath=None): if filepath is None: self.filepath = self.fm.thisfile.path else: self.filepath = filepath if self.filepath is None: self.fm.notify("Error: No file selected for hashing!", bad=True) # TODO: Lazily generated list would be more efficient, a generator # isn't enough because this object is reused for every fsobject # in the current directory. self.filehash = list(hash_chunks(abspath(self.filepath)))