def main(): # FIXME: this is a hack, see find_duplicates_fd() above sys.setrecursionlimit(10000) args = parse_args() fileinfos = [] for path in args.FILES: for fileinfo in generate_fileinfos(path, relative=False, prefix=None, checksums=False): if fileinfo.kind == "file": fileinfos.append(fileinfo) else: pass # ignore non-files fileinfos_by_size = {} for fileinfo in fileinfos: if fileinfo.size not in fileinfos_by_size: fileinfos_by_size[fileinfo.size] = [] fileinfos_by_size[fileinfo.size].append(fileinfo) for size, fileinfos in fileinfos_by_size.items(): if len(fileinfos) > 1 and size >= args.limit: if args.verbose: print("potential duplicates: {} bytes".format(size)) for fileinfo in fileinfos: print(" {}".format(fileinfo.path)) print() groups = find_duplicates([p.path for p in fileinfos]) for g in groups: if len(g) > 1: print("duplicates:") for f in g: print(" {}".format(f)) print()
def fileinfos_from_path(path): """Read FileInfo objects from path, which can be a .sbtr, .sbtr.gz or directory""" if os.path.isdir(path): return {fileinfo.path: fileinfo for fileinfo in generate_fileinfos(path, checksums=True)} else: return fileinfos_from_sbtr(path)
def process_directory(directory, checksums, relative, prefix, on_report_cb): if prefix is not None: relative = True for fileinfo in generate_fileinfos(directory, relative=relative, prefix=prefix, checksums=checksums, onerror=on_error): on_report_cb(fileinfo)
def test_generator(self): output = "" for fileinfo in generate_fileinfos("tests/"): output += fileinfo.json() # FIXME: insert some proper check for validity self.assertTrue(True)