def update_database(repo, fs_dir, do_checksum): lists_of_files = status.compare_fs_db(repo, fs_dir, do_checksum, updating=True) new = lists_of_files[config.NEW_FILES] missing = lists_of_files[config.MISSING_FILES] good = lists_of_files[config.GOOD_FILES] different = lists_of_files[config.DIFFERENT_FILES] moved = lists_of_files[config.MOVED_FILES] # only skip MISSING to_save = list(good) to_update = list(different) if not do_checksum: # force checksum for database entry to_update += new else: to_save += new for fname, old_info in to_update: fs_fullpath = os.path.join(fs_dir, fname) info = common.get_file_info(fs_fullpath, do_checksum=True) to_save.append((fname, info)) for fname, info in moved: del info["moved_from"] to_save.append((fname, info)) # sort to match DB order to_save = sorted(to_save, key=lambda entry: entry[0]) tmp_db_file = "{}.tmp".format(repo.db_file) with open(tmp_db_file, "w+") as tmp_db_f: for fname, info in to_save: common.print_a_file(fname, info, tmp_db_f) try: os.remove(repo.db_file) except OSError: pass os.rename(tmp_db_file, repo.db_file) log.warn("Database updated.") log.info("{} entries untouched".format(len(good))) log.info("{} entries added".format(len(missing))) log.info("{} entries updated".format(len(different))) log.info("{} entries removed".format(len(new))) log.info("{} entries moved".format(len(moved))) status.do_clean(repo)
def compare_fs_db(repo, fs_dir, do_checksum, updating=False): progress = progress_on_fs_and_db(repo, fs_dir, do_checksum) good, missing, new, different, moved = [], [], [], [], [] try: while True: state, diff, db_entry, fs_entry = next(progress) try: fs_fullpath, fs_relpath, fs_info = fs_entry except Exception: pass try: db_relpath, db_info = db_entry except Exception: pass if state is FileState.OK: good.append((db_relpath, db_info)) elif state is FileState.DIFFERENT: assert diff different.append((db_relpath, diff)) elif state is FileState.MISSING_IN_FS: if not updating: assert not os.path.exists("{}/{}".format(fs_dir, db_relpath)) missing.append((db_relpath, db_info)) elif state is FileState.MISSING_ON_DB: if not updating: command = '/usr/bin/grep "{}" "{}" --quiet'.format(fs_relpath, repo.db_file) assert os.system(command) # assert !0 (text not found) new.append((fs_relpath, fs_info)) else: # MOVED should not happend here log.critical("Incorrect state: {}".format(state)) assert False # should not come here except StopIteration: pass # compute moved files for new_file_info in list(new): new_file, new_info = new_file_info if not do_checksum: # if checksum not computed before fs_fullpath = os.path.join(fs_dir, new_file) new_info = common.get_file_info(fs_fullpath, do_checksum=True) missing_file_info = [(i, missing_file_info[0]) for i, missing_file_info in enumerate(missing) if missing_file_info[1]["md5sum"] == new_info["md5sum"]] if not missing_file_info: continue assert len(missing_file_info) == 1 idx, missing_file = missing_file_info[0] info = new_info.copy() info["moved_from"] = missing_file moved.append((new_file, info)) log.warning("{} # actually moved from {}".format(new_file, missing_file)) missing.pop(idx) new.remove(new_file_info) return OrderedDict(( (config.GOOD_FILES, good), (config.NEW_FILES, new), (config.MISSING_FILES, missing), (config.DIFFERENT_FILES, different), (config.MOVED_FILES, moved), ))