def search_file(filepath): print('Searching database for {0}'.format(filepath), end='\n\n') file_info = None try: file_info = get_fileinfo(filepath) except FileNotFoundError: fatal_error('Check file path; Unable to find file at {0}'.format()) except PermissionError: fatal_error('Insufficient permissions to access {0}'.format(filepath)) except BlockingIOError: fatal_error('Unable to access, another process has opened {0}'.format(filepath)) row = check_file_exists_in_database(4, file_info['hashes']['sha1b32']) if row[0] is not '': print('\033[94mFile found! See {0}\033[0m'.format(os.path.join(settings.base_directory, row[0]))) pass else: print('\033[94mFile not found in database.\033[0m')
def search_file(filepath): print('Searching database for {0}'.format(filepath), end='\n\n') file_info = None try: file_info = get_fileinfo(filepath) except FileNotFoundError: fatal_error('Check file path; Unable to find file at {0}'.format()) except PermissionError: fatal_error('Insufficient permissions to access {0}'.format(filepath)) except BlockingIOError: fatal_error('Unable to access, another process has opened {0}'.format( filepath)) row = check_file_exists_in_database(4, file_info['hashes']['sha1b32']) if row[0] is not '': print('\033[94mFile found! See {0}\033[0m'.format( os.path.join(settings.base_directory, row[0]))) pass else: print('\033[94mFile not found in database.\033[0m')
def verify(): print("*** File manager verification ***\n") print("Beginning stage 1 (comparing database against file store)...") db_to_fs_bad = check_db_to_fs() if len(db_to_fs_bad) == 0: print("Stage 1 complete. No inconsistencies detected between database and file system.") print("\nBeginning stage 2 (comparing file store against database)...") fs_to_db_bad = check_fs_to_db() if len(fs_to_db_bad) == 0: print("Stage 2 complete. No inconsistencies detected between file system and database.") if len(fs_to_db_bad) == 0 and len(db_to_fs_bad) == 0: print("\n\nNo inconsistencies detected!") else: # we have to fix things print("\n\nFound {:,d} database and {:,d} file system inconsistencies.".format(len(db_to_fs_bad), len(fs_to_db_bad))) fix_it = input("\nDo you want to fix these issues? [Y|n]: ") if not fix_it.lower() == 'n': print("\nDeleting bad records from database...", end='') delete_files_from_db(db_to_fs_bad) print("Deleted {:,d} records from database!".format(len(db_to_fs_bad))) # set up a clean staging area for files to be imported from verify_directory = os.path.join(settings.base_directory, "verify") if os.path.isdir(verify_directory): shutil.rmtree(verify_directory) os.mkdir(verify_directory) print("Adding files to database...") for file in fs_to_db_bad: fileinfo = get_fileinfo(file) if file_exists_in_database(fileinfo): # nuke it to be clean delete_file_from_db(fileinfo) # move each file to a staging directory, then call import work on it. done head, tail = os.path.split(file) to_file = os.path.join(verify_directory, tail) unique_prefix = 0 while os.path.isfile(to_file): # file exists, so get a unique name to_file = os.path.join(verify_directory, str(unique_prefix) + "_" + tail) unique_prefix += 1 shutil.move(file, to_file) (files_added_to_database, total_files, files_deleted, files_copied, files_with_duplicate_hashes, files_with_invalid_extensions) = import_files_work(verify_directory) shutil.rmtree(verify_directory) print("\nAdded {:,d} files to database!".format(files_added_to_database)) print("\n\n*** Repair complete! ***")
def import_files_work(dirname): files_with_invalid_extensions = [] # list of files we didn't import. total_files = 0 files_added_to_database = 0 files_deleted = 0 files_with_duplicate_hashes = [] files_copied = 0 # Looking up each hash is sllllllow, so pull em all in as a set and just look there! print("Getting existing hashes from database...", end='') existing_hashes = get_sha1b32_from_database() print("Got {:,d} hashes from database. Looking for files.\n".format( len(existing_hashes))) for dirpath, dirnames, files in os.walk(dirname, topdown=False): total_files += len(files) file_counter = 0 if len(files) > 0: safeprint("\n\tFound {:,d} files in {}. Processing...".format( len(files), dirpath)) # logger.info("Found {:,d} files in {}".format(len(files), dirpath)) for name in files: full_path_name = os.path.join(dirpath, name) file_counter += 1 if os.path.isfile(full_path_name): if os.path.getsize(full_path_name) == 0: safeprint("\t\tDeleting 0 byte file '{}'.".format( full_path_name)) os.remove(full_path_name) continue parts = os.path.splitext(name.lower()) if len(parts) == 2: ext = parts[1] # some files are always bad, so just make em go away. if ext in auto_delete_extensions: safeprint( '\t\t({} [{:,d}/{:,d}]): File {} has an autonuke extension. Deleting...' .format(datetime.datetime.now().strftime('%x %X'), file_counter, len(files), full_path_name)) os.remove(full_path_name) continue if ext in extensions: # logger.info( # "{} before fileinfo = get_file_data(full_path_name)".format( # datetime.datetime.now().strftime('%x %X'))) fileinfo = get_fileinfo(full_path_name) # logger.info("{} after fileinfo = get_file_data(full_path_name)".format( # datetime.datetime.now().strftime('%x %X'))) if not fileinfo['hashes']['sha1b32'] in existing_hashes: files_added_to_database += 1 safeprint( "\t\t({} [{:,d}/{:,d}]): '{}' does not exist in database! Adding..." .format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), full_path_name)) # since this is a new file, we add it to our set for future import operations existing_hashes.add(fileinfo['hashes']['sha1b32']) add_file_to_db(fileinfo) else: pass # do anything else here? should i check if file exists in file system? who cares tho # as this syncs it up maybe here is where you do extra hashing of what is on file # system to make sure the 2 match, properly named, etc # logger.info("{} before copied = copy_file_to_store(fileinfo)):".format( # datetime.datetime.now().strftime('%x %X'))) copied = copy_file_to_store(fileinfo) if copied: safeprint( '\t\t({} [{:,d}/{:,d}]): File with SHA-1 Base32 hash {} does not exist in file store! Copying {:,d} bytes...' .format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), fileinfo['hashes']['sha1b32'], fileinfo['filesize'])) # logger.info("{} after copied = copy_file_to_store(fileinfo)):".format( # datetime.datetime.now().strftime('%x %X'))) if not copied: files_with_duplicate_hashes.append(full_path_name) else: files_copied += 1 if len(settings.copy_new_destination) > 0 and copied: if not os.path.exists( settings.copy_new_destination): os.mkdir(settings.copy_new_destination) # TODO should this create the 2 char structure too? for now, just copy it copy_name = os.path.join( settings.copy_new_destination, name) unique_prefix = 0 while os.path.isfile(copy_name): # file exists, so get a unique name copy_name = os.path.join( settings.copy_new_destination, str(unique_prefix) + "_" + name) unique_prefix += 1 shutil.copyfile(full_path_name, copy_name) outfile = os.path.join( settings.copy_new_destination, "!!" + datetime.datetime.now().strftime("%Y-%m-%d") + " File copy log " + '.txt') with open(outfile, 'a', encoding="utf-16") as logfile: logfile.write("{}: Copied {} to {}.\n".format( datetime.datetime.now(), full_path_name, copy_name)) if settings.delete_existing: safeprint( "\t\t({} [{:,d}/{:,d}]): Deleting '{}'...". format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), full_path_name)) if settings.delete_existing == 'yes': os.remove(full_path_name) files_deleted += 1 else: files_with_invalid_extensions.append( os.path.join(dirpath, name)) if settings.delete_empty_directories: if not os.listdir(dirpath): safeprint( "\t\t({} [{:,d}/{:,d}]): Deleting empty directory '{}'...". format(datetime.datetime.now().strftime('%x %X'), file_counter, len(files), dirpath)) if settings.delete_empty_directories == 'yes': os.rmdir(dirpath) return (files_added_to_database, total_files, files_deleted, files_copied, files_with_duplicate_hashes, files_with_invalid_extensions)
def import_files_work(dirname): files_with_invalid_extensions = [] # list of files we didn't import. total_files = 0 files_added_to_database = 0 files_deleted = 0 files_with_duplicate_hashes = [] files_copied = 0 # Looking up each hash is sllllllow, so pull em all in as a set and just look there! print("Getting existing hashes from database...", end='') existing_hashes = get_sha1b32_from_database() print("Got {:,d} hashes from database. Looking for files.\n".format(len(existing_hashes))) for dirpath, dirnames, files in os.walk(dirname, topdown=False): total_files += len(files) file_counter = 0 if len(files) > 0: safeprint("\n\tFound {:,d} files in {}. Processing...".format(len(files), dirpath)) # logger.info("Found {:,d} files in {}".format(len(files), dirpath)) for name in files: full_path_name = os.path.join(dirpath, name) file_counter += 1 if os.path.isfile(full_path_name): if os.path.getsize(full_path_name) == 0: safeprint("\t\tDeleting 0 byte file '{}'.".format(full_path_name)) os.remove(full_path_name) continue parts = os.path.splitext(name.lower()) if len(parts) == 2: ext = parts[1] # some files are always bad, so just make em go away. if ext in auto_delete_extensions: safeprint( '\t\t({} [{:,d}/{:,d}]): File {} has an autonuke extension. Deleting...'.format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), full_path_name)) os.remove(full_path_name) continue if ext in extensions: # logger.info( # "{} before fileinfo = get_file_data(full_path_name)".format( # datetime.datetime.now().strftime('%x %X'))) fileinfo = get_fileinfo(full_path_name) # logger.info("{} after fileinfo = get_file_data(full_path_name)".format( # datetime.datetime.now().strftime('%x %X'))) if not fileinfo['hashes']['sha1b32'] in existing_hashes: files_added_to_database += 1 safeprint("\t\t({} [{:,d}/{:,d}]): '{}' does not exist in database! Adding...".format (datetime.datetime.now().strftime('%x %X'), file_counter, len(files), full_path_name)) # since this is a new file, we add it to our set for future import operations existing_hashes.add(fileinfo['hashes']['sha1b32']) add_file_to_db(fileinfo) else: pass # do anything else here? should i check if file exists in file system? who cares tho # as this syncs it up maybe here is where you do extra hashing of what is on file # system to make sure the 2 match, properly named, etc # logger.info("{} before copied = copy_file_to_store(fileinfo)):".format( # datetime.datetime.now().strftime('%x %X'))) copied = copy_file_to_store(fileinfo) if copied: safeprint( '\t\t({} [{:,d}/{:,d}]): File with SHA-1 Base32 hash {} does not exist in file store! Copying {:,d} bytes...'.format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), fileinfo['hashes']['sha1b32'], fileinfo['filesize'])) # logger.info("{} after copied = copy_file_to_store(fileinfo)):".format( # datetime.datetime.now().strftime('%x %X'))) if not copied: files_with_duplicate_hashes.append(full_path_name) else: files_copied += 1 if len(settings.copy_new_destination) > 0 and copied: if not os.path.exists(settings.copy_new_destination): os.mkdir(settings.copy_new_destination) # TODO should this create the 2 char structure too? for now, just copy it copy_name = os.path.join(settings.copy_new_destination, name) unique_prefix = 0 while os.path.isfile(copy_name): # file exists, so get a unique name copy_name = os.path.join(settings.copy_new_destination, str(unique_prefix) + "_" + name) unique_prefix += 1 shutil.copyfile(full_path_name, copy_name) outfile = os.path.join(settings.copy_new_destination, "!!" + datetime.datetime.now().strftime( "%Y-%m-%d") + " File copy log " + '.txt') with open(outfile, 'a', encoding="utf-16") as logfile: logfile.write( "{}: Copied {} to {}.\n".format(datetime.datetime.now(), full_path_name, copy_name)) if settings.delete_existing: safeprint("\t\t({} [{:,d}/{:,d}]): Deleting '{}'...".format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), full_path_name)) if settings.delete_existing == 'yes': os.remove(full_path_name) files_deleted += 1 else: files_with_invalid_extensions.append(os.path.join(dirpath, name)) if settings.delete_empty_directories: if not os.listdir(dirpath): safeprint("\t\t({} [{:,d}/{:,d}]): Deleting empty directory '{}'...".format( datetime.datetime.now().strftime('%x %X'), file_counter, len(files), dirpath)) if settings.delete_empty_directories == 'yes': os.rmdir(dirpath) return (files_added_to_database, total_files, files_deleted, files_copied, files_with_duplicate_hashes, files_with_invalid_extensions)