def get_all_files(self, path, extensions=None, exclude_regex_list=set()): """Recursively get all files which match a path and extension. :param str path string: Path to start recursive file listing :param tuple(str) extensions: File extensions to include (whitelist) :returns: generator """ # If extensions is None then we get all supported extensions if not extensions: extensions = set() subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) # Create a list of compiled regular expressions to match against the file path compiled_regex_list = [re.compile(regex) for regex in exclude_regex_list] for dirname, dirnames, filenames in os.walk(path): for filename in filenames: # If file extension is in `extensions` # And if file path is not in exclude regexes # Then append to the list filename_path = os.path.join(dirname, filename) if ( os.path.splitext(filename)[1][1:].lower() in extensions and not self.should_exclude(filename_path, compiled_regex_list, False) ): yield filename_path
def get_all_files(self, path, extensions=None, check_extensions=False): """Recursively get all files which match a path and extension. :param str path string: Path to start recursive file listing :param tuple(str) extensions: File extensions to include (whitelist) :param check_extensions boolean: whether to check extensions or to just get files regardless :returns: generator """ # If extensions is None then we get all supported extensions if not extensions: extensions = set() subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) for dirname, dirnames, filenames in os.walk(path): for filename in filenames: if check_extensions: # If file extension is in `extensions` then append to the list if os.path.splitext(filename)[1][1:].lower() in extensions: yield os.path.join(dirname, filename) else: log.warn("Ignored extension found at {}".format(os.path.join(dirname, filename))) else: yield os.path.join(dirname, filename)
def _generate_db(source): """Regenerate the hash.json database which contains all of the sha1 signatures of media files. """ result = Result() source = os.path.abspath(os.path.expanduser(source)) extensions = set() all_files = set() valid_files = set() if not os.path.isdir(source): log.error('Source is not a valid directory %s' % source) sys.exit(1) subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) all_files.update(FILESYSTEM.get_all_files(source, None)) db = Db() db.backup_hash_db() db.reset_hash_db() for current_file in all_files: if os.path.splitext(current_file)[1][1:].lower() not in extensions: log.info('Skipping invalid file %s' % current_file) result.append((current_file, False)) continue result.append((current_file, True)) db.add_hash(db.checksum(current_file), current_file) db.update_hash_db() result.write()
def import_file(file_path, config, manifest, metadata_dict, move=False, allow_duplicates=False, dryrun=False): """Set file metadata and move it to destination. """ if not os.path.exists(file_path): log.warn('Import_file: Could not find %s' % file_path) return target = config["targets"][0] target_base_path = target["base_path"] # Check if the source, _file, is a child folder within destination # .... this is not the right time to be checking for that. Lots of unnecessary checks # elif destination.startswith(os.path.abspath(os.path.dirname(_file))+os.sep): # print('{"source": "%s", "destination": "%s", "error_msg": "Source cannot be in destination"}' % (_file, destination)) # return # Creates an object of the right type, using the file extension ie .jpg -> photo media = Media.get_class_by_file(file_path, get_all_subclasses()) if not media: log.warn('Not a supported file (%s)' % file_path) return # if album_from_folder: # media.set_album_from_folder() checksum = manifest.checksum(file_path) is_duplicate = (checksum in manifest.entries) # Merge it into the manifest regardless of duplicate entries, to record all sources for a given file manifest_entry = FILESYSTEM.generate_manifest(file_path, target, metadata_dict, media) manifest.merge({checksum: manifest_entry}) if (not allow_duplicates) and is_duplicate: log.debug( "[ ] File {} already present in manifest; allow_duplicates is false; skipping" .format(file_path)) return True if dryrun: log.info("Generated manifest: {}".format(file_path)) return manifest_entry is not None else: result = FILESYSTEM.execute_manifest(file_path, manifest_entry, target_base_path, move_not_copy=move) # if dest_path: # print('%s -> %s' % (_file, dest_path)) # if trash: # send2trash(_file) return result
def main(argv): filesystem = FileSystem() result = Result() subclasses = get_all_subclasses() paths = argv[1:] for path in paths: path = os.path.expanduser(path) if os.path.isdir(path): for source in filesystem.get_all_files(path, None): status = add_original_name(source, subclasses) result.append((_decode(source), status)) else: status = add_original_name(path, subclasses) result.append((_decode(path), status)) result.write()
def get_all_files(self, path, extensions=None): """Recursively get all files which match a path and extension. :param str path string: Path to start recursive file listing :param tuple(str) extensions: File extensions to include (whitelist) :returns: generator """ # If extensions is None then we get all supported extensions if not extensions: extensions = set() subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) for dirname, dirnames, filenames in os.walk(path): for filename in filenames: # If file extension is in `extensions` then append to the list if os.path.splitext(filename)[1][1:].lower() in extensions: yield os.path.join(dirname, filename)
def import_file(_file, destination, album_from_folder, trash, allow_duplicates): _file = _decode(_file) destination = _decode(destination) """Set file metadata and move it to destination. """ if not os.path.exists(_file): log.warn('Could not find %s' % _file) log.all('{"source":"%s", "error_msg":"Could not find %s"}' % (_file, _file)) return # Check if the source, _file, is a child folder within destination elif destination.startswith( os.path.abspath(os.path.dirname(_file)) + os.sep): log.all( '{"source": "%s", "destination": "%s", "error_msg": "Source cannot be in destination"}' % (_file, destination)) return media = Media.get_class_by_file(_file, get_all_subclasses()) if not media: log.warn('Not a supported file (%s)' % _file) log.all('{"source":"%s", "error_msg":"Not a supported file"}' % _file) return if album_from_folder: media.set_album_from_folder() dest_path = FILESYSTEM.process_file(_file, destination, media, allowDuplicate=allow_duplicates, move=False) if dest_path: log.all('%s -> %s' % (_file, dest_path)) if trash: send2trash(_file) return dest_path or None
def test_get_all_subclasses(): subclasses = get_all_subclasses(Base) expected = {Media, Base, Text, Photo, Video, Audio} assert subclasses == expected, subclasses
def _update(album, location, time, title, paths, debug): """Update a file's EXIF. Automatically modifies the file's location and file name accordingly. """ constants.debug = debug has_errors = False result = Result() files = set() for path in paths: path = os.path.expanduser(path) if os.path.isdir(path): files.update(FILESYSTEM.get_all_files(path, None)) else: files.add(path) for current_file in files: if not os.path.exists(current_file): has_errors = True result.append((current_file, False)) log.warn('Could not find %s' % current_file) log.all('{"source":"%s", "error_msg":"Could not find %s"}' % (current_file, current_file)) continue current_file = os.path.expanduser(current_file) # The destination folder structure could contain any number of levels # So we calculate that and traverse up the tree. # '/path/to/file/photo.jpg' -> '/path/to/file' -> # ['path','to','file'] -> ['path','to'] -> '/path/to' current_directory = os.path.dirname(current_file) destination_depth = -1 * len(FILESYSTEM.get_folder_path_definition()) destination = os.sep.join( os.path.normpath( current_directory ).split(os.sep)[:destination_depth] ) media = Media.get_class_by_file(current_file, get_all_subclasses()) if not media: continue updated = False if location: update_location(media, current_file, location) updated = True if time: update_time(media, current_file, time) updated = True if album: media.set_album(album) updated = True # Updating a title can be problematic when doing it 2+ times on a file. # You would end up with img_001.jpg -> img_001-first-title.jpg -> # img_001-first-title-second-title.jpg. # To resolve that we have to track the prior title (if there was one. # Then we massage the updated_media's metadata['base_name'] to remove # the old title. # Since FileSystem.get_file_name() relies on base_name it will properly # rename the file by updating the title instead of appending it. remove_old_title_from_name = False if title: # We call get_metadata() to cache it before making any changes metadata = media.get_metadata() title_update_status = media.set_title(title) original_title = metadata['title'] if title_update_status and original_title: # @TODO: We should move this to a shared method since # FileSystem.get_file_name() does it too. original_title = re.sub(r'\W+', '-', original_title.lower()) original_base_name = metadata['base_name'] remove_old_title_from_name = True updated = True if updated: updated_media = Media.get_class_by_file(current_file, get_all_subclasses()) # See comments above on why we have to do this when titles # get updated. if remove_old_title_from_name and len(original_title) > 0: updated_media.get_metadata() updated_media.set_metadata_basename( original_base_name.replace('-%s' % original_title, '')) dest_path = FILESYSTEM.process_file(current_file, destination, updated_media, move=True, allowDuplicate=True) log.info(u'%s -> %s' % (current_file, dest_path)) log.all('{"source":"%s", "destination":"%s"}' % (current_file, dest_path)) # If the folder we moved the file out of or its parent are empty # we delete it. FILESYSTEM.delete_directory_if_empty(os.path.dirname(current_file)) FILESYSTEM.delete_directory_if_empty( os.path.dirname(os.path.dirname(current_file))) result.append((current_file, dest_path)) # Trip has_errors to False if it's already False or dest_path is. has_errors = has_errors is True or not dest_path else: has_errors = False result.append((current_file, False)) result.write() if has_errors: sys.exit(1)