def run_all_after(self, file_path, destination_folder, final_file_path, metadata): """Process `before` methods of each plugin that was loaded. """ self.load() pass_status = True for cls in self.classes: this_method = getattr(self.classes[cls], 'after') # We try to call the plugin's `before()` method. # If the method explicitly raises an ElodiePluginError we'll fail the import # by setting pass_status to False. # If any other error occurs we log the message and proceed as usual. # By default, plugins don't change behavior. try: this_method(file_path, destination_folder, final_file_path, metadata) log.info('Called after() for {}'.format(cls)) except ElodiePluginError as err: log.warn('Plugin {} raised an exception in run_all_before: {}'. format(cls, err)) log.error(format_exc()) log.error('false') pass_status = False except: log.error(format_exc()) return pass_status
def import_file(_file, destination, album_from_folder, trash, allow_duplicates): """Set file metadata and move it to destination. """ if not os.path.exists(_file): log.warn('Could not find %s' % _file) print('{"source":"%s", "error_msg":"Could not find %s"}' % \ (_file, _file)) return # Check if the source, _file, is a child folder within destination elif destination.startswith(os.path.dirname(_file)): print('{"source": "%s", "destination": "%s", "error_msg": "Source cannot be in destination"}' % (_file, destination)) return media = Media.get_class_by_file(_file, [Text, Audio, Photo, Video]) if not media: log.warn('Not a supported file (%s)' % _file) print('{"source":"%s", "error_msg":"Not a supported file"}' % _file) return if album_from_folder: media.set_album_from_folder() dest_path = FILESYSTEM.process_file(_file, destination, media, allowDuplicate=allow_duplicates, move=False) if dest_path: print('%s -> %s' % (_file, dest_path)) if trash: send2trash(_file) return dest_path or None
def get_all_files(self, path, extensions=None, check_extensions=False): """Recursively get all files which match a path and extension. :param str path string: Path to start recursive file listing :param tuple(str) extensions: File extensions to include (whitelist) :param check_extensions boolean: whether to check extensions or to just get files regardless :returns: generator """ # If extensions is None then we get all supported extensions if not extensions: extensions = set() subclasses = get_all_subclasses(Base) for cls in subclasses: extensions.update(cls.extensions) for dirname, dirnames, filenames in os.walk(path): for filename in filenames: if check_extensions: # If file extension is in `extensions` then append to the list if os.path.splitext(filename)[1][1:].lower() in extensions: yield os.path.join(dirname, filename) else: log.warn("Ignored extension found at {}".format(os.path.join(dirname, filename))) else: yield os.path.join(dirname, filename)
def import_file(file_path, config, manifest, metadata_dict, move=False, allow_duplicates=False, dryrun=False): """Set file metadata and move it to destination. """ if not os.path.exists(file_path): log.warn('Import_file: Could not find %s' % file_path) return target = config["targets"][0] target_base_path = target["base_path"] # Check if the source, _file, is a child folder within destination # .... this is not the right time to be checking for that. Lots of unnecessary checks # elif destination.startswith(os.path.abspath(os.path.dirname(_file))+os.sep): # print('{"source": "%s", "destination": "%s", "error_msg": "Source cannot be in destination"}' % (_file, destination)) # return # Creates an object of the right type, using the file extension ie .jpg -> photo media = Media.get_class_by_file(file_path, get_all_subclasses()) if not media: log.warn('Not a supported file (%s)' % file_path) return # if album_from_folder: # media.set_album_from_folder() checksum = manifest.checksum(file_path) is_duplicate = (checksum in manifest.entries) # Merge it into the manifest regardless of duplicate entries, to record all sources for a given file manifest_entry = FILESYSTEM.generate_manifest(file_path, target, metadata_dict, media) manifest.merge({checksum: manifest_entry}) if (not allow_duplicates) and is_duplicate: log.debug( "[ ] File {} already present in manifest; allow_duplicates is false; skipping" .format(file_path)) return True if dryrun: log.info("Generated manifest: {}".format(file_path)) return manifest_entry is not None else: result = FILESYSTEM.execute_manifest(file_path, manifest_entry, target_base_path, move_not_copy=move) # if dest_path: # print('%s -> %s' % (_file, dest_path)) # if trash: # send2trash(_file) return result
def execute_manifest(self, source_path, manifest_entry, base_path, move_not_copy=False): if move_not_copy: manipulate_file = shutil.move manipulation = "moved" else: manipulate_file = shutil.copy2 manipulation = "copied" # Check if file is already present at the target. # If it is, return target_manifest = manifest_entry["target"] destination = os.path.join(base_path, target_manifest["path"], target_manifest["name"]) # If there's already a file there... if os.path.isfile(destination): # Check that it's the same file. situations: a) edited but kept same name, b) corrupted if checksum(destination) == checksum(source_path): if os.path.getmtime(destination) == os.path.getmtime(source_path): log.debug("[ ] File {} already exists at {} and is intact, with metadata; skipping".format(source_path, destination)) else: log.debug( "[ ] File {} already exists at {} and is intact but is missing metadata; overwriting".format( source_path, destination )) self.create_directory(os.path.join(base_path, target_manifest["path"])) manipulate_file(source_path, destination) else: target_name, target_ext = os.path.splitext(target_manifest["name"]) target_name_with_hash = ''.join([target_name, '.', checksum(source_path), target_ext]) destination_name_with_hash = os.path.join(base_path, target_manifest["path"], target_name_with_hash) manipulate_file(source_path, destination_name_with_hash) log.debug("[ ] File {} already exists at {} but is corrupt or edited; copying with hash: {}".format( source_path, destination, target_name_with_hash )) return True else: try: if os.path.isfile(source_path): self.create_directory(os.path.join(base_path, target_manifest["path"])) manipulate_file(source_path, destination) log.debug("[*] File {} {} to {}".format(source_path, manipulation, destination)) return True else: log.debug("[*] File not found at source, could not move/copy: {} ".format(source_path)) except Exception as e: log.warn("[!] Exception moving/copying {} to {}: {}".format(source_path, destination, e)) return False
def run_batch(self): self.load() pass_status = True for cls in self.classes: this_method = getattr(self.classes[cls], 'batch') # We try to call the plugin's `before()` method. # If the method explicitly raises an ElodiePluginError we'll fail the import # by setting pass_status to False. # If any other error occurs we log the message and proceed as usual. # By default, plugins don't change behavior. try: this_method() log.info('Called batch() for {}'.format(cls)) except ElodiePluginError as err: log.warn('Plugin {} raised an exception in run_batch: {}'.format(cls, err)) log.error(format_exc()) pass_status = False except: log.error(format_exc()) return pass_status
def import_file(_file, destination, album_from_folder, trash, allow_duplicates): _file = _decode(_file) destination = _decode(destination) """Set file metadata and move it to destination. """ if not os.path.exists(_file): log.warn('Could not find %s' % _file) log.all('{"source":"%s", "error_msg":"Could not find %s"}' % (_file, _file)) return # Check if the source, _file, is a child folder within destination elif destination.startswith( os.path.abspath(os.path.dirname(_file)) + os.sep): log.all( '{"source": "%s", "destination": "%s", "error_msg": "Source cannot be in destination"}' % (_file, destination)) return media = Media.get_class_by_file(_file, get_all_subclasses()) if not media: log.warn('Not a supported file (%s)' % _file) log.all('{"source":"%s", "error_msg":"Not a supported file"}' % _file) return if album_from_folder: media.set_album_from_folder() dest_path = FILESYSTEM.process_file(_file, destination, media, allowDuplicate=allow_duplicates, move=False) if dest_path: log.all('%s -> %s' % (_file, dest_path)) if trash: send2trash(_file) return dest_path or None
def write(self, write_path=None, indent=False, overwrite=True): file_path, file_name = os.path.split(self.file_path) name, ext = os.path.splitext(file_name) if write_path is None: filesystem.FileSystem().create_directory( os.path.join(file_path, '.manifest_history')) write_name = "{}{}".format( '_'.join( [name, datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')]), ext) # TODO: check to see if you're already in a manifest_history directory, so as to not nest another one write_path = os.path.join(file_path, '.manifest_history', write_name) if overwrite is True and os.path.exists(self.file_path): log.info("Writing manifest to {}".format(self.file_path)) with open(self.file_path, 'w') as f: if indent: json.dump(self.entries, f, indent=2, separators=(',', ': ')) else: json.dump(self.entries, f, separators=(',', ':')) else: log.warn("Not overwriting manifest at {}".format( self.file_path)) log.info("Writing manifest to {}".format(write_path)) with open(write_path, 'w') as f: if indent: json.dump(self.entries, f, indent=2, separators=(',', ': ')) else: json.dump(self.entries, f, separators=(',', ':')) log.info("Manifest written.")
def process_file(self, _file, destination, media, **kwargs): move = False if('move' in kwargs): move = kwargs['move'] allow_duplicate = False if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] stat_info_original = os.stat(_file) metadata = media.get_metadata() if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return checksum = self.process_checksum(_file, allow_duplicate) if(checksum is None): log.info('Original checksum returned None for %s. Skipping...' % _file) return # Run `before()` for every loaded plugin and if any of them raise an exception # then we skip importing the file and log a message. plugins_run_before_status = self.plugins.run_all_before(_file, destination) if(plugins_run_before_status == False): log.warn('At least one plugin pre-run failed for %s' % _file) return directory_name = self.get_folder_path(metadata) dest_directory = os.path.join(destination, directory_name) file_name = self.get_file_name(metadata) dest_path = os.path.join(dest_directory, file_name) media.set_original_name() # If source and destination are identical then # we should not write the file. gh-210 if(_file == dest_path): print('Final source and destination path should not be identical') return self.create_directory(dest_directory) # exiftool renames the original file by appending '_original' to the # file name. A new file is written with new tags with the initial file # name. See exiftool man page for more details. exif_original_file = _file + '_original' # Check if the source file was processed by exiftool and an _original # file was created. exif_original_file_exists = False if(os.path.exists(exif_original_file)): exif_original_file_exists = True if(move is True): stat = os.stat(_file) # Move the processed file into the destination directory shutil.move(_file, dest_path) if(exif_original_file_exists is True): # We can remove it as we don't need the initial file. os.remove(exif_original_file) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: if(exif_original_file_exists is True): # Move the newly processed file with any updated tags to the # destination directory shutil.move(_file, dest_path) # Move the exif _original back to the initial source file shutil.move(exif_original_file, _file) else: compatability._copyfile(_file, dest_path) # Set the utime based on what the original file contained # before we made any changes. # Then set the utime on the destination file based on metadata. os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime)) self.set_utime_from_metadata(metadata, dest_path) db = Db() db.add_hash(checksum, dest_path) db.update_hash_db() # Run `after()` for every loaded plugin and if any of them raise an exception # then we skip importing the file and log a message. plugins_run_after_status = self.plugins.run_all_after(_file, destination, dest_path, metadata) if(plugins_run_after_status == False): log.warn('At least one plugin pre-run failed for %s' % _file) return return dest_path
def _update(album, location, time, title, paths, debug): """Update a file's EXIF. Automatically modifies the file's location and file name accordingly. """ constants.debug = debug has_errors = False result = Result() files = set() for path in paths: path = os.path.expanduser(path) if os.path.isdir(path): files.update(FILESYSTEM.get_all_files(path, None)) else: files.add(path) for current_file in files: if not os.path.exists(current_file): has_errors = True result.append((current_file, False)) log.warn('Could not find %s' % current_file) log.all('{"source":"%s", "error_msg":"Could not find %s"}' % (current_file, current_file)) continue current_file = os.path.expanduser(current_file) # The destination folder structure could contain any number of levels # So we calculate that and traverse up the tree. # '/path/to/file/photo.jpg' -> '/path/to/file' -> # ['path','to','file'] -> ['path','to'] -> '/path/to' current_directory = os.path.dirname(current_file) destination_depth = -1 * len(FILESYSTEM.get_folder_path_definition()) destination = os.sep.join( os.path.normpath( current_directory ).split(os.sep)[:destination_depth] ) media = Media.get_class_by_file(current_file, get_all_subclasses()) if not media: continue updated = False if location: update_location(media, current_file, location) updated = True if time: update_time(media, current_file, time) updated = True if album: media.set_album(album) updated = True # Updating a title can be problematic when doing it 2+ times on a file. # You would end up with img_001.jpg -> img_001-first-title.jpg -> # img_001-first-title-second-title.jpg. # To resolve that we have to track the prior title (if there was one. # Then we massage the updated_media's metadata['base_name'] to remove # the old title. # Since FileSystem.get_file_name() relies on base_name it will properly # rename the file by updating the title instead of appending it. remove_old_title_from_name = False if title: # We call get_metadata() to cache it before making any changes metadata = media.get_metadata() title_update_status = media.set_title(title) original_title = metadata['title'] if title_update_status and original_title: # @TODO: We should move this to a shared method since # FileSystem.get_file_name() does it too. original_title = re.sub(r'\W+', '-', original_title.lower()) original_base_name = metadata['base_name'] remove_old_title_from_name = True updated = True if updated: updated_media = Media.get_class_by_file(current_file, get_all_subclasses()) # See comments above on why we have to do this when titles # get updated. if remove_old_title_from_name and len(original_title) > 0: updated_media.get_metadata() updated_media.set_metadata_basename( original_base_name.replace('-%s' % original_title, '')) dest_path = FILESYSTEM.process_file(current_file, destination, updated_media, move=True, allowDuplicate=True) log.info(u'%s -> %s' % (current_file, dest_path)) log.all('{"source":"%s", "destination":"%s"}' % (current_file, dest_path)) # If the folder we moved the file out of or its parent are empty # we delete it. FILESYSTEM.delete_directory_if_empty(os.path.dirname(current_file)) FILESYSTEM.delete_directory_if_empty( os.path.dirname(os.path.dirname(current_file))) result.append((current_file, dest_path)) # Trip has_errors to False if it's already False or dest_path is. has_errors = has_errors is True or not dest_path else: has_errors = False result.append((current_file, False)) result.write() if has_errors: sys.exit(1)
def signal_handler(sig, frame): log.warn('[ ] Import cancelled') log.write(log_path) sys.exit(0)
def _import(source, config_path, manifest_path, allow_duplicates, dryrun, debug, move=False, indent_manifest=False, no_overwrite_manifest=False): """Import files or directories by reading their EXIF and organizing them accordingly. """ start_time = round(time.time()) constants.debug = debug has_errors = False result = Result() # Load the configuration from the json file. config = Config().load_from_file(config_path) source = config["sources"][0] # For now, only one. target = config["targets"][ 0] # For now, only one target allowed...but data structure allows more source_file_path = source["file_path"] manifest = Manifest() if manifest_path is not None: manifest.load_from_file(manifest_path) log_base_path, _ = os.path.split(manifest.file_path) FILESYSTEM.create_directory(os.path.join(log_base_path, '.elodie')) log_path = os.path.join(log_base_path, '.elodie', 'import_{}.log'.format(utility.timestamp_string())) def signal_handler(sig, frame): log.warn('[ ] Import cancelled') log.write(log_path) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) original_manifest_key_count = len(manifest) # destination = _decode(destination) # destination = os.path.abspath(os.path.expanduser(destination)) exiftool_addedargs = [ # '-overwrite_original', u'-config', u'"{}"'.format(constants.exiftool_config) ] file_generator = FILESYSTEM.get_all_files(source_file_path, None) source_file_count = 0 with ExifTool(addedargs=exiftool_addedargs) as et: while True: file_batch = list( itertools.islice(file_generator, constants.exiftool_batch_size)) if len(file_batch) == 0: break # This will cause slight discrepancies in file counts: since elodie.json is counted but not imported, # each one will set the count off by one. source_file_count += len(file_batch) metadata_list = et.get_metadata_batch(file_batch) if not metadata_list: raise Exception("Metadata scrape failed.") # Key on the filename to make for easy access, metadata_dict = dict((os.path.abspath(el["SourceFile"]), el) for el in metadata_list) for current_file in file_batch: # Don't import localized config files. if current_file.endswith( "elodie.json"): # Faster than a os.path.split continue try: result = import_file(current_file, config, manifest, metadata_dict, move=move, dryrun=dryrun, allow_duplicates=allow_duplicates) except Exception as e: log.warn("[!] Error importing {}: {}".format( current_file, e)) result = False has_errors = has_errors or not result exiftool_waiting_time = et.waiting_time manifest.write(indent=indent_manifest, overwrite=(not no_overwrite_manifest)) manifest_key_count = len(manifest) try: total_time = round(time.time() - start_time) log.info("Statistics:") log.info("Source: File Count {}".format(source_file_count)) log.info("Manifest: New Hashes {}".format(manifest_key_count - original_manifest_key_count)) log.info("Manifest: Total Hashes {}".format(manifest_key_count)) log.info("Time: Total {}s".format(total_time)) log.info("Time: Files/sec {}".format( round(source_file_count / total_time))) log.info("Time: Waiting on ExifTool {}s".format( round(exiftool_waiting_time))) except Exception as e: log.error("[!] Error generating statistics: {}".format(e)) log.write(log_path) if has_errors: sys.exit(1)