def deserialize( filepath: str, source_directory: str = "filestore", destination_directory: str = "system_temp", **kwargs, ) -> Dict[str, str]: DiskIOMethods.copy_file( join(FILEPATH_REGISTRY.get(source_directory), filepath), join(FILEPATH_REGISTRY.get(destination_directory), filepath), ) return { "filepath": filepath, "source_directory": destination_directory }
def serialize( obj: ddDataFrame, filepath: str, format_directory: str = JSON_DIRECTORY, format_extension: str = ".jsonl", destination_directory: str = "system_temp", **kwargs, ) -> Dict[str, str]: # Append the filepath to the storage directory # read_json method expects a * format destination_folder = FILEPATH_REGISTRY.get(destination_directory) filename_format = join(format_directory, filepath + "-*" + format_extension) full_path = join(destination_folder, filename_format) DaskPersistenceMethods.to_json(obj, full_path) written_filepaths = glob.glob(full_path) # strip out root path to keep relative to directory filepaths = [] for i in written_filepaths: relative_path = i.split(destination_folder)[1] # strip the preceding / if relative_path[0] == "/": relative_path = relative_path[1:] filepaths.append(relative_path) return { "filepaths": filepaths, "source_directory": destination_directory }
def deserialize(filepaths: List[str], source_directory: str = "system_temp", **kwargs) -> Dict[str, Any]: full_paths = [ join(FILEPATH_REGISTRY.get(source_directory), filepath) for filepath in filepaths ] return {"obj": DaskPersistenceMethods.read_json(full_paths)}
def serialize( obj: Any, filepath: str, format_directory: str = HDF5_DIRECTORY, format_extension: str = ".h5", destination_directory: str = "system_temp", **kwargs, ) -> Dict[str, str]: # Append the filepath to the storage directory filepath = join(format_directory, filepath + format_extension) full_path = join(FILEPATH_REGISTRY.get(destination_directory), filepath) KerasPersistenceMethods.save_model(obj, full_path, save_format="h5") return {"filepath": filepath, "source_directory": destination_directory}
def serialize( obj: ddDataFrame, filepath: str, format_directory: str = ORC_DIRECTORY, format_extension: str = ".orc", destination_directory: str = "system_temp", **kwargs, ) -> Dict[str, str]: # Append the filepath to the storage directory filepath = join(format_directory, filepath + format_extension) full_path = join(FILEPATH_REGISTRY.get(destination_directory), filepath) DaskPersistenceMethods.to_orc(obj, full_path) return { "filepath": filepath, "source_directory": destination_directory }
def serialize( obj: Any, filepath: str, format_directory: str = PICKLE_DIRECTORY, format_extension: str = ".pkl", destination_directory: str = "system_temp", **kwargs, ) -> Dict[str, str]: # Append the filepath to the pickle storage directory filepath = join(format_directory, filepath + format_extension) full_path = join(FILEPATH_REGISTRY.get(destination_directory), filepath) # make sure the directory exists makedirs(dirname(full_path), exist_ok=True) PicklePersistenceMethods.dump_object(obj, full_path) return {"filepath": filepath, "source_directory": destination_directory}
def deserialize( filepath: str, source_directory: str = "system_temp", **kwargs ) -> Dict[str, Any]: full_path = join(FILEPATH_REGISTRY.get(source_directory), filepath) return {"obj": KerasPersistenceMethods.load_model(full_path)}
def deserialize(filepath: str, source_directory: str = "system_temp", **kwargs) -> Dict[str, pd.DataFrame]: full_path = join(FILEPATH_REGISTRY.get(source_directory), filepath) return {"obj": PandasPersistenceMethods.read_json(full_path)}