def test_new_registry(): """Tests that it works to write and read a simple value to a fresh registry with a non-existing directory""" with tempfile.TemporaryDirectory() as tmpdir: registry = pathlib.Path(tmpdir).joinpath("newregistry") disk_registry.write_key(registry, "akey", "aval") assert disk_registry.get_value(registry, "akey") == "aval"
def test_complicated_happy_path(): """Tests that it works to write and read a 'complicated' value""" with tempfile.TemporaryDirectory() as tmpdir: value = """ A long value with many weird character lie åøæ and some linebreaks""" disk_registry.write_key(tmpdir, "akey", value) assert disk_registry.get_value(tmpdir, "akey") == value
def provide_saved_model( name: str, model_config: dict, data_config: dict, metadata: dict, output_dir: Union[os.PathLike, str], model_register_dir: Union[os.PathLike, str] = None, replace_cache=False, ) -> Union[os.PathLike, str]: """ Ensures that the desired model exists on disk, and returns the path to it. Builds the model if needed, or finds it among already existing models if ``model_register_dir`` is non-None, and we find the model there. If `model_register_dir` is set we will also store the model-location of the generated model there for future use. Think about it as a cache that is never emptied. Parameters ---------- name: str Name of the model to be built model_config: dict Config for the model. See :func:`gordo_components.builder.build_model.build_model`. data_config: dict Config for the data-configuration. See :func:`gordo_components.builder.build_model.build_model`. metadata: dict Extra metadata to be added to the built models if it is built. See :func:`gordo_components.builder.build_model.build_model`. output_dir: Union[os.PathLike, str] A path to where the model will be deposited if it is built. model_register_dir: A path to a register, see `gordo_components.util.disk_registry`. If this is None then always build the model, otherwise try to resolve the model from the registry. replace_cache: bool Forces a rebuild of the model, and replaces the entry in the cache with the new model. Returns ------- Union[os.PathLike, str]: Path to the model """ cache_key = calculate_model_key(name, model_config, data_config, metadata=metadata) if model_register_dir: logger.info( f"Model caching activated, attempting to read model-location with key " f"{cache_key} from register {model_register_dir}") if replace_cache: logger.info( "replace_cache activated, deleting any existing cache entry") cache_key = calculate_model_key(name, model_config, data_config, metadata=metadata) disk_registry.delete_value(model_register_dir, cache_key) existing_model_location = disk_registry.get_value( model_register_dir, cache_key) # Check that the model is actually there if existing_model_location and Path(existing_model_location).exists(): logger.debug( f"Found existing model at path {existing_model_location}, returning it" ) return existing_model_location elif existing_model_location: logger.warning( f"Found that the model-path {existing_model_location} stored in the " f"registry did not exist.") else: logger.info( f"Did not find the model with key {cache_key} in the register at " f"{model_register_dir}.") model, metadata = build_model(name=name, model_config=model_config, data_config=data_config, metadata=metadata) model_location = _save_model_for_workflow(model=model, metadata=metadata, output_dir=output_dir) logger.info(f"Successfully built model, and deposited at {model_location}") if model_register_dir: logger.info(f"Writing model-location to model registry") disk_registry.write_key(model_register_dir, cache_key, model_location) return model_location
def provide_saved_model( name: str, model_config: dict, data_config: dict, metadata: dict, output_dir: Union[os.PathLike, str], model_register_dir: Union[os.PathLike, str] = None, replace_cache=False, evaluation_config: dict = {"cv_mode": "full_build"}, ) -> Union[os.PathLike, str]: """ Ensures that the desired model exists on disk in `output_dir`, and returns the path to it. If `output_dir` exists we assume the model is there (no validation), and return that path. Builds the model if needed, or finds it among already existing models if ``model_register_dir`` is non-None, and we find the model there. If `model_register_dir` is set we will also store the model-location of the generated model there for future use. Think about it as a cache that is never emptied. Parameters ---------- name: str Name of the model to be built model_config: dict Config for the model. See :func:`gordo_components.builder.build_model.build_model`. data_config: dict Config for the data-configuration. See :func:`gordo_components.builder.build_model.build_model`. metadata: dict Extra metadata to be added to the built models if it is built. See :func:`gordo_components.builder.build_model.build_model`. output_dir: Union[os.PathLike, str] A path to where the model will be deposited if it is built. model_register_dir: A path to a register, see `gordo_components.util.disk_registry`. If this is None then always build the model, otherwise try to resolve the model from the registry. replace_cache: bool Forces a rebuild of the model, and replaces the entry in the cache with the new model. evaluation_config: dict Config for the evaluation. See :func:`gordo_components.builder.build_model.build_model`. Returns ------- Union[os.PathLike, str]: Path to the model """ cache_key = calculate_model_key(name, model_config, data_config, evaluation_config, metadata=metadata) if model_register_dir: logger.info( f"Model caching activated, attempting to read model-location with key " f"{cache_key} from register {model_register_dir}") if replace_cache: logger.info( "replace_cache activated, deleting any existing cache entry") disk_registry.delete_value(model_register_dir, cache_key) else: cached_model_location = check_cache(model_register_dir, cache_key) if cached_model_location: logger.info( f"Found model in cache, copying from {cached_model_location} to " f"new location {output_dir} ") if cached_model_location == output_dir: return output_dir else: try: # Why not shutil.copytree? Because in python <3.7 it causes # errors on Azure NFS, see: # - https://bugs.python.org/issue24564 # - https://stackoverflow.com/questions/51616058/shutil-copystat-fails-inside-docker-on-azure/51635427#51635427 copy_tree( str(cached_model_location), str(output_dir), preserve_mode=0, preserve_times=0, ) except FileExistsError: logger.warning( f"Found that output directory {output_dir} " f"already exists, assuming model is " f"already located there") return output_dir model, metadata = build_model( name=name, model_config=model_config, data_config=data_config, metadata=metadata, evaluation_config=evaluation_config, ) model_location = _save_model_for_workflow(model=model, metadata=metadata, output_dir=output_dir) logger.info(f"Successfully built model, and deposited at {model_location}") if model_register_dir: logger.info(f"Writing model-location to model registry") disk_registry.write_key(model_register_dir, cache_key, model_location) return model_location