def test_complicated_happy_path(tmpdir): """Tests that it works to write and read a 'complicated' value""" value = """ A long value with many weird character lie åøæ and some linebreaks""" disk_registry.write_key(tmpdir, "akey", value) assert disk_registry.get_value(tmpdir, "akey") == value
def test_new_registry(tmpdir): """ Tests that it works to write and read a simple value to a fresh registry with a non-existing directory """ registry = pathlib.Path(tmpdir).joinpath("newregistry") disk_registry.write_key(registry, "akey", "aval") assert disk_registry.get_value(registry, "akey") == "aval"
def test_overwrites_existing(tmpdir): """Double writes to the same registry overwrites the first value""" the_key = "akey" first_value = "Some value" disk_registry.write_key(tmpdir, the_key, first_value) assert disk_registry.get_value(tmpdir, the_key) == first_value second_value = "Some value" disk_registry.write_key(tmpdir, the_key, second_value) assert disk_registry.get_value(tmpdir, the_key) == second_value
def test_delete(tmpdir): """Delete removes a key""" the_key = "akey" first_value = "Some value" disk_registry.write_key(tmpdir, the_key, first_value) assert disk_registry.get_value(tmpdir, the_key) == first_value existed_p = disk_registry.delete_value(tmpdir, the_key) assert disk_registry.get_value(tmpdir, the_key) is None # They key existed assert existed_p
def test_simple_happy_path(tmpdir): """Tests that it works to write and read a simple value to a fresh registry""" disk_registry.write_key(tmpdir, "akey", "aval") assert disk_registry.get_value(tmpdir, "akey") == "aval"
def build( self, output_dir: Optional[Union[os.PathLike, str]] = None, model_register_dir: Optional[Union[os.PathLike, str]] = None, replace_cache=False, ) -> Tuple[sklearn.base.BaseEstimator, Machine]: """ Always return a model and its metadata. If ``output_dir`` is supplied, it will save the model there. ``model_register_dir`` points to the model cache directory which it will attempt to read the model from. Supplying both will then have the effect of both; reading from the cache and saving that cached model to the new output directory. Parameters ---------- output_dir: Optional[Union[os.PathLike, str]] A path to where the model will be deposited. model_register_dir: Optional[Union[os.PathLike, str]] A path to a register, see `:func:gordo.util.disk_registry`. If this is None then always build the model, otherwise try to resolve the model from the registry. replace_cache: bool Forces a rebuild of the model, and replaces the entry in the cache with the new model. Returns ------- Tuple[sklearn.base.BaseEstimator, Machine] Built model and an updated ``Machine`` """ if not model_register_dir: model, machine = self._build() else: logger.debug( f"Model caching activated, attempting to read model-location with key " f"{self.cache_key} from register {model_register_dir}") self.cached_model_path = self.check_cache(model_register_dir) if replace_cache: logger.info( "replace_cache=True, deleting any existing cache entry") disk_registry.delete_value(model_register_dir, self.cache_key) self.cached_model_path = None # Load the model from previous cached directory if self.cached_model_path: model = serializer.load(self.cached_model_path) metadata = serializer.load_metadata(self.cached_model_path) metadata["metadata"][ "user_defined"] = self.machine.metadata.user_defined metadata["runtime"] = self.machine.runtime machine = Machine(**metadata) # Otherwise build and cache the model else: model, machine = self._build() self.cached_model_path = self._save_model( model=model, machine=machine, output_dir=output_dir # type: ignore ) logger.info( f"Built model, and deposited at {self.cached_model_path}") logger.info(f"Writing model-location to model registry") disk_registry.write_key( # type: ignore model_register_dir, self.cache_key, self.cached_model_path) # Save model to disk, if we're not building for cv only purposes. if output_dir and (self.machine.evaluation.get("cv_mode") != "cross_val_only"): self.cached_model_path = self._save_model(model=model, machine=machine, output_dir=output_dir) return model, machine