Exemplo n.º 1
0
def test_complicated_happy_path(tmpdir):
    """Tests that it works to write and read a 'complicated' value"""
    value = """
    A long
    value with many weird character lie åøæ
    and some linebreaks"""
    disk_registry.write_key(tmpdir, "akey", value)
    assert disk_registry.get_value(tmpdir, "akey") == value
Exemplo n.º 2
0
def test_new_registry(tmpdir):
    """
    Tests that it works to write and read a simple value to a fresh registry with
    a non-existing directory
    """
    registry = pathlib.Path(tmpdir).joinpath("newregistry")
    disk_registry.write_key(registry, "akey", "aval")
    assert disk_registry.get_value(registry, "akey") == "aval"
Exemplo n.º 3
0
def test_overwrites_existing(tmpdir):
    """Double writes to the same registry overwrites the first value"""
    the_key = "akey"
    first_value = "Some value"
    disk_registry.write_key(tmpdir, the_key, first_value)
    assert disk_registry.get_value(tmpdir, the_key) == first_value
    second_value = "Some value"
    disk_registry.write_key(tmpdir, the_key, second_value)
    assert disk_registry.get_value(tmpdir, the_key) == second_value
Exemplo n.º 4
0
def test_delete(tmpdir):
    """Delete removes a key"""
    the_key = "akey"
    first_value = "Some value"
    disk_registry.write_key(tmpdir, the_key, first_value)
    assert disk_registry.get_value(tmpdir, the_key) == first_value

    existed_p = disk_registry.delete_value(tmpdir, the_key)
    assert disk_registry.get_value(tmpdir, the_key) is None
    # They key existed
    assert existed_p
Exemplo n.º 5
0
def test_simple_happy_path(tmpdir):
    """Tests that it works to write and read a simple value to a fresh registry"""
    disk_registry.write_key(tmpdir, "akey", "aval")
    assert disk_registry.get_value(tmpdir, "akey") == "aval"
Exemplo n.º 6
0
    def build(
        self,
        output_dir: Optional[Union[os.PathLike, str]] = None,
        model_register_dir: Optional[Union[os.PathLike, str]] = None,
        replace_cache=False,
    ) -> Tuple[sklearn.base.BaseEstimator, Machine]:
        """
        Always return a model and its metadata.

        If ``output_dir`` is supplied, it will save the model there.
        ``model_register_dir`` points to the model cache directory which it will
        attempt to read the model from. Supplying both will then have the effect
        of both; reading from the cache and saving that cached model to the new
        output directory.

        Parameters
        ----------
        output_dir: Optional[Union[os.PathLike, str]]
            A path to where the model will be deposited.
        model_register_dir: Optional[Union[os.PathLike, str]]
            A path to a register, see `:func:gordo.util.disk_registry`.
            If this is None then always build the model, otherwise try to resolve
            the model from the registry.
        replace_cache: bool
            Forces a rebuild of the model, and replaces the entry in the cache
            with the new model.

        Returns
        -------
        Tuple[sklearn.base.BaseEstimator, Machine]
            Built model and an updated ``Machine``
        """
        if not model_register_dir:
            model, machine = self._build()
        else:
            logger.debug(
                f"Model caching activated, attempting to read model-location with key "
                f"{self.cache_key} from register {model_register_dir}")
            self.cached_model_path = self.check_cache(model_register_dir)

            if replace_cache:
                logger.info(
                    "replace_cache=True, deleting any existing cache entry")
                disk_registry.delete_value(model_register_dir, self.cache_key)
                self.cached_model_path = None

            # Load the model from previous cached directory
            if self.cached_model_path:
                model = serializer.load(self.cached_model_path)
                metadata = serializer.load_metadata(self.cached_model_path)
                metadata["metadata"][
                    "user_defined"] = self.machine.metadata.user_defined

                metadata["runtime"] = self.machine.runtime

                machine = Machine(**metadata)

            # Otherwise build and cache the model
            else:
                model, machine = self._build()
                self.cached_model_path = self._save_model(
                    model=model,
                    machine=machine,
                    output_dir=output_dir  # type: ignore
                )
                logger.info(
                    f"Built model, and deposited at {self.cached_model_path}")
                logger.info(f"Writing model-location to model registry")
                disk_registry.write_key(  # type: ignore
                    model_register_dir, self.cache_key, self.cached_model_path)

        # Save model to disk, if we're not building for cv only purposes.
        if output_dir and (self.machine.evaluation.get("cv_mode") !=
                           "cross_val_only"):
            self.cached_model_path = self._save_model(model=model,
                                                      machine=machine,
                                                      output_dir=output_dir)
        return model, machine