Пример #1
0
    def __init__(
        self,
        name: str,
        model: dict,
        dataset: Union[GordoBaseDataset, dict],
        project_name: str,
        evaluation: Optional[dict] = None,
        metadata: Optional[Union[dict, Metadata]] = None,
        runtime=None,
    ):

        if runtime is None:
            runtime = dict()
        if evaluation is None:
            evaluation = dict(cv_mode="full_build")
        if metadata is None:
            metadata = dict()
        self.name = name
        self.model = model
        self.dataset = (dataset if isinstance(dataset, GordoBaseDataset) else
                        GordoBaseDataset.from_dict(dataset))
        self.runtime = runtime
        self.evaluation = evaluation
        self.metadata = (
            metadata if isinstance(metadata, Metadata) else Metadata.from_dict(
                metadata)  # type: ignore
        )
        self.project_name = project_name

        self.host = f"gordoserver-{self.project_name}-{self.name}"
Пример #2
0
    def from_config(  # type: ignore
            cls,
            config: Dict[str, Any],
            project_name: str,
            config_globals=None):
        """
        Construct an instance from a block of YAML config file which represents
        a single Machine; loaded as a ``dict``.

        Parameters
        ----------
        config: dict
            The loaded block of config which represents a 'Machine' in YAML
        project_name: str
            Name of the project this Machine belongs to.
        config_globals:
            The block of config within the YAML file within `globals`

        Returns
        -------
        :class:`~Machine`
        """
        if config_globals is None:
            config_globals = dict()

        name = config["name"]
        model = config.get("model") or config_globals.get("model")

        local_runtime = config.get("runtime", dict())
        runtime = patch_dict(config_globals.get("runtime", dict()),
                             local_runtime)

        dataset_config = patch_dict(config.get("dataset", dict()),
                                    config_globals.get("dataset", dict()))
        dataset = GordoBaseDataset.from_dict(dataset_config)
        evaluation = patch_dict(config_globals.get("evaluation", dict()),
                                config.get("evaluation", dict()))

        metadata = Metadata(
            user_defined={
                "global-metadata": config_globals.get("metadata", dict()),
                "machine-metadata": config.get("metadata", dict()),
            })
        return cls(
            name,
            model,
            dataset,
            metadata=metadata,
            runtime=runtime,
            project_name=project_name,
            evaluation=evaluation,
        )
Пример #3
0
def test_provide_saved_model_caching(
    should_be_equal: bool,
    metadata: Optional[Metadata],
    tag_list: Optional[List[SensorTag]],
    replace_cache,
    tmpdir,
):
    """
    Test provide_saved_model with caching and possible cache busting if tag_list, or replace_cache is set.

    Builds two models and checks if their model-creation-date's are the same,
    which will be if and only if there is caching.

    Parameters
    ----------
    should_be_equal : bool
        Do we expect the two generated models to be at the same location or not? I.e. do
        we expect caching.
    metadata: Metadata
        Optional metadata which will be used as metadata for the second model.
    tag_list
        Optional list of strings which be used as the taglist in the dataset for the
        second model.
    replace_cache: bool
        Should we force a model cache replacement?

    """

    if tag_list is None:
        tag_list = []
    if metadata is None:
        metadata = Metadata()

    model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "model")
    registry_dir = os.path.join(tmpdir, "registry")
    machine = Machine(name="model-name",
                      dataset=data_config,
                      model=model_config,
                      project_name="test")
    _, first_machine = ModelBuilder(machine).build(
        output_dir=output_dir, model_register_dir=registry_dir)

    if tag_list:
        data_config["tag_list"] = tag_list

    new_output_dir = os.path.join(tmpdir, "model2")
    _, second_machine = ModelBuilder(machine=Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        metadata=metadata,
        project_name="test",
        runtime={"something": True},
    )).build(
        output_dir=new_output_dir,
        model_register_dir=registry_dir,
        replace_cache=replace_cache,
    )

    model1_creation_date = (
        first_machine.metadata.build_metadata.model.model_creation_date)
    model2_creation_date = (
        second_machine.metadata.build_metadata.model.model_creation_date)
    assert "something" in second_machine.runtime

    if should_be_equal:
        assert model1_creation_date == model2_creation_date
    else:
        assert model1_creation_date != model2_creation_date

    if metadata is not None:
        assert metadata.user_defined == second_machine.metadata.user_defined
Пример #4
0
                      project_name="test")
    builder = ModelBuilder(machine)
    builder.build(output_dir=output_dir1, model_register_dir=registry_dir)

    builder.build(output_dir=output_dir2, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir2

    builder.build(output_dir=output_dir2, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir2


@pytest.mark.parametrize(
    "should_be_equal,metadata,tag_list,replace_cache",
    [
        (True, None, None, False),
        (True, Metadata(user_defined={"metadata": "something"}), None, False),
        (False, Metadata(user_defined={"metadata": "something"}), None, True),
        (False, None, [SensorTag("extra_tag", None)], False),
        (False, None, None, True),  # replace_cache gives a new model location
    ],
)
def test_provide_saved_model_caching(
    should_be_equal: bool,
    metadata: Optional[Metadata],
    tag_list: Optional[List[SensorTag]],
    replace_cache,
    tmpdir,
):
    """
    Test provide_saved_model with caching and possible cache busting if tag_list, or replace_cache is set.