def __init__( self, name: str, model: dict, dataset: Union[GordoBaseDataset, dict], project_name: str, evaluation: Optional[dict] = None, metadata: Optional[Union[dict, Metadata]] = None, runtime=None, ): if runtime is None: runtime = dict() if evaluation is None: evaluation = dict(cv_mode="full_build") if metadata is None: metadata = dict() self.name = name self.model = model self.dataset = (dataset if isinstance(dataset, GordoBaseDataset) else GordoBaseDataset.from_dict(dataset)) self.runtime = runtime self.evaluation = evaluation self.metadata = ( metadata if isinstance(metadata, Metadata) else Metadata.from_dict( metadata) # type: ignore ) self.project_name = project_name self.host = f"gordoserver-{self.project_name}-{self.name}"
def from_config( # type: ignore cls, config: Dict[str, Any], project_name: str, config_globals=None): """ Construct an instance from a block of YAML config file which represents a single Machine; loaded as a ``dict``. Parameters ---------- config: dict The loaded block of config which represents a 'Machine' in YAML project_name: str Name of the project this Machine belongs to. config_globals: The block of config within the YAML file within `globals` Returns ------- :class:`~Machine` """ if config_globals is None: config_globals = dict() name = config["name"] model = config.get("model") or config_globals.get("model") local_runtime = config.get("runtime", dict()) runtime = patch_dict(config_globals.get("runtime", dict()), local_runtime) dataset_config = patch_dict(config.get("dataset", dict()), config_globals.get("dataset", dict())) dataset = GordoBaseDataset.from_dict(dataset_config) evaluation = patch_dict(config_globals.get("evaluation", dict()), config.get("evaluation", dict())) metadata = Metadata( user_defined={ "global-metadata": config_globals.get("metadata", dict()), "machine-metadata": config.get("metadata", dict()), }) return cls( name, model, dataset, metadata=metadata, runtime=runtime, project_name=project_name, evaluation=evaluation, )
def test_provide_saved_model_caching( should_be_equal: bool, metadata: Optional[Metadata], tag_list: Optional[List[SensorTag]], replace_cache, tmpdir, ): """ Test provide_saved_model with caching and possible cache busting if tag_list, or replace_cache is set. Builds two models and checks if their model-creation-date's are the same, which will be if and only if there is caching. Parameters ---------- should_be_equal : bool Do we expect the two generated models to be at the same location or not? I.e. do we expect caching. metadata: Metadata Optional metadata which will be used as metadata for the second model. tag_list Optional list of strings which be used as the taglist in the dataset for the second model. replace_cache: bool Should we force a model cache replacement? """ if tag_list is None: tag_list = [] if metadata is None: metadata = Metadata() model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}} data_config = get_random_data() output_dir = os.path.join(tmpdir, "model") registry_dir = os.path.join(tmpdir, "registry") machine = Machine(name="model-name", dataset=data_config, model=model_config, project_name="test") _, first_machine = ModelBuilder(machine).build( output_dir=output_dir, model_register_dir=registry_dir) if tag_list: data_config["tag_list"] = tag_list new_output_dir = os.path.join(tmpdir, "model2") _, second_machine = ModelBuilder(machine=Machine( name="model-name", dataset=data_config, model=model_config, metadata=metadata, project_name="test", runtime={"something": True}, )).build( output_dir=new_output_dir, model_register_dir=registry_dir, replace_cache=replace_cache, ) model1_creation_date = ( first_machine.metadata.build_metadata.model.model_creation_date) model2_creation_date = ( second_machine.metadata.build_metadata.model.model_creation_date) assert "something" in second_machine.runtime if should_be_equal: assert model1_creation_date == model2_creation_date else: assert model1_creation_date != model2_creation_date if metadata is not None: assert metadata.user_defined == second_machine.metadata.user_defined
project_name="test") builder = ModelBuilder(machine) builder.build(output_dir=output_dir1, model_register_dir=registry_dir) builder.build(output_dir=output_dir2, model_register_dir=registry_dir) assert builder.cached_model_path == output_dir2 builder.build(output_dir=output_dir2, model_register_dir=registry_dir) assert builder.cached_model_path == output_dir2 @pytest.mark.parametrize( "should_be_equal,metadata,tag_list,replace_cache", [ (True, None, None, False), (True, Metadata(user_defined={"metadata": "something"}), None, False), (False, Metadata(user_defined={"metadata": "something"}), None, True), (False, None, [SensorTag("extra_tag", None)], False), (False, None, None, True), # replace_cache gives a new model location ], ) def test_provide_saved_model_caching( should_be_equal: bool, metadata: Optional[Metadata], tag_list: Optional[List[SensorTag]], replace_cache, tmpdir, ): """ Test provide_saved_model with caching and possible cache busting if tag_list, or replace_cache is set.