def test_build_cv_mode_cross_val_cache( tmpdir, should_save_model: bool, cv_mode_1: str, cv_mode_2: str, runner: CliRunner, machine: Machine, ): """ Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can print the cv_scores from them. """ logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") machine.evaluation = cv_mode_1 # type: ignore with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): runner.invoke(cli.gordo, ["build"]) machine.evaluation = cv_mode_2 # type: ignore with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): runner.invoke(cli.gordo, ["build"]) if should_save_model: assert len(os.listdir(tmpdir)) > 0 else: assert len(os.listdir(tmpdir)) == 0
def test_build_cv_mode(tmpdir, runner: CliRunner, should_save_model: bool, cv_mode: str, machine: Machine): """ Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are printed and model are only saved when using the default (full) value. """ machine.model = MODEL_CONFIG_WITH_PREDICT machine.evaluation = cv_mode # type: ignore logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") tmp_model_dir = os.path.join(tmpdir, "tmp") os.makedirs(tmp_model_dir, exist_ok=True) with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=tmp_model_dir): result = runner.invoke(cli.gordo, ["build", "--print-cv-scores"]) assert result.exit_code == 0 # Checks that the file is empty or not depending on the mode. if should_save_model: assert len(os.listdir(tmp_model_dir)) != 0 else: assert len(os.listdir(tmp_model_dir)) == 0 # Checks the output contains 'explained-variance_raw-scores' assert "r2-score" in result.output assert "mean-squared-error" in result.output assert "mean-absolute-error" in result.output assert "explained-variance-score" in result.output
def test_build_cv_mode_build_only(tmpdir, runner: CliRunner, machine: Machine): """ Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model saved to it. It also checks that the metadata contains cv-duration-sec=None and cv-scores={} """ logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") machine.evaluation = {"cv_mode": "build_only"} with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): metadata_file = f"{os.path.join(tmpdir, 'metadata.json')}" runner.invoke(cli.gordo, ["build"]) # A model has been saved assert len(os.listdir(tmpdir)) != 0 with open(metadata_file) as f: metadata_json = json.loads(f.read()) assert (metadata_json["metadata"]["build_metadata"]["model"] ["cross_validation"]["cv_duration_sec"] is None) assert (metadata_json["metadata"]["build_metadata"]["model"] ["cross_validation"]["scores"] == {})