def test_random_search_without_n_runs(self): config_dict = { "parallel": { "kind": "random_search", "concurrency": 1, "matrix": { "lr": { "kind": "choice", "value": [1, 2, 3] } }, "seed": 1, "early_stopping": [], }, "run": { "kind": "container", "image": "foo/bar" }, } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict["parallel"]["n_runs"] = 10 config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict
def test_passing_params_declarations_raises(self): config_dict = { "params": { "foo": "bar" }, "declarations": { "foo": "bar" } } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict)
def test_incomplete_params(self): config_dict = { "inputs": [ { "name": "param1", "type": types.INT }, { "name": "param2", "type": types.INT }, ], "run": { "kind": "container", "image": "test" }, } config = ComponentConfig.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": 1}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) config_dict = { "outputs": [ { "name": "param1", "type": types.INT, "value": 12, "is_optional": True }, { "name": "param2", "type": types.INT }, ], "run": { "kind": "container", "image": "test" }, } config = ComponentConfig.from_dict(config_dict) ops_params.validate_params( params={"param1": 1}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_executable(self): config_dict = {"start_at": "foo", "container": {"image": "test"}} with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = { "schedule": { "execute_at": "foo" }, "container": { "image": "test" }, } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = {"timeout": 2} with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = { "termination": { "timeout": 2 }, "schedule": { "kind": "exact_time", "execute_at": local_now().isoformat() }, "container": { "image": "test" }, } ComponentConfig.from_dict(config_dict)
def test_job_refs_params(self): config_dict = { "inputs": [ { "name": "param1", "type": types.INT }, { "name": "param9", "type": types.FLOAT }, ], "run": { "kind": "container", "image": "test" }, } params = {"param1": "{{ job.A.outputs.foo }}", "param9": 13.1} config = ComponentConfig.from_dict(config_dict) # Validation outside the context of a pipeline with self.assertRaises(ValidationError): ops_params.validate_params(params=params, inputs=config.inputs, outputs=None, is_template=False)
def test_extra_params(self): # inputs config_dict = { "inputs": [{ "name": "param1", "type": types.INT }], "run": { "kind": "container", "image": "test" }, } config = ComponentConfig.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={ "param1": 1, "param2": 2 }, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # outputs config_dict = { "outputs": [{ "name": "param1", "type": types.INT }], "run": { "kind": "container", "image": "test" }, } config = ComponentConfig.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={ "param1": 1, "param2": 2 }, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_experiment_and_job_refs_params(self): config_dict = { "inputs": [ { "name": "param1", "type": types.INT }, { "name": "param2", "type": types.FLOAT }, { "name": "param9", "type": types.WASB }, { "name": "param11", "type": types.METRIC }, ], "run": { "kind": "container", "image": "test" }, } op = ComponentConfig.from_dict(config_dict) params = { "param1": "{{ runs.64332180bfce46eba80a65caf73c5396.outputs.foo }}", "param2": "{{ runs.0de53b5bf8b04a219d12a39c6b92bcce.outputs.foo }}", "param9": "wasbs://[email protected]/", "param11": "{{ runs.fcc462d764104eb698d3cca509f34154.outputs.accuracy }}", } validated_params = ops_params.validate_params(params=params, inputs=op.inputs, outputs=None, is_template=False) assert {p.name: p.value for p in validated_params} == { "param1": "runs.64332180bfce46eba80a65caf73c5396.outputs.foo", "param2": "runs.0de53b5bf8b04a219d12a39c6b92bcce.outputs.foo", "param9": "wasbs://[email protected]/", "param11": "runs.fcc462d764104eb698d3cca509f34154.outputs.accuracy", }
def test_iterative_without_n_iterations(self): config_dict = { "parallel": { "kind": "iterative", "matrix": { "lr": { "kind": "choice", "value": [1, 2, 3] } }, "seed": 1, }, "run": { "kind": "container", "image": "foo/bar" }, } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict["parallel"]["n_iterations"] = 10 config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict
def test_pipelines_base_attrs(self): config_dict = {"concurrency": "foo", "container": {"image": "test"}} with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = {"concurrency": 2, "container": {"image": "test"}} with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = { "workflow": { "concurrency": 2, "strategy": { "kind": "mapping", "values": [{ "a": 1 }, { "a": 1 }] }, }, "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) assert config.to_dict()["workflow"] == config_dict["workflow"] config_dict = { "workflow": { "concurrency": 2, "strategy": { "kind": "mapping", "values": [{ "a": 1 }, { "a": 1 }] }, }, "schedule": { "kind": "exact_time", "execute_at": local_now().isoformat() }, "termination": { "timeout": 1000 }, "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) config_to_light = config.to_light_dict() assert config_to_light == config_dict
def test_grid_search_without_n_runs(self): config_dict = { "parallel": { "kind": "grid_search", "concurrency": 1, "matrix": { "lr": { "kind": "choice", "value": [1, 2, 3] } }, "early_stopping": [], }, "run": { "kind": "container", "image": "foo/bar" }, } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict
def test_param_validation_with_inputs(self): config_dict = { "inputs": [ { "name": "param1", "type": IOTypes.STR }, { "name": "param2", "type": IOTypes.INT }, { "name": "param3", "type": IOTypes.FLOAT }, { "name": "param4", "type": IOTypes.BOOL }, { "name": "param5", "type": IOTypes.DICT }, { "name": "param6", "type": IOTypes.LIST }, { "name": "param7", "type": IOTypes.GCS_PATH }, { "name": "param8", "type": IOTypes.S3_PATH }, { "name": "param9", "type": IOTypes.AZURE_PATH }, { "name": "param10", "type": IOTypes.PATH }, ], "container": { "image": "test" }, } op = ComponentConfig.from_dict(config_dict) params = { "param1": "text", "param2": 12, "param3": 13.3, "param4": False, "param5": { "foo": "bar" }, "param6": [1, 3, 45, 5], "param7": "gs://bucket/path/to/blob/", "param8": "s3://test/this/is/bad/key.txt", "param9": "wasbs://[email protected]/", "param10": "/foo/bar", } validated_params = ops_params.validate_params(params=params, inputs=op.inputs, outputs=None, is_template=False) assert params == {p.name: p.value for p in validated_params} # Passing missing params params.pop("param1") params.pop("param2") with self.assertRaises(ValidationError): ops_params.validate_params(params=params, inputs=op.inputs, outputs=None, is_template=False)
def test_param_validation_with_mismatched_outputs(self): config_dict = { "outputs": [{ "name": "param1", "type": IOTypes.INT }], "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) # Passing correct param ops_params.validate_params( params={"param1": 1}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Passing wrong type with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": "text"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": 12.1}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": { "foo": "bar" }}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": "gs://bucket/path/to/blob/"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) config_dict = { "outputs": [{ "name": "param2", "type": IOTypes.STR }], "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) # Passing correct param ops_params.validate_params( params={"param2": "text"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Passing wrong type with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": 1}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": False}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": { "foo": "bar" }}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": ["gs://bucket/path/to/blob/"]}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) config_dict = { "outputs": [{ "name": "param7", "type": IOTypes.AZURE_PATH }], "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) # Passing correct param ops_params.validate_params( params={"param7": "wasbs://[email protected]/"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Passing wrong param with self.assertRaises(ValidationError): ops_params.validate_params( params={"param7": "gs://bucket/path/to/blob/"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param7": "s3://test/this/is/bad/key.txt"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param7": 1}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_required_input_no_param_only_validated_on_run(self): # Inputs config_dict = { "inputs": [ { "name": "param1", "type": IOTypes.STR }, { "name": "param10", "type": IOTypes.PATH }, ], "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": "text"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Outputs config_dict = { "outputs": [ { "name": "param1", "type": IOTypes.STR }, { "name": "param10", "type": IOTypes.PATH }, ], "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) ops_params.validate_params( params={"param1": "text"}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # IO config_dict = { "inputs": [{ "name": "param1", "type": IOTypes.STR }], "outputs": [{ "name": "param10", "type": IOTypes.PATH }], "container": { "image": "test" }, } config = ComponentConfig.from_dict(config_dict) ops_params.validate_params( params={"param1": "text"}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_pipelines_base_attrs(self): config_dict = { "concurrency": "foo", "run": { "kind": "container", "image": "test" }, } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = { "concurrency": 2, "run": { "kind": "container", "image": "test" } } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict = { "parallel": { "concurrency": 2, "kind": "mapping", "values": [{ "a": 1 }, { "a": 1 }], }, "run": { "kind": "container", "image": "test" }, } config = ComponentConfig.from_dict(config_dict) assert config.to_dict()["run"] == config_dict["run"] assert config.to_dict()["parallel"] == config_dict["parallel"] config_dict = { "parallel": { "concurrency": 2, "kind": "mapping", "values": [{ "a": 1 }, { "a": 1 }], }, "schedule": { "kind": "exact_time", "start_at": local_now().isoformat() }, "termination": { "timeout": 1000 }, "run": { "kind": "container", "image": "test" }, } config = ComponentConfig.from_dict(config_dict) config_to_light = config.to_light_dict() config_to_light["schedule"].pop("start_at") config_dict["schedule"].pop("start_at") assert config_to_light == config_dict
def test_param_validation_with_outputs(self): config_dict = { "outputs": [ { "name": "param1", "type": types.STR }, { "name": "param2", "type": types.INT }, { "name": "param3", "type": types.FLOAT }, { "name": "param4", "type": types.BOOL }, { "name": "param5", "type": types.DICT }, { "name": "param6", "type": types.LIST }, { "name": "param7", "type": types.GCS }, { "name": "param8", "type": types.S3 }, { "name": "param9", "type": types.WASB }, { "name": "param10", "type": types.PATH }, { "name": "param11", "type": types.METRIC }, { "name": "param12", "type": types.METADATA }, { "name": "param13", "type": types.METADATA }, { "name": "param14", "type": types.METADATA }, ], "run": { "kind": "container", "image": "test" }, } op = ComponentConfig.from_dict(config_dict) params = { "param1": "text", "param2": 12, "param3": 13.3, "param4": False, "param5": { "foo": "bar" }, "param6": [1, 3, 45, 5], "param7": "gs://bucket/path/to/blob/", "param8": "s3://test/this/is/bad/key.txt", "param9": "wasbs://[email protected]/", "param10": "/foo/bar", "param11": 124.4, "param12": { "foo": 124.4 }, "param13": { "foo": "bar" }, "param14": { "foo": ["foo", 124.4] }, } validated_params = ops_params.validate_params(params=params, inputs=None, outputs=op.outputs, is_template=False) assert params == {p.name: p.value for p in validated_params} # Passing missing params params.pop("param1") params.pop("param2") validated_params = ops_params.validate_params(params=params, inputs=None, outputs=op.outputs, is_template=False) params["param1"] = None params["param2"] = None assert params == {p.name: p.value for p in validated_params}
def test_workflow_config_raise_conditions(self): config_dict = { "parallel": { "kind": "mapping", "concurrency": 2, "values": [{"foo": 1}, {"foo": 2}, {"foo": 3}], }, "run": {"kind": "container", "image": "foo/bar"}, } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Add random_search without matrix should raise config_dict["parallel"] = {"kind": "random_search", "n_runs": 10} with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) # Add a matrix definition with 2 methods config_dict["parallel"]["matrix"] = { "lr": { "kind": "choice", "value": [1, 2, 3], "pvalues": [(1, 0.3), (2, 0.3), (3, 0.3)], } } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) # Using a distribution with random search should pass config_dict["parallel"]["matrix"] = { "lr": {"kind": "pchoice", "value": [(1, 0.3), (2, 0.3), (3, 0.3)]} } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Add matrix definition should pass config_dict["parallel"]["matrix"] = { "lr": {"kind": "choice", "value": [1, 2, 3]} } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Add grid_search should raise config_dict["parallel"] = {"kind": "grid_search", "n_runs": 10} config_dict["parallel"]["matrix"] = { "lr": {"kind": "choice", "value": [1, 2, 3]} } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Adding a distribution should raise config_dict["parallel"]["matrix"] = { "lr": {"kind": "pchoice", "value": [(1, 0.3), (2, 0.3), (3, 0.3)]} } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) # Updating the matrix should pass config_dict["parallel"]["matrix"] = { "lr": {"kind": "choice", "value": [1, 2, 3]} } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Add hyperband should raise config_dict["parallel"] = { "kind": "hyperband", "max_iter": 10, "eta": 3, "resource": {"name": "steps", "type": "int"}, "resume": False, "metric": OptimizationMetricConfig( name="loss", optimization=Optimization.MINIMIZE ).to_dict(), "matrix": { "lr": {"kind": "pchoice", "value": [(1, 0.3), (2, 0.3), (3, 0.3)]} }, "seed": 1, } config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Add early stopping config_dict["parallel"]["early_stopping"] = [ { "kind": "metric_early_stopping", "metric": "loss", "value": 0.1, "optimization": Optimization.MINIMIZE, "policy": {"kind": "median", "evaluation_interval": 1}, }, { "kind": "metric_early_stopping", "metric": "accuracy", "value": 0.9, "optimization": Optimization.MAXIMIZE, "policy": { "kind": "truncation", "percent": 50, "evaluation_interval": 1, }, }, ] config = ComponentConfig.from_dict(config_dict) assert config.to_dict() == config_dict # Add bo should raise config_dict["parallel"] = { "kind": "bo", "metric": OptimizationMetricConfig( name="loss", optimization=Optimization.MINIMIZE ).to_dict(), "n_initial_trials": 2, "n_iterations": 10, "utility_function": { "acquisition_function": AcquisitionFunctions.UCB, "kappa": 1.2, "gaussian_process": { "kernel": GaussianProcessesKernels.MATERN, "length_scale": 1.0, "nu": 1.9, "n_restarts_optimizer": 2, }, }, "matrix": { "lr": {"kind": "pchoice", "value": [(1, 0.3), (2, 0.3), (3, 0.3)]} }, "seed": 1, } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) # Using non uniform distribution should raise # Updating the matrix should pass config_dict["parallel"]["matrix"] = { "lr": {"kind": "pchoice", "value": [[0.1, 0.1], [0.2, 0.9]]} } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) config_dict["parallel"]["matrix"] = { "lr": {"kind": "normal", "value": [0.1, 0.2]} } with self.assertRaises(ValidationError): ComponentConfig.from_dict(config_dict) # Using uniform distribution should not raise config_dict["parallel"]["matrix"] = { "lr": {"kind": "uniform", "value": {"low": 0.1, "high": 0.2}} } config = ComponentConfig.from_dict(config_dict) assert_equal_dict(config.to_dict(), config_dict)