def testBuildImporter(self): impt = importer.Importer(instance_name='my_importer', source_uri='m/y/u/r/i', properties={ 'split_names': '["train", "eval"]', }, custom_properties={ 'str_custom_property': 'abc', 'int_custom_property': 123, }, artifact_type=standard_artifacts.Examples) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=impt, deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testBuildImporterWithRuntimeParam(self): param = data_types.RuntimeParameter(name='runtime_flag', ptype=str) impt = importer.Importer( source_uri=param, artifact_type=standard_artifacts.Examples).with_id('my_importer') deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} with parameter_utils.ParameterContext() as pc: my_builder = step_builder.StepBuilder( node=impt, deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_component_with_runtime_param.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_task_with_runtime_param.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_executor_with_runtime_param.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config) self.assertListEqual([param], pc.parameters)
def testImporterDefinitionWithSingleUri(self): impt = importer.Importer(instance_name='my_importer', source_uri='m/y/u/r/i', properties={ 'split_names': '["train", "eval"]', }, custom_properties={ 'str_custom_property': 'abc', 'int_custom_property': 123, }, artifact_type=standard_artifacts.Examples) self.assertDictEqual( impt.exec_properties, { importer.SOURCE_URI_KEY: 'm/y/u/r/i', importer.REIMPORT_OPTION_KEY: 0, importer.PROPERTIES_KEY: { 'split_names': '["train", "eval"]', }, importer.CUSTOM_PROPERTIES_KEY: { 'str_custom_property': 'abc', 'int_custom_property': 123, }, }) self.assertEmpty(impt.inputs.get_all()) self.assertEqual(impt.outputs[importer.IMPORT_RESULT_KEY].type, standard_artifacts.Examples)
def testIsImporter(self): impt = importer.Importer(source_uri="uri/to/schema", artifact_type=standard_artifacts.Schema) self.assertTrue(compiler_utils.is_importer(impt)) example_gen = CsvExampleGen(input_base="data_path") self.assertFalse(compiler_utils.is_importer(example_gen))
def testSuccessfulExecution(self): example_importer = importer.Importer( artifact_type=simple_artifacts.File, reimport=False, source_uri=f'gs://{self._TEST_DATA_BUCKET}/ai-platform-training/mnist' ).with_id('examples') train = ai_platform_training_component.create_ai_platform_training( name='simple_aip_training', project_id=self._GCP_PROJECT_ID, region=self._GCP_REGION, image_uri=self._TRAINING_IMAGE, args=[ '--dataset', placeholders.InputUriPlaceholder('examples'), '--model-dir', placeholders.OutputUriPlaceholder('model'), '--lr', placeholders.InputValuePlaceholder('learning_rate'), ], scale_tier='BASIC', inputs={'examples': example_importer.outputs['result']}, outputs={'model': standard_artifacts.Model}, parameters={'learning_rate': '0.001'}) pipeline_name = _PIPELINE_NAME_PREFIX.format(test_utils.random_id()) aip_training_pipeline = pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=self._pipeline_root(pipeline_name), components=[example_importer, train], ) self._run_pipeline(aip_training_pipeline)
def testIsImporter(self): impt = importer.Importer(instance_name="import_schema", source_uri="uri/to/schema", artifact_type=standard_artifacts.Schema) self.assertTrue(compiler_utils.is_importer(impt)) impt = legacy_importer_node.ImporterNode( instance_name="import_schema", source_uri="uri/to/schema", artifact_type=standard_artifacts.Schema) self.assertTrue(compiler_utils.is_importer(impt)) example_gen = CsvExampleGen(input=external_input("data_path")) self.assertFalse(compiler_utils.is_importer(example_gen))
def testImporterDumpsJsonRoundtrip(self): instance_name = 'my_importer' source_uris = ['m/y/u/r/i'] impt = importer.Importer(instance_name=instance_name, source_uri=source_uris, artifact_type=standard_artifacts.Examples) # The following line will raise an assertion if object not JSONable. json_text = json_utils.dumps(impt) actual_obj = json_utils.loads(json_text) self.assertEqual(actual_obj._instance_name, instance_name) self.assertEqual(actual_obj._source_uri, source_uris)
def setUp(self): super().setUp() # Example artifacts for testing. self.raw_examples_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'csv_example_gen'), artifact_type=standard_artifacts.Examples, reimport=True, properties={ 'split_names': '["train", "eval"]' }).with_id('raw_examples') # Schema artifact for testing. self.schema_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'schema_gen'), artifact_type=standard_artifacts.Schema, reimport=True).with_id('schema') # Model artifact for testing. self.model_1_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'trainer', 'previous'), artifact_type=standard_artifacts.Model, reimport=True).with_id('model_1')
def create_pipeline() -> pipeline_pb2.Pipeline: """Creates a pipeline with an importer node for testing.""" inode = importer.Importer( source_uri='my_url', reimport=True, custom_properties={ 'int_custom_property': 123, 'str_custom_property': 'abc', }, artifact_type=standard_artifacts.Schema).with_id('my_importer') pipeline = pipeline_lib.Pipeline( pipeline_name='my_pipeline', pipeline_root='/path/to/root', components=[inode], execution_mode=pipeline_lib.ExecutionMode.SYNC) dsl_compiler = compiler.Compiler() return dsl_compiler.compile(pipeline)
def pipeline_with_one_container_spec_component() -> tfx_pipeline.Pipeline: """Pipeline with container.""" importer_task = importer.Importer( source_uri='some-uri', artifact_type=standard_artifacts.Model, ).with_id('my_importer') container_task = DummyContainerSpecComponent( input1=importer_task.outputs['result'], output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value1', ) return tfx_pipeline.Pipeline( pipeline_name='pipeline-with-container', pipeline_root=_TEST_PIPELINE_ROOT, components=[importer_task, container_task], )
def testImporterDefinitionWithSingleUri(self): impt = importer.Importer( source_uri='m/y/u/r/i', properties={ 'split_names': '["train", "eval"]', }, custom_properties={ 'str_custom_property': 'abc', 'int_custom_property': 123, }, artifact_type=standard_artifacts.Examples).with_id('my_importer') self.assertDictEqual( impt.exec_properties, { importer.SOURCE_URI_KEY: 'm/y/u/r/i', importer.REIMPORT_OPTION_KEY: 0, }) self.assertEmpty(impt.inputs) output_channel = impt.outputs[importer.IMPORT_RESULT_KEY] self.assertEqual(output_channel.type, standard_artifacts.Examples) # Tests properties in channel. self.assertEqual(output_channel.additional_properties, { 'split_names': '["train", "eval"]', }) self.assertEqual(output_channel.additional_custom_properties, { 'str_custom_property': 'abc', 'int_custom_property': 123, }) # Tests properties in artifact. output_artifact = list(output_channel.get())[0] self.assertEqual(output_artifact.split_names, '["train", "eval"]') self.assertEqual( output_artifact.get_string_custom_property('str_custom_property'), 'abc') self.assertEqual( output_artifact.get_int_custom_property('int_custom_property'), 123)
def create_test_pipeline(): """Builds an Iris example pipeline with slight changes.""" pipeline_name = "iris" iris_root = "iris_root" serving_model_dir = os.path.join(iris_root, "serving_model", pipeline_name) tfx_root = "tfx_root" data_path = os.path.join(tfx_root, "data_path") pipeline_root = os.path.join(tfx_root, "pipelines", pipeline_name) example_gen = CsvExampleGen(input_base=data_path) statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"]) my_importer = importer.Importer( source_uri="m/y/u/r/i", properties={ "split_names": "['train', 'eval']", }, custom_properties={ "int_custom_property": 42, "str_custom_property": "42", }, artifact_type=standard_artifacts.Examples).with_id("my_importer") another_statistics_gen = StatisticsGen( examples=my_importer.outputs["result"]).with_id( "another_statistics_gen") schema_gen = SchemaGen(statistics=statistics_gen.outputs["statistics"]) example_validator = ExampleValidator( statistics=statistics_gen.outputs["statistics"], schema=schema_gen.outputs["schema"]) trainer = Trainer( # Use RuntimeParameter as module_file to test out RuntimeParameter in # compiler. module_file=data_types.RuntimeParameter(name="module_file", default=os.path.join( iris_root, "iris_utils.py"), ptype=str), custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), examples=example_gen.outputs["examples"], schema=schema_gen.outputs["schema"], train_args=trainer_pb2.TrainArgs(num_steps=2000), # Attaching `TrainerArgs` as platform config is not sensible practice, # but is only for testing purpose. eval_args=trainer_pb2.EvalArgs(num_steps=5)).with_platform_config( config=trainer_pb2.TrainArgs(num_steps=2000)) model_resolver = resolver.Resolver( strategy_class=latest_blessed_model_strategy. LatestBlessedModelStrategy, model=Channel(type=standard_artifacts.Model, producer_component_id=trainer.id), model_blessing=Channel(type=standard_artifacts.ModelBlessing)).with_id( "latest_blessed_model_resolver") eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name="eval")], slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec( thresholds={ "sparse_categorical_accuracy": tfma.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={"value": 0.6}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={"value": -1e-10})) }) ]) evaluator = Evaluator(examples=example_gen.outputs["examples"], model=trainer.outputs["model"], baseline_model=model_resolver.outputs["model"], eval_config=eval_config) pusher = Pusher(model=trainer.outputs["model"], model_blessing=evaluator.outputs["blessing"], push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=serving_model_dir))) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ example_gen, statistics_gen, another_statistics_gen, my_importer, schema_gen, example_validator, trainer, model_resolver, evaluator, pusher, ], enable_cache=True, beam_pipeline_args=["--my_testing_beam_pipeline_args=foo"], # Attaching `TrainerArgs` as platform config is not sensible practice, # but is only for testing purpose. platform_config=trainer_pb2.TrainArgs(num_steps=2000), execution_mode=pipeline.ExecutionMode.SYNC)
def setUp(self): super().setUp() # Transformed Example artifacts for testing. self.transformed_examples_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'transform', 'transformed_examples'), artifact_type=standard_artifacts.Examples, reimport=True, properties={ 'split_names': '["train", "eval"]' }).with_id('transformed_examples') # Schema artifact for testing. self.schema_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'schema_gen'), artifact_type=standard_artifacts.Schema, reimport=True).with_id('schema') # TransformGraph artifact for testing. self.transform_graph_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'transform', 'transform_graph'), artifact_type=standard_artifacts.TransformGraph, reimport=True).with_id('transform_graph') # Model artifact for testing. self.model_1_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'trainer', 'previous'), artifact_type=standard_artifacts.Model, reimport=True).with_id('model_1') self.model_2_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'trainer', 'current'), artifact_type=standard_artifacts.Model, reimport=True).with_id('model_2') # ModelBlessing artifact for testing. self.model_blessing_1_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'model_validator', 'blessed'), artifact_type=standard_artifacts.ModelBlessing, reimport=True, custom_properties={ 'blessed': 1 }).with_id('model_blessing_1') self.model_blessing_2_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'model_validator', 'blessed'), artifact_type=standard_artifacts.ModelBlessing, reimport=True, custom_properties={ 'blessed': 1 }).with_id('model_blessing_2') ### Test data and modules for native Keras trainer and tuner. self._penguin_tuner_module = os.path.join(self._MODULE_ROOT, 'tuner_module.py') self.penguin_examples_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'penguin', 'data'), artifact_type=standard_artifacts.Examples, reimport=True, properties={ 'split_names': '["train", "eval"]' }).with_id('penguin_examples') self.penguin_schema_importer = importer.Importer( source_uri=os.path.join(self._testdata_root, 'penguin', 'schema'), artifact_type=standard_artifacts.Schema, reimport=True).with_id('penguin_schema')