Пример #1
0
  def testStrategy_IrMode(self):
    # Model with id 1, will be blessed.
    model_one = standard_artifacts.Model()
    model_one.uri = 'model_one'
    model_one.id = 1
    # Model with id 2, will be blessed.
    model_two = standard_artifacts.Model()
    model_two.uri = 'model_two'
    model_two.id = 2
    # Model with id 3, will not be blessed.
    model_three = standard_artifacts.Model()
    model_three.uri = 'model_three'
    model_three.id = 3

    model_blessing_one = standard_artifacts.ModelBlessing()
    self._set_model_blessing_bit(model_blessing_one, model_one.id, 1)
    model_blessing_two = standard_artifacts.ModelBlessing()
    self._set_model_blessing_bit(model_blessing_two, model_two.id, 1)

    strategy = latest_blessed_model_strategy.LatestBlessedModelStrategy()
    result = strategy.resolve_artifacts(
        self._store, {
            'model': [model_one, model_two, model_three],
            'model_blessing': [model_blessing_one, model_blessing_two]
        })
    self.assertIsNotNone(result)
    self.assertEqual([a.uri for a in result['model']], ['model_two'])
Пример #2
0
  def testGetLatestBlessedModelArtifact_IrMode(self):
    with metadata.Metadata(connection_config=self._connection_config) as m:
      # Model with id 1, will be blessed.
      model_one = standard_artifacts.Model()
      model_one.uri = 'model_one'
      model_one.id = 1
      # Model with id 2, will be blessed.
      model_two = standard_artifacts.Model()
      model_two.uri = 'model_two'
      model_two.id = 2
      # Model with id 3, will not be blessed.
      model_three = standard_artifacts.Model()
      model_three.uri = 'model_three'
      model_three.id = 3

      model_blessing_one = standard_artifacts.ModelBlessing()
      self._set_model_blessing_bit(model_blessing_one, model_one.id, 1)
      model_blessing_two = standard_artifacts.ModelBlessing()
      self._set_model_blessing_bit(model_blessing_two, model_two.id, 1)

      resolver = latest_blessed_model_resolver.LatestBlessedModelResolver()
      result = resolver.resolve_artifacts(
          m, {
              'model': [model_one, model_two, model_three],
              'model_blessing': [model_blessing_one, model_blessing_two]
          })
      self.assertIsNotNone(result)
      self.assertEqual([a.uri for a in result['model']], ['model_two'])
Пример #3
0
  def setUp(self):
    super(ExecutorTest, self).setUp()
    self._testdata_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    self._module_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'example')
    self._output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    self._context = executor.Executor.Context(
        tmp_dir=self._output_data_dir, unique_id='1')

    # Create input dict.
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(self._testdata_dir, 'data')
    examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
    schema = standard_artifacts.Schema()
    schema.uri = os.path.join(self._testdata_dir, 'schema')

    self._input_dict = {
        'examples': [examples],
        'schema': [schema],
    }

    # Create output dict.
    model = standard_artifacts.Model()
    model.uri = os.path.join(self._output_data_dir, 'model')
    self._best_hparams = standard_artifacts.Model()
    self._best_hparams.uri = os.path.join(self._output_data_dir, 'best_hparams')

    self._output_dict = {
        'model': [model],
        'best_hyperparameters': [self._best_hparams],
    }
Пример #4
0
    def testDoValidation(self, exec_properties, blessed, has_baseline):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        model = standard_artifacts.Model()
        baseline_model = standard_artifacts.Model()
        model.uri = os.path.join(source_data_dir, 'trainer/current')
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')
        input_dict = {
            EXAMPLES_KEY: [examples],
            MODEL_KEY: [model],
            SCHEMA_KEY: [schema],
        }
        if has_baseline:
            input_dict[BASELINE_MODEL_KEY] = [baseline_model]

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            EVALUATION_KEY: [eval_output],
            BLESSING_KEY: [blessing_output],
        }

        # List needs to be serialized before being passed into Do function.
        exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(None)

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri,
                                                   'metrics')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'validations')))
        if blessed:
            self.assertTrue(
                fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
        else:
            self.assertTrue(
                fileio.exists(os.path.join(blessing_output.uri,
                                           'NOT_BLESSED')))
    def setUp(self):
        super(KubeflowGCPIntegrationTest, self).setUp()

        # Example artifacts for testing.
        raw_train_examples = standard_artifacts.Examples(split='train')
        raw_train_examples.uri = os.path.join(
            self._intermediate_data_root,
            'csv_example_gen/examples/test-pipeline/train/')
        raw_eval_examples = standard_artifacts.Examples(split='eval')
        raw_eval_examples.uri = os.path.join(
            self._intermediate_data_root,
            'csv_example_gen/examples/test-pipeline/eval/')
        self._test_raw_examples = [raw_train_examples, raw_eval_examples]

        # Transformed Example artifacts for testing.
        transformed_train_examples = standard_artifacts.Examples(split='train')
        transformed_train_examples.uri = os.path.join(
            self._intermediate_data_root,
            'transform/transformed_examples/test-pipeline/train/')
        transformed_eval_examples = standard_artifacts.Examples(split='eval')
        transformed_eval_examples.uri = os.path.join(
            self._intermediate_data_root,
            'transform/transformed_examples/test-pipeline/eval/')
        self._test_transformed_examples = [
            transformed_train_examples, transformed_eval_examples
        ]

        # Schema artifact for testing.
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(self._intermediate_data_root,
                                  'schema_gen/output/test-pipeline/')
        self._test_schema = [schema]

        # TransformGraph artifact for testing.
        transform_graph = standard_artifacts.TransformGraph()
        transform_graph.uri = os.path.join(
            self._intermediate_data_root,
            'transform/transform_output/test-pipeline/')
        self._test_transform_graph = [transform_graph]

        # Model artifact for testing.
        model_1 = standard_artifacts.Model()
        model_1.uri = os.path.join(self._intermediate_data_root,
                                   'trainer/output/test-pipeline/1/')
        self._test_model_1 = [model_1]

        model_2 = standard_artifacts.Model()
        model_2.uri = os.path.join(self._intermediate_data_root,
                                   'trainer/output/test-pipeline/2/')
        self._test_model_2 = [model_2]

        # ModelBlessing artifact for testing.
        model_blessing = standard_artifacts.ModelBlessing()
        model_blessing.uri = os.path.join(
            self._intermediate_data_root,
            'model_validator/blessing/test-pipeline/')
        self._test_model_blessing = [model_blessing]
Пример #6
0
  def testStrategy(self):
    contexts = self._metadata.register_pipeline_contexts_if_not_exists(
        self._pipeline_info)
    # Model with id 1, will be blessed.
    model_one = standard_artifacts.Model()
    model_one.uri = 'model_one'
    self._metadata.publish_artifacts([model_one])
    # Model with id 2, will be blessed.
    model_two = standard_artifacts.Model()
    model_two.uri = 'model_two'
    self._metadata.publish_artifacts([model_two])
    # Model with id 3, will not be blessed.
    model_three = standard_artifacts.Model()
    model_three.uri = 'model_three'
    self._metadata.publish_artifacts([model_three])

    model_blessing_one = standard_artifacts.ModelBlessing()
    self._set_model_blessing_bit(model_blessing_one, model_one.id, 1)
    model_blessing_two = standard_artifacts.ModelBlessing()
    self._set_model_blessing_bit(model_blessing_two, model_two.id, 1)
    self._metadata.publish_artifacts([model_blessing_one, model_blessing_two])

    self._metadata.register_execution(
        exec_properties={},
        pipeline_info=self._pipeline_info,
        component_info=self._component_info,
        contexts=contexts)
    self._metadata.publish_execution(
        component_info=self._component_info,
        output_artifacts={
            'a': [model_one, model_two, model_three],
            'b': [model_blessing_one, model_blessing_two]
        })

    strategy = latest_blessed_model_strategy.LatestBlessedModelStrategy()
    resolve_result = strategy.resolve(
        pipeline_info=self._pipeline_info,
        metadata_handler=self._metadata,
        source_channels={
            'model':
                types.Channel(
                    type=standard_artifacts.Model,
                    producer_component_id=self._component_info.component_id,
                    output_key='a'),
            'model_blessing':
                types.Channel(
                    type=standard_artifacts.ModelBlessing,
                    producer_component_id=self._component_info.component_id,
                    output_key='b')
        })
    self.assertTrue(resolve_result.has_complete_result)
    self.assertEqual([
        a.uri
        for a in resolve_result.per_key_resolve_result['model']
    ], ['model_two'])
    self.assertTrue(resolve_result.per_key_resolve_state['model'])
Пример #7
0
  def setUp(self):
    super(ExecutorTest, self).setUp()
    self._source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    self._output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create input dict.
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(self._source_data_dir,
                                'transform/transformed_examples')
    examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
    transform_output = standard_artifacts.TransformGraph()
    transform_output.uri = os.path.join(self._source_data_dir,
                                        'transform/transform_output')
    schema = standard_artifacts.Schema()
    schema.uri = os.path.join(self._source_data_dir, 'schema_gen')
    previous_model = standard_artifacts.Model()
    previous_model.uri = os.path.join(self._source_data_dir, 'trainer/previous')

    self._input_dict = {
        executor.EXAMPLES_KEY: [examples],
        executor.TRANSFORM_GRAPH_KEY: [transform_output],
        executor.SCHEMA_KEY: [schema],
        executor.BASE_MODEL_KEY: [previous_model]
    }

    # Create output dict.
    self._model_exports = standard_artifacts.Model()
    self._model_exports.uri = os.path.join(self._output_data_dir,
                                           'model_export_path')
    self._output_dict = {executor.OUTPUT_MODEL_KEY: [self._model_exports]}

    # Create exec properties skeleton.
    self._exec_properties = {
        'train_args':
            json_format.MessageToJson(
                trainer_pb2.TrainArgs(num_steps=1000),
                preserving_proto_field_name=True),
        'eval_args':
            json_format.MessageToJson(
                trainer_pb2.EvalArgs(num_steps=500),
                preserving_proto_field_name=True),
        'warm_starting':
            False,
    }

    self._module_file = os.path.join(self._source_data_dir, 'module_file',
                                     'trainer_module.py')
    self._trainer_fn = '%s.%s' % (trainer_module.trainer_fn.__module__,
                                  trainer_module.trainer_fn.__name__)

    # Executors for test.
    self._trainer_executor = executor.Executor()
    self._generic_trainer_executor = executor.GenericExecutor()
Пример #8
0
    def setUp(self):
        super(ExecutorTest, self).setUp()
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        train_examples = standard_artifacts.Examples(split='train')
        train_examples.uri = os.path.join(
            self._source_data_dir, 'transform/transformed_examples/train/')
        eval_examples = standard_artifacts.Examples(split='eval')
        eval_examples.uri = os.path.join(
            self._source_data_dir, 'transform/transformed_examples/eval/')
        transform_output = standard_artifacts.TransformGraph()
        transform_output.uri = os.path.join(self._source_data_dir,
                                            'transform/transform_output/')
        schema = standard_artifacts.Examples()
        schema.uri = os.path.join(self._source_data_dir, 'schema_gen/')
        previous_model = standard_artifacts.Model()
        previous_model.uri = os.path.join(self._source_data_dir,
                                          'trainer/previous/')

        self._input_dict = {
            'examples': [train_examples, eval_examples],
            'transform_output': [transform_output],
            'schema': [schema],
            'base_model': [previous_model]
        }

        # Create output dict.
        self._model_exports = standard_artifacts.Model()
        self._model_exports.uri = os.path.join(self._output_data_dir,
                                               'model_export_path')
        self._output_dict = {'output': [self._model_exports]}

        # Create exec properties skeleton.
        self._exec_properties = {
            'train_args':
            json_format.MessageToJson(trainer_pb2.TrainArgs(num_steps=1000),
                                      preserving_proto_field_name=True),
            'eval_args':
            json_format.MessageToJson(trainer_pb2.EvalArgs(num_steps=500),
                                      preserving_proto_field_name=True),
            'warm_starting':
            False,
        }

        self._module_file = os.path.join(self._source_data_dir, 'module_file',
                                         'trainer_module.py')
        self._trainer_fn = '%s.%s' % (trainer_module.trainer_fn.__module__,
                                      trainer_module.trainer_fn.__name__)

        # Executor for test.
        self._trainer_executor = executor.Executor()
Пример #9
0
 def testConstructWithBaselineModel(self):
     examples = standard_artifacts.Examples()
     model_exports = standard_artifacts.Model()
     baseline_model = standard_artifacts.Model()
     evaluator = component.Evaluator(
         examples=channel_utils.as_channel([examples]),
         model=channel_utils.as_channel([model_exports]),
         baseline_model=channel_utils.as_channel([baseline_model]))
     self.assertEqual(standard_artifacts.ModelEvaluation.TYPE_NAME,
                      evaluator.outputs['evaluation'].type_name)
Пример #10
0
    def testEvalution(self, exec_properties, model_agnostic=False):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        baseline_model = standard_artifacts.Model()
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')
        input_dict = {
            EXAMPLES_KEY: [examples],
            SCHEMA_KEY: [schema],
        }
        if not model_agnostic:
            model = standard_artifacts.Model()
            model.uri = os.path.join(source_data_dir, 'trainer/current')
            input_dict[MODEL_KEY] = [model]

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            EVALUATION_KEY: [eval_output],
            BLESSING_KEY: [blessing_output],
        }

        # Test multiple splits.
        exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(
            ['train', 'eval'])

        if MODULE_FILE_KEY in exec_properties:
            exec_properties[MODULE_FILE_KEY] = os.path.join(
                source_data_dir, 'module_file', 'evaluator_module.py')

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            fileio.exists(os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri,
                                                   'metrics')))
        self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertFalse(
            fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
Пример #11
0
  def testGetQualifiedArtifacts(self):
    with self.metadata() as m:
      contexts_one = m.register_pipeline_contexts_if_not_exists(
          self._pipeline_info)
      contexts_two = m.register_pipeline_contexts_if_not_exists(
          self._pipeline_info3)
      # The first execution, with matched:
      #   - pipeline context
      #   - producer component id
      m.register_execution(
          exec_properties={},
          pipeline_info=self._pipeline_info,
          component_info=self._component_info,
          contexts=list(contexts_one))
      # artifact_one will be output with matched artifact type and output key
      artifact_one = standard_artifacts.Model()
      # artifact_one will be output with matched artifact type only
      artifact_two = standard_artifacts.Model()
      m.publish_execution(
          component_info=self._component_info,
          output_artifacts={
              'k1': [artifact_one],
              'k2': [artifact_two]
          })
      # The second execution, with matched pipeline context only
      m.register_execution(
          exec_properties={},
          pipeline_info=self._pipeline_info,
          component_info=self._component_info2,
          contexts=list(contexts_one))
      # artifact_three will be output with matched artifact type and output key
      artifact_three = standard_artifacts.Model()
      m.publish_execution(
          component_info=self._component_info2,
          output_artifacts={'k1': [artifact_three]})
      # The third execution, with matched producer component id only
      m.register_execution(
          exec_properties={},
          pipeline_info=self._pipeline_info3,
          component_info=self._component_info3,
          contexts=list(contexts_two))
      # artifact_three will be output with matched artifact type and output key
      artifact_four = standard_artifacts.Model()
      m.publish_execution(
          component_info=self._component_info3,
          output_artifacts={'k1': [artifact_four]})

      result = m.get_qualified_artifacts(
          contexts=contexts_one,
          type_name=standard_artifacts.Model().type_name,
          producer_component_id=self._component_info.component_id,
          output_key='k1')
      self.assertEqual(len(result), 1)
      self.assertEqual(result[0].artifact.id, artifact_one.id)
Пример #12
0
 def testConstruct(self):
     input_model = standard_artifacts.Model()
     output_model = standard_artifacts.Model()
     this_component = component.Transform(
         function_name='component_test.pouet',
         input_model=channel_utils.as_channel([input_model]),
         output_model=channel_utils.as_channel([output_model
                                                ])).with_id(u'Testing123')
     self.assertEqual(standard_artifacts.Model.TYPE_NAME,
                      this_component.outputs[OUTPUT_MODEL_KEY].type_name)
     artifact_collection = this_component.outputs[OUTPUT_MODEL_KEY].get()
     self.assertIsNotNone(artifact_collection)
Пример #13
0
    def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        model = standard_artifacts.Model()
        baseline_model = standard_artifacts.Model()
        model.uri = os.path.join(source_data_dir, 'trainer/current')
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        input_dict = {
            executor.EXAMPLES_KEY: [examples],
            executor.MODEL_KEY: [model],
        }

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        output_dict = {executor.EVALUATION_KEY: [eval_output]}

        try:
            # Need to import the following module so that the fairness indicator
            # post-export metric is registered.  This may raise an ImportError if the
            # currently-installed version of TFMA does not support fairness
            # indicators.
            import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
            exec_properties['fairness_indicator_thresholds'] = [
                0.1, 0.3, 0.5, 0.7, 0.9
            ]
        except ImportError:
            absl.logging.warning(
                'Not testing fairness indicators because a compatible TFMA version '
                'is not installed.')

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            tf.io.gfile.exists(
                os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
Пример #14
0
    def testBuildDummyConsumerWithCondition(self):
        producer_task_1 = test_utils.dummy_producer_component(
            output1=channel_utils.as_channel([standard_artifacts.Model()]),
            param1='value1',
        ).with_id('producer_task_1')
        producer_task_2 = test_utils.dummy_producer_component_2(
            output1=channel_utils.as_channel([standard_artifacts.Model()]),
            param1='value2',
        ).with_id('producer_task_2')
        # This test tests two things:
        # 1. Nested conditions. The condition string of consumer_task should contain
        #    both predicates.
        # 2. Implicit channels. consumer_task only takes producer_task_1's output.
        #    But producer_task_2 is used in condition, hence producer_task_2 should
        #    be added to the dependency of consumer_task.
        # See testdata for detail.
        with conditional.Cond(
                producer_task_1.outputs['output1'].future()[0].uri != 'uri'):
            with conditional.Cond(producer_task_2.outputs['output1'].future()
                                  [0].property('property') == 'value1'):
                consumer_task = test_utils.dummy_consumer_component(
                    input1=producer_task_1.outputs['output1'],
                    param1=1,
                )
        # Need to construct a pipeline to set producer_component_id.
        unused_pipeline = tfx.dsl.Pipeline(
            pipeline_name='pipeline-with-condition',
            pipeline_root='',
            components=[producer_task_1, producer_task_2, consumer_task],
        )
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=consumer_task,
            image='gcr.io/tensorflow/tfx:latest',
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_consumer_with_condition_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_consumer_with_condition_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_consumer_with_condition_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Пример #15
0
  def setUp(self):
    super(ExecutorTest, self).setUp()

    self.component_id = 'test_component'
    self._model_data_dir = tempfile.mkdtemp()

    num_classes = 10
    input_shape = (28, 28, 1)
    model = keras.Sequential(
      [
        keras.Input(shape=input_shape),
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax"),
      ]
    )
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    model.save(os.path.join(self._model_data_dir, 'Format-Serving'))
    del model

    # Create input dict.
    self._model = standard_artifacts.Model()
    self._model.uri = self._model_data_dir

    self._input_dict = {
      INPUT_MODEL_KEY: [self._model],
    }

    # Create output dict.
    self._output_model = standard_artifacts.Model()
    self._output_model_dir = os.path.join(tempfile.mkdtemp())
    self._output_model.uri = self._output_model_dir

    self._output_dict_sr = {
      OUTPUT_MODEL_KEY: [self._output_model],
    }

    # Create exe properties.
    self._exec_properties = {
      FUNCTION_NAME_KEY: 'tfx_x.components.model.transform.executor.identity',
    }

    # Create context
    self._tmp_dir = os.path.join(self._output_model_dir, '.temp')
    self._context = executor.Executor.Context(
      tmp_dir=self._tmp_dir, unique_id='2')
Пример #16
0
    def setUp(self):
        super().setUp()
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        self._context = executor.Executor.Context(
            tmp_dir=self._output_data_dir, unique_id='1')

        # Create input dict.
        e1 = standard_artifacts.Examples()
        e1.uri = os.path.join(self._testdata_dir, 'penguin', 'data')
        e1.split_names = artifact_utils.encode_split_names(['train', 'eval'])

        e2 = copy.deepcopy(e1)

        self._single_artifact = [e1]
        self._multiple_artifacts = [e1, e2]

        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(self._testdata_dir, 'penguin', 'schema')

        base_model = standard_artifacts.Model()
        base_model.uri = os.path.join(self._testdata_dir, 'trainer/previous')

        self._input_dict = {
            standard_component_specs.EXAMPLES_KEY: self._single_artifact,
            standard_component_specs.SCHEMA_KEY: [schema],
            standard_component_specs.BASE_MODEL_KEY: [base_model]
        }

        # Create output dict.
        self._best_hparams = standard_artifacts.Model()
        self._best_hparams.uri = os.path.join(self._output_data_dir,
                                              'best_hparams')

        self._output_dict = {
            standard_component_specs.BEST_HYPERPARAMETERS_KEY:
            [self._best_hparams],
        }

        # Create exec properties.
        self._exec_properties = {
            standard_component_specs.TRAIN_ARGS_KEY:
            proto_utils.proto_to_json(trainer_pb2.TrainArgs(num_steps=100)),
            standard_component_specs.EVAL_ARGS_KEY:
            proto_utils.proto_to_json(trainer_pb2.EvalArgs(num_steps=50)),
        }
Пример #17
0
    def testEvalution(self, exec_properties):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])
        model = standard_artifacts.Model()
        baseline_model = standard_artifacts.Model()
        model.uri = os.path.join(source_data_dir, 'trainer/current')
        baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')
        input_dict = {
            constants.EXAMPLES_KEY: [examples],
            constants.MODEL_KEY: [model],
            constants.SCHEMA_KEY: [schema],
        }

        # Create output dict.
        eval_output = standard_artifacts.ModelEvaluation()
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        blessing_output = standard_artifacts.ModelBlessing()
        blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
        output_dict = {
            constants.EVALUATION_KEY: [eval_output],
            constants.BLESSING_KEY: [blessing_output],
        }

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            tf.io.gfile.exists(
                os.path.join(eval_output.uri, 'eval_config.json')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
        self.assertTrue(
            tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
        self.assertFalse(
            tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))
  def testGetLatestBlessedModelArtifact(self):
    with metadata.Metadata(connection_config=self._connection_config) as m:
      contexts = m.register_pipeline_contexts_if_not_exists(self._pipeline_info)
      # Model with id 1, will be blessed.
      model_one = standard_artifacts.Model()
      model_one.uri = 'model_one'
      m.publish_artifacts([model_one])
      # Model with id 2, will be blessed.
      model_two = standard_artifacts.Model()
      model_two.uri = 'model_two'
      m.publish_artifacts([model_two])
      # Model with id 3, will not be blessed.
      model_three = standard_artifacts.Model()
      model_three.uri = 'model_three'
      m.publish_artifacts([model_three])

      model_blessing_one = standard_artifacts.ModelBlessing()
      self._set_model_blessing_bit(model_blessing_one, model_one.id, 1)
      model_blessing_two = standard_artifacts.ModelBlessing()
      self._set_model_blessing_bit(model_blessing_two, model_two.id, 1)
      m.publish_artifacts([model_blessing_one, model_blessing_two])

      m.register_execution(
          input_artifacts={
              'a': [model_one, model_two, model_three],
              'b': [model_blessing_one, model_blessing_two]
          },
          exec_properties={},
          pipeline_info=self._pipeline_info,
          component_info=self._component_info,
          contexts=contexts)

      resolver = latest_blessed_model_resolver.LatestBlessedModelResolver()
      resolve_result = resolver.resolve(
          pipeline_info=self._pipeline_info,
          metadata_handler=m,
          source_channels={
              'model':
                  types.Channel(type=standard_artifacts.Model),
              'model_blessing':
                  types.Channel(type=standard_artifacts.ModelBlessing)
          })
      self.assertTrue(resolve_result.has_complete_result)
      self.assertEqual([
          artifact.uri
          for artifact in resolve_result.per_key_resolve_result['model']
      ], ['model_two'])
      self.assertTrue(resolve_result.per_key_resolve_state['model'])
Пример #19
0
    def setUp(self):
        super(ExecutorTest, self).setUp()
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self.component_id = 'test_component'

        # Create input dict.
        self._examples = standard_artifacts.Examples(split='unlabelled')
        self._examples.uri = os.path.join(self._source_data_dir,
                                          'csv_example_gen/unlabelled/')
        self._model_export = standard_artifacts.Model()
        self._model_export.uri = os.path.join(self._source_data_dir,
                                              'trainer/current/')

        self._model_blessing = standard_artifacts.ModelBlessing()
        self._model_blessing.uri = os.path.join(self._source_data_dir,
                                                'model_validator/blessed')
        self._model_blessing.set_int_custom_property('blessed', 1)

        self._inference_result = standard_artifacts.InferenceResult()
        self._prediction_log_dir = os.path.join(self._output_data_dir,
                                                'prediction_logs')
        self._inference_result.uri = self._prediction_log_dir

        # Create context
        self._tmp_dir = os.path.join(self._output_data_dir, '.temp')
        self._context = executor.Executor.Context(tmp_dir=self._tmp_dir,
                                                  unique_id='2')
Пример #20
0
    def setUp(self):
        super(LocalDockerRunnerTest, self).setUp()

        base_dir = os.path.join(
            os.path.dirname(  # components/
                os.path.dirname(  # infra_validator/
                    os.path.dirname(__file__))),  # model_server_runners/
            'testdata')
        self.model = standard_artifacts.Model()
        self.model.uri = os.path.join(base_dir, 'trainer', 'current')

        # Mock LocalDockerModelServerRunner._FindAvailablePort
        self.find_available_port_patcher = mock.patch.object(
            LocalDockerModelServerRunner, '_FindAvailablePort')
        self.find_available_port = self.find_available_port_patcher.start()
        self.find_available_port.return_value = 1234

        # Mock docker.DockerClient
        self.docker_client_patcher = mock.patch('docker.DockerClient')
        self.docker_client_cls = self.docker_client_patcher.start()
        self.docker_client = self.docker_client_cls.return_value

        # Mock client factory
        self.client_factory = mock.Mock()
        self.client = self.client_factory.return_value
Пример #21
0
 def testRunExecutor_with_InplaceUpdateExecutor(self):
     executor_sepc = text_format.Parse(
         """
   class_path: "tfx.orchestration.portable.python_executor_operator_test.InplaceUpdateExecutor"
 """, executable_spec_pb2.PythonClassExecutableSpec())
     operator = python_executor_operator.PythonExecutorOperator(
         executor_sepc)
     input_dict = {'input_key': [standard_artifacts.Examples()]}
     output_dict = {'output_key': [standard_artifacts.Model()]}
     exec_properties = {
         'string': 'value',
         'int': 1,
         'float': 0.0,
         # This should not happen on production and will be
         # dropped.
         'proto': execution_result_pb2.ExecutorOutput()
     }
     executor_output = operator.run_executor(
         self._get_execution_info(input_dict, output_dict, exec_properties))
     self.assertProtoPartiallyEquals(
         """
       output_artifacts {
         key: "output_key"
         value {
           artifacts {
             custom_properties {
               key: "name"
               value {
                 string_value: "MyPipeline.MyPythonNode.my_model"
               }
             }
           }
         }
       }""", executor_output)
Пример #22
0
    def setUp(self):
        super(LocalDockerRunnerTest, self).setUp()

        base_dir = os.path.join(
            os.path.dirname(  # components/
                os.path.dirname(  # infra_validator/
                    os.path.dirname(__file__))),  # model_server_runners/
            'testdata')
        self._model = standard_artifacts.Model()
        self._model.uri = os.path.join(base_dir, 'trainer', 'current')
        self._model_name = 'chicago-taxi'
        self._model_path = path_utils.serving_model_path(self._model.uri)

        # Mock docker.DockerClient
        patcher = mock.patch('docker.DockerClient')
        self._docker_client = patcher.start().return_value
        self.addCleanup(patcher.stop)

        self._serving_spec = _create_serving_spec({
            'tensorflow_serving': {
                'tags': ['1.15.0']
            },
            'local_docker': {},
            'model_name':
            self._model_name,
        })
        self._serving_binary = serving_bins.parse_serving_binaries(
            self._serving_spec)[0]
        patcher = mock.patch.object(self._serving_binary, 'MakeClient')
        self._model_server_client = patcher.start().return_value
        self.addCleanup(patcher.stop)
Пример #23
0
 def testRunExecutor_with_InprocessExecutor(self):
   executor_sepc = text_format.Parse(
       """
     class_path: "tfx.orchestration.portable.python_executor_operator_test.InprocessExecutor"
   """, executable_spec_pb2.PythonClassExecutableSpec())
   operator = python_executor_operator.PythonExecutorOperator(executor_sepc)
   input_dict = {'input_key': [standard_artifacts.Examples()]}
   output_dict = {'output_key': [standard_artifacts.Model()]}
   exec_properties = {'key': 'value'}
   stateful_working_dir = os.path.join(self.tmp_dir, 'stateful_working_dir')
   executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
   executor_output = operator.run_executor(
       base_executor_operator.ExecutionInfo(
           input_dict=input_dict,
           output_dict=output_dict,
           exec_properties=exec_properties,
           stateful_working_dir=stateful_working_dir,
           executor_output_uri=executor_output_uri))
   self.assertProtoPartiallyEquals("""
         execution_properties {
           key: "key"
           value {
             string_value: "value"
           }
         }
         output_artifacts {
           key: "output_key"
           value {
             artifacts {
             }
           }
         }""", executor_output)
Пример #24
0
    def testBuildContainerTask2(self):
        task = test_utils.dummy_producer_component(
            output1=channel_utils.as_channel([standard_artifacts.Model()]),
            param1='value1',
        )
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=task,
            image='gcr.io/tensorflow/tfx:latest',
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        # Same as in testBuildContainerTask
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_container_spec_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_container_spec_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_container_spec_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Пример #25
0
 def testGetCacheContextTwiceDifferentOutputs(self):
     with metadata.Metadata(connection_config=self._connection_config) as m:
         self._get_cache_context(m)
         self._get_cache_context(
             m, custom_output_artifacts={'k': [standard_artifacts.Model()]})
         # Different output skeleton will result in a new cache context.
         self.assertLen(m.store.get_contexts(), 2)
Пример #26
0
 def testRunExecutor_with_InprocessExecutor(self):
     executor_sepc = text_format.Parse(
         """
   class_path: "tfx.orchestration.portable.python_executor_operator_test.InprocessExecutor"
 """, executable_spec_pb2.PythonClassExecutableSpec())
     operator = python_executor_operator.PythonExecutorOperator(
         executor_sepc)
     input_dict = {'input_key': [standard_artifacts.Examples()]}
     output_dict = {'output_key': [standard_artifacts.Model()]}
     exec_properties = {'key': 'value'}
     executor_output = operator.run_executor(
         self._get_execution_info(input_dict, output_dict, exec_properties))
     self.assertProtoPartiallyEquals(
         """
       execution_properties {
         key: "key"
         value {
           string_value: "value"
         }
       }
       output_artifacts {
         key: "output_key"
         value {
           artifacts {
           }
         }
       }""", executor_output)
Пример #27
0
    def setUp(self):
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self.component_id = 'test_component'

        # Create input dict.
        eval_examples = standard_artifacts.Examples(split='eval')
        eval_examples.uri = os.path.join(self._source_data_dir,
                                         'csv_example_gen/eval/')
        model = standard_artifacts.Model()
        model.uri = os.path.join(self._source_data_dir, 'trainer/current/')
        self._input_dict = {
            'examples': [eval_examples],
            'model': [model],
        }

        # Create output dict.
        self._blessing = standard_artifacts.ModelBlessing()
        self._blessing.uri = os.path.join(output_data_dir, 'blessing')
        self._output_dict = {'blessing': [self._blessing]}

        # Create context
        self._tmp_dir = os.path.join(output_data_dir, '.temp')
        self._context = executor.Executor.Context(tmp_dir=self._tmp_dir,
                                                  unique_id='2')
Пример #28
0
 def setUp(self):
     super(ComponentTest, self).setUp()
     self._examples = channel_utils.as_channel(
         [standard_artifacts.Examples()])
     self._model = channel_utils.as_channel([standard_artifacts.Model()])
     self._model_blessing = channel_utils.as_channel(
         [standard_artifacts.ModelBlessing()])
Пример #29
0
  def setUp(self):
    super(ExecutorTest, self).setUp()
    self._source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    self._output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)
    tf.io.gfile.makedirs(self._output_data_dir)
    self._model_export = standard_artifacts.Model()
    self._model_export.uri = os.path.join(self._source_data_dir,
                                          'trainer/current')
    self._model_blessing = standard_artifacts.ModelBlessing()
    self._input_dict = {
        executor.MODEL_KEY: [self._model_export],
        executor.MODEL_BLESSING_KEY: [self._model_blessing],
    }

    self._model_push = standard_artifacts.PushedModel()
    self._model_push.uri = os.path.join(self._output_data_dir, 'model_push')
    tf.io.gfile.makedirs(self._model_push.uri)
    self._output_dict = {
        executor.PUSHED_MODEL_KEY: [self._model_push],
    }
    self._serving_model_dir = os.path.join(self._output_data_dir,
                                           'serving_model_dir')
    tf.io.gfile.makedirs(self._serving_model_dir)
    self._exec_properties = {
        'push_destination':
            json_format.MessageToJson(
                pusher_pb2.PushDestination(
                    filesystem=pusher_pb2.PushDestination.Filesystem(
                        base_directory=self._serving_model_dir)),
                preserving_proto_field_name=True),
    }
    self._executor = executor.Executor()
Пример #30
0
    def setUp(self):
        super(ExecutorTest, self).setUp()
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        fileio.makedirs(self._output_data_dir)
        self._model_export = standard_artifacts.Model()
        self._model_export.uri = os.path.join(self._source_data_dir,
                                              'trainer/current')
        self._model_blessing = standard_artifacts.ModelBlessing()
        self._input_dict = {
            MODEL_KEY: [self._model_export],
            MODEL_BLESSING_KEY: [self._model_blessing],
        }

        self._model_push = standard_artifacts.PushedModel()
        self._model_push.uri = os.path.join(self._output_data_dir,
                                            'model_push')
        fileio.makedirs(self._model_push.uri)
        self._output_dict = {
            PUSHED_MODEL_KEY: [self._model_push],
        }
        self._serving_model_dir = os.path.join(self._output_data_dir,
                                               'serving_model_dir')
        fileio.makedirs(self._serving_model_dir)
        self._exec_properties = self._MakeExecProperties()
        self._executor = executor.Executor()