Пример #1
0
    def setUp(self):
        super(BaseExampleGenExecutorTest, self).setUp()
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create output dict.
        self._examples = standard_artifacts.Examples()
        self._examples.uri = output_data_dir
        self._output_dict = {utils.EXAMPLES_KEY: [self._examples]}

        self._train_output_file = os.path.join(
            self._examples.uri, 'train', 'data_tfrecord-00000-of-00001.gz')
        self._eval_output_file = os.path.join(
            self._examples.uri, 'eval', 'data_tfrecord-00000-of-00001.gz')

        # Create exec proterties for output splits.
        self._exec_properties = {
            utils.INPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='single',
                                                pattern='single/*'),
                ])),
            utils.OUTPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Output(
                    split_config=example_gen_pb2.SplitConfig(splits=[
                        example_gen_pb2.SplitConfig.Split(name='train',
                                                          hash_buckets=2),
                        example_gen_pb2.SplitConfig.Split(name='eval',
                                                          hash_buckets=1)
                    ])))
        }
Пример #2
0
    def testDoWithOutputExamplesSpecifiedSplits(self):
        self._exec_properties['data_spec'] = proto_utils.proto_to_json(
            text_format.Parse(
                """
                example_splits: 'unlabelled'
            """, bulk_inferrer_pb2.DataSpec()))
        self._exec_properties[
            'output_example_spec'] = proto_utils.proto_to_json(
                text_format.Parse(
                    """
                output_columns_spec {
                  classify_output {
                    label_column: 'classify_label'
                    score_column: 'classify_score'
                  }
                }
            """, bulk_inferrer_pb2.OutputExampleSpec()))

        # Run executor.
        bulk_inferrer = executor.Executor(self._context)
        bulk_inferrer.Do(self._input_dict, self._output_dict_oe,
                         self._exec_properties)

        # Check outputs.
        self.assertTrue(fileio.exists(self._output_examples_dir))
        self._verify_example_split('unlabelled')
        self.assertFalse(
            fileio.exists(
                os.path.join(self._output_examples_dir, 'unlabelled2')))
Пример #3
0
  def setUp(self):
    super(ExecutorTest, self).setUp()

    # Setup Mocks

    patcher = mock.patch.object(request_builder, 'build_requests')
    self.build_requests_mock = patcher.start()
    self.addCleanup(patcher.stop)

    # Setup directories

    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    base_output_dir = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR',
                                     self.get_temp_dir())
    output_data_dir = os.path.join(base_output_dir, self._testMethodName)

    # Setup input_dict.

    self._model = standard_artifacts.Model()
    self._model.uri = os.path.join(source_data_dir, 'trainer', 'current')
    self._model_path = path_utils.serving_model_path(self._model.uri)
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(source_data_dir, 'transform',
                                'transformed_examples', 'eval')
    examples.split_names = artifact_utils.encode_split_names(['eval'])

    self._input_dict = {
        MODEL_KEY: [self._model],
        EXAMPLES_KEY: [examples],
    }
    self._blessing = standard_artifacts.InfraBlessing()
    self._blessing.uri = os.path.join(output_data_dir, 'blessing')
    self._output_dict = {BLESSING_KEY: [self._blessing]}
    temp_dir = os.path.join(output_data_dir, '.temp')
    self._context = executor.Executor.Context(tmp_dir=temp_dir, unique_id='1')
    self._serving_spec = _make_serving_spec({
        'tensorflow_serving': {
            'tags': ['1.15.0']
        },
        'local_docker': {},
        'model_name': 'chicago-taxi',
    })
    self._serving_binary = serving_bins.parse_serving_binaries(
        self._serving_spec)[0]
    self._validation_spec = _make_validation_spec({
        'max_loading_time_seconds': 10,
        'num_tries': 3
    })
    self._request_spec = _make_request_spec({
        'tensorflow_serving': {
            'signature_names': ['serving_default'],
        },
        'num_examples': 1
    })
    self._exec_properties = {
        SERVING_SPEC_KEY: proto_utils.proto_to_json(self._serving_spec),
        VALIDATION_SPEC_KEY: proto_utils.proto_to_json(self._validation_spec),
        REQUEST_SPEC_KEY: proto_utils.proto_to_json(self._request_spec),
    }
Пример #4
0
    def testDoWithCustomSplits(self):
        # Update input dict.
        io_utils.copy_dir(
            os.path.join(self._testdata_dir, 'penguin/data/train'),
            os.path.join(self._output_data_dir, 'data/training'))
        io_utils.copy_dir(
            os.path.join(self._testdata_dir, 'penguin/data/eval'),
            os.path.join(self._output_data_dir, 'data/evaluating'))
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(self._output_data_dir, 'data')
        examples.split_names = artifact_utils.encode_split_names(
            ['training', 'evaluating'])
        self._input_dict[standard_component_specs.EXAMPLES_KEY] = [examples]

        # Update exec properties skeleton with custom splits.
        self._exec_properties[standard_component_specs.
                              TRAIN_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.TrainArgs(splits=['training'],
                                                        num_steps=1000))
        self._exec_properties[standard_component_specs.
                              EVAL_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.EvalArgs(splits=['evaluating'],
                                                       num_steps=500))
        self._exec_properties[
            standard_component_specs.MODULE_FILE_KEY] = os.path.join(
                self._testdata_dir, 'module_file', 'tuner_module.py')

        tuner = executor.Executor(self._context)
        tuner.Do(input_dict=self._input_dict,
                 output_dict=self._output_dict,
                 exec_properties=self._exec_properties)

        self._verify_output()
Пример #5
0
    def testDoWithCustomSplits(self):
        # Update input dict.
        io_utils.copy_dir(
            os.path.join(self._source_data_dir,
                         'transform/transformed_examples/data/train'),
            os.path.join(self._output_data_dir, 'data/training'))
        io_utils.copy_dir(
            os.path.join(self._source_data_dir,
                         'transform/transformed_examples/data/eval'),
            os.path.join(self._output_data_dir, 'data/evaluating'))
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(self._output_data_dir, 'data')
        examples.split_names = artifact_utils.encode_split_names(
            ['training', 'evaluating'])
        self._input_dict[standard_component_specs.EXAMPLES_KEY] = [examples]

        # Update exec properties skeleton with custom splits.
        self._exec_properties[standard_component_specs.
                              TRAIN_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.TrainArgs(splits=['training'],
                                                        num_steps=1000))
        self._exec_properties[standard_component_specs.
                              EVAL_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.EvalArgs(splits=['evaluating'],
                                                       num_steps=500))

        self._exec_properties[
            standard_component_specs.MODULE_FILE_KEY] = self._module_file
        self._do(self._trainer_executor)
        self._verify_model_exports()
        self._verify_model_run_exports()
Пример #6
0
    def testGetCommonFnArgs(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir,
                                    'transform/transformed_examples')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])

        transform_output = standard_artifacts.TransformGraph()
        transform_output.uri = os.path.join(source_data_dir,
                                            'transform/transform_graph')

        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')

        base_model = standard_artifacts.Model()
        base_model.uri = os.path.join(source_data_dir, 'trainer/previous')

        input_dict = {
            standard_component_specs.EXAMPLES_KEY: [examples],
            standard_component_specs.TRANSFORM_GRAPH_KEY: [transform_output],
            standard_component_specs.SCHEMA_KEY: [schema],
            standard_component_specs.BASE_MODEL_KEY: [base_model],
        }

        # Create exec properties skeleton.
        exec_properties = {
            'train_args':
            proto_utils.proto_to_json(trainer_pb2.TrainArgs(num_steps=1000)),
            'eval_args':
            proto_utils.proto_to_json(trainer_pb2.EvalArgs(num_steps=500)),
        }

        fn_args = fn_args_utils.get_common_fn_args(input_dict, exec_properties,
                                                   'tempdir')
        self.assertEqual(fn_args.working_dir, 'tempdir')
        self.assertEqual(fn_args.train_steps, 1000)
        self.assertEqual(fn_args.eval_steps, 500)
        self.assertLen(fn_args.train_files, 1)
        self.assertEqual(fn_args.train_files[0],
                         os.path.join(examples.uri, 'Split-train', '*'))
        self.assertLen(fn_args.eval_files, 1)
        self.assertEqual(fn_args.eval_files[0],
                         os.path.join(examples.uri, 'Split-eval', '*'))
        self.assertEqual(fn_args.schema_path,
                         os.path.join(schema.uri, 'schema.pbtxt'))
        # Depending on execution environment, the base model may have been stored
        # at .../Format-Servo/... or .../Format-Serving/... directory patterns.
        self.assertRegex(
            fn_args.base_model,
            os.path.join(base_model.uri,
                         r'Format-(Servo|Serving)/export/chicago-taxi/\d+'))
        self.assertEqual(fn_args.transform_graph_path, transform_output.uri)
        self.assertIsInstance(fn_args.data_accessor,
                              fn_args_utils.DataAccessor)
Пример #7
0
    def testQueryBasedDriver(self):
        # Create exec proterties.
        exec_properties = {
            standard_component_specs.INPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='s1',
                        pattern=
                        "select * from table where span={SPAN} and split='s1'"
                    ),
                    example_gen_pb2.Input.Split(
                        name='s2',
                        pattern=
                        "select * from table where span={SPAN} and split='s2'")
                ])),
            standard_component_specs.RANGE_CONFIG_KEY:
            proto_utils.proto_to_json(
                range_config_pb2.RangeConfig(
                    static_range=range_config_pb2.StaticRange(
                        start_span_number=2, end_span_number=2))),
        }
        # Prepare output_dict
        example = standard_artifacts.Examples()
        example.uri = 'my_uri'
        output_dict = {standard_component_specs.EXAMPLES_KEY: [example]}

        query_based_driver = driver.QueryBasedDriver(self._mock_metadata)
        result = query_based_driver.run(
            portable_data_types.ExecutionInfo(output_dict=output_dict,
                                              exec_properties=exec_properties))

        self.assertEqual(exec_properties[utils.SPAN_PROPERTY_NAME], 2)
        self.assertIsNone(exec_properties[utils.VERSION_PROPERTY_NAME])
        self.assertIsNone(exec_properties[utils.FINGERPRINT_PROPERTY_NAME])
        updated_input_config = example_gen_pb2.Input()
        proto_utils.json_to_proto(
            exec_properties[standard_component_specs.INPUT_CONFIG_KEY],
            updated_input_config)
        self.assertProtoEquals(
            """
        splits {
          name: "s1"
          pattern: "select * from table where span=2 and split='s1'"
        }
        splits {
          name: "s2"
          pattern: "select * from table where span=2 and split='s2'"
        }""", updated_input_config)
        self.assertLen(
            result.output_artifacts[
                standard_component_specs.EXAMPLES_KEY].artifacts, 1)
        output_example = result.output_artifacts[
            standard_component_specs.EXAMPLES_KEY].artifacts[0]
        self.assertEqual(output_example.uri, example.uri)
        self.assertEqual(
            output_example.custom_properties[
                utils.SPAN_PROPERTY_NAME].string_value, '2')
Пример #8
0
  def testDo(self, mock_client):
    # Mock query result schema for _BigQueryConverter.
    mock_client.return_value.query.return_value.result.return_value.schema = self._schema

    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create output dict.
    examples = standard_artifacts.Examples()
    examples.uri = output_data_dir
    output_dict = {'examples': [examples]}

    # Create exe properties.
    exec_properties = {
        'input_config':
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='bq', pattern='SELECT i, b, f, s FROM `fake`'),
                ])),
        'output_config':
            proto_utils.proto_to_json(
                example_gen_pb2.Output(
                    split_config=example_gen_pb2.SplitConfig(splits=[
                        example_gen_pb2.SplitConfig.Split(
                            name='train', hash_buckets=2),
                        example_gen_pb2.SplitConfig.Split(
                            name='eval', hash_buckets=1)
                    ])))
    }

    # Run executor.
    big_query_example_gen = executor.Executor(
        base_beam_executor.BaseBeamExecutor.Context(
            beam_pipeline_args=['--project=test-project']))
    big_query_example_gen.Do({}, output_dict, exec_properties)

    mock_client.assert_called_with(project='test-project')

    self.assertEqual(
        artifact_utils.encode_split_names(['train', 'eval']),
        examples.split_names)

    # Check BigQuery example gen outputs.
    train_output_file = os.path.join(examples.uri, 'Split-train',
                                     'data_tfrecord-00000-of-00001.gz')
    eval_output_file = os.path.join(examples.uri, 'Split-eval',
                                    'data_tfrecord-00000-of-00001.gz')
    self.assertTrue(fileio.exists(train_output_file))
    self.assertTrue(fileio.exists(eval_output_file))
    self.assertGreater(
        fileio.open(train_output_file).size(),
        fileio.open(eval_output_file).size())
Пример #9
0
    def setUp(self):
        super().setUp()
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        self._context = executor.Executor.Context(
            tmp_dir=self._output_data_dir, unique_id='1')

        # Create input dict.
        e1 = standard_artifacts.Examples()
        e1.uri = os.path.join(self._testdata_dir, 'penguin', 'data')
        e1.split_names = artifact_utils.encode_split_names(['train', 'eval'])

        e2 = copy.deepcopy(e1)

        self._single_artifact = [e1]
        self._multiple_artifacts = [e1, e2]

        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(self._testdata_dir, 'penguin', 'schema')

        base_model = standard_artifacts.Model()
        base_model.uri = os.path.join(self._testdata_dir, 'trainer/previous')

        self._input_dict = {
            standard_component_specs.EXAMPLES_KEY: self._single_artifact,
            standard_component_specs.SCHEMA_KEY: [schema],
            standard_component_specs.BASE_MODEL_KEY: [base_model]
        }

        # Create output dict.
        self._best_hparams = standard_artifacts.Model()
        self._best_hparams.uri = os.path.join(self._output_data_dir,
                                              'best_hparams')

        self._output_dict = {
            standard_component_specs.BEST_HYPERPARAMETERS_KEY:
            [self._best_hparams],
        }

        # Create exec properties.
        self._exec_properties = {
            standard_component_specs.TRAIN_ARGS_KEY:
            proto_utils.proto_to_json(trainer_pb2.TrainArgs(num_steps=100)),
            standard_component_specs.EVAL_ARGS_KEY:
            proto_utils.proto_to_json(trainer_pb2.EvalArgs(num_steps=50)),
        }
Пример #10
0
  def testDo(self):
    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create output dict.
    examples = standard_artifacts.Examples()
    examples.uri = output_data_dir
    output_dict = {utils.EXAMPLES_KEY: [examples]}

    # Create exec proterties.
    exec_properties = {
        utils.INPUT_BASE_KEY:
            self._input_data_dir,
        utils.INPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='parquet', pattern='parquet/*'),
                ])),
        utils.OUTPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Output(
                    split_config=example_gen_pb2.SplitConfig(splits=[
                        example_gen_pb2.SplitConfig.Split(
                            name='train', hash_buckets=2),
                        example_gen_pb2.SplitConfig.Split(
                            name='eval', hash_buckets=1)
                    ])))
    }

    # Run executor.
    parquet_example_gen = parquet_executor.Executor()
    parquet_example_gen.Do({}, output_dict, exec_properties)

    self.assertEqual(
        artifact_utils.encode_split_names(['train', 'eval']),
        examples.split_names)

    # Check Parquet example gen outputs.
    train_output_file = os.path.join(examples.uri, 'train',
                                     'data_tfrecord-00000-of-00001.gz')
    eval_output_file = os.path.join(examples.uri, 'eval',
                                    'data_tfrecord-00000-of-00001.gz')
    self.assertTrue(fileio.exists(train_output_file))
    self.assertTrue(fileio.exists(eval_output_file))
    self.assertGreater(
        fileio.open(train_output_file).size(),
        fileio.open(eval_output_file).size())
    def testDoInputSplit(self):
        # Create exec proterties for input split.
        self._exec_properties = {
            standard_component_specs.INPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='train',
                                                pattern='train/*'),
                    example_gen_pb2.Input.Split(name='eval', pattern='eval/*')
                ])),
            standard_component_specs.OUTPUT_CONFIG_KEY:
            proto_utils.proto_to_json(example_gen_pb2.Output())
        }

        self._testDo()
Пример #12
0
    def testGetCommonFnArgs(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        # Create input dict.
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(source_data_dir,
                                    'transform/transformed_examples')
        examples.split_names = artifact_utils.encode_split_names(
            ['train', 'eval'])

        transform_output = standard_artifacts.TransformGraph()
        transform_output.uri = os.path.join(source_data_dir,
                                            'transform/transform_graph')

        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(source_data_dir, 'schema_gen')

        input_dict = {
            standard_component_specs.EXAMPLES_KEY: [examples],
            standard_component_specs.TRANSFORM_GRAPH_KEY: [transform_output],
            standard_component_specs.SCHEMA_KEY: [schema],
        }

        # Create exec properties skeleton.
        exec_properties = {
            'train_args':
            proto_utils.proto_to_json(trainer_pb2.TrainArgs(num_steps=1000)),
            'eval_args':
            proto_utils.proto_to_json(trainer_pb2.EvalArgs(num_steps=500)),
        }

        fn_args = fn_args_utils.get_common_fn_args(input_dict, exec_properties,
                                                   'tempdir')
        self.assertEqual(fn_args.working_dir, 'tempdir')
        self.assertEqual(fn_args.train_steps, 1000)
        self.assertEqual(fn_args.eval_steps, 500)
        self.assertLen(fn_args.train_files, 1)
        self.assertEqual(fn_args.train_files[0],
                         os.path.join(examples.uri, 'train', '*'))
        self.assertLen(fn_args.eval_files, 1)
        self.assertEqual(fn_args.eval_files[0],
                         os.path.join(examples.uri, 'eval', '*'))
        self.assertEqual(fn_args.schema_path,
                         os.path.join(schema.uri, 'schema.pbtxt'))
        self.assertEqual(fn_args.transform_graph_path, transform_output.uri)
        self.assertIsInstance(fn_args.data_accessor,
                              fn_args_utils.DataAccessor)
Пример #13
0
    def testDo(self):
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create output dict.
        examples = standard_artifacts.Examples()
        examples.uri = output_data_dir
        output_dict = {'examples': [examples]}

        # Create exe properties.
        exec_properties = {
            'input_config':
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='bq', pattern='SELECT i, f, s FROM `fake`'),
                ])),
            'custom_config':
            proto_utils.proto_to_json(example_gen_pb2.CustomConfig()),
            'output_config':
            proto_utils.proto_to_json(
                example_gen_pb2.Output(
                    split_config=example_gen_pb2.SplitConfig(splits=[
                        example_gen_pb2.SplitConfig.Split(name='train',
                                                          hash_buckets=2),
                        example_gen_pb2.SplitConfig.Split(name='eval',
                                                          hash_buckets=1)
                    ]))),
        }

        # Run executor.
        presto_example_gen = executor.Executor()
        presto_example_gen.Do({}, output_dict, exec_properties)

        self.assertEqual(artifact_utils.encode_split_names(['train', 'eval']),
                         examples.split_names)

        # Check Presto example gen outputs.
        train_output_file = os.path.join(examples.uri, 'Split-train',
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(examples.uri, 'Split-eval',
                                        'data_tfrecord-00000-of-00001.gz')
        self.assertTrue(fileio.exists(train_output_file))
        self.assertTrue(fileio.exists(eval_output_file))
        self.assertGreater(
            fileio.open(train_output_file).size(),
            fileio.open(eval_output_file).size())
Пример #14
0
  def testDoWithTuneArgsAndTrainingInputOverride(self):
    executor = ai_platform_tuner_executor.Executor()
    self._exec_properties['tune_args'] = proto_utils.proto_to_json(
        tuner_pb2.TuneArgs(num_parallel_trials=6))

    self._exec_properties['custom_config'][
        ai_platform_trainer_executor.TRAINING_ARGS_KEY].update({
            'scaleTier': 'CUSTOM',
            'masterType': 'n1-highmem-16',
            'workerType': 'n1-highmem-16',
            'workerCount': 2,
        })

    executor.Do(self._inputs, self._outputs,
                self._serialize_custom_config_under_test())

    self.mock_runner.start_aip_training.assert_called_with(
        self._inputs,
        self._outputs,
        self._serialize_custom_config_under_test(),
        self._executor_class_path,
        {
            'project': self._project_id,
            'jobDir': self._job_dir,
            # Confirm scale tier and machine types are not overritten.
            'scaleTier': 'CUSTOM',
            'masterType': 'n1-highmem-16',
            'workerType': 'n1-highmem-16',
            # Confirm workerCount has been adjusted to num_parallel_trials.
            'workerCount': 5,
        },
        mock.ANY)
Пример #15
0
  def testDoSkippedModelCreation(self, mock_runner, mock_run_model_inference,
                                 _):
    input_dict = {
        'examples': [self._examples],
        'model': [self._model],
        'model_blessing': [self._model_blessing],
    }
    output_dict = {
        'inference_result': [self._inference_result],
    }
    ai_platform_serving_args = {
        'model_name': 'model_name',
        'project_id': 'project_id'
    }
    # Create exe properties.
    exec_properties = {
        'data_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()),
        'custom_config':
            json_utils.dumps(
                {executor.SERVING_ARGS_KEY: ai_platform_serving_args}),
    }
    mock_runner.get_service_name_and_api_version.return_value = ('ml', 'v1')
    mock_runner.create_model_for_aip_prediction_if_not_exist.return_value = False

    # Run executor.
    bulk_inferrer = executor.Executor(self._context)
    bulk_inferrer.Do(input_dict, output_dict, exec_properties)

    ai_platform_prediction_model_spec = (
        model_spec_pb2.AIPlatformPredictionModelSpec(
            project_id='project_id',
            model_name='model_name',
            version_name=self._model_version))
    ai_platform_prediction_model_spec.use_serialization_config = True
    inference_endpoint = model_spec_pb2.InferenceSpecType()
    inference_endpoint.ai_platform_prediction_model_spec.CopyFrom(
        ai_platform_prediction_model_spec)
    mock_run_model_inference.assert_called_once_with(mock.ANY, mock.ANY,
                                                     mock.ANY, mock.ANY,
                                                     mock.ANY,
                                                     inference_endpoint)
    executor_class_path = '%s.%s' % (bulk_inferrer.__class__.__module__,
                                     bulk_inferrer.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.make_labels_dict()
    mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
        serving_path=path_utils.serving_model_path(self._model.uri),
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        labels=job_labels,
        api=mock.ANY,
        skip_model_endpoint_creation=True,
        set_default=False)
    mock_runner.delete_model_from_aip_if_exists.assert_called_once_with(
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        api=mock.ANY,
        delete_model_endpoint=False)
Пример #16
0
 def get_value_and_set_type(
     value: types.ExecPropertyTypes,
     value_type: pipeline_pb2.Value.Schema.ValueType) -> types.Property:
   """Returns serialized value and sets value_type."""
   if isinstance(value, bool):
     if set_schema:
       value_type.boolean_type.SetInParent()
     return value
   elif isinstance(value, message.Message):
     # TODO(b/171794016): Investigate if file descripter set is needed for
     # tfx-owned proto already build in the launcher binary.
     if set_schema:
       proto_type = value_type.proto_type
       proto_type.message_type = type(value).DESCRIPTOR.full_name
       proto_utils.build_file_descriptor_set(value,
                                             proto_type.file_descriptors)
     return proto_utils.proto_to_json(value)
   elif isinstance(value, list) and len(value):
     if set_schema:
       value_type.list_type.SetInParent()
     value = [
         get_value_and_set_type(val, value_type.list_type) for val in value
     ]
     return json_utils.dumps(value)
   elif isinstance(value, (int, float, str)):
     return value
   else:
     raise ValueError('Unexpected type %s' % type(value))
Пример #17
0
  def _parse_parameters(self, raw_args: Mapping[str, Any]):
    """Parse arguments to ComponentSpec."""
    unparsed_args = set(raw_args.keys())
    inputs = {}
    outputs = {}
    self.exec_properties = {}

    # First, check that the arguments are set.
    for arg_name, arg in itertools.chain(self.PARAMETERS.items(),
                                         self.INPUTS.items(),
                                         self.OUTPUTS.items()):
      if arg_name not in unparsed_args:
        if arg.optional:
          continue
        else:
          raise ValueError('Missing argument %r to %s.' %
                           (arg_name, self.__class__))
      unparsed_args.remove(arg_name)

      # Type check the argument.
      value = raw_args[arg_name]
      if arg.optional and value is None:
        continue
      arg.type_check(arg_name, value)

    # Populate the appropriate dictionary for each parameter type.
    for arg_name, arg in self.PARAMETERS.items():
      if arg.optional and arg_name not in raw_args:
        continue
      value = raw_args[arg_name]

      if (inspect.isclass(arg.type) and
          issubclass(arg.type, message.Message) and value and
          not _is_runtime_param(value)):
        if arg.use_proto:
          if isinstance(value, dict):
            value = proto_utils.dict_to_proto(value, arg.type())
          elif isinstance(value, str):
            value = proto_utils.json_to_proto(value, arg.type())
        else:
          # Create deterministic json string as it will be stored in metadata
          # for cache check.
          if isinstance(value, dict):
            value = json_utils.dumps(value)
          elif not isinstance(value, str):
            value = proto_utils.proto_to_json(value)

      self.exec_properties[arg_name] = value

    for arg_dict, param_dict in ((self.INPUTS, inputs), (self.OUTPUTS,
                                                         outputs)):
      for arg_name, arg in arg_dict.items():
        if arg.optional and not raw_args.get(arg_name):
          continue
        value = raw_args[arg_name]
        param_dict[arg_name] = value

    self.inputs = inputs
    self.outputs = outputs
Пример #18
0
  def setUp(self):
    super(ExecutorTest, self).setUp()
    self._input_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'testdata',
        'external')

    # Create values in exec_properties
    self._input_config = proto_utils.proto_to_json(
        example_gen_pb2.Input(splits=[
            example_gen_pb2.Input.Split(name='tfrecord', pattern='tfrecord/*'),
        ]))
    self._output_config = proto_utils.proto_to_json(
        example_gen_pb2.Output(
            split_config=example_gen_pb2.SplitConfig(splits=[
                example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=2),
                example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1)
            ])))
Пример #19
0
 def test_do_with_empty_analyze_splits(self):
     self._exec_properties['splits_config'] = proto_utils.proto_to_json(
         transform_pb2.SplitsConfig(analyze=[], transform=['train',
                                                           'eval']))
     self._exec_properties['module_file'] = self._module_file
     with self.assertRaises(ValueError):
         self._transform_executor.Do(self._input_dict, self._output_dict,
                                     self._exec_properties)
Пример #20
0
 def test_do_with_custom_splits(self):
     self._exec_properties['splits_config'] = proto_utils.proto_to_json(
         transform_pb2.SplitsConfig(analyze=['train'],
                                    transform=['train', 'eval']))
     self._exec_properties['module_file'] = self._module_file
     self._transform_executor.Do(self._input_dict, self._output_dict,
                                 self._exec_properties)
     self._verify_transform_outputs()
Пример #21
0
    def setUp(self):
        super(ExecutorTest, self).setUp()
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        self._context = executor.Executor.Context(
            tmp_dir=self._output_data_dir, unique_id='1')

        # Create input dict.
        e1 = standard_artifacts.Examples()
        e1.uri = os.path.join(self._testdata_dir, 'penguin', 'data')
        e1.split_names = artifact_utils.encode_split_names(['train', 'eval'])

        e2 = copy.deepcopy(e1)

        self._single_artifact = [e1]
        self._multiple_artifacts = [e1, e2]

        schema = standard_artifacts.Schema()
        schema.uri = os.path.join(self._testdata_dir, 'penguin', 'schema')

        self._input_dict = {
            'examples': self._single_artifact,
            'schema': [schema],
        }

        # Create output dict.
        self._best_hparams = standard_artifacts.Model()
        self._best_hparams.uri = os.path.join(self._output_data_dir,
                                              'best_hparams')

        self._output_dict = {
            'best_hyperparameters': [self._best_hparams],
        }

        # Create exec properties.
        self._exec_properties = {
            'train_args':
            proto_utils.proto_to_json(trainer_pb2.TrainArgs(num_steps=100)),
            'eval_args':
            proto_utils.proto_to_json(trainer_pb2.EvalArgs(num_steps=50)),
        }
Пример #22
0
    def testTuneArgs(self):
        with self.assertRaises(ValueError):
            self._exec_properties['tune_args'] = proto_utils.proto_to_json(
                tuner_pb2.TuneArgs(num_parallel_trials=3))

            tuner = executor.Executor(self._context)
            tuner.Do(input_dict=self._input_dict,
                     output_dict=self._output_dict,
                     exec_properties=self._exec_properties)
Пример #23
0
 def test_do_with_empty_analyze_splits(self):
     self._exec_properties[standard_component_specs.
                           SPLITS_CONFIG_KEY] = proto_utils.proto_to_json(
                               transform_pb2.SplitsConfig(
                                   analyze=[], transform=['train', 'eval']))
     self._exec_properties[
         standard_component_specs.MODULE_FILE_KEY] = self._module_file
     with self.assertRaises(ValueError):
         self._transform_executor.Do(self._input_dict, self._output_dict,
                                     self._exec_properties)
 def _testFeatureBasedPartition(self, partition_feature_name):
   self._exec_properties[utils.OUTPUT_CONFIG_KEY] = proto_utils.proto_to_json(
       example_gen_pb2.Output(
           split_config=example_gen_pb2.SplitConfig(
               splits=[
                   example_gen_pb2.SplitConfig.Split(
                       name='train', hash_buckets=2),
                   example_gen_pb2.SplitConfig.Split(
                       name='eval', hash_buckets=1)
               ],
               partition_feature_name=partition_feature_name)))
Пример #25
0
 def test_do_with_custom_splits(self):
     self._exec_properties[standard_component_specs.
                           SPLITS_CONFIG_KEY] = proto_utils.proto_to_json(
                               transform_pb2.SplitsConfig(
                                   analyze=['train'],
                                   transform=['train', 'eval']))
     self._exec_properties[
         standard_component_specs.MODULE_FILE_KEY] = self._module_file
     self._transform_executor.Do(self._input_dict, self._output_dict,
                                 self._exec_properties)
     self._verify_transform_outputs()
Пример #26
0
    def testTuneArgs(self):
        with self.assertRaises(ValueError):
            self._exec_properties[standard_component_specs.
                                  TUNE_ARGS_KEY] = proto_utils.proto_to_json(
                                      tuner_pb2.TuneArgs(
                                          num_parallel_trials=3))

            tuner = executor.Executor(self._context)
            tuner.Do(input_dict=self._input_dict,
                     output_dict=self._output_dict,
                     exec_properties=self._exec_properties)
Пример #27
0
    def testPrestoToExample(self):
        with beam.Pipeline() as pipeline:
            examples = (pipeline | 'ToTFExample' >> executor._PrestoToExample(
                exec_properties={
                    'input_config':
                    proto_utils.proto_to_json(example_gen_pb2.Input()),
                    'custom_config':
                    proto_utils.proto_to_json(example_gen_pb2.CustomConfig())
                },
                split_pattern='SELECT i, f, s FROM `fake`'))

            feature = {}
            feature['i'] = tf.train.Feature(int64_list=tf.train.Int64List(
                value=[1]))
            feature['f'] = tf.train.Feature(float_list=tf.train.FloatList(
                value=[2.0]))
            feature['s'] = tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[tf.compat.as_bytes('abc')]))
            example_proto = tf.train.Example(features=tf.train.Features(
                feature=feature))
            util.assert_that(examples, util.equal_to([example_proto]))
Пример #28
0
    def run_executor(
        self, execution_info: data_types.ExecutionInfo
    ) -> execution_result_pb2.ExecutorOutput:
        """Execute underlying component implementation."""

        context = placeholder_utils.ResolutionContext(
            exec_info=execution_info,
            executor_spec=self._executor_spec,
            platform_config=self._platform_config)

        component_executor_spec = (
            executor_specs.TemplatedExecutorContainerSpec(
                image=self._container_executor_spec.image,
                command=[
                    placeholder_utils.resolve_placeholder_expression(
                        cmd, context)
                    for cmd in self._container_executor_spec.commands
                ]))

        logging.info('Container spec: %s', vars(component_executor_spec))
        logging.info('Docker platform config: %s',
                     proto_utils.proto_to_json(self._docker_platform_config))

        # Call client.containers.run and wait for completion.
        # ExecutorContainerSpec follows k8s container spec which has different
        # names to Docker's container spec. It's intended to set command to docker's
        # entrypoint and args to docker's command.
        if self._docker_platform_config.docker_server_url:
            client = docker.DockerClient(
                base_url=self._docker_platform_config.docker_server_url)
        else:
            client = docker.from_env()

        run_args = self._build_run_args(self._docker_platform_config)
        container = client.containers.run(
            image=component_executor_spec.image,
            command=component_executor_spec.command,
            detach=True,
            **run_args)

        # Streaming logs
        for log in container.logs(stream=True):
            logging.info('Docker: %s', log.decode('utf-8'))
        exit_code = container.wait()['StatusCode']
        if exit_code != 0:
            raise RuntimeError(
                'Container exited with error code "{}"'.format(exit_code))
        # TODO(b/141192583): Report data to publisher
        # - report container digest
        # - report replaced command line entrypoints
        # - report docker run args
        return execution_result_pb2.ExecutorOutput()
Пример #29
0
  def testDoWithSequenceExamples(self):
    self._input_config = proto_utils.proto_to_json(
        example_gen_pb2.Input(splits=[
            example_gen_pb2.Input.Split(
                name='tfrecord_sequence', pattern='tfrecord_sequence/*'),
        ]))

    self._testDo(example_gen_pb2.PayloadFormat.FORMAT_TF_SEQUENCE_EXAMPLE)
    self.assertEqual(
        example_gen_pb2.PayloadFormat.Name(
            example_gen_pb2.PayloadFormat.FORMAT_TF_SEQUENCE_EXAMPLE),
        self.examples.get_string_custom_property(
            utils.PAYLOAD_FORMAT_PROPERTY_NAME))
Пример #30
0
  def testDo_MakeSavedModelWarmup(self):
    infra_validator = executor.Executor(self._context)
    self._request_spec.make_warmup = True
    self._exec_properties[REQUEST_SPEC_KEY] = (
        proto_utils.proto_to_json(self._request_spec))

    with mock.patch.object(infra_validator, '_ValidateOnce'):
      infra_validator.Do(self._input_dict, self._output_dict,
                         self._exec_properties)

    warmup_file = path_utils.warmup_file_path(
        path_utils.stamped_model_path(self._blessing.uri))
    self.assertFileExists(warmup_file)
    self.assertEqual(self._blessing.get_int_custom_property('has_model'), 1)