Exemplo n.º 1
0
    def testTrainerFn(self):
        temp_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        schema_file = os.path.join(self._testdata_path,
                                   'schema_gen/schema.pbtxt')
        trainer_fn_args = trainer_executor.TrainerFnArgs(
            train_files=os.path.join(
                self._testdata_path,
                'transform/transformed_examples/train/*.gz'),
            transform_output=os.path.join(self._testdata_path,
                                          'transform/transform_output/'),
            serving_model_dir=os.path.join(temp_dir, 'serving_model_dir'),
            eval_files=os.path.join(
                self._testdata_path,
                'transform/transformed_examples/eval/*.gz'),
            schema_file=schema_file,
            train_steps=1,
            eval_steps=1,
            base_model=os.path.join(self._testdata_path,
                                    'trainer/current/serving_model_dir'),
            data_accessor=DataAccessor(tf_dataset_factory=tfxio_utils.
                                       get_tf_dataset_factory_from_artifact(
                                           [standard_artifacts.Examples()],
                                           []),
                                       record_batch_factory=None,
                                       data_view_decode_fn=None))
        schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())
        training_spec = taxi_utils_bqml.trainer_fn(trainer_fn_args, schema)

        estimator = training_spec['estimator']
        train_spec = training_spec['train_spec']
        eval_spec = training_spec['eval_spec']
        eval_input_receiver_fn = training_spec['eval_input_receiver_fn']

        self.assertIsInstance(estimator, tf.estimator.Estimator)
        self.assertIsInstance(train_spec, tf.estimator.TrainSpec)
        self.assertIsInstance(eval_spec, tf.estimator.EvalSpec)
        self.assertIsInstance(eval_input_receiver_fn, types.FunctionType)

        # Train for one step, then eval for one step.
        eval_result, exports = tf.estimator.train_and_evaluate(
            estimator, train_spec, eval_spec)
        self.assertGreater(eval_result['loss'], 0.0)
        self.assertEqual(len(exports), 1)
        self.assertGreaterEqual(len(fileio.listdir(exports[0])), 1)

        # Export the eval saved model.
        eval_savedmodel_path = tfma.export.export_eval_savedmodel(
            estimator=estimator,
            export_dir_base=path_utils.eval_model_dir(temp_dir),
            eval_input_receiver_fn=eval_input_receiver_fn)
        self.assertGreaterEqual(len(fileio.listdir(eval_savedmodel_path)), 1)

        # Test exported serving graph.
        with tf.compat.v1.Session() as sess:
            metagraph_def = tf.compat.v1.saved_model.loader.load(
                sess, [tf.saved_model.SERVING], exports[0])
            self.assertIsInstance(metagraph_def, tf.compat.v1.MetaGraphDef)
Exemplo n.º 2
0
 def _verify_model_exports(self):
     self.assertTrue(
         tf.io.gfile.exists(
             path_utils.eval_model_dir(self._model_exports.uri)))
     self.assertTrue(
         tf.io.gfile.exists(
             path_utils.serving_model_dir(self._model_exports.uri)))
Exemplo n.º 3
0
    def _GetFnArgs(self, input_dict: Dict[str, List[types.Artifact]],
                   output_dict: Dict[str, List[types.Artifact]],
                   exec_properties: Dict[str, Any]) -> fn_args_utils.FnArgs:
        if input_dict.get(standard_component_specs.HYPERPARAMETERS_KEY):
            hyperparameters_file = io_utils.get_only_uri_in_dir(
                artifact_utils.get_single_uri(
                    input_dict[standard_component_specs.HYPERPARAMETERS_KEY]))
            hyperparameters_config = json.loads(
                file_io.read_file_to_string(hyperparameters_file))
        else:
            hyperparameters_config = None

        output_path = artifact_utils.get_single_uri(
            output_dict[standard_component_specs.MODEL_KEY])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        model_run_dir = artifact_utils.get_single_uri(
            output_dict[standard_component_specs.MODEL_RUN_KEY])

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        result = fn_args_utils.get_common_fn_args(input_dict, exec_properties)
        if result.custom_config and not isinstance(result.custom_config, dict):
            raise ValueError(
                'custom_config in execution properties needs to be a '
                'dict. Got %s instead.' % type(result.custom_config))
        result.transform_output = result.transform_graph_path
        result.serving_model_dir = serving_model_dir
        result.eval_model_dir = eval_model_dir
        result.model_run_dir = model_run_dir
        result.schema_file = result.schema_path
        result.hyperparameters = hyperparameters_config
        return result
Exemplo n.º 4
0
    def test_trainer_fn(self):
        temp_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        schema_file = os.path.join(self._testdata_path,
                                   'schema_gen/schema.pbtxt')
        output_dir = os.path.join(temp_dir, 'output_dir')
        hparams = tf.contrib.training.HParams(
            train_files=os.path.join(
                self._testdata_path,
                'transform/transformed_examples/train/*.gz'),
            transform_output=os.path.join(self._testdata_path,
                                          'transform/transform_output/'),
            output_dir=output_dir,
            serving_model_dir=os.path.join(temp_dir, 'serving_model_dir'),
            eval_files=os.path.join(
                self._testdata_path,
                'transform/transformed_examples/eval/*.gz'),
            schema_file=schema_file,
            train_steps=1,
            eval_steps=1,
            verbosity='INFO',
            warm_start_from=os.path.join(self._testdata_path,
                                         'trainer/current/serving_model_dir'))
        schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())
        training_spec = taxi_utils.trainer_fn(hparams, schema)

        estimator = training_spec['estimator']
        train_spec = training_spec['train_spec']
        eval_spec = training_spec['eval_spec']
        eval_input_receiver_fn = training_spec['eval_input_receiver_fn']

        self.assertIsInstance(estimator,
                              tf.estimator.DNNLinearCombinedClassifier)
        self.assertIsInstance(train_spec, tf.estimator.TrainSpec)
        self.assertIsInstance(eval_spec, tf.estimator.EvalSpec)
        self.assertIsInstance(eval_input_receiver_fn, types.FunctionType)

        # Train for one step, then eval for one step.
        eval_result, exports = tf.estimator.train_and_evaluate(
            estimator, train_spec, eval_spec)
        self.assertGreater(eval_result['loss'], 0.0)
        self.assertEqual(len(exports), 1)
        self.assertGreaterEqual(len(tf.gfile.ListDirectory(exports[0])), 1)

        # Export the eval saved model.
        eval_savedmodel_path = tfma.export.export_eval_savedmodel(
            estimator=estimator,
            export_dir_base=path_utils.eval_model_dir(output_dir),
            eval_input_receiver_fn=eval_input_receiver_fn)
        self.assertGreaterEqual(
            len(tf.gfile.ListDirectory(eval_savedmodel_path)), 1)

        # Test exported serving graph.
        with tf.Session() as sess:
            metagraph_def = tf.compat.v1.saved_model.loader.load(
                sess, [tf.saved_model.tag_constants.SERVING], exports[0])
            self.assertIsInstance(metagraph_def, tf.MetaGraphDef)
Exemplo n.º 5
0
  def test_estimator_lifecycle(self, estimator_constructor):
    """Checks that a full estimator lifecycle completes without crashing."""

    # Generate data that the adapter can consume.
    task = tfds_task.TFDSTask(tfds.builder('titanic'))

    autodata = subpipeline.AutoData(
        task.problem_statement,
        examples=task.train_and_eval_examples,
        preprocessor=basic_preprocessor.BasicPreprocessor())

    self.run_pipeline(components=task.components + autodata.components)

    # Create the trainer adapter.
    adapter = estimator_adapter.EstimatorAdapter(
        problem_statement=task.problem_statement,
        transform_graph_dir=self.artifact_dir(
            'Transform.AutoData/transform_graph'))

    config = tf.estimator.RunConfig(
        save_checkpoints_steps=999, keep_checkpoint_max=3)

    # Create the estimator.
    estimator = estimator_constructor(adapter, config)

    # Train.
    estimator.train(
        input_fn=adapter.get_input_fn(
            file_pattern=self.artifact_dir(
                'Transform.AutoData/transformed_examples', 'train/*'),
            batch_size=3),
        max_steps=3)

    # Eval.
    results = estimator.evaluate(
        input_fn=adapter.get_input_fn(
            file_pattern=self.artifact_dir(
                'Transform.AutoData/transformed_examples', 'eval/*'),
            batch_size=3),
        steps=1)
    self.assertNotEmpty(results)

    # Export for TFMA.
    tfma.export.export_eval_savedmodel(
        estimator=estimator,
        export_dir_base=path_utils.eval_model_dir(estimator.model_dir),
        eval_input_receiver_fn=adapter.get_eval_input_receiver_fn())

    # Export for Serving.
    estimator.export_saved_model(
        export_dir_base=os.path.join(estimator.model_dir, 'export'),
        serving_input_receiver_fn=adapter.get_serving_input_receiver_fn())
Exemplo n.º 6
0
    def testEstimatorModelPath(self, is_old_artifact):
        # Create folders based on Estimator based Trainer output model directory,
        # after Executor performs cleaning.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        eval_model_path = path_utils.eval_model_dir(output_uri,
                                                    is_old_artifact)
        eval_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(eval_model, 'testing')
        serving_model_path = path_utils.serving_model_dir(
            output_uri, is_old_artifact)
        serving_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(
            eval_model_path,
            path_utils.eval_model_path(output_uri, is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.serving_model_path(output_uri, is_old_artifact))

        self.assertEqual(
            eval_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TFMA_EVAL,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_KERAS,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_GENERIC,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_ESTIMATOR,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri, path_constants.TF_JS,
                                             is_old_artifact))
        self.assertEqual(
            serving_model_path,
            path_utils.get_model_dir_by_type(output_uri,
                                             path_constants.TF_LITE,
                                             is_old_artifact))
Exemplo n.º 7
0
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> fn_args_utils.FnArgs:
    # Load and deserialize custom config from execution properties.
    # Note that in the component interface the default serialization of custom
    # config is 'null' instead of '{}'. Therefore we need to default the
    # json_utils.loads to 'null' then populate it with an empty dict when
    # needed.
    custom_config = json_utils.loads(
        exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null')) or {}
    if not isinstance(custom_config, dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict. Got %s instead.' % type(custom_config))

    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    if input_dict.get(constants.BASE_MODEL_KEY):
      base_model = path_utils.serving_model_path(
          artifact_utils.get_single_uri(input_dict[constants.BASE_MODEL_KEY]))
    else:
      base_model = None

    if input_dict.get(constants.HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(
              input_dict[constants.HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    output_path = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    model_run_dir = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_RUN_KEY])

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    result = fn_args_utils.get_common_fn_args(input_dict, exec_properties)
    result.transform_output = result.transform_graph_path
    result.serving_model_dir = serving_model_dir
    result.eval_model_dir = eval_model_dir
    result.model_run_dir = model_run_dir
    result.schema_file = result.schema_path
    result.base_model = base_model
    result.hyperparameters = hyperparameters_config
    result.custom_config = custom_config
    return result
Exemplo n.º 8
0
  def _assertNumberOfTrainerOutputIsOne(self, pipeline_name):
    """Make sure the number of trainer executions and output models."""
    # There must be only one execution of Trainer.
    trainer_output_base_dir = os.path.join(
        self._pipeline_root(pipeline_name), 'Trainer', 'model')
    trainer_outputs = fileio.listdir(trainer_output_base_dir)
    self.assertEqual(1, len(trainer_outputs))

    # There must be only one saved models each for serving and eval.
    model_uri = os.path.join(trainer_output_base_dir, trainer_outputs[0])
    eval_model_dir = path_utils.eval_model_dir(model_uri)
    serving_model_dir = path_utils.serving_model_dir(model_uri)
    self.assertEqual(1, fileio.listdir(eval_model_dir).count('saved_model.pb'))
    self.assertEqual(1,
                     fileio.listdir(serving_model_dir).count('saved_model.pb'))
Exemplo n.º 9
0
    def testEstimatorModelPath(self):
        # Create folders based on Estimator based Trainer output model directory,
        # after Executor performs cleaning.
        output_uri = os.path.join(self.get_temp_dir(), 'model_dir')
        eval_model_path = path_utils.eval_model_dir(output_uri)
        eval_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(eval_model, 'testing')
        serving_model_path = path_utils.serving_model_dir(output_uri)
        serving_model = os.path.join(eval_model_path, 'saved_model.pb')
        io_utils.write_string_file(serving_model, 'testing')

        # Test retrieving model folder.
        self.assertEqual(eval_model_path,
                         path_utils.eval_model_path(output_uri))
        self.assertEqual(serving_model_path,
                         path_utils.serving_model_path(output_uri))
Exemplo n.º 10
0
    def _assertNumberOfTrainerOutputIsOne(self, pipeline_name):
        """Make sure the number of trainer executions and output models."""
        # There must be only one execution of Trainer.
        trainer_output_base_dir = os.path.join(
            self._pipeline_root(pipeline_name), 'Trainer', 'model')
        trainer_outputs = tf.io.gfile.listdir(trainer_output_base_dir)
        self.assertEqual(1, len(trainer_outputs))

        # There must be only one saved models each for serving and eval.
        model_uri = os.path.join(trainer_output_base_dir, trainer_outputs[0])
        self.assertEqual(
            1, len(tf.io.gfile.listdir(path_utils.eval_model_dir(model_uri))))
        self.assertEqual(
            1,
            len(
                tf.io.gfile.listdir(
                    os.path.join(path_utils.serving_model_dir(model_uri),
                                 'export', 'chicago-taxi'))))
Exemplo n.º 11
0
def run_fn(fn_args: executor.TrainerFnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    schema = io_utils.parse_pbtxt_file(fn_args.schema_file,
                                       schema_pb2.Schema())

    training_spec = _trainer_fn(fn_args, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])
    absl.logging.info('Training complete.  Model written to %s',
                      fn_args.serving_model_dir)

    # Export an eval savedmodel for TFMA
    # NOTE: When trained in distributed training cluster, eval_savedmodel must be
    # exported only by the chief worker (check TF_CONFIG).
    absl.logging.info('Exporting eval_savedmodel for TFMA.')
    eval_export_dir = path_utils.eval_model_dir(fn_args.model_run_dir)
    tfma.export.export_eval_savedmodel(
        estimator=training_spec['estimator'],
        export_dir_base=eval_export_dir,
        eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

    absl.logging.info('Exported eval_savedmodel to %s.',
                      fn_args.eval_model_dir)

    # TODO(b/160795287): Deprecate estimator based executor.
    # Copy serving and eval model from model_run to model artifact directory.
    serving_source = path_utils.serving_model_path(fn_args.model_run_dir)
    io_utils.copy_dir(serving_source, fn_args.serving_model_dir)
    absl.logging.info('Serving model copied to: %s.',
                      fn_args.serving_model_dir)

    eval_source = path_utils.eval_model_path(fn_args.model_run_dir)
    io_utils.copy_dir(eval_source, fn_args.eval_model_dir)
    absl.logging.info('Eval model copied to: %s.', fn_args.eval_model_dir)
Exemplo n.º 12
0
 def _verify_no_eval_model_exports(self):
     self.assertFalse(
         fileio.exists(path_utils.eval_model_dir(self._model_exports.uri)))
Exemplo n.º 13
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: Exported model.
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
    self._log_startup(input_dict, output_dict, exec_properties)

    # TODO(zhitaoli): Deprecate this in a future version.
    if exec_properties.get('custom_config', None):
      cmle_args = exec_properties.get('custom_config',
                                      {}).get('cmle_training_args')
      if cmle_args:
        executor_class_path = '.'.join([Executor.__module__, Executor.__name__])
        absl.logging.warn(
            'Passing \'cmle_training_args\' to trainer directly is deprecated, '
            'please use extension executor at '
            'tfx.extensions.google_cloud_ai_platform.trainer.executor instead')

        return runner.start_cmle_training(input_dict, output_dict,
                                          exec_properties, executor_class_path,
                                          cmle_args)

    trainer_fn = self._GetTrainerFn(exec_properties)

    # Set up training parameters
    train_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict['examples'], 'train'))
    ]
    transform_output = artifact_utils.get_single_uri(
        input_dict['transform_output']) if input_dict.get(
            'transform_output', None) else None
    eval_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict['examples'], 'eval'))
    ]
    schema_file = io_utils.get_only_uri_in_dir(
        artifact_utils.get_single_uri(input_dict['schema']))

    train_args = trainer_pb2.TrainArgs()
    eval_args = trainer_pb2.EvalArgs()
    json_format.Parse(exec_properties['train_args'], train_args)
    json_format.Parse(exec_properties['eval_args'], eval_args)

    # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
    # num_steps=None.  Conversion of the proto to python will set the default
    # value of an int as 0 so modify the value here.  Tensorflow will raise an
    # error if num_steps <= 0.
    train_steps = train_args.num_steps or None
    eval_steps = eval_args.num_steps or None

    output_path = artifact_utils.get_single_uri(output_dict['output'])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    # Assemble warm start path if needed.
    warm_start_from = None
    if exec_properties.get('warm_starting') and exec_properties.get(
        'warm_start_from'):
      previous_model_dir = os.path.join(exec_properties['warm_start_from'],
                                        path_utils.SERVING_MODEL_DIR)
      if previous_model_dir and tf.io.gfile.exists(
          os.path.join(previous_model_dir, self._CHECKPOINT_FILE_NAME)):
        warm_start_from = previous_model_dir

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    hparams = _HParamWrapper(
        # A list of uris for train files.
        train_files=train_files,
        # An optional single uri for transform graph produced by TFT. Will be
        # None if not specified.
        transform_output=transform_output,
        # A single uri for the output directory of the serving model.
        serving_model_dir=serving_model_dir,
        # A list of uris for eval files.
        eval_files=eval_files,
        # A single uri for schema file.
        schema_file=schema_file,
        # Number of train steps.
        train_steps=train_steps,
        # Number of eval steps.
        eval_steps=eval_steps,
        # A single uri for the model directory to warm start from.
        warm_start_from=warm_start_from)

    schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())

    training_spec = trainer_fn(hparams, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])
    absl.logging.info('Training complete.  Model written to %s',
                      serving_model_dir)

    # Export an eval savedmodel for TFMA
    absl.logging.info('Exporting eval_savedmodel for TFMA.')
    tfma.export.export_eval_savedmodel(
        estimator=training_spec['estimator'],
        export_dir_base=eval_model_dir,
        eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

    absl.logging.info('Exported eval_savedmodel to %s.', eval_model_dir)
Exemplo n.º 14
0
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> TrainerFnArgs:
    custom_config = exec_properties.get('custom_config') or {}
    if not isinstance(custom_config, dict):
      raise ValueError('Expect custom_config to be a dict but got %s instead' %
                       type(custom_config))

    # Set up training parameters
    train_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'train'))
    ]
    transform_output = artifact_utils.get_single_uri(
        input_dict[TRANSFORM_GRAPH_KEY]) if input_dict.get(
            TRANSFORM_GRAPH_KEY, None) else None
    eval_files = [
        _all_files_pattern(
            artifact_utils.get_split_uri(input_dict[EXAMPLES_KEY], 'eval'))
    ]
    schema_file = io_utils.get_only_uri_in_dir(
        artifact_utils.get_single_uri(input_dict[SCHEMA_KEY]))
    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    base_model = path_utils.serving_model_path(
        artifact_utils.get_single_uri(input_dict[BASE_MODEL_KEY])
    ) if input_dict.get(BASE_MODEL_KEY) else None
    if input_dict.get(HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(input_dict[HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    train_args = trainer_pb2.TrainArgs()
    eval_args = trainer_pb2.EvalArgs()
    json_format.Parse(exec_properties['train_args'], train_args)
    json_format.Parse(exec_properties['eval_args'], eval_args)

    # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
    # num_steps=None.  Conversion of the proto to python will set the default
    # value of an int as 0 so modify the value here.  Tensorflow will raise an
    # error if num_steps <= 0.
    train_steps = train_args.num_steps or None
    eval_steps = eval_args.num_steps or None

    output_path = artifact_utils.get_single_uri(output_dict[OUTPUT_MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    return TrainerFnArgs(
        # A list of uris for train files.
        train_files=train_files,
        # An optional single uri for transform graph produced by TFT. Will be
        # None if not specified.
        transform_output=transform_output,
        # A single uri for the output directory of the serving model.
        serving_model_dir=serving_model_dir,
        # A single uri for the output directory of the eval model.
        # Note that this is estimator only, Keras doesn't require it for TFMA.
        eval_model_dir=eval_model_dir,
        # A list of uris for eval files.
        eval_files=eval_files,
        # A single uri for schema file.
        schema_file=schema_file,
        # Number of train steps.
        train_steps=train_steps,
        # Number of eval steps.
        eval_steps=eval_steps,
        # Base model that will be used for this training job.
        base_model=base_model,
        # An optional kerastuner.HyperParameters config.
        hyperparameters=hyperparameters_config,
        # Additional parameters to pass to trainer function.
        **custom_config)
Exemplo n.º 15
0
  def testAIPlatformTrainerPipeline(self):
    """Trainer-only test pipeline on AI Platform Training."""
    pipeline_name = 'kubeflow-aip-trainer-test-{}'.format(self._random_id())
    pipeline = self._create_pipeline(
        pipeline_name,
        [
            self.schema_importer,
            self.transformed_examples_importer,
            self.transform_graph_importer,
            Trainer(
                custom_executor_spec=executor_spec.ExecutorClassSpec(
                    ai_platform_trainer_executor.Executor),
                module_file=self._trainer_module,
                transformed_examples=self.transformed_examples_importer
                .outputs['result'],
                schema=self.schema_importer.outputs['result'],
                transform_graph=self.transform_graph_importer.outputs['result'],
                train_args=trainer_pb2.TrainArgs(num_steps=10),
                eval_args=trainer_pb2.EvalArgs(num_steps=5),
                custom_config={
                    # Test that distributed training is behaves properly.
                    ai_platform_trainer_executor.TRAINING_ARGS_KEY: {
                        'project':
                            self._gcp_project_id,
                        'region':
                            self._gcp_region,
                        'jobDir':
                            os.path.join(
                                self._pipeline_root(pipeline_name), 'tmp'),
                        'masterConfig': {
                            'imageUri': self._container_image,
                        },
                        'scaleTier':
                            'CUSTOM',
                        'masterType':
                            'large_model',
                        'parameterServerType':
                            'standard',
                        'parameterServerCount':
                            1,
                        'workerType':
                            'standard',
                        'workerCount':
                            2,
                    }
                })
        ])
    self._compile_and_run_pipeline(pipeline)

    # There must be only one execution of Trainer.
    trainer_output_base_dir = os.path.join(
        self._pipeline_root(pipeline_name), 'Trainer', 'model')
    trainer_outputs = tf.io.gfile.listdir(trainer_output_base_dir)
    self.assertEqual(1, len(trainer_outputs))

    # There must be only one saved models each for serving and eval.
    model_uri = os.path.join(trainer_output_base_dir, trainer_outputs[0])
    self.assertEqual(
        1, len(tf.io.gfile.listdir(path_utils.eval_model_dir(model_uri))))
    self.assertEqual(
        1,
        len(
            tf.io.gfile.listdir(
                os.path.join(
                    path_utils.serving_model_dir(model_uri), 'export',
                    'chicago-taxi'))))
Exemplo n.º 16
0
  def _GetFnArgs(self, input_dict: Dict[Text, List[types.Artifact]],
                 output_dict: Dict[Text, List[types.Artifact]],
                 exec_properties: Dict[Text, Any]) -> TrainerFnArgs:
    fn_args = fn_args_utils.get_common_fn_args(input_dict, exec_properties)

    # Load and deserialize custom config from execution properties.
    # Note that in the component interface the default serialization of custom
    # config is 'null' instead of '{}'. Therefore we need to default the
    # json_utils.loads to 'null' then populate it with an empty dict when
    # needed.
    custom_config = json_utils.loads(
        exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null')) or {}
    if not isinstance(custom_config, Dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict. Got %s instead.' % type(custom_config))

    # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
    if input_dict.get(constants.BASE_MODEL_KEY):
      base_model = path_utils.serving_model_path(
          artifact_utils.get_single_uri(input_dict[constants.BASE_MODEL_KEY]))
    else:
      base_model = None

    if input_dict.get(constants.HYPERPARAMETERS_KEY):
      hyperparameters_file = io_utils.get_only_uri_in_dir(
          artifact_utils.get_single_uri(
              input_dict[constants.HYPERPARAMETERS_KEY]))
      hyperparameters_config = json.loads(
          file_io.read_file_to_string(hyperparameters_file))
    else:
      hyperparameters_config = None

    output_path = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_KEY])
    serving_model_dir = path_utils.serving_model_dir(output_path)
    eval_model_dir = path_utils.eval_model_dir(output_path)

    model_run_dir = artifact_utils.get_single_uri(
        output_dict[constants.MODEL_RUN_KEY])

    # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
    return TrainerFnArgs(
        # A list of uris for train files.
        train_files=fn_args.train_files,
        # An optional single uri for transform graph produced by TFT. Will be
        # None if not specified.
        transform_output=fn_args.transform_graph_path,
        # A single uri for the output directory of the serving model.
        serving_model_dir=serving_model_dir,
        # A single uri for the output directory of the eval model.
        # Note that this is estimator only, Keras doesn't require it for TFMA.
        eval_model_dir=eval_model_dir,
        # A list of uris for eval files.
        eval_files=fn_args.eval_files,
        # A single uri for the output directory of model training related files.
        model_run_dir=model_run_dir,
        # A single uri for schema file.
        schema_file=fn_args.schema_path,
        # Number of train steps.
        train_steps=fn_args.train_steps,
        # Number of eval steps.
        eval_steps=fn_args.eval_steps,
        # Base model that will be used for this training job.
        base_model=base_model,
        # An optional kerastuner.HyperParameters config.
        hyperparameters=hyperparameters_config,
        # Additional parameters to pass to trainer function.
        **custom_config)
Exemplo n.º 17
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - model: Exported model.
        - model_run: Model training related outputs (e.g., Tensorboard logs)
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.
        - custom_config: Optional. JSON-serialized dict of additional parameters
          to pass to trainer function.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
    self._log_startup(input_dict, output_dict, exec_properties)

    fn_args = self._GetFnArgs(input_dict, output_dict, exec_properties)
    trainer_fn = udf_utils.get_fn(exec_properties, 'trainer_fn')

    schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

    # TODO(b/160795287): Deprecate estimator based executor.
    # Provide user with a modified fn_args, with model_run given as
    # the working directory. Executor will then copy user models to
    # model artifact directory.
    serving_dest = fn_args.serving_model_dir
    eval_dest = fn_args.eval_model_dir

    working_dir = fn_args.model_run_dir
    fn_args.serving_model_dir = path_utils.serving_model_dir(working_dir)
    fn_args.eval_model_dir = path_utils.eval_model_dir(working_dir)

    training_spec = trainer_fn(fn_args, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])

    absl.logging.info(
        'Training complete. Model written to %s. ModelRun written to %s',
        fn_args.serving_model_dir, fn_args.model_run_dir)

    # Export an eval savedmodel for TFMA. If distributed training, it must only
    # be written by the chief worker, as would be done for serving savedmodel.
    if _is_chief():
      absl.logging.info('Exporting eval_savedmodel for TFMA.')
      tfma.export.export_eval_savedmodel(
          estimator=training_spec['estimator'],
          export_dir_base=fn_args.eval_model_dir,
          eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

      absl.logging.info('Exported eval_savedmodel to %s.',
                        fn_args.eval_model_dir)

      # TODO(b/160795287): Deprecate estimator based executor.
      # Copy serving and eval model from model_run to model artifact directory.
      serving_source = path_utils.serving_model_path(fn_args.model_run_dir)
      io_utils.copy_dir(serving_source, serving_dest)
      absl.logging.info('Serving model copied to: %s.', serving_dest)

      eval_source = path_utils.eval_model_path(fn_args.model_run_dir)
      io_utils.copy_dir(eval_source, eval_dest)
      absl.logging.info('Eval model copied to: %s.', eval_dest)

    else:
      absl.logging.info(
          'Model export is skipped because this is not the chief worker.')
Exemplo n.º 18
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Uses a user-supplied tf.estimator to train a TensorFlow model locally.

    The Trainer Executor invokes a training_fn callback function provided by
    the user via the module_file parameter.  With the tf.estimator returned by
    this function, the Trainer Executor then builds a TensorFlow model using the
    user-provided tf.estimator.

    Args:
      input_dict: Input dict from input key to a list of ML-Metadata Artifacts.
        - examples: Examples used for training, must include 'train' and 'eval'
          splits.
        - transform_output: Optional input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: Exported model.
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.

    Returns:
      None

    Raises:
      ValueError: When neither or both of 'module_file' and 'trainer_fn'
        are present in 'exec_properties'.
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        custom_config = exec_properties.get('custom_config') or {}
        if not isinstance(custom_config, dict):
            raise ValueError(
                'Expect custom_config to be a dict but got %s instead' %
                type(custom_config))

        trainer_fn = self._GetTrainerFn(exec_properties)

        # Set up training parameters
        train_files = [
            _all_files_pattern(
                artifact_utils.get_split_uri(input_dict['examples'], 'train'))
        ]
        transform_output = artifact_utils.get_single_uri(
            input_dict['transform_output']) if input_dict.get(
                'transform_output', None) else None
        eval_files = [
            _all_files_pattern(
                artifact_utils.get_split_uri(input_dict['examples'], 'eval'))
        ]
        schema_file = io_utils.get_only_uri_in_dir(
            artifact_utils.get_single_uri(input_dict['schema']))
        # TODO(ruoyu): Make this a dict of tag -> uri instead of list.
        base_model = path_utils.serving_model_path(
            artifact_utils.get_single_uri(input_dict['base_model'])
        ) if input_dict.get('base_model') else None
        if input_dict.get('hyperparameters'):
            hyperparameters_file = io_utils.get_only_uri_in_dir(
                artifact_utils.get_single_uri(input_dict['hyperparameters']))
            hyperparameters_config = json.loads(
                file_io.read_file_to_string(hyperparameters_file))
        else:
            hyperparameters_config = None

        train_args = trainer_pb2.TrainArgs()
        eval_args = trainer_pb2.EvalArgs()
        json_format.Parse(exec_properties['train_args'], train_args)
        json_format.Parse(exec_properties['eval_args'], eval_args)

        # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
        # num_steps=None.  Conversion of the proto to python will set the default
        # value of an int as 0 so modify the value here.  Tensorflow will raise an
        # error if num_steps <= 0.
        train_steps = train_args.num_steps or None
        eval_steps = eval_args.num_steps or None

        output_path = artifact_utils.get_single_uri(output_dict['output'])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        train_fn_args = TrainerFnArgs(
            # A list of uris for train files.
            train_files=train_files,
            # An optional single uri for transform graph produced by TFT. Will be
            # None if not specified.
            transform_output=transform_output,
            # A single uri for the output directory of the serving model.
            serving_model_dir=serving_model_dir,
            # A list of uris for eval files.
            eval_files=eval_files,
            # A single uri for schema file.
            schema_file=schema_file,
            # Number of train steps.
            train_steps=train_steps,
            # Number of eval steps.
            eval_steps=eval_steps,
            # Base model that will be used for this training job.
            base_model=base_model,
            # An optional kerastuner.HyperParameters config.
            hyperparameters=hyperparameters_config,
            # Additional parameters to pass to trainer function.
            **custom_config)

        schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())

        training_spec = trainer_fn(train_fn_args, schema)

        # Train the model
        absl.logging.info('Training model.')
        tf.estimator.train_and_evaluate(training_spec['estimator'],
                                        training_spec['train_spec'],
                                        training_spec['eval_spec'])
        absl.logging.info('Training complete.  Model written to %s',
                          serving_model_dir)

        # Export an eval savedmodel for TFMA
        absl.logging.info('Exporting eval_savedmodel for TFMA.')
        tfma.export.export_eval_savedmodel(
            estimator=training_spec['estimator'],
            export_dir_base=eval_model_dir,
            eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

        absl.logging.info('Exported eval_savedmodel to %s.', eval_model_dir)
Exemplo n.º 19
0
    def Do(self, input_dict, output_dict, exec_properties):
        """Runs trainer job the given input.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - transformed_examples: Transformed example.
        - transform_output: Input transform graph.
        - schema: Schema of the data.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: Exported model.
      exec_properties: A dict of execution properties.
        - train_args: JSON string of trainer_pb2.TrainArgs instance, providing
          args for training.
        - eval_args: JSON string of trainer_pb2.EvalArgs instance, providing
          args for eval.
        - module_file: Python module file containing UDF model definition.
        - warm_starting: Whether or not we need to do warm starting.
        - warm_start_from: Optional. If warm_starting is True, this is the
          directory to find previous model to warm start on.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        # TODO(khaas): Move this to tfx/extensions.
        if exec_properties.get('custom_config', None):
            cmle_args = exec_properties.get('custom_config',
                                            {}).get('cmle_training_args')
            if cmle_args:
                return cmle_runner.start_cmle_training(input_dict, output_dict,
                                                       exec_properties,
                                                       cmle_args)

        trainer_fn = io_utils.import_func(exec_properties['module_file'],
                                          'trainer_fn')

        # Set up training parameters
        train_files = [
            _all_files_pattern(
                types.get_split_uri(input_dict['transformed_examples'],
                                    'train'))
        ]
        transform_output = types.get_single_uri(input_dict['transform_output'])
        eval_files = _all_files_pattern(
            types.get_split_uri(input_dict['transformed_examples'], 'eval'))
        schema_file = io_utils.get_only_uri_in_dir(
            types.get_single_uri(input_dict['schema']))

        train_args = trainer_pb2.TrainArgs()
        eval_args = trainer_pb2.EvalArgs()
        json_format.Parse(exec_properties['train_args'], train_args)
        json_format.Parse(exec_properties['eval_args'], eval_args)

        # https://github.com/tensorflow/tfx/issues/45: Replace num_steps=0 with
        # num_steps=None.  Conversion of the proto to python will set the default
        # value of an int as 0 so modify the value here.  Tensorflow will raise an
        # error if num_steps <= 0.
        train_steps = train_args.num_steps or None
        eval_steps = eval_args.num_steps or None

        output_path = types.get_single_uri(output_dict['output'])
        serving_model_dir = path_utils.serving_model_dir(output_path)
        eval_model_dir = path_utils.eval_model_dir(output_path)

        # Assemble warm start path if needed.
        warm_start_from = None
        if exec_properties.get('warm_starting') and exec_properties.get(
                'warm_start_from'):
            previous_model_dir = os.path.join(
                exec_properties['warm_start_from'],
                path_utils.SERVING_MODEL_DIR)
            if previous_model_dir and tf.gfile.Exists(
                    os.path.join(previous_model_dir,
                                 self._CHECKPOINT_FILE_NAME)):
                warm_start_from = previous_model_dir

        # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
        hparams = tf.contrib.training.HParams(
            train_files=train_files,
            transform_output=transform_output,
            output_dir=output_path,
            serving_model_dir=serving_model_dir,
            eval_files=eval_files,
            schema_file=schema_file,
            train_steps=train_steps,
            eval_steps=eval_steps,
            warm_start_from=warm_start_from)

        schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())

        training_spec = trainer_fn(hparams, schema)

        # Train the model
        tf.logging.info('Training model.')
        tf.estimator.train_and_evaluate(training_spec['estimator'],
                                        training_spec['train_spec'],
                                        training_spec['eval_spec'])
        tf.logging.info('Training complete.  Model written to %s',
                        serving_model_dir)

        # Export an eval savedmodel for TFMA
        tf.logging.info('Exporting eval_savedmodel for TFMA.')
        tfma.export.export_eval_savedmodel(
            estimator=training_spec['estimator'],
            export_dir_base=eval_model_dir,
            eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

        tf.logging.info('Exported eval_savedmodel to %s.', eval_model_dir)