Пример #1
0
def run_fn(fn_args: TrainerFnArgs):

    BATCH_SIZE = 65536

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
    
    train_dataset = _input_fn(fn_args.train_files, tf_transform_output, BATCH_SIZE)
    eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, BATCH_SIZE)
  
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
      model = _build_keras()
    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=log_dir, update_freq='batch')

    model.fit(train_dataset,
             steps_per_epoch=fn_args.train_steps,
             validation_data=eval_dataset,
             validation_steps=fn_args.eval_steps,
            #  callbacks=[tensorboard_callback])
            callbacks=[])
        
    signatures = {
      'serving_default':
          _get_serve_tf_examples_fn(model,
                                    tf_transform_output).get_concrete_function(
                                        tf.TensorSpec(
                                            shape=[None],
                                            dtype=tf.string,
                                            name='examples')),
    }
    model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Пример #2
0
def trainer_fn(hparams, schema):
    train_batch_size = 32
    eval_batch_size = 32

    tf_transform_output = tft.TFTransformOutput(hparams.transform_output)

    train_input_fn = lambda: _input_fn(
        hparams.train_files, tf_transform_output, batch_size=train_batch_size)

    eval_input_fn = lambda: _input_fn(
        hparams.eval_files, tf_transform_output, batch_size=eval_batch_size)

    train_spec = tf.estimator.TrainSpec(train_input_fn,
                                        max_steps=hparams.train_steps)

    serving_receiver_fn = lambda: _example_serving_receiver_fn(
        tf_transform_output, schema)

    exporter = tf.estimator.FinalExporter('cifar-10', serving_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=hparams.eval_steps,
                                      exporters=[exporter],
                                      name='cifar-10')

    estimator = _build_estimator()

    receiver_fn = lambda: _eval_input_receiver_fn(tf_transform_output, schema)

    return {
        'estimator': estimator,
        'train_spec': train_spec,
        'eval_spec': eval_spec,
        'eval_input_receiver_fn': receiver_fn
    }
Пример #3
0
def export_serving_model(classifier, serving_model_dir, raw_schema_location,
                         tft_output_dir):

    raw_schema = tfdv.load_schema_text(raw_schema_location)
    raw_feature_spec = schema_utils.schema_as_feature_spec(
        raw_schema).feature_spec

    tft_output = tft.TFTransformOutput(tft_output_dir)

    features_input_signature = {
        feature_name: tf.TensorSpec(shape=(None, 1),
                                    dtype=spec.dtype,
                                    name=feature_name)
        for feature_name, spec in raw_feature_spec.items()
        if feature_name in features.FEATURE_NAMES
    }

    signatures = {
        "serving_default":
        _get_serve_features_fn(
            classifier,
            tft_output).get_concrete_function(features_input_signature),
        "serving_tf_example":
        _get_serve_tf_examples_fn(classifier, tft_output,
                                  raw_feature_spec).get_concrete_function(
                                      tf.TensorSpec(shape=[None],
                                                    dtype=tf.string,
                                                    name="examples")),
    }

    logging.info("Model export started...")
    classifier.save(serving_model_dir, signatures=signatures)
    logging.info("Model export completed.")
Пример #4
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    train_files = fn_args.train_files
    eval_files = fn_args.eval_files

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    hparams = _get_hyperparameters()

    tuner = kerastuner.Hyperband(hypermodel=_build_keras_model,
                                 hyperparameters=hparams,
                                 objective=kerastuner.Objective(
                                     'binary_accuracy', 'max'),
                                 factor=3,
                                 max_epochs=2,
                                 directory=fn_args.working_dir,
                                 project_name='ftfx:simple_e2e')

    train_dataset = _input_fn(train_files, tf_transform_output)
    eval_dataset = _input_fn(eval_files, tf_transform_output)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })
Пример #5
0
def run_fn(fn_args):

    tft_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    print(tf_transform_output)

    raise Exception("aslçdjfaksldjf")
Пример #6
0
def trainer_fn(hparams, schema):
    """Build the estimator using the high level API.

  Args:
    hparams: Holds hyperparameters used to train the model as name/value pairs.
    schema: Holds the schema of the training examples.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """
    train_batch_size = 32
    eval_batch_size = 32

    tf_transform_output = tft.TFTransformOutput(hparams.transform_output)

    train_input_fn = lambda: _input_fn(  # pylint: disable=g-long-lambda
        hparams.train_files,
        tf_transform_output,
        batch_size=train_batch_size)

    eval_input_fn = lambda: _input_fn(  # pylint: disable=g-long-lambda
        hparams.eval_files,
        tf_transform_output,
        batch_size=eval_batch_size)

    train_spec = tf.estimator.TrainSpec(  # pylint: disable=g-long-lambda
        train_input_fn,
        max_steps=hparams.train_steps)

    serving_receiver_fn = lambda: _example_serving_receiver_fn(  # pylint: disable=g-long-lambda
        tf_transform_output, schema)

    exporter = tf.estimator.FinalExporter('cifar-10', serving_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=hparams.eval_steps,
                                      exporters=[exporter],
                                      name='cifar-10')

    run_config = tf.estimator.RunConfig(save_checkpoints_steps=999,
                                        keep_checkpoint_max=1)

    run_config = run_config.replace(model_dir=hparams.serving_model_dir)

    estimator = tf.keras.estimator.model_to_estimator(
        keras_model=_keras_model_builder(), config=run_config)

    # Create an input receiver for TFMA processing
    receiver_fn = lambda: _eval_input_receiver_fn(  # pylint: disable=g-long-lambda
        tf_transform_output, schema)

    return {
        'estimator': estimator,
        'train_spec': train_spec,
        'eval_spec': eval_spec,
        'eval_input_receiver_fn': receiver_fn
    }
Пример #7
0
    def testWriteTransformFn(self):
        transform_output_dir = os.path.join(self.get_temp_dir(), 'output')

        with beam.Pipeline() as pipeline:
            # Create an empty directory for the source saved model dir.
            saved_model_dir = os.path.join(self.get_temp_dir(), 'source')
            file_io.recursive_create_dir(saved_model_dir)
            saved_model_dir_pcoll = (
                pipeline
                | 'CreateSavedModelDir' >> beam.Create([saved_model_dir]))
            metadata = beam_metadata_io.BeamDatasetMetadata(
                _TEST_METADATA_WITH_FUTURES, {
                    'a': pipeline | 'CreateA' >> beam.Create([3]),
                })

            _ = ((saved_model_dir_pcoll, metadata)
                 | transform_fn_io.WriteTransformFn(transform_output_dir))

        # Test reading with TFTransformOutput
        tf_transform_output = tft.TFTransformOutput(transform_output_dir)
        metadata = tf_transform_output.transformed_metadata
        self.assertEqual(metadata, _TEST_METADATA)

        transform_fn_dir = tf_transform_output.transform_savedmodel_dir
        self.assertTrue(file_io.file_exists(transform_fn_dir))
        self.assertTrue(file_io.is_directory(transform_fn_dir))
Пример #8
0
def run_fn(fn_args):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = _input_fn(fn_args.train_files, tf_transform_output,
                              constants.TRAIN_BATCH_SIZE)
    eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output,
                             constants.EVAL_BATCH_SIZE)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        model = _build_keras_model(hidden_units=constants.HIDDEN_UNITS,
                                   learning_rate=constants.LEARNING_RATE)

    model.fit(train_dataset,
              steps_per_epoch=fn_args.train_steps,
              validation_data=eval_dataset,
              validation_steps=fn_args.eval_steps)

    signatures = {
        'serving_default':
        _get_serve_tf_examples_fn(model,
                                  tf_transform_output).get_concrete_function(
                                      tf.TensorSpec(shape=[None],
                                                    dtype=tf.string,
                                                    name='examples')),
    }
    model.save(fn_args.serving_model_dir,
               save_format='tf',
               signatures=signatures)
Пример #9
0
def example_serving_receiver_fn(tf_transform_dir, raw_feature_spec,
                                target_feature, feature_id):
    """Creates serving function that is used during inference.

  Args:
    tf_transform_dir: A directory in which the tf.Transform model was written
      during the preprocessing step.
    raw_feature_spec: A dictionary of raw feature spec for input data.
    target_feature: Key for target feature.
    feature_id: Key for id field in input data.

  Returns:
    An instance of tf.estimator.export.ServingInputReceiver that parses input
    data by applying transformation from saved tf.Transform graph.
  """
    if target_feature in raw_feature_spec:
        raw_feature_spec.pop(target_feature)

    raw_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
        raw_feature_spec, default_batch_size=None)
    serving_input_receiver = raw_input_fn()
    features = serving_input_receiver.features
    transform_output = tft.TFTransformOutput(tf_transform_dir)
    transformed_features = transform_output.transform_raw_features(features)
    transformed_features[feature_id] = (convert_sparse_to_dense(
        transformed_features[feature_id]))
    return tf.estimator.export.ServingInputReceiver(
        transformed_features, serving_input_receiver.receiver_tensors)
Пример #10
0
def run_fn(fn_args: tfx.components.FnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = input_fn(fn_args.train_files, fn_args.data_accessor,
                           tf_transform_output, base.TRAIN_BATCH_SIZE)

  eval_dataset = input_fn(fn_args.eval_files, fn_args.data_accessor,
                          tf_transform_output, base.EVAL_BATCH_SIZE)

  if fn_args.hyperparameters:
    hparams = kt.HyperParameters.from_config(fn_args.hyperparameters)
  else:
    # This is a shown case when hyperparameters is decided and Tuner is removed
    # from the pipeline. User can also inline the hyperparameters directly in
    # _make_keras_model.
    hparams = _get_hyperparameters()

  model = _make_keras_model(hparams)

  # Write logs to path
  tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=fn_args.model_run_dir, update_freq='batch')

  model.fit(
      train_dataset,
      validation_data=eval_dataset,
      callbacks=[tensorboard_callback])

  signatures = base.make_serving_signatures(model, tf_transform_output)
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Пример #11
0
    def testWriteTransformFn(self):
        transform_output_dir = os.path.join(self.get_temp_dir(), 'output')

        with beam.Pipeline() as pipeline:
            # Create an empty directory for the source saved model dir.
            saved_model_dir = os.path.join(self.get_temp_dir(), 'source')
            file_io.recursive_create_dir(saved_model_dir)
            saved_model_dir_pcoll = (
                pipeline
                | 'CreateSavedModelDir' >> beam.Create([saved_model_dir]))
            # Combine test metadata with a dict of PCollections resolving futures.
            deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create(
                [_TEST_METADATA_COMPLETE])
            metadata = beam_metadata_io.BeamDatasetMetadata(
                _TEST_METADATA, deferred_metadata)

            _ = ((saved_model_dir_pcoll, metadata)
                 | transform_fn_io.WriteTransformFn(transform_output_dir))

        # Test reading with TFTransformOutput
        tf_transform_output = tft.TFTransformOutput(transform_output_dir)
        metadata = tf_transform_output.transformed_metadata
        self.assertEqual(metadata, _TEST_METADATA_COMPLETE)

        transform_fn_dir = tf_transform_output.transform_savedmodel_dir
        self.assertTrue(file_io.file_exists(transform_fn_dir))
        self.assertTrue(file_io.is_directory(transform_fn_dir))
Пример #12
0
def run_fn(fn_args: TrainerFnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = _input_fn(fn_args.train_files, tf_transform_output, 40)
  eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, 40)

  model = _build_keras_model()

  model.fit(
      train_dataset,
      steps_per_epoch=fn_args.train_steps,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps)

  signatures = {
      'serving_default':
          _get_serve_tf_examples_fn(model,
                                    tf_transform_output).get_concrete_function(
                                        tf.TensorSpec(
                                            shape=[None],
                                            dtype=tf.string,
                                            name='examples')),
  }
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Пример #13
0
    def train(self):
        # the graph preprocessed by TFT preprocessing
        tf_transform_output = tft.TFTransformOutput(cfg.TARGET_DIR)

        # Generate all `input_fn`s for the tf estimator
        train_input_fn = self.model.make_training_input_fn(
            tf_transform_output, cfg.exp_log_data_file_train_tfrecord + '*',
            cfg.TRAIN_BATCH_SIZE)
        eval_input_fn = self.model.make_training_input_fn(
            tf_transform_output, cfg.exp_log_data_file_eval_tfrecord + '*',
            cfg.EVAL_BATCH_SIZE)

        make_serving_input_fn = self.model.make_serving_input_fn(
            tf_transform_output)

        estimator = tf.estimator.LinearClassifier(
            feature_columns=self.model.create_feature_columns(
                tf_transform_output))
        estimator.train(train_input_fn, steps=cfg.TRAIN_MAX_STEPS)
        eval_evalset_result = estimator.evaluate(eval_input_fn,
                                                 steps=cfg.EVAL_STEPS,
                                                 name='eval')
        print eval_evalset_result

        estimator.export_savedmodel(cfg.TARGET_DIR,
                                    make_serving_input_fn,
                                    strip_default_attrs=True)
Пример #14
0
def trainer_fn(trainer_fn_args, schema):
    """Build the estimator using the high level API.

  Args:
    trainer_fn_args: Holds args used to train the model as name/value pairs.
    schema: Holds the schema of the training examples.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """

    tf_transform_output = tft.TFTransformOutput(
        trainer_fn_args.transform_output)

    train_input_fn = lambda: _input_fn(  # pylint: disable=g-long-lambda
        trainer_fn_args.train_files,
        tf_transform_output,
        batch_size=hparams.TRAIN_BATCH_SIZE)

    eval_input_fn = lambda: _input_fn(  # pylint: disable=g-long-lambda
        trainer_fn_args.eval_files,
        tf_transform_output,
        batch_size=hparams.EVAL_BATCH_SIZE)

    train_spec = tf.estimator.TrainSpec(  # pylint: disable=g-long-lambda
        train_input_fn,
        max_steps=trainer_fn_args.train_steps)

    serving_receiver_fn = lambda: _example_serving_receiver_fn(  # pylint: disable=g-long-lambda
        tf_transform_output, schema)

    exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=trainer_fn_args.eval_steps,
                                      exporters=[exporter],
                                      name='chicago-taxi-eval')

    run_config = tf.estimator.RunConfig(save_checkpoints_steps=999,
                                        keep_checkpoint_max=1)

    run_config = run_config.replace(
        model_dir=trainer_fn_args.serving_model_dir)

    estimator = _build_estimator(hidden_units=hparams.HIDDEN_UNITS,
                                 config=run_config)

    # Create an input receiver for TFMA processing
    receiver_fn = lambda: _eval_input_receiver_fn(  # pylint: disable=g-long-lambda
        tf_transform_output, schema)

    return {
        'estimator': estimator,
        'train_spec': train_spec,
        'eval_spec': eval_spec,
        'eval_input_receiver_fn': receiver_fn
    }
Пример #15
0
def write_projector_metadata(metadata_dir, tft_dir):
    """Write a metadata file to use in tensorboard to visualize embeddings.

  Tensorboard expects a .tsv (tab-seperated values) file encoding information
  about each sample. A header is required if there is more than one column.

  Args:
    metadata_dir: the directory where the projector config protobuf is written.
    tft_dir: the directory where tft outputs are written.

  Returns:
    A tuple of user and item indices:
      user_indices: indices of users that were sampled.
      item_indices: indices of items that were sampled.
  """
    tft_output = tft.TFTransformOutput(tft_dir)
    user_indices, user_metadata = _sample_vocab(tft_output,
                                                constants.USER_VOCAB_NAME,
                                                "user",
                                                constants.NUM_PROJECTOR_USERS)
    item_indices, item_metadata = _sample_vocab(tft_output,
                                                constants.ITEM_VOCAB_NAME,
                                                "item",
                                                constants.NUM_PROJECTOR_ITEMS)
    metadata = user_metadata + item_metadata
    metadata_path = os.path.join(metadata_dir, constants.PROJECTOR_PATH)
    tf.gfile.MakeDirs(metadata_dir)
    with tf.gfile.GFile(metadata_path, "w+") as f:
        f.write("label\tname\n")
        f.write("\n".join(["\t".join(sample) for sample in metadata]))
    return user_indices, item_indices
Пример #16
0
def run_fn(fn_args: tfx.components.FnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = base.input_fn(fn_args.train_files, fn_args.data_accessor,
                                  tf_transform_output, base.TRAIN_BATCH_SIZE)

    eval_dataset = base.input_fn(fn_args.eval_files, fn_args.data_accessor,
                                 tf_transform_output, base.EVAL_BATCH_SIZE)

    model = _make_trained_model(train_dataset,
                                eval_dataset,
                                num_epochs=1,
                                steps_per_epoch=fn_args.train_steps,
                                eval_steps_per_epoch=fn_args.eval_steps,
                                tensorboard_log_dir=fn_args.model_run_dir)
    # TODO(b/180721874): batch polymorphic model not yet supported.

    signatures = base.make_serving_signatures(model,
                                              tf_transform_output,
                                              serving_batch_size=1)
    tf.saved_model.save(model,
                        fn_args.serving_model_dir,
                        signatures=signatures)
def evaluate(classifier, data_accessor, eval_data_dir, tft_output_dir,
             hyperparameters):
    """
    Args:
      classifier:
      data_accessor:
      eval_data_dir:
      tft_output_dir:
      hyperparameters:
    Returns:
      evaluation_metrics:
    """
    logging.info("Loading tft output from %s", tft_output_dir)
    tft_output = tft.TFTransformOutput(tft_output_dir)
    schema = tft_output.transformed_metadata.schema

    logging.info("Model evaluation started...")
    eval_dataset = model_input.get_dataset(
        file_pattern=eval_data_dir,
        data_accessor=data_accessor,
        schema=schema,
        batch_size=hyperparameters["batch_size"],
    )

    evaluation_metrics = classifier.evaluate(eval_dataset)
    logging.info("Model evaluation completed.")

    return evaluation_metrics
Пример #18
0
def run_fn(fn_args):
    """Train the model based on given args.

    Args:
    fn_args: Holds args used to train the model as name/value pairs.
    """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = _input_fn(fn_args.train_files, tf_transform_output, 40)
    eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, 40)

    model = get_model()

    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=log_dir, update_freq='batch')
    
    model.fit(
        train_dataset,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback])

    signatures = {
        'serving_default':
            _get_serve_tf_examples_fn(model,
                                      tf_transform_output).get_concrete_function(
                                          tf.TensorSpec(
                                              shape=[None],
                                              dtype=tf.string,
                                              name='examples')),
    }
    model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Пример #19
0
    def run_fn(self):
        tf_transform_output = tft.TFTransformOutput(self.transform_output)

        train_dataset = self.input_fn(self.train_files, tf_transform_output)
        eval_dataset = self.input_fn(self.eval_files, tf_transform_output)

        model = self.model_fn(train_dataset=train_dataset,
                              eval_dataset=eval_dataset)

        signatures = {
            'serving_default':
            self._get_serve_tf_examples_fn(
                model, tf_transform_output).get_concrete_function(
                    tf.TensorSpec(shape=[None],
                                  dtype=tf.string,
                                  name='examples')),
            'zen_eval':
            self._get_zen_eval_tf_examples_fn(
                model, tf_transform_output).get_concrete_function(
                    tf.TensorSpec(shape=[None],
                                  dtype=tf.string,
                                  name='examples'))
        }

        model.save(self.serving_model_dir,
                   save_format='tf',
                   signatures=signatures)
Пример #20
0
def run_fn(fn_args: tfx.components.FnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    if fn_args.transform_output is None:  # Transform is not used.
        tf_transform_output = None
        schema = tfx.utils.parse_pbtxt_file(fn_args.schema_file,
                                            schema_pb2.Schema())
        feature_list = features.FEATURE_KEYS
        label_key = features.LABEL_KEY
    else:
        tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
        schema = tf_transform_output.transformed_metadata.schema
        feature_list = [
            features.transformed_name(f) for f in features.FEATURE_KEYS
        ]
        label_key = features.transformed_name(features.LABEL_KEY)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    train_batch_size = (constants.TRAIN_BATCH_SIZE *
                        mirrored_strategy.num_replicas_in_sync)
    eval_batch_size = (constants.EVAL_BATCH_SIZE *
                       mirrored_strategy.num_replicas_in_sync)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              schema,
                              label_key,
                              batch_size=train_batch_size)
    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             schema,
                             label_key,
                             batch_size=eval_batch_size)

    with mirrored_strategy.scope():
        model = _build_keras_model(feature_list)

    # Write logs to path
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=fn_args.model_run_dir, update_freq='batch')

    model.fit(train_dataset,
              steps_per_epoch=fn_args.train_steps,
              validation_data=eval_dataset,
              validation_steps=fn_args.eval_steps,
              callbacks=[tensorboard_callback])

    signatures = {
        'serving_default':
        _get_tf_examples_serving_signature(model, schema, tf_transform_output),
        'transform_features':
        _get_transform_features_signature(model, schema, tf_transform_output),
    }
    model.save(fn_args.serving_model_dir,
               save_format='tf',
               signatures=signatures)
Пример #21
0
def run_fn(fn_args: TrainerFnArgs):
    """Train the model based on given args.
  Args:
    fn_args: Holds args used to train and tune the model as name/value pairs. See 
      https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs.
  """

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor,
                              tf_transform_output, TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor,
                             tf_transform_output, EVAL_BATCH_SIZE)

    if fn_args.hyperparameters:
        hparams = kerastuner.HyperParameters.from_config(
            fn_args.hyperparameters)
    else:
        # This is a shown case when hyperparameters is decided and Tuner is removed
        # from the pipeline. User can also inline the hyperparameters directly in
        # _build_keras_model.
        hparams = _get_hyperparameters()
    absl.logging.info('HyperParameters for training: %s' %
                      hparams.get_config())

    # Distribute training over multiple replicas on the same machine.
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        model = _build_keras_model(hparams=hparams,
                                   tf_transform_output=tf_transform_output)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=LOCAL_LOG_DIR, update_freq='batch')

    model.fit(train_dataset,
              epochs=EPOCHS,
              steps_per_epoch=fn_args.train_steps,
              validation_data=eval_dataset,
              validation_steps=fn_args.eval_steps,
              verbose=2,
              callbacks=[tensorboard_callback])

    signatures = {
        'serving_default':
        _get_serve_tf_examples_fn(model,
                                  tf_transform_output).get_concrete_function(
                                      tf.TensorSpec(shape=[None],
                                                    dtype=tf.string,
                                                    name='examples')),
    }

    model.save(fn_args.serving_model_dir,
               save_format='tf',
               signatures=signatures)
    if fn_args.serving_model_dir.startswith('gs://'):
        _copy_tensorboard_logs(LOCAL_LOG_DIR,
                               fn_args.serving_model_dir + '/logs')
def run_fn(fn_args: tfx.components.FnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              tf_transform_output,
                              batch_size=_TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             tf_transform_output,
                             batch_size=_EVAL_BATCH_SIZE)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        model = _build_keras_model()

    model.fit(train_dataset,
              steps_per_epoch=fn_args.train_steps,
              validation_data=eval_dataset,
              validation_steps=fn_args.eval_steps,
              verbose=2)

    signatures = {
        'serving_default':
        _get_inference_fn(model, tf_transform_output).get_concrete_function(
            tf.TensorSpec(shape=[None],
                          dtype=tf.int64,
                          name=_CUR_PAGE_FEATURE_KEY),
            tf.TensorSpec(shape=[None],
                          dtype=tf.int64,
                          name=_SESSION_INDEX_FEATURE_KEY)),
    }

    # Create the saved_model in a temporary directory.
    temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, 'temp')
    model.save(temp_saving_model_dir, save_format='tf', signatures=signatures)

    # Convert the saved_model to a tfjs model and store it in the final directory.
    tfrw = rewriter_factory.create_rewriter(rewriter_factory.TFJS_REWRITER,
                                            name='tfjs_rewriter')
    converters.rewrite_saved_model(temp_saving_model_dir,
                                   fn_args.serving_model_dir, tfrw,
                                   rewriter.ModelType.TFJS_MODEL)

    # Copy the vocabulary computed by transform to the final directory.
    # The vocabulary is not included in the original savedmodel because vocab
    # lookups are currently not supported in TFJS and are expected to be done
    # independently by client code.
    fileio.copy(tf_transform_output.vocabulary_file_by_name(_VOCAB_FILENAME),
                os.path.join(fn_args.serving_model_dir, _VOCAB_FILENAME))

    fileio.rmtree(temp_saving_model_dir)
Пример #23
0
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """Build the tuner using the CloudTuner API.

  Args:
    fn_args: Holds args as name/value pairs. See
      https://www.tensorflow.org/tfx/api_docs/python/tfx/components/trainer/fn_args_utils/FnArgs.
      - transform_graph_path: optional transform graph produced by TFT.
      - custom_config: An optional dictionary passed to the component. In this
        example, it contains the dict ai_platform_tuning_args.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.

  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
    transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

    # CloudTuner is a subclass of kerastuner.Tuner which inherits from
    # BaseTuner.
    tuner = CloudTuner(
        _build_keras_model,
        # The project/region configuations for Cloud Vizier service and its trial
        # executions. Note: this example uses the same configuration as the
        # CAIP Training service for distributed tuning flock management to view
        # all of the pipeline's jobs and resources in the same project. It can
        # also be configured separately.
        project_id=fn_args.custom_config['ai_platform_tuning_args']['project'],
        region=fn_args.custom_config['ai_platform_tuning_args']['region'],
        objective=kerastuner.Objective('val_sparse_categorical_accuracy',
                                       'max'),
        hyperparameters=_get_hyperparameters(),
        max_trials=8,  # Optional.
        directory=fn_args.working_dir)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              transform_graph,
                              batch_size=_TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             transform_graph,
                             batch_size=_EVAL_BATCH_SIZE)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })
Пример #24
0
def train_and_evaluate(model_dir,
                       input_feature_spec,
                       target,
                       train_files_pattern,
                       eval_files_pattern,
                       batch_size=64,
                       train_max_steps=1000):
    """
    Trains and evaluates the estimator given.
    The input functions are generated by the preprocessing function.
    """
    # specify where model is stored
    if tf.io.gfile.exists(model_dir):
        tf.io.gfile.rmtree(model_dir)
    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(model_dir=model_dir)
    # this will give us a more granular visualization of the training
    run_config = run_config.replace(save_summary_steps=1)

    # no build in RNN estimator in TF yet
    model = make_simple_rnn()
    model.compile(loss=tf.losses.MeanSquaredError(),
                  optimizer=tf.optimizers.Adam(),
                  metrics=[tf.metrics.MeanAbsoluteError()])
    estimator = tf.keras.estimator.model_to_estimator(
        keras_model=model
    )

    # wrapper around output of tf.Transform
    tft_output = tft.TFTransformOutput(os.path.split(train_files_pattern)[0])
    feature_spec = tft_output.transformed_feature_spec()

    # Create the training and evaluation specifications
    train_spec = tf.estimator.TrainSpec(
        input_fn=make_input_fn(
            tfrecord_pattern=train_files_pattern,
            feature_spec=feature_spec,
            target=target,
            batch_size=batch_size,
            mode=tf.estimator.ModeKeys.TRAIN),
        max_steps=train_max_steps
        )
    eval_spec = tf.estimator.EvalSpec(
        input_fn=make_input_fn(
            tfrecord_pattern=eval_files_pattern,
            feature_spec=feature_spec,
            target=target,
            batch_size=batch_size,
            mode=tf.estimator.ModeKeys.EVAL))
    # train and evaluate the model
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    # export saved model
    estimator.export_saved_model(
        model_dir,
        serving_input_receiver_fn=make_serving_input_fn(
            tft_output, input_feature_spec, target
            ))
Пример #25
0
    def predict(self):
        tf_transform_output = tft.TFTransformOutput(TARGET_DIR)

        eval_sample_input_fn = self.model.make_training_input_fn(
            tf_transform_output, exp_log_data_file_train_tfrecord + '*', 1)

        predict_fn = predictor.from_saved_model(TARGET_DIR)
        predictions = predict_fn(xx)
        print(predictions['scores'])
Пример #26
0
def train_and_evaluate(working_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on test data.

  Args:
    working_dir: Directory to read transformed data and metadata from and to
        write exported model to.
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """
    tf_transform_output = tft.TFTransformOutput(working_dir)

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        tf.feature_column.numeric_column(key, shape=())
        for key in NUMERIC_FEATURE_KEYS
    ]

    # Wrap categorical columns.
    one_hot_columns = [
        tf.feature_column.categorical_column_with_vocabulary_file(
            key=key,
            vocabulary_file=tf_transform_output.vocabulary_file_by_name(
                vocab_filename=key)) for key in CATEGORICAL_FEATURE_KEYS
    ]

    run_config = tf.estimator.RunConfig()

    estimator = tf.estimator.LinearClassifier(
        feature_columns=real_valued_columns + one_hot_columns,
        config=run_config)

    # Fit the model using the default optimizer.
    train_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'),
        batch_size=TRAIN_BATCH_SIZE)
    estimator.train(input_fn=train_input_fn,
                    max_steps=TRAIN_NUM_EPOCHS * num_train_instances /
                    TRAIN_BATCH_SIZE)

    # Evaluate model on test dataset.
    eval_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'),
        batch_size=1)

    # Export the model.
    serving_input_fn = _make_serving_input_fn(tf_transform_output)
    exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR)
    estimator.export_savedmodel(exported_model_dir, serving_input_fn)

    return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Пример #27
0
def train_and_maybe_evaluate(hparams):
  """Run the training and evaluate using the high level API.

  Args:
    hparams: Holds hyperparameters used to train the model as name/value pairs.

  Returns:
    The estimator that was used for training (and maybe eval)
  """
  schema = taxi.read_schema(hparams.schema_file)
  tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir)

  train_input = lambda: model.input_fn(
      hparams.train_files,
      tf_transform_output,
      batch_size=TRAIN_BATCH_SIZE
  )

  eval_input = lambda: model.input_fn(
      hparams.eval_files,
      tf_transform_output,
      batch_size=EVAL_BATCH_SIZE
  )

  train_spec = tf.estimator.TrainSpec(
      train_input, max_steps=hparams.train_steps)

  serving_receiver_fn = lambda: model.example_serving_receiver_fn(
      tf_transform_output, schema)

  exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn)
  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      steps=hparams.eval_steps,
      exporters=[exporter],
      name='chicago-taxi-eval')

  run_config = tf.estimator.RunConfig(
      save_checkpoints_steps=999, keep_checkpoint_max=1)

  serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR)
  run_config = run_config.replace(model_dir=serving_model_dir)

  estimator = model.build_estimator(
      tf_transform_output,

      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i))
          for i in range(NUM_DNN_LAYERS)
      ],
      config=run_config)

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

  return estimator
Пример #28
0
def train_and_evaluate(
    work_dir,
    input_feature_spec,
    labels,
    train_files_pattern,
    eval_files_pattern,
    batch_size=64,
    train_max_steps=1000):
  """Trains and evaluates the estimator given.

  The input functions are generated by the preprocessing function.
  """

  model_dir = os.path.join(work_dir, 'model')
  if tf.gfile.Exists(model_dir):
    tf.gfile.DeleteRecursively(model_dir)

  # Specify where to store our model
  run_config = tf.estimator.RunConfig()
  run_config = run_config.replace(model_dir=model_dir)

  # This will give us a more granular visualization of the training
  run_config = run_config.replace(save_summary_steps=10)

  # Create a Deep Neural Network Regressor estimator
  estimator = tf.estimator.DNNRegressor(
      feature_columns=[
          tf.feature_column.numeric_column('NormalizedC', dtype=tf.float32),
          tf.feature_column.numeric_column('NormalizedH', dtype=tf.float32),
          tf.feature_column.numeric_column('NormalizedO', dtype=tf.float32),
          tf.feature_column.numeric_column('NormalizedN', dtype=tf.float32),
      ],
      hidden_units=[128, 64],
      dropout=0.5,
      config=run_config)

  # Get the transformed feature_spec
  tft_output = tft.TFTransformOutput(work_dir)
  feature_spec = tft_output.transformed_feature_spec()

  # Create the training and evaluation specifications
  train_spec = tf.estimator.TrainSpec(
      input_fn=make_train_input_fn(
          feature_spec, labels, train_files_pattern, batch_size),
      max_steps=train_max_steps)

  exporter = tf.estimator.FinalExporter(
      'final', make_serving_input_fn(tft_output, input_feature_spec, labels))

  eval_spec = tf.estimator.EvalSpec(
      input_fn=make_eval_input_fn(
          feature_spec, labels, eval_files_pattern, batch_size),
      exporters=[exporter])

  # Train and evaluate the model
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Пример #29
0
def run_fn(fn_args: TrainerFnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = _input_fn(
      fn_args.train_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_TRAIN_BATCH_SIZE)
  eval_dataset = _input_fn(
      fn_args.eval_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_EVAL_BATCH_SIZE)

  if fn_args.hyperparameters:
    hparams = kerastuner.HyperParameters.from_config(fn_args.hyperparameters)
  else:
    # This is a shown case when hyperparameters is decided and Tuner is removed
    # from the pipeline. User can also inline the hyperparameters directly in
    # _build_keras_model.
    hparams = _get_hyperparameters()
  absl.logging.info('HyperParameters for training: %s' % hparams.get_config())

  mirrored_strategy = tf.distribute.MirroredStrategy()
  with mirrored_strategy.scope():
    model = _build_keras_model(hparams)

  steps_per_epoch = _TRAIN_DATA_SIZE / _TRAIN_BATCH_SIZE

  # Write logs to path
  tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=fn_args.model_run_dir, update_freq='batch')

  model.fit(
      train_dataset,
      epochs=int(fn_args.train_steps / steps_per_epoch),
      steps_per_epoch=steps_per_epoch,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps,
      callbacks=[tensorboard_callback])

  signatures = {
      'serving_default':
          _get_serve_tf_examples_fn(model,
                                    tf_transform_output).get_concrete_function(
                                        tf.TensorSpec(
                                            shape=[None],
                                            dtype=tf.string,
                                            name='examples')),
  }
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
Пример #30
0
def tuner_fn(fn_args: TrainerFnArgs) -> TunerFnResult:
    """Build the tuner using the KerasTuner API.
  Args:
    fn_args: Holds args as name/value pairs.
      - working_dir: working dir for tuning.
      - train_files: List of file paths containing training tf.Example data.
      - eval_files: List of file paths containing eval tf.Example data.
      - train_steps: number of train steps.
      - eval_steps: number of eval steps.
      - schema_path: optional schema of the input data.
      - transform_graph_path: optional transform graph produced by TFT.
  Returns:
    A namedtuple contains the following:
      - tuner: A BaseTuner that will be used for tuning.
      - fit_kwargs: Args to pass to tuner's run_trial function for fitting the
                    model , e.g., the training and validation dataset. Required
                    args depend on the above tuner's implementation.
  """
    transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Construct a build_keras_model_fn that just takes hyperparams from get_hyperparameters as input.
    build_keras_model_fn = functools.partial(
        _build_keras_model, tf_transform_output=transform_graph)

    # BayesianOptimization is a subclass of kerastuner.Tuner which inherits from BaseTuner.
    tuner = kerastuner.BayesianOptimization(
        build_keras_model_fn,
        max_trials=10,
        hyperparameters=_get_hyperparameters(),
        # New entries allowed for n_units hyperparameter construction conditional on n_layers selected.
        #       allow_new_entries=True,
        #       tune_new_entries=True,
        objective=kerastuner.Objective('val_sparse_categorical_accuracy',
                                       'max'),
        directory=fn_args.working_dir,
        project_name='covertype_tuning')

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              transform_graph,
                              batch_size=TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             transform_graph,
                             batch_size=EVAL_BATCH_SIZE)

    return TunerFnResult(tuner=tuner,
                         fit_kwargs={
                             'x': train_dataset,
                             'validation_data': eval_dataset,
                             'steps_per_epoch': fn_args.train_steps,
                             'validation_steps': fn_args.eval_steps
                         })