示例#1
0
  def model_fn(self, features, labels, mode, params):
    """TPUEstimator compatible model_fn."""
    del params
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    update_ops = []

    # First, embed the context and answer panels.
    if self.embedding_model_class == "values":
      # Use the integer values of the ground-truth factors.
      context_embeddings = features["context_factor_values"]
      answer_embeddings = features["answers_factor_values"]
    elif self.embedding_model_class == "onehot":
      # Use one-hot embeddings of the ground-truth factors.
      context_embeddings = features["context_factors_onehot"]
      answer_embeddings = features["answers_factors_onehot"]
    else:
      embedding_model = self.embedding_model_class()
      context_embeddings, answer_embeddings = embedding_model(
          [
              features["context"],
              features["answers"],
          ],
          training=is_training,
      )
      embedding_model.summary(print_fn=tf.logging.info)
      update_ops += embedding_model.updates

    # Apply the reasoning model.
    reasoning_model = self.reasoning_model_class()
    logits = reasoning_model([context_embeddings, answer_embeddings],
                             training=is_training)
    reasoning_model.summary(print_fn=tf.logging.info)
    update_ops += reasoning_model.updates

    loss_vec = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=logits)
    loss_mean = tf.reduce_mean(loss_vec)

    if mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(labels, logits):
        predictions = tf.argmax(logits, 1)
        return {
            "accuracy":
                tf.metrics.accuracy(labels=labels, predictions=predictions),
        }

      return contrib_tpu.TPUEstimatorSpec(
          mode=mode, loss=loss_mean, eval_metrics=(metric_fn, [labels, logits]))

    if mode == tf.estimator.ModeKeys.TRAIN:
      # In case we use batch norm, the following is required.
      with tf.control_dependencies(update_ops):
        optimizer = self.optimizer_fn()
        train_op = optimizer.minimize(
            loss=loss_mean, global_step=tf.train.get_global_step())
      return contrib_tpu.TPUEstimatorSpec(
          mode=mode, loss=loss_mean, train_op=train_op)
    raise NotImplementedError("Unsupported mode.")
示例#2
0
    def model_fn(self, features, labels, mode, params):
        """TPUEstimator compatible model function."""
        del labels
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        data_shape = features.get_shape().as_list()[1:]
        batch_size = tf.shape(features)[0]
        z_mean, z_logvar = self.gaussian_encoder(features,
                                                 is_training=is_training)
        z_sampled = self.sample_from_latent_distribution(z_mean, z_logvar)

        # z_sampled_sum = z_sampled[:batch_size // 2] + \
        # z_sampled[batch_size // 2:]
        # z_sampled_all = tf.concat([z_sampled, z_sampled_sum], axis=0)
        z_sampled_all = z_sampled
        reconstructions, group_feats_G, lie_alg_basis = self.decode_with_gfeats(
            z_sampled_all, data_shape, is_training)

        per_sample_loss = losses.make_reconstruction_loss(
            features, reconstructions[:batch_size])
        reconstruction_loss = tf.reduce_mean(per_sample_loss)
        kl_loss = compute_gaussian_kl(z_mean, z_logvar)
        regularizer = self.regularizer(kl_loss, z_mean, z_logvar, z_sampled,
                                       group_feats_G, lie_alg_basis,
                                       batch_size)
        loss = tf.add(reconstruction_loss, regularizer, name="loss")
        elbo = tf.add(reconstruction_loss, kl_loss, name="elbo")
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = optimizers.make_vae_optimizer()
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            train_op = tf.group([train_op, update_ops])
            tf.summary.scalar("reconstruction_loss", reconstruction_loss)
            tf.summary.scalar("elbo", -elbo)

            logging_hook = tf.train.LoggingTensorHook(
                {
                    "loss": loss,
                    "reconstruction_loss": reconstruction_loss,
                    "elbo": -elbo
                },
                every_n_iter=100)
            return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                loss=loss,
                                                train_op=train_op,
                                                training_hooks=[logging_hook])
        elif mode == tf.estimator.ModeKeys.EVAL:
            return contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                eval_metrics=(make_metric_fn("reconstruction_loss", "elbo",
                                             "regularizer", "kl_loss"), [
                                                 reconstruction_loss, -elbo,
                                                 regularizer, kl_loss
                                             ]))
        else:
            raise NotImplementedError("Eval mode not supported.")
示例#3
0
def my_model(features, labels, mode, params):
    """Deep Neural Network(DNN) model.

  This is a DNN Model with 3 hidden layers. First 2 hidden layers are having
  10 neurons in each. And number of neurons in the last layer is equal to the
  number of output classes. This is a densely connected network where each
  neuron of previous layer is connected to each neuron of next layer.

  Args:
    features: Feature values for input samples.
    labels: label/class assigned to the corresponding input sample.
    mode: "TRAIN"/"EVAL"/"PREDICT"
    params: Dictionary used to pass extra parameters to model function from
      the main function.

  Returns:
    TPUEstimator object.

  """

    # Create three fully connected layers.
    net = tf.feature_column.input_layer(features, params["feature_columns"])
    for units in params["hidden_units"]:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)

    # Compute logits (1 per class).
    logits = tf.layers.dense(net, params["n_classes"], activation=None)

    # Compute predictions.
    predicted_classes = tf.argmax(logits, 1)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            "class_ids": predicted_classes[:, tf.newaxis],
            "probabilities": tf.nn.softmax(logits),
            "logits": logits,
        }
        return contrib_tpu.TPUEstimatorSpec(mode, predictions=predictions)

    # Compute loss.
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    if mode == tf.estimator.ModeKeys.EVAL:
        return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                            loss=loss,
                                            eval_metrics=(metric_fn,
                                                          [labels, logits]))

    # Create training op.
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)
        train_op = optimizer.minimize(loss,
                                      global_step=tf.train.get_global_step())
        return contrib_tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op)
示例#4
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(is_training, input_ids, input_mask,
                                       segment_ids, label_ids, num_labels,
                                       albert_hub_module_handle)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, eval_metrics=eval_metrics)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode, predictions={"probabilities": probabilities})
        else:
            raise ValueError(
                "Only TRAIN, EVAL and PREDICT modes are supported: %s" %
                (mode))

        return output_spec
示例#5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        utils.log("Building model")

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = MultitaskModel(config, tasks, task_weights, is_training,
                               features, num_train_steps)

        # Load pre-trained weights from checkpoint
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if not config.debug:
            assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
                tvars, config.init_checkpoint)
            if config.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(config.init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(config.init_checkpoint,
                                              assignment_map)

        # Run training or prediction
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(config, model.loss,
                                                     num_train_steps)
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=model.loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                training_hooks=[
                    training_utils.ETAHook(
                        config,
                        {} if config.use_tpu else dict(loss=model.loss),
                        num_train_steps)
                ])
        else:
            assert mode == tf.estimator.ModeKeys.PREDICT
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=utils.flatten_dict(model.outputs),
                scaffold_fn=scaffold_fn)

        utils.log("Building complete")
        return output_spec
示例#6
0
def model_fn(features, labels, mode, params):
    im_mode = MODEKEY_TO_MODE[mode]
    model_config = configuration.ModelConfig()
    training_config = configuration.TrainingConfig()
    model = show_and_tell_model.ShowAndTellModel(
        model_config, mode=im_mode, train_inception=FLAGS.train_inception)
    model.build_model_for_tpu(images=features["images"],
                              input_seqs=features["input_seqs"],
                              target_seqs=features["target_seqs"],
                              input_mask=features["input_mask"])

    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=training_config.initial_learning_rate)
    optimizer = contrib_estimator.clip_gradients_by_norm(
        optimizer, training_config.clip_gradients)
    if FLAGS.use_tpu:
        optimizer = contrib_tpu.CrossShardOptimizer(optimizer)
    train_op = optimizer.minimize(
        model.total_loss, global_step=tf.train.get_or_create_global_step())

    def scaffold_fn():
        """Load pretrained Inception checkpoint at initialization time."""
        return tf.train.Scaffold(init_fn=model.init_fn)

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=model.total_loss,
                                        train_op=train_op,
                                        scaffold_fn=scaffold_fn)
示例#7
0
def _get_estimator_spec_with_metrics(
        logits,  # type: tf.Tensor
        softmax_logits,  # type: tf.Tensor
        duplicate_mask,  # type: tf.Tensor
        num_training_neg,  # type: int
        match_mlperf=False,  # type: bool
        use_tpu_spec=False  # type: bool
):
    """Returns a EstimatorSpec that includes the metrics."""
    cross_entropy, \
    metric_fn, \
    in_top_k, \
    ndcg, \
    metric_weights = compute_eval_loss_and_metrics_helper(
        logits,
        softmax_logits,
        duplicate_mask,
        num_training_neg,
        match_mlperf,
        use_tpu_spec)

    if use_tpu_spec:
        return contrib_tpu.TPUEstimatorSpec(
            mode=tf.estimator.ModeKeys.EVAL,
            loss=cross_entropy,
            eval_metrics=(metric_fn, [in_top_k, ndcg, metric_weights]))

    return tf.estimator.EstimatorSpec(mode=tf.estimator.ModeKeys.EVAL,
                                      loss=cross_entropy,
                                      eval_metric_ops=metric_fn(
                                          in_top_k, ndcg, metric_weights))
def model_fn(features, labels, mode, params):
    """TPUEstimatorSpec for the Squeezenet model."""
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    logits = squeezenet(features,
                        is_training=is_training,
                        num_classes=params["num_classes"])

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    global_batch_size = (params["train"]["num_cores_per_replica"] *
                         params["train"]["train_batch_size"])
    decay_steps = (params["train"]["num_examples_per_epoch"] *
                   params["train"]["num_epochs"]) // global_batch_size
    learning_rate = tf.train.polynomial_decay(
        params["train"]["learning_rate"]["init_learning_rate"],
        global_step=tf.train.get_or_create_global_step(),
        end_learning_rate=params["train"]["learning_rate"]
        ["end_learning_rate"],
        decay_steps=decay_steps,
        power=1.0,
        cycle=False)

    # TODO(power): Hack copied from resnet: remove when summaries are working.
    lr_repeat = tf.reshape(
        tf.tile(tf.expand_dims(learning_rate, 0), [
            params["train"]["train_batch_size"],
        ]), [params["train"]["train_batch_size"], 1])

    if params["train"]["optimizer"]["type"] == "adam":
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    elif params["train"]["optimizer"]["type"] == "rmsprop":
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=learning_rate,
            momentum=params["train"]["optimizer"]["momentum"],
            epsilon=1.0)
    else:
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=learning_rate,
            momentum=params["train"]["optimizer"]["momentum"],
            use_nesterov=True)

    if params["use_tpu"]:
        optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

    train_op = optimizer.minimize(loss, tf.train.get_global_step())

    return contrib_tpu.TPUEstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        eval_metrics=(metric_fn, [labels, logits, lr_repeat]),
        predictions={
            "classes": tf.argmax(input=logits, axis=1),
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        },
    )
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_id": unique_ids,
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                   predictions=predictions,
                                                   scaffold_fn=scaffold_fn)
        return output_spec
示例#10
0
def model_fn(features, labels, mode, params):
    """model_fn constructs the ML model used to predict handwritten digits."""

    del params
    image = features
    if isinstance(image, dict):
        image = features["image"]

    model = mnist.create_model("channels_last")

    if mode == tf.estimator.ModeKeys.PREDICT:
        logits = model(image, training=False)
        predictions = {
            'class_ids': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits),
        }
        return contrib_tpu.TPUEstimatorSpec(mode, predictions=predictions)

    logits = model(image, training=(mode == tf.estimator.ModeKeys.TRAIN))
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    if mode == tf.estimator.ModeKeys.TRAIN:
        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   tf.train.get_global_step(),
                                                   decay_steps=100000,
                                                   decay_rate=0.96)
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate)
        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)
        return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                            loss=loss,
                                            train_op=optimizer.minimize(
                                                loss,
                                                tf.train.get_global_step()))

    if mode == tf.estimator.ModeKeys.EVAL:
        return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                            loss=loss,
                                            eval_metrics=(metric_fn,
                                                          [labels, logits]))
def model_fn(features, labels, mode, params):
    """Inception v3 model using Estimator API."""
    del params

    if mode != tf.estimator.ModeKeys.TRAIN:
        raise RuntimeError('mode {} is not supported yet'.format(mode))

    num_labels = FLAGS.num_labels

    with slim.arg_scope(inception_v3_arg_scope(is_training=True)):
        logits, end_points = inception.inception_v3(
            features,
            num_labels,
            is_training=True,
            depth_multiplier=FLAGS.depth_multiplier)

    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32),
                               depth=num_labels)

    if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(end_points['AuxLogits'],
                                        onehot_labels,
                                        label_smoothing=0.1,
                                        weights=0.4,
                                        scope='aux_loss')
    tf.losses.softmax_cross_entropy(logits,
                                    onehot_labels,
                                    label_smoothing=0.1,
                                    weights=1.0)
    loss = tf.losses.get_total_loss()

    if FLAGS.optimizer == 'sgd':
        tf.logging.info('Using SGD optimizer')
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'momentum':
        tf.logging.info('Using Momentum optimizer')
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=FLAGS.learning_rate, momentum=0.9)
    else:
        tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

    if FLAGS.use_tpu:
        optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

    train_op = optimizer.minimize(
        loss, global_step=tf.train.get_or_create_global_step())

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op)
示例#12
0
        def _define_model(features, labels, mode, params):
            data_source = (features, labels)
            self.outputs = {}
            self.losses = {}
            self.otters = {}
            outputs, losses, others = self.define_model(data_source, mode)

            if mode == tf.estimator.ModeKeys.EVAL:
                return tpu.TPUEstimatorSpec(mode=mode,
                                            loss=losses,
                                            eval_metrics=others)
            if mode == tf.estimator.ModeKeys.PREDICT:
                return tpu.TPUEstimatorSpec(mode=mode, predictions=outputs)
            if mode == tf.estimator.ModeKeys.TRAIN:
                self.losses['train'] = losses
                self._build_optimizer(tpu_support=True)
                if not len(self.optimize_ops) == 1:
                    logging.error(
                        'Implementati Error: More than one optimizer defined')
                    logging.warning(' [*] Selecting only the first optimizer')
                return tpu.TPUEstimatorSpec(mode=mode,
                                            loss=losses[0],
                                            train_op=self.optimize_ops[0])
示例#13
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        global initialized_variable_names
        # tf.logging.info("*** Features ***")
        # for name in sorted(features.keys()):
        #     tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        predictions = _creat_bert(is_training, features, bert_config,
                                  use_one_hot_embeddings, init_checkpoint)

        # the concatenate of predictions is the output of bert encoder
        # and it will be seen as input of other modules
        total_loss, logits = _create_cqa_modules(is_training, predictions)

        scaffold_fn = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                               loss=total_loss,
                                               train_op=train_op,
                                               scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                               predictions=logits,
                                               scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
示例#14
0
def model_fn(features, labels, mode, params):
    """Define a CIFAR model in Keras."""
    del params  # unused
    layers = contrib_keras.layers

    # Pass our input tensor to initialize the Keras input layer.
    v = layers.Input(tensor=features)
    v = layers.Conv2D(filters=32,
                      kernel_size=5,
                      activation="relu",
                      padding="same")(v)
    v = layers.MaxPool2D(pool_size=2)(v)
    v = layers.Conv2D(filters=64,
                      kernel_size=5,
                      activation="relu",
                      padding="same")(v)
    v = layers.MaxPool2D(pool_size=2)(v)
    v = layers.Flatten()(v)
    fc1 = layers.Dense(units=512, activation="relu")(v)
    logits = layers.Dense(units=10)(fc1)

    # Instead of constructing a Keras model for training, build our loss function
    # and optimizer in Tensorflow.
    #
    # N.B.  This construction omits some features that are important for more
    # complex models (e.g. regularization, batch-norm).  Once
    # `model_to_estimator` support is added for TPUs, it should be used instead.
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=labels))
    optimizer = tf.train.AdamOptimizer()
    if FLAGS.use_tpu:
        optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        predictions={
                                            "classes":
                                            tf.argmax(input=logits, axis=1),
                                            "probabilities":
                                            tf.nn.softmax(
                                                logits, name="softmax_tensor")
                                        })
示例#15
0
def model_fn(features, labels, mode, params):

    # Build graph
    logits = tf.layers.dense(features, 10)
    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
    optim = tf.train.GradientDescentOptimizer(learning_rate=1e-2)

    # NOTE:
    # When using TPUs, you have to use CrossShardOptimizer which aggregate gradients with all reduce.
    if params["use_tpu"]:
        optim = tpu.CrossShardOptimizer(optim)

    train_op = optim.minimize(loss=loss,
                              global_step=tf.train.get_or_create_global_step())
    # Create EstimatorSpec
    estimator_spec = tpu.TPUEstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
    )
    return estimator_spec
示例#16
0
def _model_fn(features,
              labels,
              mode,
              params,
              model,
              use_tpu_estimator_spec,
              variable_filter_fn=None):
    """Model defination for the RetinaNet model based on ResNet.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in dataloader.py
    mode: the mode of TPUEstimator/Estimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.
    use_tpu_estimator_spec: Whether to use TPUEstimatorSpec or EstimatorSpec.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """

    # In predict mode features is a dict with input as value of the 'inputs'.
    image_info = None
    if (mode == tf.estimator.ModeKeys.PREDICT and isinstance(features, dict)
            and 'inputs' in features):
        image_info = features['image_info']
        labels = None
        if 'labels' in features:
            labels = features['labels']
        features = features['inputs']

    def _model_outputs():
        return model(features,
                     min_level=params['min_level'],
                     max_level=params['max_level'],
                     num_classes=params['num_classes'],
                     num_anchors=len(params['aspect_ratios'] *
                                     params['num_scales']),
                     resnet_depth=params['resnet_depth'],
                     is_training_bn=params['is_training_bn'])

    if params['use_bfloat16']:
        with contrib_tpu.bfloat16_scope():
            cls_outputs, box_outputs = _model_outputs()
            levels = cls_outputs.keys()
            for level in levels:
                cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
                box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
    else:
        cls_outputs, box_outputs = _model_outputs()
        levels = cls_outputs.keys()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        # Postprocess on host; memory layout for NMS on TPU is very inefficient.
        def _predict_postprocess_wrapper(args):
            return _predict_postprocess(*args)

        predictions = contrib_tpu.outside_compilation(
            _predict_postprocess_wrapper,
            (cls_outputs, box_outputs, labels, params))

        # Include resizing information on prediction output to help bbox drawing.
        if image_info is not None:
            predictions.update({
                'image_info':
                tf.identity(image_info, 'ImageInfo'),
            })

        return contrib_tpu.TPUEstimatorSpec(mode=tf.estimator.ModeKeys.PREDICT,
                                            predictions=predictions)

    # Load pretrained model from checkpoint.
    if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            tf.train.init_from_checkpoint(
                params['resnet_checkpoint'], {
                    '/': 'resnet%s/' % params['resnet_depth'],
                })
            return tf.train.Scaffold()
    else:
        scaffold_fn = None

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_global_step()
    learning_rate = learning_rate_schedule(params['adjusted_learning_rate'],
                                           params['lr_warmup_init'],
                                           params['lr_warmup_step'],
                                           params['first_lr_drop_step'],
                                           params['second_lr_drop_step'],
                                           global_step)
    # cls_loss and box_loss are for logging. only total_loss is optimized.
    total_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
                                                    labels, params)
    total_loss += _WEIGHT_DECAY * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=params['momentum'])
        if params['use_tpu']:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)
        else:
            if params['auto_mixed_precision']:
                optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimizer)

        # Batch norm requires `update_ops` to be executed alongside `train_op`.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        var_list = variable_filter_fn(
            tf.trainable_variables(),
            params['resnet_depth']) if variable_filter_fn else None

        minimize_op = optimizer.minimize(total_loss,
                                         global_step,
                                         var_list=var_list)
        train_op = tf.group(minimize_op, update_ops)

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            batch_size = params['batch_size']
            eval_anchors = anchors.Anchors(params['min_level'],
                                           params['max_level'],
                                           params['num_scales'],
                                           params['aspect_ratios'],
                                           params['anchor_scale'],
                                           params['image_size'])
            anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                                   params['num_classes'])
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                          params['val_json_file'], **kwargs)

            # Add metrics to output.
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics

        cls_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(cls_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        box_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(box_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        metric_fn_inputs = {
            'cls_loss_repeat': cls_loss_repeat,
            'box_loss_repeat': box_loss_repeat,
            'source_ids': labels['source_ids'],
            'groundtruth_data': labels['groundtruth_data'],
            'image_scales': labels['image_scales'],
        }
        add_metric_fn_inputs(params, cls_outputs, box_outputs,
                             metric_fn_inputs)
        eval_metrics = (metric_fn, metric_fn_inputs)

    if use_tpu_estimator_spec:
        return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            train_op=train_op,
                                            eval_metrics=eval_metrics,
                                            scaffold_fn=scaffold_fn)
    else:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=total_loss,
            # TODO(rostam): Fix bug to get scaffold working.
            # scaffold=scaffold_fn(),
            train_op=train_op)
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        is_training = (mode == tfes.estimator.ModeKeys.TRAIN)

        (start_logits, end_logits) = create_model(
            bert_config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # tf.logging.info("**** Trainable Variables ****")
        # for var in tvars:
        #     init_string = ""
        #     if var.name in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                     init_string)

        output_spec = None
        if mode == tfes.estimator.ModeKeys.TRAIN:
            seq_length = modeling.get_shape_list(input_ids)[1]

            def compute_loss(logits, positions):
                one_hot_positions = tf.one_hot(positions,
                                               depth=seq_length,
                                               dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(
                    tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
                return loss

            start_positions = features["start_positions"]
            end_positions = features["end_positions"]

            start_loss = compute_loss(start_logits, start_positions)
            end_loss = compute_loss(end_logits, end_positions)

            total_loss = (start_loss + end_loss) / 2.0

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                               loss=total_loss,
                                               train_op=train_op,
                                               scaffold_fn=scaffold_fn)
        elif mode == tfes.estimator.ModeKeys.PREDICT:

            # outer = tf.matmul(tf.expand_dims(tf.nn.softmax(start_logits), axis=2),
            #                   tf.expand_dims(tf.nn.softmax(end_logits), axis=1))
            # outer = tf.matrix_band_part(outer, -1, 15)  # 取上3角15条对角线,表示答案最大长度只能取到15+1个单词
            # yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)  # 寻找最大值在L1轴的索引
            # yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)

            predictions = {
                "unique_ids": unique_ids,
                "start_logits": start_logits,
                "end_logits": end_logits,
                # "yp1": yp1,
                # "yp2": yp2,
            }
            output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                               predictions=predictions,
                                               scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                             (mode))

        return output_spec
示例#18
0
def resnet_model_fn_w_pruning(features, labels, mode, params):
    """The model_fn for ResNet-50 with pruning.

  Args:
    features: A float32 batch of images.
    labels: A int32 batch of labels.
    mode: Specifies whether training or evaluation.
    params: Dictionary of parameters passed to the model.

  Returns:
    A TPUEstimatorSpec for the model
  """

    width = 1. if FLAGS.width <= 0 else FLAGS.width

    if isinstance(features, dict):
        features = features['feature']

    if FLAGS.data_format == 'channels_first':
        assert not FLAGS.transpose_input  # channels_first only for GPU
        features = tf.transpose(features, [0, 3, 1, 2])

    if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT:
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

    # Normalize the image to zero mean and unit variance.
    features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
    features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

    training_method = params['training_method']
    use_tpu = params['use_tpu']

    def build_network():
        """Construct the network in the graph."""
        if FLAGS.model_architecture == 'mobilenet_v2':
            network_func = functools.partial(
                mobilenetv2_model.mobilenet_v2,
                expansion_factor=FLAGS.expansion_factor)
        elif FLAGS.model_architecture == 'mobilenet_v1':
            network_func = functools.partial(mobilenetv1_model.mobilenet_v1)
        elif FLAGS.model_architecture == 'resnet':
            prune_first_layer = FLAGS.first_layer_sparsity != 0.
            network_func = functools.partial(
                resnet_model.resnet_v1_,
                resnet_depth=FLAGS.resnet_depth,
                init_method=FLAGS.init_method,
                end_sparsity=FLAGS.end_sparsity,
                prune_first_layer=prune_first_layer)
        else:
            raise ValueError('Unknown archiecture ' + FLAGS.archiecture)
        prune_last_layer = FLAGS.last_layer_sparsity != 0.
        network = network_func(
            num_classes=FLAGS.num_label_classes,
            # TODO remove the pruning_method option.
            pruning_method='threshold',
            width=width,
            prune_last_layer=prune_last_layer,
            data_format=FLAGS.data_format,
            weight_decay=FLAGS.weight_decay)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if FLAGS.use_batch_statistics:
            is_training = True
        return network(inputs=features, is_training=is_training)

    if FLAGS.precision == 'bfloat16':
        with contrib_tpu.bfloat16_scope():
            logits = build_network()
        logits = tf.cast(logits, tf.float32)
    elif FLAGS.precision == 'float32':
        logits = build_network()

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'classes': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    output_dir = params['output_dir']
    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    one_hot_labels = tf.one_hot(labels, FLAGS.num_label_classes)

    # make sure we reuse the same label smoothing parameter is we're doing
    # scratch / lottery ticket experiments.
    label_smoothing = FLAGS.label_smoothing
    if FLAGS.training_method == 'scratch' and FLAGS.load_mask_dir:
        scratch_stripped = FLAGS.load_mask_dir.replace('/scratch', '')
        label_smoothing = float(scratch_stripped.split('/')[15])
        tf.logging.info('LABEL SMOOTHING USED: %.2f' % label_smoothing)
    cross_loss = tf.losses.softmax_cross_entropy(
        logits=logits,
        onehot_labels=one_hot_labels,
        label_smoothing=label_smoothing)
    # Add regularization loss term
    reg_loss = tf.losses.get_regularization_loss()
    loss = cross_loss + reg_loss

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        host_call, train_op = train_function(training_method, loss, cross_loss,
                                             reg_loss, output_dir, use_tpu)
    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits, cross_loss, reg_loss):
            """Calculate eval metrics."""
            logging.info('In metric function')
            eval_metrics = {}
            predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            eval_metrics['top_5_eval_accuracy'] = tf.metrics.mean(in_top_5)
            eval_metrics['cross_loss'] = tf.metrics.mean(cross_loss)
            eval_metrics['reg_loss'] = tf.metrics.mean(reg_loss)
            eval_metrics['eval_accuracy'] = tf.metrics.accuracy(
                labels=labels, predictions=predictions)

            # If evaluating once lets also calculate sparsities.
            if FLAGS.mode == 'eval_once':
                sparsity_summaries = utils.mask_summaries(pruning.get_masks())
                # We call mean on a scalar to create tensor, update_op pairs.
                sparsity_summaries = {
                    k: tf.metrics.mean(v)
                    for k, v in sparsity_summaries.items()
                }
                eval_metrics.update(sparsity_summaries)
            return eval_metrics

        tensors = [
            labels, logits,
            tf.broadcast_to(cross_loss, tf.shape(labels)),
            tf.broadcast_to(reg_loss, tf.shape(labels))
        ]

        eval_metrics = (metric_fn, tensors)

    if (FLAGS.load_mask_dir
            and FLAGS.training_method not in ('snip', 'baseline')):

        def scaffold_fn():
            """For initialization, passed to the estimator."""
            utils.initialize_parameters_from_ckpt(FLAGS.load_mask_dir,
                                                  FLAGS.output_dir,
                                                  MASK_SUFFIX)
            if FLAGS.initial_value_checkpoint:
                utils.initialize_parameters_from_ckpt(
                    FLAGS.initial_value_checkpoint, FLAGS.output_dir,
                    PARAM_SUFFIXES)
            return tf.train.Scaffold()
    elif (FLAGS.mask_init_method
          and FLAGS.training_method not in ('snip', 'baseline')):

        def scaffold_fn():
            """For initialization, passed to the estimator."""
            if FLAGS.initial_value_checkpoint:
                utils.initialize_parameters_from_ckpt(
                    FLAGS.initial_value_checkpoint, FLAGS.output_dir,
                    PARAM_SUFFIXES)
            all_masks = pruning.get_masks()
            assigner = sparse_utils.get_mask_init_fn(all_masks,
                                                     FLAGS.mask_init_method,
                                                     FLAGS.end_sparsity,
                                                     CUSTOM_SPARSITY_MAP)

            def init_fn(scaffold, session):
                """A callable for restoring variable from a checkpoint."""
                del scaffold  # Unused.
                session.run(assigner)

            return tf.train.Scaffold(init_fn=init_fn)
    else:
        assert FLAGS.training_method in ('snip', 'baseline')
        scaffold_fn = None
        tf.logging.info('No mask is set, starting dense.')

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics,
                                        scaffold_fn=scaffold_fn)
示例#19
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = tf.reshape(features["input_ids"], [-1, FLAGS.max_seq_length])
    input_mask = tf.reshape(features["input_mask"], [-1, FLAGS.max_seq_length])
    segment_ids = tf.reshape(features["segment_ids"],
                             [-1, FLAGS.max_seq_length])

    label_types = features["label_types"]
    label_ids = features["label_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_real_example = tf.reduce_sum(
        tf.one_hot(label_types, FLAGS.k_size * 2), axis=1)

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    (cpc_loss, _, logits, probabilities) = bilin_model_builder.create_model(
        model, label_ids, label_types, num_choices, k_size=FLAGS.k_size)

    if add_masking:
      mask_rate = FLAGS.mask_rate  # search alternatives?
      max_predictions_per_seq = int(math.ceil(FLAGS.max_seq_length * mask_rate))
      masked_lm_positions = tf.reshape(features["mask_indices"],
                                       [-1, max_predictions_per_seq])
      masked_lm_ids = tf.reshape(features["target_token_ids"],
                                 [-1, max_predictions_per_seq])
      masked_lm_weights = tf.reshape(features["target_token_weights"],
                                     [-1, max_predictions_per_seq])
      (masked_lm_loss, _, _) = bilin_model_builder.get_masked_lm_output(
          bert_config, model.get_sequence_output(), model.get_embedding_table(),
          masked_lm_positions, masked_lm_ids, masked_lm_weights)
      total_loss = cpc_loss + masked_lm_loss
    else:
      total_loss = cpc_loss
      masked_lm_loss = tf.constant([0])

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)

      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(total_loss, learning_rate,
                                               num_train_steps,
                                               num_warmup_steps, use_tpu)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(cpc_loss, mlm_loss, label_ids, logits, is_real_example):
        """Collect metrics for function."""

        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions, weights=is_real_example)
        cpc_loss_metric = tf.metrics.mean(values=cpc_loss)
        mlm_loss_metric = tf.metrics.mean(values=mlm_loss)
        metric_dict = {
            "eval_accuracy": accuracy,
            "eval_cpc_loss": cpc_loss_metric,
            "eval_mlm_loss": mlm_loss_metric
        }
        for i in range(FLAGS.k_size * 2):
          metric_dict["acc" + str(i)] = tf.metrics.accuracy(
              labels=label_ids[:, i],
              predictions=predictions[:, i],
              weights=is_real_example[:, i])
        return metric_dict

      eval_metrics = (metric_fn, [
          cpc_loss, masked_lm_loss, label_ids, logits, is_real_example
      ])
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
示例#20
0
def _model_fn(features, labels, mode, params):
    """Estimator model_fn for an autoencoder with adaptive damping."""
    del params

    training_model = classifier_mnist.Model()
    layer_collection = kfac.LayerCollection()

    def loss_fn(minibatch, logits=None, return_error=False):

        features, labels = minibatch
        if logits is None:
            # Note we do not need to do anything like
            # `with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):`
            # here because Sonnet takes care of variable reuse for us as long as we
            # call the same `training_model` module.  Otherwise we would need to
            # use variable reusing here.
            logits = training_model(features)

        return classifier_mnist.compute_loss(logits=logits,
                                             labels=labels,
                                             return_error=return_error)

    logits = training_model(features)

    pre_update_batch_loss, pre_update_batch_error = loss_fn((features, labels),
                                                            logits=logits,
                                                            return_error=True)

    global_step = tf.train.get_or_create_global_step()

    if mode == tf.estimator.ModeKeys.TRAIN:
        layer_collection.register_softmax_cross_entropy_loss(logits,
                                                             seed=FLAGS.seed +
                                                             1)
        layer_collection.auto_register_layers()

        train_op, kfac_optimizer = make_train_op(
            (features, labels), pre_update_batch_loss, layer_collection,
            loss_fn)

        tensors_to_print = {
            'learning_rate': tf.expand_dims(kfac_optimizer.learning_rate, 0),
            'momentum': tf.expand_dims(kfac_optimizer.momentum, 0),
            'damping': tf.expand_dims(kfac_optimizer.damping, 0),
            'global_step': tf.expand_dims(global_step, 0),
            'loss': tf.expand_dims(pre_update_batch_loss, 0),
            'error': tf.expand_dims(pre_update_batch_error, 0),
        }

        if FLAGS.adapt_damping:
            tensors_to_print['qmodel_change'] = tf.expand_dims(
                kfac_optimizer.qmodel_change, 0)
            tensors_to_print['rho'] = tf.expand_dims(kfac_optimizer.rho, 0)

        return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                            loss=pre_update_batch_loss,
                                            train_op=train_op,
                                            host_call=(print_tensors,
                                                       tensors_to_print),
                                            eval_metrics=None)

    else:  # mode == tf.estimator.ModeKeys.{EVAL, PREDICT}:
        return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                            loss=pre_update_batch_loss,
                                            eval_metrics=None)
示例#21
0
    def estimator_spec_train(self, loss, num_async_replicas=1, use_tpu=False):
        """Constructs `tf.estimator.EstimatorSpec` for TRAIN (training) mode."""
        train_op = self.optimize(loss,
                                 num_async_replicas=num_async_replicas,
                                 use_tpu=use_tpu)

        sparsity_technique = self._hparams.get("sparsity_technique")
        if "pruning" in sparsity_technique:
            if not self._hparams.load_masks_from:
                # If we are loading trained masks, don't add the mask update
                # step to the training process and keep the masks static
                with tf.control_dependencies([train_op]):
                    mp_hparams = pruning_hparams(
                        self._hparams, use_tpu,
                        sparsity_technique == "random_pruning")
                    p = magnitude_pruning.Pruning(
                        mp_hparams, global_step=tf.train.get_global_step())
                    mask_update_op = p.conditional_mask_update_op()
                    train_op = mask_update_op
            check_global_sparsity()

        if use_tpu:
            if self._hparams.warm_start_from:

                def scaffold_fn():
                    self.initialize_from_ckpt(self._hparams.warm_start_from)
                    return tf.train.Scaffold()
            elif self._hparams.load_masks_from and self._hparams.load_weights_from:

                def scaffold_fn():
                    self.initialize_masks_from_ckpt(
                        self._hparams.load_masks_from)
                    self.initialize_non_masks_from_ckpt(
                        self._hparams.load_weights_from)
                    return tf.train.Scaffold()
            elif self._hparams.load_masks_from:

                def scaffold_fn():
                    self.initialize_masks_from_ckpt(
                        self._hparams.load_masks_from)
                    return tf.train.Scaffold()
            else:
                scaffold_fn = None

            # Note: important to call this before remove_summaries()
            if self.hparams.tpu_enable_host_call:
                host_call = t2t_model.create_host_call(self.hparams.model_dir)
            else:
                host_call = None

            t2t_model.remove_summaries()

            return contrib_tpu.TPUEstimatorSpec(tf_estimator.ModeKeys.TRAIN,
                                                loss=loss,
                                                train_op=train_op,
                                                host_call=host_call,
                                                scaffold_fn=scaffold_fn)
        else:
            if self._hparams.warm_start_from:
                self.initialize_from_ckpt(self._hparams.warm_start_from)
            elif self._hparams.load_masks_from:
                self.initialize_masks_from_ckpt(self._hparams.load_masks_from)

            return tf_estimator.EstimatorSpec(tf_estimator.ModeKeys.TRAIN,
                                              loss=loss,
                                              train_op=train_op)
示例#22
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, probabilities, logits, predictions) = \
            create_model(albert_config, is_training, input_ids, input_mask,
                         segment_ids, label_ids, num_labels, use_one_hot_embeddings,
                         task_name, hub_module)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            if task_name not in ["sts-b", "cola"]:

                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions,
                                                   weights=is_real_example)
                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }
            elif task_name == "sts-b":

                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    """Compute Pearson correlations for STS-B."""
                    # Display labels and predictions
                    concat1 = contrib_metrics.streaming_concat(logits)
                    concat2 = contrib_metrics.streaming_concat(label_ids)

                    # Compute Pearson correlation
                    pearson = contrib_metrics.streaming_pearson_correlation(
                        logits, label_ids, weights=is_real_example)

                    # Compute MSE
                    # mse = tf.metrics.mean(per_example_loss)
                    mse = tf.metrics.mean_squared_error(
                        label_ids, logits, weights=is_real_example)

                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)

                    return {
                        "pred": concat1,
                        "label_ids": concat2,
                        "pearson": pearson,
                        "MSE": mse,
                        "eval_loss": loss,
                    }
            elif task_name == "cola":

                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    """Compute Matthew's correlations for STS-B."""
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
                    tp, tp_op = tf.metrics.true_positives(
                        predictions, label_ids, weights=is_real_example)
                    tn, tn_op = tf.metrics.true_negatives(
                        predictions, label_ids, weights=is_real_example)
                    fp, fp_op = tf.metrics.false_positives(
                        predictions, label_ids, weights=is_real_example)
                    fn, fn_op = tf.metrics.false_negatives(
                        predictions, label_ids, weights=is_real_example)

                    # Compute Matthew's correlation
                    mcc = tf.div_no_nan(
                        tp * tn - fp * fn,
                        tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn),
                               0.5))

                    # Compute accuracy
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions,
                                                   weights=is_real_example)

                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)

                    return {
                        "matthew_corr":
                        (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)),
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions={
                                                           "probabilities":
                                                           probabilities,
                                                           "predictions":
                                                           predictions
                                                       },
                                                       scaffold_fn=scaffold_fn)
        return output_spec
示例#23
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        # Note: We keep this feature name `next_sentence_labels` to be compatible
        # with the original data created by lanzhzh@. However, in the ALBERT case
        # it does represent sentence_order_labels.
        sentence_order_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.AlbertModel(
            config=albert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             albert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (sentence_order_loss, sentence_order_example_loss,
         sentence_order_log_probs) = get_sentence_order_output(
             albert_config, model.get_pooled_output(), sentence_order_labels)

        total_loss = masked_lm_loss + sentence_order_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            tf.logging.info("number of hidden group %d to initialize",
                            albert_config.num_hidden_groups)
            num_of_initialize_group = 1
            if FLAGS.init_from_group0:
                num_of_initialize_group = albert_config.num_hidden_groups
                if albert_config.net_structure_type > 0:
                    num_of_initialize_group = albert_config.num_hidden_layers
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint, num_of_initialize_group)
            if use_tpu:

                def tpu_scaffold():
                    for gid in range(num_of_initialize_group):
                        tf.logging.info("initialize the %dth layer", gid)
                        tf.logging.info(assignment_map[gid])
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map[gid])
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                for gid in range(num_of_initialize_group):
                    tf.logging.info("initialize the %dth layer", gid)
                    tf.logging.info(assignment_map[gid])
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map[gid])

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer, poly_power,
                                                     start_warmup_step)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(*args):
                """Computes the loss and accuracy of the model."""
                (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                 masked_lm_weights, sentence_order_example_loss,
                 sentence_order_log_probs, sentence_order_labels) = args[:7]

                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                metrics = {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                }

                sentence_order_log_probs = tf.reshape(
                    sentence_order_log_probs,
                    [-1, sentence_order_log_probs.shape[-1]])
                sentence_order_predictions = tf.argmax(
                    sentence_order_log_probs, axis=-1, output_type=tf.int32)
                sentence_order_labels = tf.reshape(sentence_order_labels, [-1])
                sentence_order_accuracy = tf.metrics.accuracy(
                    labels=sentence_order_labels,
                    predictions=sentence_order_predictions)
                sentence_order_mean_loss = tf.metrics.mean(
                    values=sentence_order_example_loss)
                metrics.update({
                    "sentence_order_accuracy": sentence_order_accuracy,
                    "sentence_order_loss": sentence_order_mean_loss
                })
                return metrics

            metric_values = [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, sentence_order_example_loss,
                sentence_order_log_probs, sentence_order_labels
            ]

            eval_metrics = (metric_fn, metric_values)

            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
示例#24
0
def resnet_model_fn_w_pruning(features, labels, mode, params):
    """The model_fn for ResNet-50 with pruning.

  Args:
    features: A float32 batch of images.
    labels: A int32 batch of labels.
    mode: Specifies whether training or evaluation.
    params: Dictionary of parameters passed to the model.

  Returns:
    A TPUEstimatorSpec for the model
  """

    width = 1. if FLAGS.width <= 0 else FLAGS.width
    if isinstance(features, dict):
        features = features['feature']

    if FLAGS.data_format == 'channels_first':
        assert not FLAGS.transpose_input  # channels_first only for GPU
        features = tf.transpose(features, [0, 3, 1, 2])

    if FLAGS.transpose_input and mode != tf.estimator.ModeKeys.PREDICT:
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

    # Normalize the image to zero mean and unit variance.
    features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
    features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

    pruning_method = params['pruning_method']
    use_tpu = params['use_tpu']
    log_alpha_threshold = params['log_alpha_threshold']

    def build_network():
        """Construct the network in the graph."""
        model_pruning_method = pruning_method
        if pruning_method == 'scratch':
            model_pruning_method = 'threshold'

        network = resnet_model.resnet_v1_(
            resnet_depth=FLAGS.resnet_depth,
            num_classes=FLAGS.num_label_classes,
            # we need to construct the model with the pruning masks, but they won't
            # be updated if we're doing scratch training
            pruning_method=model_pruning_method,
            init_method=FLAGS.init_method,
            width=width,
            prune_first_layer=FLAGS.prune_first_layer,
            prune_last_layer=FLAGS.prune_last_layer,
            data_format=FLAGS.data_format,
            end_sparsity=FLAGS.end_sparsity,
            clip_log_alpha=FLAGS.clip_log_alpha,
            log_alpha_threshold=log_alpha_threshold,
            weight_decay=FLAGS.weight_decay)
        return network(inputs=features,
                       is_training=(mode == tf.estimator.ModeKeys.TRAIN))

    if FLAGS.precision == 'bfloat16':
        with contrib_tpu.bfloat16_scope():
            logits = build_network()
        logits = tf.cast(logits, tf.float32)
    elif FLAGS.precision == 'float32':
        logits = build_network()

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'classes': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    output_dir = params['output_dir']  # pylint: disable=unused-variable

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    one_hot_labels = tf.one_hot(labels, FLAGS.num_label_classes)

    # make sure we reuse the same label smoothing parameter is we're doing
    # scratch / lottery ticket experiments.
    label_smoothing = FLAGS.label_smoothing
    if FLAGS.pruning_method == 'scratch':
        label_smoothing = float(FLAGS.load_mask_dir.split('/')[15])
    loss = tf.losses.softmax_cross_entropy(logits=logits,
                                           onehot_labels=one_hot_labels,
                                           label_smoothing=label_smoothing)
    # Add regularization loss term
    loss += tf.losses.get_regularization_loss()

    if pruning_method == 'variational_dropout':
        reg_loss = utils.variational_dropout_dkl_loss(
            reg_scalar=FLAGS.reg_scalar,
            start_reg_ramp_up=FLAGS.sparsity_begin_step,
            end_reg_ramp_up=FLAGS.sparsity_end_step,
            warm_up=FLAGS.is_warm_up,
            use_tpu=use_tpu)
        loss += reg_loss
        tf.losses.add_loss(reg_loss, loss_collection=tf.GraphKeys.LOSSES)
    elif pruning_method == 'l0_regularization':
        reg_loss = utils.l0_regularization_loss(
            reg_scalar=FLAGS.reg_scalar,
            start_reg_ramp_up=FLAGS.sparsity_begin_step,
            end_reg_ramp_up=FLAGS.sparsity_end_step,
            warm_up=FLAGS.is_warm_up,
            use_tpu=use_tpu)
        loss += reg_loss
        tf.losses.add_loss(reg_loss, loss_collection=tf.GraphKeys.LOSSES)

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        host_call, train_op = train_function(pruning_method, loss, output_dir,
                                             use_tpu)

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits):
            """Calculate eval metrics."""
            logging.info('In metric function')
            eval_metrics = {}
            predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            eval_metrics['top_5_eval_accuracy'] = tf.metrics.mean(in_top_5)
            eval_metrics['eval_accuracy'] = tf.metrics.accuracy(
                labels=labels, predictions=predictions)

            return eval_metrics

        def vd_metric_fn(labels, logits, global_sparsity):
            eval_metrics = metric_fn(labels, logits)
            eval_metrics['global_sparsity'] = tf.metrics.mean(global_sparsity)
            return eval_metrics

        tensors = [labels, logits]
        metric_function = metric_fn

        if FLAGS.pruning_method == 'variational_dropout':
            batch_size = labels.shape[0]
            ones = tf.ones([batch_size, 1])
            mask_metrics = utils.add_vd_pruning_summaries(
                threshold=FLAGS.log_alpha_threshold)
            tensors.append(mask_metrics['global_sparsity'] * ones)
            metric_function = vd_metric_fn

        eval_metrics = (metric_function, tensors)

    # define a custom scaffold function to enable initializing the mask from an
    # already trained checkpoint.
    def initialize_mask_from_ckpt(ckpt_path):
        """Load mask from an existing checkpoint."""
        model_dir = FLAGS.output_dir
        already_has_ckpt = model_dir and tf.train.latest_checkpoint(
            model_dir) is not None
        if already_has_ckpt:
            tf.logging.info(
                'Training already started on this model, not loading masks from'
                'previously trained model')
            return

        reader = tf.train.NewCheckpointReader(ckpt_path)
        mask_names = reader.get_variable_to_shape_map().keys()
        mask_names = [x for x in mask_names if x.endswith('mask')]

        variable_map = {}
        for var in tf.global_variables():
            var_name = var.name.split(':')[0]
            if var_name in mask_names:
                tf.logging.info('Loading mask variable from checkpoint: %s',
                                var_name)
                variable_map[var_name] = var
            elif 'mask' in var_name:
                tf.logging.info(
                    'Cannot find mask variable in checkpoint, skipping: %s',
                    var_name)
        tf.train.init_from_checkpoint(ckpt_path, variable_map)

    def initialize_parameters_from_ckpt(ckpt_path):
        """Load parameters from an existing checkpoint."""
        model_dir = FLAGS.output_dir
        already_has_ckpt = model_dir and tf.train.latest_checkpoint(
            model_dir) is not None
        if already_has_ckpt:
            tf.logging.info(
                'Training already started on this model, not loading masks from'
                'previously trained model')
            return

        reader = tf.train.NewCheckpointReader(ckpt_path)
        param_names = reader.get_variable_to_shape_map().keys()
        param_names = [x for x in param_names if not x.endswith('mask')]

        variable_map = {}
        for var in tf.global_variables():
            var_name = var.name.split(':')[0]
            if var_name in param_names:
                tf.logging.info(
                    'Loading parameter variable from checkpoint: %s', var_name)
                variable_map[var_name] = var
            elif 'mask' not in var_name:
                tf.logging.info(
                    'Cannot find parameter variable in checkpoint, skipping: %s',
                    var_name)
        tf.train.init_from_checkpoint(ckpt_path, variable_map)

    if FLAGS.pruning_method == 'scratch':
        if FLAGS.load_mask_dir:

            def scaffold_fn():
                initialize_mask_from_ckpt(FLAGS.load_mask_dir)
                if FLAGS.initial_value_checkpoint:
                    initialize_parameters_from_ckpt(
                        FLAGS.initial_value_checkpoint)
                return tf.train.Scaffold()
        else:
            raise ValueError(
                'Must supply a mask directory to use scratch method')
    else:
        scaffold_fn = None

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics,
                                        scaffold_fn=scaffold_fn)
示例#25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        num_choices = 2

        read_size = num_choices + 1
        input_ids = [
            features["input_ids" + str(i)] for i in range(0, read_size)
        ]
        input_mask = [
            features["input_mask" + str(i)] for i in range(0, read_size)
        ]
        segment_ids = [
            features["segment_ids" + str(i)] for i in range(0, read_size)
        ]
        label_ids = features["labels"]
        label_ids = label_ids[:, 4]

        seq_length = input_ids[0].shape[-1]
        input_ids = tf.reshape(tf.stack(input_ids, axis=1), [-1, seq_length])
        input_mask = tf.reshape(tf.stack(input_mask, axis=1), [-1, seq_length])
        segment_ids = tf.reshape(tf.stack(segment_ids, axis=1),
                                 [-1, seq_length])

        is_training = (mode == tf_estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if FLAGS.bilin_preproc:
            (total_loss, per_example_loss, logits,
             probabilities) = model_builder.create_model_bilin(
                 model, label_ids, num_choices)
        else:
            (total_loss, per_example_loss, logits,
             probabilities) = model_builder.create_model(
                 model, label_ids, num_choices)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf_estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)

        elif mode == tf_estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
示例#26
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        else:
            initialized_variable_names = []

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {"probabilities": probabilities}
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions=predictions,
                                                       scaffold_fn=scaffold_fn)

        else:
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn)
        return output_spec
示例#27
0
def inception_model_fn(features, labels, mode, params):
    """Inception v2 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)

    if isinstance(features, dict):
        features = features['feature']

    features = tensor_transform_fn(features, params['input_perm'])

    if FLAGS.clear_update_collections:
        # updates_collections must be set to None in order to use fused batchnorm
        with arg_scope(
                inception.inception_v2_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON,
                    updates_collections=None)):
            logits, end_points = inception.inception_v2(
                features,
                num_classes,
                is_training=is_training,
                replace_separable_convolution=True)
    else:
        with arg_scope(
                inception.inception_v2_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON)):
            logits, end_points = inception.inception_v2(
                features,
                num_classes,
                is_training=is_training,
                replace_separable_convolution=True)

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                    logits=logits,
                                    weights=1.0,
                                    label_smoothing=0.1)
    loss = tf.losses.get_total_loss(add_regularization_losses=True)

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    if FLAGS.use_learning_rate_warmup:
        # Adjust initial learning rate to match final warmup rate
        warmup_decay = FLAGS.learning_rate_decay**(
            (FLAGS.warmup_epochs + FLAGS.cold_epochs) /
            FLAGS.learning_rate_decay_epochs)
        adj_initial_learning_rate = initial_learning_rate * warmup_decay

    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        learning_rate = tf.train.exponential_decay(
            learning_rate=initial_learning_rate,
            global_step=global_step,
            decay_steps=int(FLAGS.learning_rate_decay_epochs *
                            batches_per_epoch),
            decay_rate=FLAGS.learning_rate_decay,
            staircase=True)

        if FLAGS.use_learning_rate_warmup:
            wlr = 0.1 * adj_initial_learning_rate
            wlr_height = tf.cast(
                0.9 * adj_initial_learning_rate /
                (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1),
                tf.float32)
            epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32)
            exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs +
                               FLAGS.learning_rate_decay_epochs)
            lin_inc_lr = tf.add(
                wlr,
                tf.multiply(
                    tf.cast(tf.subtract(current_epoch, epoch_offset),
                            tf.float32), wlr_height))
            learning_rate = tf.where(
                tf.greater_equal(current_epoch, FLAGS.cold_epochs),
                (tf.where(tf.greater_equal(current_epoch, exp_decay_start),
                          learning_rate, lin_inc_lr)), wlr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch]` for the global_step
        loss: `Tensor` with shape `[batch]` for the training loss.
        lr: `Tensor` with shape `[batch]` for the learning_rate.
        ce: `Tensor` with shape `[batch]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            gs = gs[0]
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()

        host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics)
示例#28
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        if FLAGS.use_next_sentence_prediction:
            sample_weights = None
            if FLAGS.no_nsp_while_masking:
                sample_weights = tf.cast(
                    tf.math.greater_equal(
                        tf.reduce_sum(masked_lm_weights, axis=1), 0.0),
                    tf.float32)
            (next_sentence_loss, next_sentence_example_loss,
             next_sentence_log_probs) = get_next_sentence_output(
                 bert_config, model.get_pooled_output(), next_sentence_labels,
                 sample_weights)

            # Compute total weighted loss:
            #   if mlm_loss_weight=1, this amounts to summing up the losses.
            total_loss = (bert_config.mlm_loss_weight * masked_lm_loss +
                          next_sentence_loss) / (
                              1 + bert_config.mlm_loss_weight) * 2

            next_sentence_log_probs = tf.reshape(
                next_sentence_log_probs,
                [-1, next_sentence_log_probs.shape[-1]])
            next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
            next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
            next_sentence_accuracy = tf.metrics.accuracy(
                labels=next_sentence_labels,
                predictions=next_sentence_predictions)
        else:
            total_loss = masked_lm_loss

        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                          axis=-1,
                                          output_type=tf.int32)
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.logging.info("**** Assignment map **** %s" % assignment_map)
            for x in assignment_map:
                tf.logging.info(x)

            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            # Host function for saving summaries.
            def _host_fn(**kwargs):
                global_step = kwargs.pop("global_step")[0]
                with tf.compat.v2.summary.create_file_writer(
                        os.path.join(FLAGS.output_dir, "train")).as_default():
                    with tf.compat.v2.summary.record_summaries_every_n_global_steps(
                            FLAGS.steps_per_summary, global_step):
                        for name, tensor in kwargs.items():
                            tf.compat.v2.summary.scalar(name,
                                                        tf.reduce_mean(tensor),
                                                        step=global_step)
                    return tf.summary.all_v2_summary_ops()

            global_step = tf.train.get_or_create_global_step()
            if FLAGS.use_next_sentence_prediction:
                host_inputs = {
                    "global_step":
                    tf.expand_dims(global_step, 0),
                    "loss/mlm_loss":
                    tf.expand_dims(masked_lm_loss, 0),
                    "loss/cls_loss":
                    tf.expand_dims(next_sentence_loss, 0),
                    "loss/total_loss":
                    tf.expand_dims(total_loss, 0),
                    "accuracy/mlm_accuracy":
                    tf.expand_dims(masked_lm_accuracy, 0),
                    "accuracy/cls_accuracy":
                    tf.expand_dims(next_sentence_accuracy, 0),
                }
            else:
                host_inputs = {
                    "global_step": tf.expand_dims(global_step, 0),
                    "loss/mlm_loss": tf.expand_dims(masked_lm_loss, 0),
                    "loss/total_loss": tf.expand_dims(total_loss, 0),
                    "accuracy/mlm_accuracy":
                    tf.expand_dims(masked_lm_accuracy, 0),
                }
            host_call = (_host_fn, host_inputs)
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
示例#29
0
def get_estimator_spec(hparams, mode, features, labels, frame_logits,
                       onset_logits, offset_logits, velocity_values,
                       offset_network=True):
  """Create TPUEstimatorSpec."""
  loss_metrics = {}
  loss = None
  if (mode == tf.estimator.ModeKeys.TRAIN or
      mode == tf.estimator.ModeKeys.EVAL):
    onset_losses = tf.losses.sigmoid_cross_entropy(
        labels.onsets[:, :, :constants.MIDI_PITCHES],
        onset_logits[:, :, :constants.MIDI_PITCHES],
        weights=tf.expand_dims(
            tf.sequence_mask(
                features.length, maxlen=tf.shape(labels.onsets)[1]),
            axis=2))
    loss_metrics['onset'] = onset_losses

    if offset_network and not hparams.drums_only:
      offset_losses = tf.losses.sigmoid_cross_entropy(
          labels.offsets[:, :, :constants.MIDI_PITCHES],
          offset_logits[:, :, :constants.MIDI_PITCHES],
          weights=tf.expand_dims(
              tf.sequence_mask(
                  features.length, maxlen=tf.shape(labels.offsets)[1]),
              axis=2))
      loss_metrics['offset'] = offset_losses

    velocity_losses = tf.losses.mean_squared_error(
        labels.velocities, velocity_values,
        weights=labels.onsets * hparams.velocity_loss_weight)
    loss_metrics['velocity'] = velocity_losses

    if not hparams.drums_only:
      frame_losses = tf.losses.sigmoid_cross_entropy(
          labels.labels[:, :, :constants.MIDI_PITCHES],
          frame_logits[:, :, :constants.MIDI_PITCHES],
          weights=tf.expand_dims(
              tf.sequence_mask(
                  features.length, maxlen=tf.shape(labels.labels)[1]),
              axis=2))
      loss_metrics['frame'] = frame_losses

    loss = tf.losses.get_total_loss()

  if (mode == tf.estimator.ModeKeys.EVAL or
      mode == tf.estimator.ModeKeys.PREDICT):
    frame_probs = tf.sigmoid(frame_logits)
    onset_probs = tf.sigmoid(onset_logits)
    if offset_network:
      offset_probs = tf.sigmoid(offset_logits)
    else:
      offset_probs = tf.zeros_like(onset_probs)
    frame_predictions = frame_probs > hparams.predict_frame_threshold
    onset_predictions = onset_probs > hparams.predict_onset_threshold
    offset_predictions = offset_probs > hparams.predict_offset_threshold

    if hparams.drum_prediction_map:
      map_predictions = functools.partial(
          drum_mappings.map_pianoroll,
          mapping_name=hparams.drum_prediction_map,
          reduce_mode='any',
          min_pitch=constants.MIN_MIDI_PITCH)
      frame_predictions = tf.map_fn(map_predictions, frame_predictions)
      onset_predictions = tf.map_fn(map_predictions, onset_predictions)
      offset_predictions = tf.map_fn(map_predictions, offset_predictions)
      map_values = functools.partial(
          drum_mappings.map_pianoroll,
          mapping_name=hparams.drum_prediction_map,
          reduce_mode='max',
          min_pitch=constants.MIN_MIDI_PITCH)
      velocity_values = tf.map_fn(map_values, velocity_values)

    metrics_values = get_metrics(features, labels, frame_probs, onset_probs,
                                 frame_predictions, onset_predictions,
                                 offset_predictions, velocity_values, hparams)

    for label, loss_collection in loss_metrics.items():
      loss_label = 'losses/' + label
      metrics_values[loss_label] = loss_collection

  if mode == tf.estimator.ModeKeys.TRAIN:
    train_op = contrib_layers.optimize_loss(
        name='training',
        loss=loss,
        global_step=tf.train.get_or_create_global_step(),
        learning_rate=hparams.learning_rate,
        learning_rate_decay_fn=functools.partial(
            tf.train.exponential_decay,
            decay_steps=hparams.decay_steps,
            decay_rate=hparams.decay_rate,
            staircase=True),
        clip_gradients=hparams.clip_norm,
        summaries=[],
        optimizer=lambda lr: contrib_tpu.CrossShardOptimizer(  # pylint:disable=g-long-lambda
            tf.train.AdamOptimizer(lr)))

    return contrib_tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op)
  elif mode == tf.estimator.ModeKeys.EVAL:
    metric_ops = {k: tf.metrics.mean(v) for k, v in metrics_values.items()}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=metric_ops)
  elif mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'frame_probs':
            frame_probs,
        'onset_probs':
            onset_probs,
        'frame_predictions':
            frame_predictions,
        'onset_predictions':
            onset_predictions,
        'offset_predictions':
            offset_predictions,
        'velocity_values':
            velocity_values,
        'sequence_predictions':
            _predict_sequences(
                frame_probs=frame_probs,
                onset_probs=onset_probs,
                frame_predictions=frame_predictions,
                onset_predictions=onset_predictions,
                offset_predictions=offset_predictions,
                velocity_values=velocity_values,
                hparams=hparams),
        # Include some features and labels in output because Estimator 'predict'
        # API does not give access to them.
        'sequence_ids':
            features.sequence_id,
        'sequence_labels':
            labels.note_sequence,
        'frame_labels':
            labels.labels,
        'onset_labels':
            labels.onsets,
    }
    for k, v in metrics_values.items():
      predictions[k] = tf.stack(v)

    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
  else:
    raise ValueError('Unsupported mode: %s' % mode)
示例#30
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (start_logits, end_logits, answer_type_logits) = create_model(
        bert_config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      seq_length = modeling.get_shape_list(input_ids)[1]

      # Computes the loss for positions.
      def compute_loss(logits, positions):
        one_hot_positions = tf.one_hot(
            positions, depth=seq_length, dtype=tf.float32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        loss = -tf.reduce_mean(
            tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
        return loss

      # Computes the loss for labels.
      def compute_label_loss(logits, labels):
        one_hot_labels = tf.one_hot(
            labels, depth=len(AnswerType), dtype=tf.float32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        loss = -tf.reduce_mean(
            tf.reduce_sum(one_hot_labels * log_probs, axis=-1))
        return loss

      start_positions = features["start_positions"]
      end_positions = features["end_positions"]
      answer_types = features["answer_types"]

      start_loss = compute_loss(start_logits, start_positions)
      end_loss = compute_loss(end_logits, end_positions)
      answer_type_loss = compute_label_loss(answer_type_logits, answer_types)

      total_loss = (start_loss + end_loss + answer_type_loss) / 3.0

      train_op = optimization.create_optimizer(total_loss, learning_rate,
                                               num_train_steps,
                                               num_warmup_steps, use_tpu)

      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.PREDICT:
      predictions = {
          "unique_ids": unique_ids,
          "start_logits": start_logits,
          "end_logits": end_logits,
          "answer_type_logits": answer_type_logits,
      }
      output_spec = contrib_tpu.TPUEstimatorSpec(
          mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                       (mode))

    return output_spec