def model_fn(features, labels, mode, params):
                unique_id = features["unique_id"]
                input_ids = features["input_ids"]
                input_mask = features["input_mask"]
                input_type_ids = features["input_type_ids"]
                tokens = features["tokens"]
                model = modeling.BertModel(
                    config=bert_config,
                    is_training=False,
                    input_ids=input_ids,
                    input_mask=input_mask,
                    token_type_ids=input_type_ids,
                    use_one_hot_embeddings=False)
                if mode != tf.estimator.ModeKeys.PREDICT:
                    raise ValueError("Only PREDICT modes are supported: %s" % (mode))
                tvars = tf.trainable_variables()
                scaffold_fn = None
                (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
                                                                tvars, init_checkpoint)
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                all_layers = model.get_all_encoder_layers()

                predictions = {
                    "unique_id": unique_id,
                    "tokens": tokens,
                }

                for (i, layer_index) in enumerate(layer_indexes):
                    predictions["layer_output_%d" % i] = all_layers[layer_index]

                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                        mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
                return output_spec
Пример #2
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    input_type_ids = features["input_type_ids"]

    model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=input_type_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    if mode != tf.estimator.ModeKeys.PREDICT:
      raise ValueError("Only PREDICT modes are supported: %s" % (mode))

    tvars = tf.trainable_variables()
    scaffold_fn = None
    (assignment_map,
     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)
    if use_tpu:

      def tpu_scaffold():
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        return tf.train.Scaffold()

      scaffold_fn = tpu_scaffold
    else:
      tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    all_layers = model.get_all_encoder_layers()

    predictions = {
        "unique_id": unique_ids,
    }

    for (i, layer_index) in enumerate(layer_indexes):
      predictions["layer_output_%d" % i] = all_layers[layer_index]

    output_spec = tf.contrib.tpu.TPUEstimatorSpec(
        mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    return output_spec
Пример #3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Пример #4
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    masked_lm_positions = features["masked_lm_positions"]
    masked_lm_ids = features["masked_lm_ids"]
    masked_lm_weights = features["masked_lm_weights"]
    next_sentence_labels = features["next_sentence_labels"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    (masked_lm_loss,
     masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
         bert_config, model.get_sequence_output(), model.get_embedding_table(),
         masked_lm_positions, masked_lm_ids, masked_lm_weights)

    (next_sentence_loss, next_sentence_example_loss,
     next_sentence_log_probs) = get_next_sentence_output(
         bert_config, model.get_pooled_output(), next_sentence_labels)

    total_loss = masked_lm_loss + next_sentence_loss

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                    masked_lm_weights, next_sentence_example_loss,
                    next_sentence_log_probs, next_sentence_labels):
        """Computes the loss and accuracy of the model."""
        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(
            masked_lm_log_probs, axis=-1, output_type=tf.int32)
        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)
        masked_lm_mean_loss = tf.metrics.mean(
            values=masked_lm_example_loss, weights=masked_lm_weights)

        next_sentence_log_probs = tf.reshape(
            next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
        next_sentence_predictions = tf.argmax(
            next_sentence_log_probs, axis=-1, output_type=tf.int32)
        next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
        next_sentence_accuracy = tf.metrics.accuracy(
            labels=next_sentence_labels, predictions=next_sentence_predictions)
        next_sentence_mean_loss = tf.metrics.mean(
            values=next_sentence_example_loss)

        return {
            "masked_lm_accuracy": masked_lm_accuracy,
            "masked_lm_loss": masked_lm_mean_loss,
            "next_sentence_accuracy": next_sentence_accuracy,
            "next_sentence_loss": next_sentence_mean_loss,
        }

      eval_metrics = (metric_fn, [
          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
          masked_lm_weights, next_sentence_example_loss,
          next_sentence_log_probs, next_sentence_labels
      ])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

    return output_spec
Пример #5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #6
0
def create_model(bert_config,
                 num_labels,
                 max_seq_length,
                 sess,
                 init_checkpoint=None,
                 use_GPU=False,
                 label_smoothing=0.0,
                 cycle=1):
    """Creates a classification model."""
    GPUs = get_available_gpus()
    defalut_device = '/cpu:0'
    if use_GPU and len(GPUs) != 0:
        defalut_device = '/gpu:{}'.format(GPUs[0])
    # Place all ops on CPU by default
    with tf.device(defalut_device):
        tower_grads = []
        loss_list = []
        logits_list = []
        probabilities_list = []
        train_op = None
        loss = None
        logits = None
        probabilities = None
        global_step = tf.train.get_or_create_global_step()
        # input placeholder
        _input_ids = tf.placeholder(tf.int64, shape=(None, max_seq_length))
        _input_mask = tf.placeholder(tf.int64, shape=(None, max_seq_length))
        _segment_ids = tf.placeholder(tf.int64, shape=(None, max_seq_length))
        _label_ids = tf.placeholder(tf.int64, shape=None)
        _sample_weight = tf.placeholder(tf.float32, shape=None)
        _output_dropout_keep_prob = tf.placeholder(tf.float32, shape=None)
        _hidden_dropout_prob = tf.placeholder(tf.float32, shape=None)
        _attention_probs_dropout_prob = tf.placeholder(tf.float32, shape=None)
        # optimizer placeholder
        _learning_rate = tf.placeholder(tf.float32, shape=None)
        _num_train_steps = tf.placeholder(tf.int32, shape=None)
        _num_warmup_steps = tf.placeholder(tf.int32, shape=None)
        _batch_size = tf.placeholder(tf.int32, shape=None)
        # feed dict
        feed_dict = {
            'input_ids': _input_ids,
            'input_mask': _input_mask,
            'segment_ids': _segment_ids,
            'label_ids': _label_ids,
            'sample_weight': _sample_weight,
            'output_dropout_keep_prob': _output_dropout_keep_prob,
            'hidden_dropout_prob': _hidden_dropout_prob,
            'attention_probs_dropout_prob': _attention_probs_dropout_prob,
            'learning_rate': _learning_rate,
            'num_train_steps': _num_train_steps,
            'num_warmup_steps': _num_warmup_steps,
            'batch_size': _batch_size
        }

        optimizer = optimization.create_optimizer(
            _learning_rate, tf.cast((_num_train_steps / cycle), tf.int32),
            _num_warmup_steps)
        if use_GPU:
            batch_size = tf.to_int32(_batch_size / len(GPUs))
            for i in range(len(GPUs)):
                # with tf.device(assign_to_device('/gpu:{}'.format(GPUs[i]), ps_device='/gpu:0')):
                with tf.device('/gpu:{}'.format(GPUs[i])):
                    # split input data for every gpu device.
                    with tf.name_scope("input_slice"):
                        input_ids = _input_ids[i * batch_size:(i + 1) *
                                               batch_size]
                        input_mask = _input_mask[i * batch_size:(i + 1) *
                                                 batch_size]
                        segment_ids = _segment_ids[i * batch_size:(i + 1) *
                                                   batch_size]
                        label_ids = _label_ids[i * batch_size:(i + 1) *
                                               batch_size]
                        sample_weight = _sample_weight[i * batch_size:(i + 1) *
                                                       batch_size]

                    # build model
                    model = modeling.BertModel(
                        config=bert_config,
                        input_ids=input_ids,
                        input_mask=input_mask,
                        token_type_ids=segment_ids,
                        hidden_dropout_prob=_hidden_dropout_prob,
                        attention_probs_dropout_prob=
                        _attention_probs_dropout_prob,
                        scope="bert")
                    # If you want to use the token-level output, use model.get_sequence_output() instead.
                    output_layer = model.get_pooled_output()
                    hidden_size = output_layer.shape[-1].value
                    with tf.variable_scope("output", reuse=tf.AUTO_REUSE):
                        output_weights = tf.get_variable(
                            "output_weights", [num_labels, hidden_size],
                            initializer=tf.truncated_normal_initializer(
                                stddev=0.02))

                        output_bias = tf.get_variable(
                            "output_bias", [num_labels],
                            initializer=tf.zeros_initializer())

                        with tf.variable_scope("loss"):
                            # I.e., 0.1 dropout
                            output_layer = tf.nn.dropout(
                                output_layer,
                                keep_prob=_output_dropout_keep_prob)

                            logits_ = tf.matmul(output_layer,
                                                output_weights,
                                                transpose_b=True)
                            logits_ = tf.nn.bias_add(logits_, output_bias)
                            probabilities_ = tf.nn.softmax(logits_, axis=-1)

                            one_hot_labels = tf.one_hot(label_ids,
                                                        depth=num_labels,
                                                        dtype=tf.float32)

                            loss_ = tf.losses.softmax_cross_entropy(
                                one_hot_labels,
                                logits_,
                                weights=sample_weight,
                                label_smoothing=label_smoothing)

                            grads_ = optimizer.compute_gradients(loss_)
                            tower_grads.append(grads_)
                            loss_list.append(loss_)
                            logits_list.append(logits_)
                            probabilities_list.append(probabilities_)

            loss = tf.reduce_mean(loss_list)
            if len(GPUs) == 1:
                logits = tf.squeeze(logits_list, [0])
                probabilities = tf.squeeze(probabilities_list, [0])
            else:
                logits = tf.keras.layers.concatenate(logits_list, axis=0)
                probabilities = tf.keras.layers.concatenate(probabilities_list,
                                                            axis=0)
            # Merge grads
            with tf.name_scope("merge_grads"):
                grads = average_gradients(tower_grads)
                capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var)
                              for grad, var in grads]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)
        else:
            # build model
            model = modeling.BertModel(
                config=bert_config,
                input_ids=_input_ids,
                input_mask=_input_mask,
                token_type_ids=_segment_ids,
                hidden_dropout_prob=_hidden_dropout_prob,
                attention_probs_dropout_prob=_attention_probs_dropout_prob,
                scope="bert")
            # If you want to use the token-level output, use model.get_sequence_output() instead.
            output_layer = model.get_pooled_output()
            hidden_size = output_layer.shape[-1].value
            with tf.variable_scope("output", reuse=tf.AUTO_REUSE):
                output_weights = tf.get_variable(
                    "output_weights", [num_labels, hidden_size],
                    initializer=tf.truncated_normal_initializer(stddev=0.02))

                output_bias = tf.get_variable(
                    "output_bias", [num_labels],
                    initializer=tf.zeros_initializer())

                with tf.variable_scope("loss"):
                    # I.e., 0.1 dropout
                    output_layer = tf.nn.dropout(
                        output_layer, keep_prob=_output_dropout_keep_prob)

                    logits = tf.matmul(output_layer,
                                       output_weights,
                                       transpose_b=True)
                    logits = tf.nn.bias_add(logits, output_bias)
                    probabilities = tf.nn.softmax(logits, axis=-1)

                    one_hot_labels = tf.one_hot(_label_ids,
                                                depth=num_labels,
                                                dtype=tf.float32)

                    loss = tf.losses.softmax_cross_entropy(
                        one_hot_labels,
                        logits,
                        weights=_sample_weight,
                        label_smoothing=label_smoothing)
            with tf.name_scope("merge_grads"):
                grads = optimizer.compute_gradients(loss)
                capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var)
                              for grad, var in grads]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

        # initial model's variables.
        tf.logging.info("Load model checkpoint : %s" % init_checkpoint)
        tvars = tf.trainable_variables()
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        # # print variables
        # tf.logging.info("**** Trainable Variables ****")
        # for var in tvars:
        #     init_string = ""
        #     if var.name in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                     init_string)
        # attention_probs = model.get_all_layer_attention_probs()
        # return (train_op, loss, logits, probabilities, feed_dict, attention_probs)
        return (train_op, loss, logits, probabilities, feed_dict)
Пример #7
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        batch_size = modeling.get_shape_list(input_ids)[0]
        masked_lm_positions = tf.constant([
            sorted(
                random.sample(range(1, FLAGS.max_seq_length - 2),
                              FLAGS.max_predictions_per_seq))
            for i in range(batch_size)
        ])
        masks_list = tf.constant([MASK_ID] *
                                 (FLAGS.max_predictions_per_seq * batch_size))
        masked_lm_weights = tf.multiply(
            tf.ones(modeling.get_shape_list(masked_lm_positions)),
            tf.cast(gather_indexes_rank2(input_mask, masked_lm_positions),
                    tf.float32))

        masked_input_ids = replace_elements_by_indices(input_ids, masks_list,
                                                       masked_lm_positions)
        masked_input_ids = tf.multiply(masked_input_ids, input_mask)
        masked_lm_ids = gather_indexes_rank2(input_ids, masked_lm_positions)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=masked_input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            train_pooler=False)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        model_summary()

        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            if FLAGS.opt == 'lamb':
                train_op = optimization.create_lamb_optimizer(
                    total_loss,
                    learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    use_tpu,
                    weight_decay=0.01)

            elif FLAGS.opt == 'adam':
                train_op = optimization.create_adam_optimizer(
                    total_loss,
                    learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    use_tpu,
                    weight_decay=0.01)

            else:
                sys.exit(FLAGS.opt, 'does not exist.')

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
            """
      tf.profiler.profile(
          tf.get_default_graph(),
          options=tf.profiler.ProfileOptionBuilder.float_operation())
      """

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #8
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, predicts) = create_model(
            bert_config, is_training, input_ids, input_mask, label_mask,
            segment_ids, label_ids, num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            hook_dict = {}
            hook_dict['loss'] = total_loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(hook_dict,
                                                      every_n_iter=200)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                training_hooks=[logging_hook])
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # labels = []
                # for i, x in enumerate()
                predict_labels = []
                # for i in range(1, num_labels - 4):
                #     predict_labels.append(i)
                # precision = tf_metrics.precision(label_ids, predictions, num_labels, predict_labels, average="macro")
                # recall = tf_metrics.recall(label_ids, predictions, num_labels, predict_labels, average="macro")
                # f = tf_metrics.f1(label_ids, predictions, num_labels, predict_labels, average="macro")

                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 num_labels,
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           num_labels,
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  num_labels,
                                  average="macro")

                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
Пример #9
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    
    loss_type = ft_params[1]
    print("")
    tf.logging.info("Using loss type:%s" % (loss_type)) 

    (total_loss, per_example_loss, log_probs, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels,
        ft_params)
    
    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(per_example_loss, label_ids, log_probs, is_real_example):
        predictions = tf.argmax(log_probs, axis=-1, output_type=tf.int32)

        accuracy = tf.compat.v1.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example)
        loss = tf.compat.v1.metrics.mean(values=per_example_loss, weights=is_real_example)
        f1_score = tf.contrib.metrics.f1_score(label_ids, predictions)
        auc = tf.compat.v1.metrics.auc(label_ids, predictions)
        recall = tf.compat.v1.metrics.recall(label_ids, predictions)
        precision = tf.compat.v1.metrics.precision(label_ids, predictions)
        true_pos = tf.compat.v1.metrics.true_positives(label_ids, predictions)
        true_neg = tf.compat.v1.metrics.true_negatives(label_ids, predictions)
        false_pos = tf.compat.v1.metrics.false_positives(label_ids, predictions)  
        false_neg = tf.compat.v1.metrics.false_negatives(label_ids, predictions)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
            "F1_Score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = (metric_fn, [per_example_loss, label_ids, log_probs, is_real_example])

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
Пример #10
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        """
        features: This is batch_features from input_fn
        labels: This is batch_labels from input_fn
        mode:   An instance of tf.estimator.ModeKeys
        params: Additional configuration
        """

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_sequence = features["input_sequence"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        edit_sequence = features["edit_sequence"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss,
         logits, probabilities) = gec_create_model(
             bert_config, is_training, input_sequence, input_mask, segment_ids,
             edit_sequence, use_one_hot_embeddings, mode, copy_weight,
             use_bert_more, insert_ids, multitoken_insert_ids,
             subtract_replaced_from_replacement)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            # 如果初始化检查点文件
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            if FLAGS.use_tpu and FLAGS.tpu_name:
                # TPU train
                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps, use_tpu)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
            else:
                # GPUs or CPU train
                train_op = custom_optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps)
                output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                         loss=total_loss,
                                                         train_op=train_op)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, edit_sequence, logits):
                predictions = tf.argmax(
                    logits[:, :, 3:], axis=-1, output_type=tf.int32) + 3
                mask = tf.equal(edit_sequence, 0)
                mask = tf.logical_or(mask, tf.equal(edit_sequence, 1))
                mask = tf.logical_or(mask, tf.equal(edit_sequence, 2))
                mask = tf.logical_or(mask, tf.equal(edit_sequence, 3))
                mask = tf.to_float(tf.logical_not(mask))
                accuracy = tf.metrics.accuracy(edit_sequence, predictions,
                                               mask)
                loss = tf.metrics.mean(per_example_loss)
                result_dict = {}
                result_dict["eval_accuracy"] = accuracy
                result_dict["eval_loss"] = loss
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, edit_sequence, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            # first three edit ids unk, sos, eos are dummy. We do not consider them in predictions
            predictions = tf.argmax(
                logits[:, :, 3:], axis=-1, output_type=tf.int32) + 3
            if FLAGS.use_tpu and FLAGS.tpu_name:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={
                        "predictions": predictions,
                        "logits": logits
                    },
                    scaffold_fn=scaffold_fn)
            else:
                # multiple GPUs
                output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                         predictions={
                                                             "predictions":
                                                             predictions,
                                                             "logits": logits
                                                         })
        return output_spec
Пример #11
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        # is_real_example = None
        # if "is_real_example" in features:
        #     is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        # else:
        #     is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        if FLAGS.task_name.lower() == 'ner':
            (total_loss, per_example_loss, logits, predicts) = create_model(
                bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
                num_labels, use_one_hot_embeddings)
        else:
            (total_loss, per_example_loss, logits, probabilities) = create_model(
                bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
                num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        # 加载BERT模型
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # 加载的模型参数
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            ### 词性标注问题评价函数设计
            if FLAGS.task_name.lower() == 'ner':
                def metric_fn(per_example_loss, label_ids, logits):
                    predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                    precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                    recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                    f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                    return {
                        "eval_precision": precision,
                        "eval_recall": recall,
                        "eval_f": f,
                    }

                eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
            else:
                def metric_fn(per_example_loss, label_ids, logits):
                    predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                    accuracy = tf.metrics.accuracy(
                        labels=label_ids, predictions=predictions)
                    loss = tf.metrics.mean(values=per_example_loss)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }

                eval_metrics = (metric_fn,
                                [per_example_loss, label_ids, logits])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
        else:
            if FLAGS.task_name.lower() == 'ner':
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode, predictions=predicts, scaffold_fn=scaffold_fn
                )
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"probabilities": probabilities},
                    scaffold_fn=scaffold_fn)
        return output_spec
Пример #12
0
def optimize_graph(logger=None, verbose=False):
    if not logger:
        logger = set_logger(colored('BERT_VEC', 'yellow'), verbose)
    try:
        # we don't need GPU for optimizing the graph
        from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
        tf.gfile.MakeDirs(args.output_dir_sim)
        tf.gfile.MakeDirs(args.output_dir)
        config_fp = args.config_name
        logger.info('model config: %s' % config_fp)

        # 加载bert配置文件
        with tf.gfile.GFile(config_fp, 'r') as f:
            bert_config = modeling.BertConfig.from_dict(json.load(f))

        logger.info('build graph...')
        # input placeholders, not sure if they are friendly to XLA
        input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len),
                                   'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len),
                                    'input_mask')
        input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len),
                                        'input_type_ids')

        jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

        with jit_scope():
            input_tensors = [input_ids, input_mask, input_type_ids]

            model = modeling.BertModel(config=bert_config,
                                       is_training=False,
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=input_type_ids,
                                       use_one_hot_embeddings=False)

            # 获取所有要训练的变量
            tvars = tf.trainable_variables()

            init_checkpoint = args.ckpt_name
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            # 共享卷积核
            with tf.variable_scope("pooling"):
                # 如果只有一层,就只取对应那一层的weight
                if len(args.layer_indexes) == 1:
                    encoder_layer = model.all_encoder_layers[
                        args.layer_indexes[0]]
                else:
                    # 否则遍历需要取的层,把所有层的weight取出来并拼接起来shape:768*层数
                    all_layers = [
                        model.all_encoder_layers[l] for l in args.layer_indexes
                    ]
                    encoder_layer = tf.concat(all_layers, -1)

            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(
                mul_mask(x, m), axis=1) / (tf.reduce_sum(
                    m, axis=1, keepdims=True) + 1e-10)

            input_mask = tf.cast(input_mask, tf.float32)
            # 以下代码是句向量的生成方法,可以理解为做了一个卷积的操作,但是没有把结果相加, 卷积核是input_mask
            pooled = masked_reduce_mean(encoder_layer, input_mask)
            pooled = tf.identity(pooled, 'final_encodes')

            output_tensors = [pooled]
            tmp_g = tf.get_default_graph().as_graph_def()

        # allow_soft_placement:自动选择运行设备,但是会指定gpu全部运行,容易报错
        #config = tf.ConfigProto(allow_soft_placement=True)
        #指定50%的内存
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        #config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction
        #config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5))
        with tf.Session(config=config) as sess:
            logger.info('load parameters from checkpoint...')
            sess.run(tf.global_variables_initializer())
            logger.info('freeze...')
            #保存tensor输出名字
            tmp_g = tf.graph_util.convert_variables_to_constants(
                sess, tmp_g, [n.name[:-2] for n in output_tensors])
            dtypes = [n.dtype for n in input_tensors]
            logger.info('optimize...')
            tmp_g = optimize_for_inference(
                tmp_g, [n.name[:-2] for n in input_tensors],
                [n.name[:-2] for n in output_tensors],
                [dtype.as_datatype_enum for dtype in dtypes], False)
        #创建临时文件关闭后自动删delete=True
        tmp_file = tempfile.NamedTemporaryFile('w',
                                               delete=True,
                                               dir=args.output_dir_sim).name
        logger.info('write graph to a tmp file: %s' % tmp_file)
        with tf.gfile.GFile(tmp_file, 'wb') as f:
            f.write(tmp_g.SerializeToString())
        return tmp_file
    except Exception as e:
        logger.error('fail to optimize the graph!')
        logger.error(e)
Пример #13
0
    def __init__(self, is_training):
        self.is_training = is_training
        self.input_ids = tf.compat.v1.placeholder(
            tf.int32, shape=[None, hp.sequence_length], name='input_ids')
        self.input_masks = tf.compat.v1.placeholder(
            tf.int32, shape=[None, hp.sequence_length], name='input_masks')
        self.segment_ids = tf.compat.v1.placeholder(
            tf.int32, shape=[None, hp.sequence_length], name='segment_ids')
        self.label_ids = tf.compat.v1.placeholder(tf.int32,
                                                  shape=[None],
                                                  name='label_ids')
        # Load BERT Pre-training LM
        self.model = modeling.AlbertModel(config=bert_config,
                                          is_training=self.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_masks,
                                          token_type_ids=self.segment_ids,
                                          use_one_hot_embeddings=False)

        # Get the feature vector with size 3D:(batch_size,sequence_length,hidden_size)
        output_layer_init = self.model.get_sequence_output()
        # Cell textcnn
        output_layer = cell_textcnn(output_layer_init, self.is_training)
        # Hidden size
        #hidden_size = output_layer.shape[-1].value
        hidden_size = output_layer.shape[-1]
        # Dense
        with tf.name_scope("Full-connection"):
            output_weights = tf.compat.v1.get_variable(
                "output_weights", [num_labels, hidden_size],
                initializer=tf.compat.v1.truncated_normal_initializer(
                    stddev=0.02))

            output_bias = tf.compat.v1.get_variable(
                "output_bias", [num_labels],
                initializer=tf.zeros_initializer())
            # Logit
            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            self.logits = tf.nn.bias_add(logits, output_bias)
            print('logits: ', self.logits)
            self.probabilities = tf.nn.softmax(self.logits, axis=-1)
        # Prediction
        with tf.compat.v1.variable_scope("Prediction"):
            self.preds = tf.argmax(self.logits, axis=-1, output_type=tf.int32)
            print('preds:', self.preds)
        # Summary for tensorboard
        with tf.compat.v1.variable_scope("Loss"):
            if self.is_training:
                self.accuracy = tf.reduce_mean(
                    tf.compat.v1.to_float(tf.equal(self.preds,
                                                   self.label_ids)))
                tf.summary.scalar('Accuracy', self.accuracy)

                # Check whether has loaded model
            ckpt = tf.train.get_checkpoint_state(hp.saved_model_path)
            checkpoint_suffix = ".index"
            if ckpt and tf.compat.v1.gfile.Exists(ckpt.model_checkpoint_path +
                                                  checkpoint_suffix):
                print('=' * 10, 'Restoring model from checkpoint!', '=' * 10)
                print("%s - Restoring model from checkpoint ~%s" %
                      (time_now_string(), ckpt.model_checkpoint_path))
            else:
                # Load BERT Pre-training LM
                print('=' * 10, 'First time load BERT model!', '=' * 10)
                tvars = tf.compat.v1.trainable_variables()
                if hp.init_checkpoint:
                    (assignment_map, initialized_variable_names) = \
                        modeling.get_assignment_map_from_checkpoint(tvars,
                                                                    hp.init_checkpoint)
                    tf.compat.v1.train.init_from_checkpoint(
                        hp.init_checkpoint, assignment_map)

            # Optimization
            if self.is_training:
                # Global_step
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                # Loss
                log_probs = tf.nn.log_softmax(self.logits, axis=-1)  #预测的结果
                one_hot_labels = tf.one_hot(
                    self.label_ids, depth=num_labels,
                    dtype=tf.float32)  #标签的onehot(用于后续做loss和acc)
                per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                                  axis=-1)
                self.loss = tf.reduce_mean(per_example_loss)
                # Optimizer
                train_examples = processor.get_train_examples(hp.data_dir)
                num_train_steps = int(
                    len(train_examples) / hp.batch_size * hp.num_train_epochs)
                num_warmup_steps = int(num_train_steps * hp.warmup_proportion)
                print('num_train_steps', num_train_steps)
                self.optimizer = optimization.create_optimizer(
                    self.loss,
                    hp.learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    hp.use_tpu,
                )
                # Summary for tensorboard
                tf.summary.scalar('loss', self.loss)
                testvalue = tf.compat.v1.summary.merge_all()
                self.merged = tf.compat.v1.summary.merge_all()

        # Compte the parameters
        count_model_params()
        vs = tf.compat.v1.trainable_variables()
        for l in vs:
            print(l)
        print('=' * 40)
Пример #14
0
def predict_loop(opts, finetuned_checkpoint_path=None):
    i = 0
    eval_examples = squad_data.read_squad_examples(opts["predict_file"],
                                                   opts,
                                                   is_training=False)

    tfrecord_dir = opts['tfrecord_dir']
    if not os.path.exists(tfrecord_dir):
        os.makedirs(tfrecord_dir)

    eval_writer = squad_data.FeatureWriter(filename=os.path.join(
        tfrecord_dir, "eval.tf_record"),
                                           is_training=False)
    eval_features = []

    tokenizer = tokenization.FullTokenizer(vocab_file=opts['vocab_file'],
                                           do_lower_case=opts['do_lower_case'])

    def append_feature(feature):
        eval_features.append(feature)
        eval_writer.process_feature(feature)

    # Create eval.tfrecord
    num_features = squad_data.convert_examples_to_features(
        examples=eval_examples,
        tokenizer=tokenizer,
        max_seq_length=opts["seq_length"],
        doc_stride=opts["doc_stride"],
        max_query_length=opts["max_query_length"],
        is_training=False,
        output_fn=append_feature)

    eval_writer.close()
    iterations_per_step = 1
    predict = build_graph(opts, iterations_per_step, is_training=False)
    predict.session.run(predict.init)
    predict.session.run(predict.iterator.initializer)

    if opts["init_checkpoint"] and not finetuned_checkpoint_path:
        finetuned_checkpoint_path = opts['init_checkpoint']

    # Note that finetuned_checkpoint_path could be already set during "do_predict"
    if finetuned_checkpoint_path and not opts.get('generated_data', False):
        (assignment_map, _initialized_variable_names
         ) = bert_ipu.get_assignment_map_from_checkpoint(
             predict.tvars, finetuned_checkpoint_path)
        saver_restore = tf.train.Saver(assignment_map)
        saver_restore.restore(predict.session, finetuned_checkpoint_path)
        assert len(assignment_map) >= 127

    all_results = []
    if (opts['micro_batch_size'] * opts['gradient_accumulation_count']) == 1:
        iterations = len(eval_features) // (
            opts['micro_batch_size'] * opts['gradient_accumulation_count'] *
            opts['replicas'])
    else:
        iterations = len(eval_features) // (
            opts['micro_batch_size'] * opts['gradient_accumulation_count'] *
            opts['replicas']) + 1
    logger.info(f"Total iterations: {iterations}")
    all_time_consumption = []
    while i < iterations:
        try:
            # start = time.time()
            unique_ids, start_logits, end_logits, batch_duration = predict_step(
                predict)
            # duration = time.time() - start
            # all_time_consumption.append(duration)
            all_time_consumption.append(batch_duration /
                                        opts["batches_per_step"])
        except tf.errors.OpError as e:
            raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message)

        i += iterations_per_step

        if len(all_results) % 1000 == 0:
            logger.info(f"Procesing example: {len(all_results)}")

        # The outfeed shape is [batches_per_step, num_replicas (if replication enabled), micro_batch_size, seq_len].
        # Flatten to keep only the last dimension
        num_samples = np.prod(unique_ids.shape)
        seq_len = opts['seq_length']
        unique_ids = unique_ids.reshape([num_samples])
        start_logits = start_logits.reshape([num_samples, seq_len])
        end_logits = end_logits.reshape([num_samples, seq_len])

        for j in range(num_samples):
            unique_id = unique_ids[j]
            start_logit = start_logits[j, :].tolist()
            end_logit = end_logits[j, :].tolist()
            all_results.append(
                squad_results.RawResult(unique_id=unique_id,
                                        start_logits=start_logit,
                                        end_logits=end_logit))

    if len(
            all_time_consumption
    ) >= 10 * 2:  # The time consumption of First 10 steps is not stable for time measurement.
        all_time_consumption = np.array(all_time_consumption[10:])
    else:
        logger.warning(
            f"if the first 10 steps is counted, the measurement of throughtput and latency is not accurate."
        )
        all_time_consumption = np.array(all_time_consumption)

    logger.info((
        f"inference throughput: { (opts['micro_batch_size'] * opts['gradient_accumulation_count'] if should_be_pipeline_when_inference(opts) else opts['micro_batch_size']) / all_time_consumption.mean() } "
        f"exmples/sec - Latency: {all_time_consumption.mean()} {all_time_consumption.min()} "
        f"{all_time_consumption.max()} (mean min max) sec "))
    # Done predictions

    output_dir = opts['output_dir']
    if output_dir is None:
        if 'adamw' in finetuned_checkpoint_path:
            output_dir = finetuned_checkpoint_path.split('/ckpt')[0]
    else:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    output_prediction_file = os.path.join(output_dir, "predictions.json")
    output_nbest_file = os.path.join(output_dir, "best_predictions.json")
    output_null_log_odds_file = os.path.join(output_dir, "null_odds.json")
    eval_features = eval_features[:num_features]
    squad_results.write_predictions(
        eval_examples, eval_features, all_results, opts["n_best_size"],
        opts["max_answer_length"], opts["do_lower_case"],
        output_prediction_file, output_nbest_file, output_null_log_odds_file,
        opts["version_2_with_negative"], opts["null_score_diff_threshold"],
        opts["verbose_logging"])

    predict.session.close()

    if opts['do_evaluation']:
        evaluate_squad(output_prediction_file, opts)
Пример #15
0
def training_loop(opts):

    consume_time = None

    if opts["version_2_with_negative"]:
        base_name_train = f"{opts['seq_length']}_{opts['doc_stride']}_{opts['max_query_length']}_SQuAD20"
    else:
        base_name_train = f"{opts['seq_length']}_{opts['doc_stride']}_{opts['max_query_length']}_SQuAD11"

    train_metafile = os.path.join(opts["tfrecord_dir"],
                                  "train_" + base_name_train + ".metadata")
    if os.path.exists(train_metafile):
        with open(train_metafile) as f:
            total_samples = int(f.readline())
    else:
        if opts["version_2_with_negative"]:
            logger.info(
                f"SQUAD 2.0 DATASET SIZE 131944 (based on no. of features).")
            total_samples = 131944
        else:
            logger.info(
                f"SQUAD 1.1 DATASET SIZE 88641 (based on no. of features).")
            total_samples = 88641

    logger.info(f"Total samples {total_samples}")
    iterations_per_epoch = total_samples // opts["total_batch_size"]
    log_iterations = opts['batches_per_step'] * opts["steps_per_logs"]
    ckpt_iterations = opts['batches_per_step'] * opts["steps_per_ckpts"]

    if opts.get('num_train_steps'):
        # total iterations
        iterations = opts['num_train_steps'] * opts['batches_per_step']
    elif opts.get('epochs'):
        iterations = iterations_per_epoch * opts['epochs']
    else:
        logger.error("One between epochs and num_train_step must be set")
        sys.exit(os.EX_OK)

    logger.info(
        f"Training will last {iterations} iterations and {iterations//opts['batches_per_step']} steps will be executed."
    )

    # So many iterations will be run for one step.
    iterations_per_step = opts['batches_per_step']
    # Avoid nan issue caused by queue length is zero.
    queue_len = iterations_per_epoch // iterations_per_step
    if queue_len == 0:
        queue_len = 1
    batch_times = deque(maxlen=queue_len)

    total_steps = (iterations //
                   opts['batches_per_step']) * opts['batches_per_step']

    # Learning rate schedule
    lr_schedule_name = opts['lr_schedule']
    logger.info(f"Using learning rate schedule {lr_schedule_name}")
    learning_rate_schedule = make_lr_schedule(lr_schedule_name, opts,
                                              total_steps)

    # -------------- BUILD TRAINING GRAPH ----------------
    train = build_graph(opts, iterations_per_step, is_training=True)
    train.session.run(train.init)
    train.session.run(train.iterator.initializer)

    # Checkpoints restore and save
    init_checkpoint_path = opts['init_checkpoint']
    if init_checkpoint_path and not opts.get('generated_data', False):
        if os.path.isfile(init_checkpoint_path):
            init_checkpoint_path = os.path.splitext(init_checkpoint_path)[0]

        (assignment_map, initialized_variable_names
         ) = bert_ipu.get_assignment_map_from_checkpoint(
             train.tvars, init_checkpoint_path)

        for var in train.tvars:
            if var.name in initialized_variable_names:
                mark = "*"
            else:
                mark = " "
            logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape,
                        var.dtype.name)

        reader = tf.train.NewCheckpointReader(init_checkpoint_path)
        load_vars = reader.get_variable_to_shape_map()

        saver_restore = tf.train.Saver(assignment_map)
        saver_restore.restore(train.session, init_checkpoint_path)

    if opts['steps_per_ckpts']:
        filepath = train.saver.save(train.session,
                                    opts["checkpoint_path"],
                                    global_step=0)
        logger.info(f"Saved checkpoint to {filepath}")

    if opts.get('restore_dir'):
        restore_path = opts['restore_dir']
        if os.path.isfile(restore_path):
            latest_checkpoint = os.path.splitext(restore_path)[0]
        else:
            latest_checkpoint = tf.train.latest_checkpoint(restore_path)
        ckpt_pattern = re.compile(".*ckpt-([0-9]+)$")
        i = int(ckpt_pattern.match(latest_checkpoint).groups()[0]) + 1
        train.saver.restore(train.session, latest_checkpoint)
        epoch = float(opts["total_batch_size"] *
                      (i + iterations_per_step)) / total_samples
    else:
        i = 0

    # Tensorboard logs path
    log_path = os.path.join(opts["logs_path"], 'event')
    logger.info("Tensorboard event file path {}".format(log_path))
    summary_writer = tf.summary.FileWriter(log_path,
                                           train.graph,
                                           session=train.session)
    start_time = datetime.datetime.now()

    # Training loop
    print_format = (
        "step: {step:6d}, iteration: {iteration:6d}, epoch: {epoch:6.2f}, lr: {lr:6.4g}, loss: {loss:6.3f}, "
        "throughput {throughput_samples_per_sec:6.2f} samples/sec, batch time: {avg_batch_time:8.6f} s, total_time: {total_time:8.1f} s"
    )
    step = 0
    start_all = time.time()

    while i < iterations:
        step += 1
        epoch = float(opts["total_batch_size"] * i) / total_samples

        learning_rate = learning_rate_schedule.get_at_step(step)

        try:
            loss, batch_time = training_step(train, learning_rate)
        except tf.errors.OpError as e:
            raise tf.errors.ResourceExhaustedError(e.node_def, e.op, e.message)

        batch_time /= iterations_per_step

        if i != 0:
            batch_times.append([batch_time])
            avg_batch_time = np.mean(batch_times)
        else:
            avg_batch_time = batch_time

        if i % log_iterations == 0:
            throughput = opts['total_batch_size'] / avg_batch_time

            # flush times every time it is reported
            batch_times.clear()

            total_time = time.time() - start_all

            stats = OrderedDict([('step', step),
                                 ('iteration', i + iterations_per_step),
                                 ('epoch', epoch), ('lr', learning_rate),
                                 ('loss', loss),
                                 ('avg_batch_time', avg_batch_time),
                                 ('throughput_samples_per_sec', throughput),
                                 ('total_time', total_time),
                                 ('learning_rate', learning_rate)])
            logger.info(print_format.format(**stats))

            train_summary = tf.Summary()
            train_summary.value.add(tag='epoch', simple_value=epoch)
            train_summary.value.add(tag='loss', simple_value=loss)
            train_summary.value.add(tag='learning_rate',
                                    simple_value=learning_rate)
            train_summary.value.add(tag='througput', simple_value=throughput)

            if opts['wandb']:
                wandb.log(dict(stats))

            summary_writer.add_summary(train_summary, step)
            summary_writer.flush()

        if i % ckpt_iterations == 0:
            filepath = train.saver.save(train.session,
                                        opts["checkpoint_path"],
                                        global_step=i + iterations_per_step)
            logger.info(f"Saved checkpoint to {filepath}")

        i += iterations_per_step

    # We save the final checkpoint
    finetuned_checkpoint_path = train.saver.save(train.session,
                                                 opts["checkpoint_path"],
                                                 global_step=i +
                                                 iterations_per_step)
    logger.info(f"Saved checkpoint to {finetuned_checkpoint_path}")
    train.session.close()
    end_time = datetime.datetime.now()
    consume_time = (end_time - start_time).seconds
    logger.info(f"training times: {consume_time} s")
    return finetuned_checkpoint_path
Пример #16
0
def main():
    print("print start load the params...")
    print(json.dumps(config, ensure_ascii=False, indent=2))
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.gfile.MakeDirs(config["out"])
    train_examples_len = config["train_examples_len"]
    dev_examples_len = config["dev_examples_len"]
    learning_rate = config["learning_rate"]
    eval_per_step = config["eval_per_step"]
    num_labels = config["num_labels"]
    num_train_steps = math.ceil(train_examples_len / config["train_batch_size"])
    num_dev_steps = math.ceil(dev_examples_len / config["dev_batch_size"])
    num_warmup_steps = math.ceil(num_train_steps * config["num_train_epochs"] * config["warmup_proportion"])
    print("num_train_steps:{},  num_dev_steps:{},  num_warmup_steps:{}".format(num_train_steps, num_dev_steps,
                                                                               num_warmup_steps))
    use_one_hot_embeddings = False
    is_training = True
    use_tpu = False
    seq_len = config["max_seq_len"]
    init_checkpoint = config["init_checkpoint"]
    print("print start compile the bert model...")
    # 定义输入输出
    input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids')
    input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask')
    segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids')
    labels = tf.placeholder(tf.int64, shape=[None, seq_len], name='labels')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')  # , name='is_training'

    bert_config_ = load_bert_config(config["bert_config"])
    (total_loss, acc, logits, probabilities) = create_model(bert_config_, is_training, input_ids,
                                                                         input_mask, segment_ids, labels, keep_prob,
                                                                         num_labels, use_one_hot_embeddings)
    train_op = optimization.create_optimizer(
        total_loss, learning_rate, num_train_steps * config["num_train_epochs"], num_warmup_steps, False)
    print("print start train the bert model...")

    batch_size = config["train_batch_size"]
    dev_batch_size = config["dev_batch_size"]

    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver([v for v in tf.global_variables() if 'adam_v' not in v.name and 'adam_m' not in v.name],
                           max_to_keep=2)  # 保存最后top3模型

    with tf.Session() as sess:
        sess.run(init_global)
        print("start load the pre train model")

        if init_checkpoint:
            # tvars = tf.global_variables()
            tvars = tf.trainable_variables()
            print("global_variables", len(tvars))
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)
            print("initialized_variable_names:", len(initialized_variable_names))
            saver_ = tf.train.Saver([v for v in tvars if v.name in initialized_variable_names])
            saver_.restore(sess, init_checkpoint)
            tvars = tf.global_variables()
            initialized_vars = [v for v in tvars if v.name in initialized_variable_names]
            not_initialized_vars = [v for v in tvars if v.name not in initialized_variable_names]
            tf.logging.info('--all size %s; not initialized size %s' % (len(tvars), len(not_initialized_vars)))
            if len(not_initialized_vars):
                sess.run(tf.variables_initializer(not_initialized_vars))
            for v in initialized_vars:
                print('--initialized: %s, shape = %s' % (v.name, v.shape))
            for v in not_initialized_vars:
                print('--not initialized: %s, shape = %s' % (v.name, v.shape))
        else:
            sess.run(tf.global_variables_initializer())
        # if init_checkpoint:
        #     saver.restore(sess, init_checkpoint)
        #     print("checkpoint restored from %s" % init_checkpoint)
        print("********* train start *********")

        # tf.summary.FileWriter("output/",sess.graph)
        # albert remove dropout
        def train_step(ids, mask, segment, y, step):
            feed = {input_ids: ids,
                    input_mask: mask,
                    segment_ids: segment,
                    labels: y,
                    keep_prob: 0.9}
            _, out_loss, acc_, p_ = sess.run([train_op, total_loss, acc, probabilities], feed_dict=feed)
            print("step :{}, lr:{}, loss :{}, acc :{}".format(step, _[1], out_loss, acc_))
            return out_loss, p_, y

        def dev_step(ids, mask, segment, y):
            feed = {input_ids: ids,
                    input_mask: mask,
                    segment_ids: segment,
                    labels: y,
                    keep_prob: 1.0
                    }
            out_loss, acc_, p_ = sess.run([total_loss, acc, probabilities], feed_dict=feed)
            print("loss :{}, acc :{}".format(out_loss, acc_))
            return out_loss, p_, y

        min_total_loss_dev = 999999
        step = 0
        for epoch in range(config["num_train_epochs"]):
            _ = "{:*^100s}".format(("epoch-" + str(epoch)).center(20))
            print(_)
            # 读取训练数据
            total_loss_train = 0
            # total_pre_train = []
            # total_true_train = []

            input_ids2, input_mask2, segment_ids2, labels2 = get_input_data(config["in_1"], seq_len, batch_size)
            for i in range(num_train_steps):
                step += 1
                ids_train, mask_train, segment_train, y_train = sess.run(
                    [input_ids2, input_mask2, segment_ids2, labels2])
                out_loss, pre, y = train_step(ids_train, mask_train, segment_train, y_train, step)
                total_loss_train += out_loss
                # total_pre_train.extend(pre)
                # total_true_train.extend(y)

                if step % eval_per_step == 0 and step >= config["eval_start_step"]:
                    total_loss_dev = 0
                    dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 = get_input_data(config["in_2"],
                                                                                                    seq_len,
                                                                                                    dev_batch_size,
                                                                                                    False)
                    # total_pre_dev = []
                    # total_true_dev = []
                    for j in range(num_dev_steps):  # 一个 epoch 的 轮数
                        ids_dev, mask_dev, segment_dev, y_dev = sess.run(
                            [dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2])
                        out_loss, pre, y = dev_step(ids_dev, mask_dev, segment_dev, y_dev)
                        total_loss_dev += out_loss
                        # total_pre_dev.extend(pre)
                        # total_true_dev.extend(y_dev)
                    print("total_loss_dev:{}".format(total_loss_dev))
                    # print(classification_report(total_true_dev, total_pre_dev, digits=4))

                    if total_loss_dev < min_total_loss_dev:
                        print("save model:\t%f\t>%f" % (min_total_loss_dev, total_loss_dev))
                        min_total_loss_dev = total_loss_dev
                        saver.save(sess, config["out"] + 'bert.ckpt', global_step=step)
                elif step < config["eval_start_step"] and step % config["auto_save"] == 0:
                    saver.save(sess, config["out"] + 'bert.ckpt', global_step=step)
            _ = "{:*^100s}".format(("epoch-" + str(epoch) + " report:").center(20))
            print("total_loss_train:{}".format(total_loss_train))
            # print(classification_report(total_true_train, total_pre_train, digits=4))
    sess.close()

    # remove dropout

    print("remove dropout in predict")
    tf.reset_default_graph()
    is_training = False
    input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids')
    input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask')
    segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids')
    labels = tf.placeholder(tf.int64, shape=[None, seq_len], name='labels')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')  # , name='is_training'

    bert_config_ = load_bert_config(config["bert_config"])
    (total_loss, _, logits, probabilities) = create_model(bert_config_, is_training, input_ids,
                                                                         input_mask, segment_ids, labels, keep_prob,
                                                                         num_labels, use_one_hot_embeddings)

    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)  # 保存最后top3模型

    try:
        checkpoint = tf.train.get_checkpoint_state(config["out"])
        input_checkpoint = checkpoint.model_checkpoint_path
        print("[INFO] input_checkpoint:", input_checkpoint)
    except Exception as e:
        input_checkpoint = config["out"]
        print("[INFO] Model folder", config["out"], repr(e))

    with tf.Session() as sess:
        sess.run(init_global)
        saver.restore(sess, input_checkpoint)
        saver.save(sess, config["out_1"] + 'bert.ckpt')
    sess.close()
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for Estimator."""
        def metric_fn(per_example_loss, label_ids, logits):
            predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
            if task_name == "cola":
                FN, FN_op = tf.metrics.false_negatives(labels=label_ids,
                                                       predictions=predictions)
                FP, FP_op = tf.metrics.false_positives(labels=label_ids,
                                                       predictions=predictions)
                TP, TP_op = tf.metrics.true_positives(labels=label_ids,
                                                      predictions=predictions)
                TN, TN_op = tf.metrics.true_negatives(labels=label_ids,
                                                      predictions=predictions)

                MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) *
                                             (TN + FP) * (TN + FN))**0.5
                MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op,
                                  tf.identity(MCC, name="MCC"))
                return {"MCC": (MCC, MCC_op)}
            elif task_name == "mrpc":
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                f1 = tf_metrics.f1(labels=label_ids,
                                   predictions=predictions,
                                   num_classes=2,
                                   pos_indices=[1])
                return {
                    "eval_accuracy": accuracy,
                    "eval_f1": f1,
                    "eval_loss": loss,
                }
            else:
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                loss = tf.metrics.mean(values=per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

        tf.compat.v1.logging.info("*** Features ***")
        tf.compat.v1.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.compat.v1.logging.info("  name = %s, shape = %s" %
                                      (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if not is_training and FLAGS.use_trt:
            trt_graph = get_frozen_tftrt_model(bert_config, input_ids.shape,
                                               num_labels,
                                               use_one_hot_embeddings,
                                               init_checkpoint)
            (total_loss, per_example_loss, logits,
             probabilities) = tf.import_graph_def(
                 trt_graph,
                 input_map={
                     'input_ids': input_ids,
                     'input_mask': input_mask,
                     'segment_ids': segment_ids,
                     'label_ids': label_ids
                 },
                 return_elements=[
                     'loss/cls_loss:0', 'loss/cls_per_example_loss:0',
                     'loss/cls_logits:0', 'loss/cls_probabilities:0'
                 ],
                 name='')
            if mode == tf.estimator.ModeKeys.PREDICT:
                predictions = {"probabilities": probabilities}
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            elif mode == tf.estimator.ModeKeys.EVAL:
                eval_metric_ops = metric_fn(per_example_loss, label_ids,
                                            logits)
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=eval_metric_ops)
            return output_spec
        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint and (hvd is None or hvd.rank() == 0):
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        if FLAGS.verbose_logging:
            tf.compat.v1.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.compat.v1.logging.info("  name = %s, shape = %s%s",
                                          var.name, var.shape, init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps,
                hvd, False, FLAGS.amp, FLAGS.num_accumulation_steps,
                FLAGS.optimizer_type)
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:
            dummy_op = tf.no_op()
            # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
            if FLAGS.amp:
                loss_scaler = tf.train.experimental.FixedLossScale(1)
                dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler)
            eval_metric_ops = metric_fn(per_example_loss, label_ids, logits)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
        else:
            dummy_op = tf.no_op()
            # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
            if FLAGS.amp:
                dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimization.LAMBOptimizer(learning_rate=0.0))
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=probabilities)
        return output_spec
Пример #18
0
segment_ids = tf.placeholder(shape=[1, Config.max_seq_length], dtype=tf.int32, name="segment_ids")
# 创建bert 模型
model = modeling.BertModel(
    config=Config.bert_config,
    is_training=False,
    input_ids=input_ids,
    input_mask=input_mask,  # input_mask是样本中有效词句的标识
    token_type_ids=segment_ids,  # token_type是句子标记 ##
    use_one_hot_embeddings=False
)
embedding = model.get_sequence_output()  # 获取字向量

tvars = tf.trainable_variables()  #加载bert 参数
# 加载bert 模型参数
(assignment_map, initialized_variable_names) = \
    modeling.get_assignment_map_from_checkpoint(tvars,
                                                Config.init_checkpoint)
tf.train.init_from_checkpoint(Config.init_checkpoint, assignment_map)

session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
texts = ['在 关 系 数 据 库 中 , 对 关 系 的 最 基 本 要 求 的 满 足 第 一 范 式']
tokenizer = tokenization.FullTokenizer(vocab_file='chinese_L-12_H-768_A-12/vocab.txt', do_lower_case=True)
input_ids_list, input_mask_list, segment_ids_list = word_ids(texts, tokenizer, Config.max_seq_length)
input_ids_list = np.reshape(input_ids_list, newshape=[-1, Config.batch_size, Config.max_seq_length])
input_mask_list = np.reshape(input_mask_list, newshape=[-1, Config.batch_size, Config.max_seq_length])
segment_ids_list = np.reshape(segment_ids_list, newshape=[-1, Config.batch_size, Config.max_seq_length])
embedding_r = session.run(embedding, feed_dict={input_ids: input_ids_list[0],
                                                   input_mask: input_mask_list[0],
                                                   segment_ids: segment_ids_list[0]})
print(type(embedding_r))
print(embedding_r.shape)
Пример #19
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        tag_to_id, id_to_tag, num_tags = get_tag_map_tensors(params)

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        tag_ids = features["tag_ids"]
        osentences_len = features["sentence_len"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (logits, crf_params, pred_ids,
         sentence_len) = create_model(bert_config, is_training, input_ids,
                                      input_mask, segment_ids, num_tags,
                                      osentences_len)

        if mode == tf.estimator.ModeKeys.PREDICT:
            pred_tags = id_to_tag.lookup(tf.to_int64(pred_ids))
            predictions = {"pred_ids": pred_ids, "pred_string": pred_tags}
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
            )
            return output_spec

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        # Calculate the loss prediction
        log_likehood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tag_ids, osentences_len, crf_params)
        loss = tf.reduce_mean(-log_likehood)

        # metric
        weights = tf.sequence_mask(osentences_len, sentence_len - 1)
        metrics = {
            'acc': tf.metrics.accuracy(tag_ids, pred_ids, weights),
            'loss': loss,
        }

        # write summary
        for metric_name, op in metrics.items():
            if metric_name == 'loss':
                tf.summary.scalar(metric_name, op)
            else:
                tf.summary.scalar(metric_name, op[1])
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(loss,
                                                     learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps,
                                                     use_tpu=False)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     train_op=train_op,
                                                     loss=loss)
        elif mode == tf.estimator.ModeKeys.EVAL:

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=loss,
                                                     eval_metric_ops=metrics)
        return output_spec
def get_frozen_tftrt_model(bert_config, shape, num_labels,
                           use_one_hot_embeddings, init_checkpoint):
    tf_config = tf.compat.v1.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    output_node_names = [
        'loss/cls_loss', 'loss/cls_per_example_loss', 'loss/cls_logits',
        'loss/cls_probabilities'
    ]

    with tf.Session(config=tf_config) as tf_sess:
        input_ids = tf.placeholder(tf.int32, shape, 'input_ids')
        input_mask = tf.placeholder(tf.int32, shape, 'input_mask')
        segment_ids = tf.placeholder(tf.int32, shape, 'segment_ids')
        label_ids = tf.placeholder(tf.int32, (None), 'label_ids')

        create_model(bert_config, False, input_ids, input_mask, segment_ids,
                     label_ids, num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf_sess.run(tf.global_variables_initializer())
        print("LOADED!")
        tf.compat.v1.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            else:
                init_string = ", *NOTTTTTTTTTTTTTTTTTTTTT"
                tf.compat.v1.logging.info("  name = %s, shape = %s%s",
                                          var.name, var.shape, init_string)

        frozen_graph = tf.graph_util.convert_variables_to_constants(
            tf_sess, tf_sess.graph.as_graph_def(), output_node_names)

        num_nodes = len(frozen_graph.node)
        print('Converting graph using TensorFlow-TensorRT...')
        from tensorflow.python.compiler.tensorrt import trt_convert as trt
        converter = trt.TrtGraphConverter(
            input_graph_def=frozen_graph,
            nodes_blacklist=output_node_names,
            max_workspace_size_bytes=(4096 << 20) - 1000,
            precision_mode="FP16" if FLAGS.amp else "FP32",
            minimum_segment_size=4,
            is_dynamic_op=True,
            maximum_cached_engines=1000)
        frozen_graph = converter.convert()

        print('Total node count before and after TF-TRT conversion:',
              num_nodes, '->', len(frozen_graph.node))
        print(
            'TRT node count:',
            len([1 for n in frozen_graph.node if str(n.op) == 'TRTEngineOp']))

        with tf.io.gfile.GFile("frozen_modelTRT.pb", "wb") as f:
            f.write(frozen_graph.SerializeToString())

    return frozen_graph
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        ##******************************************************
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        one_hot_labels = tf.one_hot(label_ids,
                                    depth=num_labels,
                                    dtype=tf.float32)
        #output_weights = tf.get_variable("output_weights", [num_labels, hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
        '''
    tf.logging.info("**** label_ids **** is: %s", label_ids) #shape=(32,)
    tf.logging.info("**** logits **** is: %s", logits) #shape=(32, 2)
    tf.logging.info("**** probabilities **** is: %s", probabilities) #shape=(32, 2)
    tf.logging.info("**** predictions **** is: %s", predictions) #shape=(32,)
    tf.logging.info("**** one_hot_labels **** is: %s", one_hot_labels) #shape=(32, 2)
    
    ## add loss to tensorboard (ok)
    tf.summary.scalar('total_loss', total_loss)
    
    # add cross_entropy to tensorboard (ok)
    with tf.variable_scope('cross_entropy'):
      diff = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits)
      cross_entropy = tf.reduce_mean(diff)
    tf.summary.scalar('cross_entropy', cross_entropy)
    
    # add learning_rate to tensorboard (ok)
    #with tf.name_scope('learning_rate'):
    #  train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy)
    #tf.summary.scalar('learning_rate', learning_rate)
    '''
        # add accuracy to tensorboard (ok)
        with tf.name_scope('accuracy'):
            #accuracy = tf.metrics.accuracy(label_ids, predictions)
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, axis=1),
                                          tf.argmax(logits, axis=1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar('accuracy', accuracy)

        # add weigth to tensorboard
        #with tf.name_scope('weight'):
        #  tf.summary.histogram('weight', output_weights)

        #merged = tf.summary.merge_all()
        #train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)

        ##******************************************************

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        # *** 寻找模型最后全连接层的最优参数(调用AdamWeightDecayOptimizer())
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # *** 预测结果评价指标
            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(
                    logits, axis=-1,
                    output_type=tf.int32)  # *** 得到一个向量中最大值所处的位置
                #predictions = tf.cast(predictions,tf.float32) # new add
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                precision = tf.metrics.precision(label_ids, predictions)
                recall = tf.metrics.recall(label_ids, predictions)
                fn = tf.metrics.false_negatives(label_ids, predictions)
                fp = tf.metrics.false_positives(label_ids, predictions)
                tn = tf.metrics.true_negatives(label_ids, predictions)
                tp = tf.metrics.true_positives(label_ids, predictions)
                f1 = tf.contrib.metrics.f1_score(label_ids, predictions)

                return collections.OrderedDict({
                    #'eval_accuracy': accuracy,
                    'accuracy': accuracy,
                    'eval_precision': precision,
                    'eval_recall': recall,
                    'eval_tp': tp,
                    'eval_tn': tn,
                    'eval_fp': fp,
                    'eval_fn': fn,
                    'eval_f1': f1,
                    'eval_loss': loss,
                })
                #return {
                #    "eval_accuracy": accuracy,
                #    "eval_loss": loss,
                #}

                #tf.summary.scalar('accuracy', accuracy)
                #tf.summary.scalar('precision', precision)
                #tf.summary.scalar('recall', recall)
                #tf.summary.scalar('loss', loss)

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            # *** 结果预测
            '''
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn)
      '''
            # used for savedmodel
            # Generate Predictions
            # v15 -- predict (this is work!)
            predictions = tf.argmax(
                probabilities, axis=-1,
                output_type=tf.int32)  #logits-->probabilities
            export_outputs = {
                'classes':
                tf.estimator.export.PredictOutput({
                    "probabilities": probabilities,
                    "classid": predictions
                })
            }
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=probabilities,
                scaffold_fn=scaffold_fn,
                export_outputs=export_outputs)
            '''
      # v13 -- classify
      predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) 
      export_outputs = {
              'classes': tf.estimator.export.ClassificationOutput(
                      scores=probabilities, 
                      classes=tf.as_string(predictions))
              }
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn, export_outputs=export_outputs)
      '''

        return output_spec
def main():
    print("print start load the params...")
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.gfile.MakeDirs(config["out"])
    train_examples_len = config["train_examples_len"]
    dev_examples_len = config["dev_examples_len"]
    learning_rate = config["learning_rate"]
    eval_per_step = config["eval_per_step"]
    num_labels = config["num_labels"]
    print(num_labels)
    num_train_steps = int(train_examples_len / config["train_batch_size"] *
                          config["num_train_epochs"])
    print("num_train_steps:", num_train_steps)
    num_dev_steps = int(dev_examples_len / config["dev_batch_size"])
    num_warmup_steps = int(num_train_steps * config["warmup_proportion"])
    use_one_hot_embeddings = False
    is_training = True
    use_tpu = False
    seq_len = config["max_seq_len"]
    init_checkpoint = config["init_checkpoint"]
    print("print start compile the bert model...")
    # 定义输入输出
    input_ids = tf.placeholder(tf.int64,
                               shape=[None, seq_len],
                               name='input_ids')
    input_mask = tf.placeholder(tf.int64,
                                shape=[None, seq_len],
                                name='input_mask')
    segment_ids = tf.placeholder(tf.int64,
                                 shape=[None, seq_len],
                                 name='segment_ids')
    labels = tf.placeholder(tf.int64, shape=[
        None,
    ], name='labels')
    keep_prob = tf.placeholder(tf.float32,
                               name='keep_prob')  # , name='is_training'

    bert_config_ = load_bert_config(config["bert_config"])
    (total_loss, per_example_loss, logits,
     probabilities) = create_model(bert_config_, is_training, input_ids,
                                   input_mask, segment_ids, labels, keep_prob,
                                   num_labels, use_one_hot_embeddings)
    train_op = optimization.create_optimizer(total_loss, learning_rate,
                                             num_train_steps, num_warmup_steps,
                                             False)
    print("print start train the bert model(multi class)...")

    batch_size = config["train_batch_size"]
    input_ids2, input_mask2, segment_ids2, labels2 = get_input_data(
        config["in_1"], seq_len, batch_size)

    dev_batch_size = config["dev_batch_size"]

    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)  # 保存最后top3模型

    with tf.Session() as sess:
        sess.run(init_global)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        print("start load the pretrain model")
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                # var.trainable = False
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        # if init_checkpoint:
        #     saver.restore(sess, init_checkpoint)
        #     print("checkpoint restored from %s" % init_checkpoint)
        print("********* bert_multi_class_train start *********")

        # tf.summary.FileWriter("output/",sess.graph)
        def train_step(ids, mask, segment, y, step):
            feed = {
                input_ids: ids,
                input_mask: mask,
                segment_ids: segment,
                labels: y,
                keep_prob: 0.9
            }
            _, out_loss, out_logits, p_ = sess.run(
                [train_op, total_loss, logits, probabilities], feed_dict=feed)
            pre = np.argmax(p_, axis=-1)
            acc = np.sum(np.equal(pre, y)) / len(pre)
            print("step :{},loss :{}, acc :{}".format(step, out_loss, acc))
            return out_loss, pre, y

        def dev_step(ids, mask, segment, y):
            feed = {
                input_ids: ids,
                input_mask: mask,
                segment_ids: segment,
                labels: y,
                keep_prob: 1.0
            }
            out_loss, out_logits, p_ = sess.run(
                [total_loss, logits, probabilities], feed_dict=feed)
            pre = np.argmax(p_, axis=-1)
            acc = np.sum(np.equal(pre, y)) / len(pre)
            print("loss :{}, acc :{}".format(out_loss, acc))
            return out_loss, pre, y

        min_total_loss_dev = 999999
        for i in range(num_train_steps):
            # batch 数据
            i += 1
            ids_train, mask_train, segment_train, y_train = sess.run(
                [input_ids2, input_mask2, segment_ids2, labels2])
            train_step(ids_train, mask_train, segment_train, y_train, i)

            if i % eval_per_step == 0:
                total_loss_dev = 0
                dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 = get_input_data(
                    config["in_2"], seq_len, dev_batch_size)
                total_pre_dev = []
                total_true_dev = []
                for j in range(num_dev_steps):  # 一个 epoch 的 轮数
                    ids_dev, mask_dev, segment_dev, y_dev = sess.run([
                        dev_input_ids2, dev_input_mask2, dev_segment_ids2,
                        dev_labels2
                    ])
                    out_loss, pre, y = dev_step(ids_dev, mask_dev, segment_dev,
                                                y_dev)
                    total_loss_dev += out_loss
                    total_pre_dev.extend(pre)
                    total_true_dev.extend(y_dev)
                #
                print("dev result report:")
                print(classification_report(total_true_dev, total_pre_dev))

                if total_loss_dev < min_total_loss_dev:
                    print("save model:\t%f\t>%f" %
                          (min_total_loss_dev, total_loss_dev))
                    min_total_loss_dev = total_loss_dev
                    saver.save(sess,
                               config["out"] + 'bert.ckpt',
                               global_step=i)
    sess.close()

    # remove dropout

    print("remove dropout in predict")
    tf.reset_default_graph()
    is_training = False
    input_ids = tf.placeholder(tf.int64,
                               shape=[None, seq_len],
                               name='input_ids')
    input_mask = tf.placeholder(tf.int64,
                                shape=[None, seq_len],
                                name='input_mask')
    segment_ids = tf.placeholder(tf.int64,
                                 shape=[None, seq_len],
                                 name='segment_ids')
    labels = tf.placeholder(tf.int64, shape=[
        None,
    ], name='labels')
    keep_prob = tf.placeholder(tf.float32,
                               name='keep_prob')  # , name='is_training'

    bert_config_ = load_bert_config(config["bert_config"])
    (total_loss, per_example_loss, logits,
     probabilities) = create_model(bert_config_, is_training, input_ids,
                                   input_mask, segment_ids, labels, keep_prob,
                                   num_labels, use_one_hot_embeddings)

    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)  # 保存最后top3模型

    try:
        checkpoint = tf.train.get_checkpoint_state(config["out"])
        input_checkpoint = checkpoint.model_checkpoint_path
        print("[INFO] input_checkpoint:", input_checkpoint)
    except Exception as e:
        input_checkpoint = config["out"]
        print("[INFO] Model folder", config["out"], repr(e))

    with tf.Session() as sess:
        sess.run(init_global)
        saver.restore(sess, input_checkpoint)
        saver.save(sess, config["out_1"] + 'bert.ckpt')
    sess.close()
Пример #23
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, per_example_loss, logits,
         predicts) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # eval 的 计算方式metric需要自己定义修改
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E,
                # 具体见 tf.metrics里的函数
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 10, [1, 2, 3, 4, 5, 6],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           10, [1, 2, 3, 4, 5, 6],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  10, [1, 2, 3, 4, 5, 6],
                                  average="macro")

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            # def metric_fn(per_example_loss, label_ids, logits, is_real_example):
            #     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
            #     accuracy = tf.metrics.accuracy(
            #         labels=label_ids, predictions=predictions, weights=is_real_example)
            #     loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
            #     return {
            #         "eval_accuracy": accuracy,
            #         "eval_loss": loss,
            #     }

            # eval_metrics = (metric_fn,
            #                 [per_example_loss, label_ids, logits, is_real_example])
            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
Пример #24
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, per_example_loss,
         log_probs) = create_model(bert_config, is_training, input_ids,
                                   input_mask, segment_ids, label_ids,
                                   num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        initialized_variable_names = []
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "log_probs": log_probs,
                    "label_ids": label_ids,
                },
                scaffold_fn=scaffold_fn)

        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                             (mode))

        return output_spec
Пример #25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None  #用于参与tf.metric函数的weight参数
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #创建模型(计算图),包括下游任务。注意tf api的结构,此刻并没有进行计算。而是构建了计算图。
        #这里看着create_model像是一个输出loss的函数,但在代码层面,它只构建了计算图,以此命名create而非calculate。
        # 只有estimator通过session(会话)访问时,计算才开始。
        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()  #创建了计算图后,可训练的变量随之被创建。
        initialized_variable_names = {}
        scaffold_fn = None  #??这个只是tpu需要。
        if init_checkpoint:  #init_checkpoint是命令行中传入的预训练BERT或先前训练过的,ckpt文件
            (
                assignment_map,
                initialized_variable_names  #从init_checkpoints中获取与可用的变量的值(预训练模型与实际任务计算图的变量的交集)
            ) = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            #因为 TPUEstimator 有bug,在非tpu上训练时无法显示loss。所以加入一个hook
            logging_hook = tf.train.LoggingTensorHook({"loss": total_loss},
                                                      every_n_iter=10)
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                training_hooks=[logging_hook],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits, probabilities,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                #这里这个metrics没有命名,而tf会记录参数,我认为是以参数名与参数域来标识的,如此再次调用时如何识别?
                #应该还是在计算图的某个步骤中定义了这个更新指标的操作,还是需要建立通过会话访问计算图的概念,
                #执行tensorflow是执行这个计算图,而访问时每到这个metric节点都计算更新一次,而不是在代码层面一次次调用
                metrics = {}
                metrics["eval_accuracy"] = tf.metrics.accuracy(
                    labels=label_ids,
                    predictions=predictions,
                    weights=is_real_example)

                num_options = 10  #10个选项
                shaped_probabilities = tf.reshape(
                    tf.slice(probabilities, [0, 1], [-1, 1]),
                    [-1, num_options])
                shaped_label = tf.zeros(
                    [tf.shape(shaped_probabilities)[0], 1],
                    dtype=tf.int64)  #标注shaped_probabilities的正确序号全为0,由数据决定
                # shaped_label = tf.argmax(tf.reshape(label_ids,[-1,num_options]), 1)#维度1上的最大值(取值只有[0,1]),这样保证即使正确答案的数目不唯一,也会返回一个(第一个),但是不能返回多个
                # shaped_label = tf.reshape(tf.slice(tf.where(tf.equal(tf.reshape(label_ids,[-1,num_options]), 1)), [0, 1], [-1, 1]), [-1, num_true] #equal中的1通过广播适配形状;如果确定正确选项个数,这行可以返回多个宽度唯一的索引数组
                shaped_is_real_example = tf.slice(
                    tf.reshape(is_real_example, [-1, num_options]), [0, 0],
                    [-1, 1])
                shaped_is_real_example = None
                for k in [1, 2, 5, 9, 10]:
                    metrics["recall@{}".format(k)] = tf.metrics.recall_at_k(
                        labels=shaped_label,
                        predictions=shaped_probabilities,
                        weights=shaped_is_real_example,
                        k=k)

                #metrics["loss"] = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
                return metrics

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, probabilities,
                is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Пример #26
0
        word_id_list.append(word_ids)
    return word_id_list


# 初始化BERT
model = modeling.BertModel(config=bert_config,
                           is_training=False,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=segment_ids,
                           use_one_hot_embeddings=False)

# 加载BERT模型
tvars = tf.trainable_variables()
(assignment,
 initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
     tvars, init_checkpoint)
tf.train.init_from_checkpoint(init_checkpoint, assignment)
# 获取最后一层和倒数第二层
encoder_last_layer = model.get_sequence_output()
encoder_last2_layer = model.all_encoder_layers[-2]

# 读取数据
token = tokenization.FullTokenizer(vocab_file=bert_vocab_file)

# input_train_data = read_input(file_dir='../data/legal_domain/train_x_c.txt')
input_train_data = read_input(file_dir='../data/cnews/train_x.txt')
# input_val_data = read_input(file_dir='../data/legal_domain/val_x_c.txt')
input_val_data = read_input(file_dir='../data/cnews/val_x.txt')
# input_test_data = read_input(file_dir='../data/legal_domain/test_x_c.txt')
input_test_data = read_input(file_dir='../data/cnews/test_x.txt')
Пример #27
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    print_configuration_op(FLAGS)
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
    root_path = FLAGS.output_dir
    if not os.path.exists(root_path):
        os.makedirs(root_path)

    timestamp = str(int(time()))
    root_path = os.path.join(root_path, timestamp)
    tf.logging.info('root_path: {}'.format(root_path))
    if not os.path.exists(root_path):
        os.makedirs(root_path)

    train_data_size = total_sample(FLAGS.train_dir)
    tf.logging.info('train data size: {}'.format(train_data_size))
    valid_data_size = total_sample(FLAGS.valid_dir)
    tf.logging.info('valid data size: {}'.format(valid_data_size))

    num_train_steps = train_data_size // FLAGS.train_batch_size * FLAGS.num_train_epochs
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    filenames = tf.placeholder(tf.string, shape=[None])
    shuffle_size = tf.placeholder(tf.int64)
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(parse_exmp)  # Parse the record into tensors.
    dataset = dataset.repeat(1)
    # buffer_size 100
    dataset = dataset.shuffle(shuffle_size)
    dataset = dataset.batch(FLAGS.train_batch_size)
    iterator = dataset.make_initializable_iterator()
    ques_ids, ans_ids, sents, mask, segmentids, labels = iterator.get_next()  # output dir
    pair_ids = [ques_ids, ans_ids, labels]


    training = tf.placeholder(tf.bool)
    mean_loss, logits, probabilities, accuracy, model = create_model(bert_config,
                                                                     is_training = training,
                                                                     input_ids = sents,
                                                                     input_mask = mask,
                                                                     segment_ids = segmentids,
                                                                     labels = labels,
                                                                     ques_ids = ques_ids,
                                                                     ans_ids = ans_ids,
                                                                     num_labels = 1,
                                                                     use_one_hot_embeddings = False)


    # init model with pre-training
    tvars = tf.trainable_variables()
    if FLAGS.init_checkpoint:
        (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,FLAGS.init_checkpoint)
        tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)


    train_opt = optimization.create_optimizer(mean_loss, FLAGS.learning_rate, num_train_steps, num_warmup_steps, False)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True


    if FLAGS.do_train:
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())

            for epoch in range(FLAGS.num_train_epochs):
                tf.logging.info('Epoch {} training begin'.format(epoch))
                sess.run(iterator.initializer,
                         feed_dict={filenames: [FLAGS.train_dir], shuffle_size: 1024})
                run_epoch(epoch, "train", sess, training, logits, accuracy, mean_loss, train_opt)

                tf.logging.info('Valid begin')
                sess.run(iterator.initializer,
                         feed_dict={filenames: [FLAGS.valid_dir], shuffle_size: 1})
                run_test(epoch, root_path, "valid", sess, training, accuracy, probabilities, pair_ids)
Пример #28
0
    def model_fn(features, labels, mode, params):
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s" %
                         (name, features[name].shape))
        input_ids = features["input_ids"]
        mask = features["mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if FLAGS.crf:
            (total_loss, logits,
             predicts) = create_model(bert_config, is_training, input_ids,
                                      mask, segment_ids, label_ids, num_labels,
                                      use_one_hot_embeddings)

        else:
            (total_loss, logits,
             predicts) = create_model(bert_config, is_training, input_ids,
                                      mask, segment_ids, label_ids, num_labels,
                                      use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        initialized_variable_names = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:

                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        # logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            # logging.info("  name = %s, shape = %s%s", var.name, var.shape,init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(label_ids, logits, num_labels, mask):
                predictions = tf.math.argmax(logits,
                                             axis=-1,
                                             output_type=tf.int32)
                cm = metrics.streaming_confusion_matrix(label_ids,
                                                        predictions,
                                                        num_labels - 1,
                                                        weights=mask)
                return {"confusion_matrix": cm}
                #

            eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
Пример #29
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        #label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         predictsDict) = create_model(bert_config, is_training, input_ids,
                                      input_mask, segment_ids, label_ids,
                                      num_labels, use_one_hot_embeddings)

        if not FLAGS.create_SavedModel:
            predictsDict[
                "input_mask"] = input_mask  # Gives problems with savedmodel!!

        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits, num_labels):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 num_labels, [1, 2],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           num_labels, [1, 2],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  num_labels, [1, 2],
                                  average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    #"eval_loss": loss,
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, label_ids, logits, num_labels])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictsDict, scaffold_fn=scaffold_fn)
        return output_spec
Пример #30
0
def main(opts):
    tf.logging.set_verbosity(tf.logging.INFO)
    """
    Set up for synthetic data.
    """
    if opts["synthetic_data"] or opts["generated_data"]:
        opts['task_name'] = 'synthetic'
        if opts['task_type'] == 'regression':
            opts['task_name'] = 'synthetic_regression'
    print(opts['task_name'])
    print(opts['task_type'])
    processors = {
        "cola": glue_data.ColaProcessor,
        "mnli": glue_data.MnliProcessor,
        "mrpc": glue_data.MrpcProcessor,
        "sst2": glue_data.Sst2Processor,
        "stsb": glue_data.StsbProcessor,
        "qqp": glue_data.QqpProcessor,
        "qnli": glue_data.QnliProcessor,
        "rte": glue_data.RteProcessor,
        "wnli": glue_data.WnliProcessor,
        "mnli-mm": glue_data.MnliMismatchProcessor,
        "ax": glue_data.AxProcessor,
        "synthetic": glue_data.SyntheticProcessor,
        "synthetic_regression": glue_data.SyntheticProcessorRegression
    }

    tokenization.validate_case_matches_checkpoint(
        do_lower_case=opts["do_lower_case"],
        init_checkpoint=opts["init_checkpoint"])

    tf.gfile.MakeDirs(opts["output_dir"])

    task_name = opts["task_name"].lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=opts["vocab_file"],
                                           do_lower_case=opts["do_lower_case"])
    opts["pass_in"] = (processor, label_list, tokenizer)

    train_examples = None
    # num_train_steps = None
    num_warmup_steps = None
    # So many iterations will be run for one step.
    iterations_per_step = opts['batches_per_step']
    # Avoid nan issue caused by queue length is zero.
    if opts["do_training"]:
        train_examples = processor.get_train_examples(opts["data_dir"])
        num_train_steps = int(
            len(train_examples) / opts["total_batch_size"] * opts['epochs'])
        iterations_per_epoch = len(train_examples) // opts["total_batch_size"]
        if opts.get('num_train_steps'):
            # total iterations
            iterations = opts['num_train_steps'] * opts['batches_per_step']
        else:
            iterations = iterations_per_epoch * opts['epochs']
        num_warmup_steps = int(iterations * opts["warmup"])

        tf.logging.info("***** Running training *****")
        tf.logging.info(f"  Num examples = {len(train_examples)}")
        tf.logging.info(f"  Micro batch size = {opts['micro_batch_size']}")
        tf.logging.info(f"  Num steps / epoch = {iterations_per_epoch}")
        tf.logging.info(f"  Num iterations = {iterations}")
        tf.logging.info(f"  Num steps = {num_train_steps}")
        tf.logging.info(f"  Warm steps = {num_warmup_steps}")
        tf.logging.info(f"  Warm frac = {opts['warmup']}")
        # Learning rate schedule
        lr_schedule_name = opts['lr_schedule']
        logger.info(f"Using learning rate schedule {lr_schedule_name}")
        learning_rate_schedule = make_lr_schedule(lr_schedule_name, opts,
                                                  iterations)

    if opts["do_training"]:
        log_iterations = opts['batches_per_step'] * opts["steps_per_logs"]

        # -------------- BUILD TRAINING GRAPH ----------------
        opts['current_mode'] = 'train'
        train = build_graph(opts, iterations_per_step, is_training=True)
        train.session.run(train.init)
        train.session.run(train.iterator.initializer)

        # Checkpoints load and save
        init_checkpoint_path = opts['init_checkpoint']
        if init_checkpoint_path:
            if os.path.isfile(init_checkpoint_path):
                init_checkpoint_path = os.path.splitext(
                    init_checkpoint_path)[0]

            (assignment_map, initialized_variable_names
             ) = bert_ipu.get_assignment_map_from_checkpoint(
                 train.tvars, init_checkpoint_path)

            for var in train.tvars:
                if var.name in initialized_variable_names:
                    mark = "*"
                else:
                    mark = " "
                logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape,
                            var.dtype.name)

            reader = tf.train.NewCheckpointReader(init_checkpoint_path)
            load_vars = reader.get_variable_to_shape_map()

            saver_restore = tf.train.Saver(assignment_map)
            saver_restore.restore(train.session, init_checkpoint_path)

        if opts['steps_per_ckpts']:
            filepath = train.saver.save(train.session,
                                        opts["checkpoint_path"],
                                        global_step=0)
            logger.info(f"Saved checkpoint to {filepath}")
            ckpt_iterations = opts['batches_per_step'] * \
                opts["steps_per_ckpts"]

        else:
            i = 0

        # Tensorboard logs path
        log_path = os.path.join(opts["logs_path"], 'event')
        logger.info("Tensorboard event file path {}".format(log_path))
        summary_writer = tf.summary.FileWriter(log_path,
                                               train.graph,
                                               session=train.session)
        start_time = datetime.datetime.now()
        # Training loop
        if opts['task_type'] == 'regression':
            print_format = (
                "step: {step:6d}, iteration: {iteration:6d} ({percent_done:.3f}%),  epoch: {epoch:6.2f}, lr: {lr:6.4g}, loss: {loss:6.3f}, pearson: {pearson:6.3f}, spearman: {spearman:6.3f}, "
                "throughput {throughput_samples_per_sec:6.2f} samples/sec, batch time: {avg_batch_time:8.6f} s, total_time: {total_time:8.1f} s"
            )
        else:
            print_format = (
                "step: {step:6d}, iteration: {iteration:6d} ({percent_done:.3f}%),  epoch: {epoch:6.2f}, lr: {lr:6.4g}, loss: {loss:6.3f}, acc: {acc:6.3f}, "
                "throughput {throughput_samples_per_sec:6.2f} samples/sec, batch time: {avg_batch_time:8.6f} s, total_time: {total_time:8.1f} s"
            )
        step = 0
        start_all = time.time()
        i = 0
        total_samples = len(train_examples)

        while i < iterations:
            step += 1
            epoch = float(opts["total_batch_size"] * i) / total_samples

            learning_rate = learning_rate_schedule.get_at_step(step)

            try:
                if opts['task_type'] == 'regression':
                    loss, pred, batch_time, pearson, spearman = training_step(
                        train, learning_rate, i, opts)
                else:
                    loss, batch_time, acc, mean_preds = training_step(
                        train, learning_rate, i, opts)
            except tf.errors.OpError as e:
                raise tf.errors.ResourceExhaustedError(e.node_def, e.op,
                                                       e.message)

            batch_time /= iterations_per_step

            avg_batch_time = batch_time

            if i % log_iterations == 0:
                throughput = opts['total_batch_size'] / avg_batch_time

                # flush times every time it is reported
                # batch_times.clear()

                total_time = time.time() - start_all
                if opts['task_type'] == 'regression':
                    stats = OrderedDict([
                        ('step', step), ('iteration', i + iterations_per_step),
                        ('percent_done', i / iterations * 100), ('epoch',
                                                                 epoch),
                        ('lr', learning_rate), ('loss', loss),
                        ('pearson', pearson), ('spearman', spearman),
                        ('avg_batch_time', avg_batch_time),
                        ('throughput_samples_per_sec', throughput),
                        ('total_time', total_time),
                        ('learning_rate', learning_rate)
                    ])
                else:
                    stats = OrderedDict([
                        ('step', step), ('iteration', i + iterations_per_step),
                        ('percent_done', i / iterations * 100), ('epoch',
                                                                 epoch),
                        ('lr', learning_rate), ('loss', loss), ('acc', acc),
                        ('avg_batch_time', avg_batch_time),
                        ('throughput_samples_per_sec', throughput),
                        ('total_time', total_time),
                        ('learning_rate', learning_rate)
                    ])
                logger.info(print_format.format(**stats))

                train_summary = tf.Summary()
                train_summary.value.add(tag='epoch', simple_value=epoch)
                train_summary.value.add(tag='loss', simple_value=loss)
                if opts['task_type'] == 'regression':
                    train_summary.value.add(tag='pearson',
                                            simple_value=pearson)
                    train_summary.value.add(tag='spearman',
                                            simple_value=spearman)
                else:
                    train_summary.value.add(tag='acc', simple_value=acc)
                train_summary.value.add(tag='learning_rate',
                                        simple_value=learning_rate)
                train_summary.value.add(tag='througput',
                                        simple_value=throughput)

                if opts['wandb']:
                    wandb.log(dict(stats))

                summary_writer.add_summary(train_summary, step)
                summary_writer.flush()

            if i % ckpt_iterations == 0 and i > 1:
                filepath = train.saver.save(train.session,
                                            opts["checkpoint_path"],
                                            global_step=i +
                                            iterations_per_step)
                logger.info(f"Saved checkpoint to {filepath}")

            i += iterations_per_step

        # We save the final checkpoint
        finetuned_checkpoint_path = train.saver.save(train.session,
                                                     opts["checkpoint_path"],
                                                     global_step=i +
                                                     iterations_per_step)
        logger.info(f"Saved checkpoint to {finetuned_checkpoint_path}")
        train.session.close()
        end_time = datetime.datetime.now()
        consume_time = (end_time - start_time).seconds
        logger.info(f"training times: {consume_time} s")

    if opts["do_eval"]:
        eval_examples = processor.get_dev_examples(opts["data_dir"])
        num_actual_eval_examples = len(eval_examples)
        opts["eval_batch_size"] = opts['micro_batch_size'] * \
            opts['gradient_accumulation_count']

        eval_file = os.path.join(opts["output_dir"], "eval.tf_record")

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Evaluate batch size = %d", opts["eval_batch_size"])

        iterations_per_step = 1
        opts['current_mode'] = 'eval'
        predict = build_graph(opts, iterations_per_step, is_training=False)
        predict.session.run(predict.init)
        predict.session.run(predict.iterator.initializer)

        if opts["init_checkpoint"] and not opts['do_training'] and opts[
                'do_eval']:
            finetuned_checkpoint_path = opts['init_checkpoint']

        if finetuned_checkpoint_path:
            print("********** RESTORING FROM CHECKPOINT *************")
            (assignment_map, _initialized_variable_names
             ) = bert_ipu.get_assignment_map_from_checkpoint(
                 predict.tvars, finetuned_checkpoint_path)
            saver_restore = tf.train.Saver(assignment_map)
            saver_restore.restore(predict.session, finetuned_checkpoint_path)
            print("Done.")

        i = 0
        all_time_consumption = []

        iterations = int(
            len(eval_examples) //
            (opts['micro_batch_size'] * opts['gradient_accumulation_count']) +
            1)

        all_accs = []
        all_pearson = []
        all_spearman = []
        all_loss = []
        while i < iterations:
            try:
                start = time.time()
                tmp_output = predict_step(predict)
                if opts['task_type'] == 'regression':
                    all_pearson.append(tmp_output['pearson'])
                    all_spearman.append(tmp_output['spearman'])
                else:
                    all_accs.append(tmp_output['acc'])
                all_loss.append(tmp_output['loss'])
                output_eval_file = os.path.join(opts['output_dir'],
                                                "eval_results.txt")
                duration = time.time() - start
                all_time_consumption.append(duration /
                                            opts["batches_per_step"])
            except tf.errors.OpError as e:
                raise tf.errors.ResourceExhaustedError(e.node_def, e.op,
                                                       e.message)

            i += iterations_per_step

            if len(all_loss) % 1000 == 0:
                logger.info(f"Procesing example: {len(all_loss)}")
        if opts['task_type'] == 'regression':
            tmp_output['average_pearson'] = np.mean(all_pearson)
            tmp_output['average_spearman'] = np.mean(all_spearman)
        else:
            tmp_output['average_acc'] = np.mean(all_accs)
        tmp_output['average_loss'] = np.mean(all_loss)

        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(tmp_output.keys()):
                tf.logging.info("  %s = %s", key, str(tmp_output[key]))
                writer.write("%s = %s\n" % (key, str(tmp_output[key])))
        # The time consumption of First 10 steps is not stable for time measurement.
        if len(all_time_consumption) >= 10 * 2:
            all_time_consumption = np.array(all_time_consumption[10:])
        else:
            logger.warning(
                f"if the first 10 steps is counted, the measurement of throughtput and latency is not accurate."
            )
            all_time_consumption = np.array(all_time_consumption)

        logger.info((
            f"inference throughput: { (opts['micro_batch_size'] * opts['gradient_accumulation_count'] ) / all_time_consumption.mean() } "
            f"exmples/sec - Latency: {all_time_consumption.mean()} {all_time_consumption.min()} "
            f"{all_time_consumption.max()} (mean min max) sec "))
        # Done evaluations

    if opts["do_predict"]:
        predict_examples = processor.get_test_examples(opts["data_dir"])
        num_actual_predict_examples = len(predict_examples)
        opts["predict_batch_size"] = opts['micro_batch_size'] * \
            opts['gradient_accumulation_count']
        tf.logging.info("***** Running prediction *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Predict batch size = %d",
                        opts["predict_batch_size"])

        iterations_per_step = 1
        opts['current_mode'] = 'predict'
        prediction = build_graph(opts, iterations_per_step, is_training=False)
        prediction.session.run(prediction.init)
        prediction.session.run(prediction.iterator.initializer)

        if opts["init_checkpoint"] and not opts['do_training'] and opts[
                'do_predict']:
            finetuned_checkpoint_path = opts['init_checkpoint']
        else:
            finetuned_checkpoint_path = False

        if finetuned_checkpoint_path:
            print("********** RESTORING FROM CHECKPOINT *************")
            (assignment_map, _initialized_variable_names
             ) = bert_ipu.get_assignment_map_from_checkpoint(
                 prediction.tvars, finetuned_checkpoint_path)
            saver_restore = tf.train.Saver(assignment_map)
            saver_restore.restore(prediction.session,
                                  finetuned_checkpoint_path)
            print("Done.")

        all_results = []
        i = 0
        all_time_consumption = []

        iterations = int(
            len(predict_examples) //
            (opts['micro_batch_size'] * opts['gradient_accumulation_count']) +
            1)

        all_preds = []
        while i < iterations:
            try:
                start = time.time()
                tmp_output = predict_step(prediction)
                all_preds.append(tmp_output['preds'])

                output_predict_file = os.path.join(opts['output_dir'],
                                                   "predict_results.txt")
                duration = time.time() - start
                all_time_consumption.append(duration /
                                            opts["batches_per_step"])
            except tf.errors.OpError as e:
                raise tf.errors.ResourceExhaustedError(e.node_def, e.op,
                                                       e.message)

            i += iterations_per_step

        all_preds = np.array(all_preds)
        all_preds = all_preds.flatten()
        headers = ["index", "prediction"]
        name_list = ["mnli", "mnli-mm", "ax", "qnli", "rte"]
        if task_name in name_list:
            all_preds = glue_data.get_output_labels(opts, all_preds)

        with tf.gfile.GFile(output_predict_file, "w") as writer:
            tf.logging.info("***** Predict results writing*****")
            for i in range(len(predict_examples)):
                if i == 0:
                    writer.write("%s\t%s\n" %
                                 (str(headers[0]), str(headers[1])))
                output_line = "%s\t%s\n" % (i, all_preds[i])
                writer.write(output_line)
Пример #31
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (start_logits, end_logits) = create_model(
        bert_config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      seq_length = modeling.get_shape_list(input_ids)[1]

      def compute_loss(logits, positions):
        one_hot_positions = tf.one_hot(
            positions, depth=seq_length, dtype=tf.float32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        loss = -tf.reduce_mean(
            tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
        return loss

      start_positions = features["start_positions"]
      end_positions = features["end_positions"]

      start_loss = compute_loss(start_logits, start_positions)
      end_loss = compute_loss(end_logits, end_positions)

      total_loss = (start_loss + end_loss) / 2.0

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.PREDICT:
      predictions = {
          "unique_ids": unique_ids,
          "start_logits": start_logits,
          "end_logits": end_logits,
      }
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    else:
      raise ValueError(
          "Only TRAIN and PREDICT modes are supported: %s" % (mode))

    return output_spec
Пример #32
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (start_logits, end_logits) = create_model(
        bert_config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      seq_length = modeling.get_shape_list(input_ids)[1]

      def compute_loss(logits, positions):
        one_hot_positions = tf.one_hot(
            positions, depth=seq_length, dtype=tf.float32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        loss = -tf.reduce_mean(
            tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
        return loss

      start_positions = features["start_positions"]
      end_positions = features["end_positions"]

      start_loss = compute_loss(start_logits, start_positions)
      end_loss = compute_loss(end_logits, end_positions)

      total_loss = (start_loss + end_loss) / 2.0

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.PREDICT:
      predictions = {
          "unique_ids": unique_ids,
          "start_logits": start_logits,
          "end_logits": end_logits,
      }
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    else:
      raise ValueError(
          "Only TRAIN and PREDICT modes are supported: %s" % (mode))

    return output_spec
Пример #33
0
    def model_fn(features, labels, mode, params):
        tf.compat.v1.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.compat.v1.logging.info("  name = %s, shape = %s" %
                                      (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint and (hvd is None or hvd.rank() == 0):
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.compat.v1.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.compat.v1.logging.info("  name = %s, shape = %s%s", var.name,
                                      var.shape, init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, hvd,
                                                     False, use_fp16)

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int64)
                accuracy = tf.compat.v1.metrics.accuracy(
                    labels=label_ids,
                    predictions=predictions,
                    weights=is_real_example)
                return {"eval_accuracy": accuracy}

            eval_metric_ops = metric_fn(per_example_loss, label_ids, logits,
                                        is_real_example)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                eval_metric_ops=eval_metric_ops)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int64)
                accuracy = tf.compat.v1.metrics.accuracy(
                    labels=label_ids,
                    predictions=predictions,
                    weights=is_real_example)
                loss = tf.compat.v1.metrics.mean(values=per_example_loss,
                                                 weights=is_real_example)
                #recall = tf.compat.v1.metrics.recall(label_ids,predictions,num_labels)
                recall, op_rec = tf.compat.v1.metrics.recall(
                    labels=label_ids,
                    predictions=predictions,
                    weights=is_real_example)
                #precision = tf.compat.v1.metrics.precision(label_ids,predictions,num_labels)
                precision, op_prec = tf.compat.v1.metrics.precision(
                    labels=label_ids,
                    predictions=predictions,
                    weights=is_real_example)
                #f = tf_metrics.f1(label_ids,predictions,num_labels)
                FN = tf.metrics.false_negatives(labels=label_ids,
                                                predictions=predictions)
                FP = tf.metrics.false_positives(labels=label_ids,
                                                predictions=predictions)
                TP = tf.metrics.true_positives(labels=label_ids,
                                               predictions=predictions)
                TN = tf.metrics.true_negatives(labels=label_ids,
                                               predictions=predictions)

                #MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) ** 0.5
                #MCC_op = tf.group(FN_op, TN_op, TP_op, FP_op, tf.identity(MCC, name="MCC"))
                f1 = 2 * (precision * recall) / (precision + recall)
                f1_op = tf.group(op_rec, op_prec, tf.identity(f1, name="f1"))

                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                    "recall": (recall, op_rec),
                    "precision": (precision, op_prec),
                    "f-score": (f1, f1_op),
                    "tp": TP,
                    "tn": TN,
                    "fp": FP,
                    "fn": FN,
                    #"MCC": (MCC, MCC_op)
                }
                #return {
                #    "eval_accuracy": accuracy,
                #    "eval_loss": loss,
                #}

            eval_metric_ops = metric_fn(per_example_loss, label_ids, logits,
                                        is_real_example)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions={
                                                         "probabilities":
                                                         probabilities,
                                                         "logits": logits
                                                     })
        return output_spec
Пример #34
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        num_labels, use_one_hot_embeddings)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(per_example_loss, label_ids, logits, is_real_example):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions, weights=is_real_example)
        loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn,
                      [per_example_loss, label_ids, logits, is_real_example])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
Пример #35
0
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            from tensorflow.python.estimator.model_fn import EstimatorSpec

            tf.compat.v1.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.compat.v1.logging.info("  name = %s, shape = %s" %
                                          (name, features[name].shape))

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]
            label_ids = features["label_ids"]

            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            (total_loss, per_example_loss, logits,
             probabilities) = BertSim.create_model(bert_config, is_training,
                                                   input_ids, input_mask,
                                                   segment_ids, label_ids,
                                                   num_labels,
                                                   use_one_hot_embeddings)

            tvars = tf.compat.v1.trainable_variables()
            initialized_variable_names = {}

            if init_checkpoint:
                (assignment_map, initialized_variable_names) \
                    = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
                tf.compat.v1.train.init_from_checkpoint(
                    init_checkpoint, assignment_map)

            tf.compat.v1.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.compat.v1.logging.info("  name = %s, shape = %s%s",
                                          var.name, var.shape, init_string)

            if mode == tf.estimator.ModeKeys.TRAIN:

                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps, False)

                output_spec = EstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            train_op=train_op)
            elif mode == tf.estimator.ModeKeys.EVAL:

                def metric_fn(per_example_loss, label_ids, logits):
                    predictions = tf.argmax(input=logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    accuracy = tf.compat.v1.metrics.accuracy(
                        label_ids, predictions)
                    auc = tf.compat.v1.metrics.auc(label_ids, predictions)
                    loss = tf.compat.v1.metrics.mean(per_example_loss)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_auc": auc,
                        "eval_loss": loss,
                    }

                eval_metrics = metric_fn(per_example_loss, label_ids, logits)
                output_spec = EstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            eval_metric_ops=eval_metrics)
            else:
                output_spec = EstimatorSpec(mode=mode,
                                            predictions=probabilities)

            return output_spec