Пример #1
0
    is_training=training,
    input_ids=input_ids,
    input_mask=input_mask,
    segment_ids=segment_ids,
    history_answer_marker=history_answer_marker,
    use_one_hot_embeddings=False
    )
    
(start_logits, end_logits) = cqa_model(bert_representation)


tvars = tf.trainable_variables()

initialized_variable_names = {}
if FLAGS.init_checkpoint:
    (assignment_map, initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(tvars, 
                                                                                              FLAGS.init_checkpoint)
    tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map)

# compute loss
seq_length = modeling.get_shape_list(input_ids)[1]
def compute_loss(logits, positions):
    one_hot_positions = tf.one_hot(
        positions, depth=seq_length, dtype=tf.float32)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    loss = -tf.reduce_mean(tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
    return loss

# get the max prob for the predicted start/end position
start_probs = tf.nn.softmax(start_logits, axis=-1)
start_prob = tf.reduce_max(start_probs, axis=-1)
end_probs = tf.nn.softmax(end_logits, axis=-1)
Пример #2
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss,
         masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(), model.get_embedding_table(),
             masked_lm_positions, masked_lm_ids, masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map,
             initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(
                        init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                          masked_lm_weights, next_sentence_example_loss,
                          next_sentence_log_probs, next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                                 [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(
                    masked_lm_log_probs, axis=-1, output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(
                    masked_lm_example_loss, [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(
                    next_sentence_log_probs, axis=-1, output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels, predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError(
                "Only TRAIN and EVAL modes are supported: %s" % (mode))

        return output_spec
Пример #3
0
def _creat_bert(is_training, features, bert_config, use_one_hot_embeddings,
                init_checkpoint):
    global initialized_variable_names
    input_ids = features["input_ids"]
    if "input_extract" in features:
        input_extract = features["input_extract"]
        input1_extract = None
        input2_extract = None
    else:
        input_extract = None
        input1_extract = features["input1_extract"]
        input2_extract = features["input2_extract"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    q_type = features["q_type"]
    label_ids = features["label_ids"]

    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)

    tvars = tf.trainable_variables()

    scaffold_fn = None
    if init_checkpoint:
        (assignment_map, initialized_variable_names
         ) = modeling.get_assigment_map_from_checkpoint(
             tvars, init_checkpoint)

        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        print("initialing checkpoint finished")

    # tf.logging.info("**** Trainable Variables ****")
    # residue = []
    # for var in tvars:
    #     init_string = ""
    #     if var.name in initialized_variable_names:
    #         init_string = ", *INIT_FROM_CKPT*"
    #     else:
    #         residue.append(var)
    #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
    #                     init_string)

    all_layers = model.get_all_encoder_layers()
    layer_indexes = [-1, -2, -3, -4]

    predictions = {
        "input_extract": input_extract,
        "input1_extract": input1_extract,
        "input2_extract": input2_extract,
        "embedding": model.get_embedding_output(),
        "input_mask": input_mask,
        "q_type": q_type,
        "label_ids": label_ids
    }
    for (i, layer_index) in enumerate(layer_indexes):
        predictions["layer_output_%d" % i] = all_layers[layer_index]

    return predictions
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["hist_len"]
        hist_len = features['hist_len']

        print('hist_len的形状是', hist_len)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss,
         logits) = create_model(bert_config, is_training, input_ids,
                                input_mask, segment_ids, label_ids, hist_len,
                                num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        num_gpus = n_gpus
        if is_training:
            optimizer = optimization.create_optimizer_mgpu(
                learning_rate, num_train_steps, num_warmup_steps)
        else:
            num_gpus = 1

        input_ids_list = tf.split(features["input_ids"],
                                  num_or_size_splits=num_gpus,
                                  axis=0)
        input_mask_list = tf.split(features["input_mask"],
                                   num_or_size_splits=num_gpus,
                                   axis=0)
        segment_ids_list = tf.split(features["segment_ids"],
                                    num_or_size_splits=num_gpus,
                                    axis=0)
        masked_lm_positions_list = tf.split(features["masked_lm_positions"],
                                            num_or_size_splits=num_gpus,
                                            axis=0)
        masked_lm_ids_list = tf.split(features["masked_lm_ids"],
                                      num_or_size_splits=num_gpus,
                                      axis=0)
        masked_lm_weights_list = tf.split(features["masked_lm_weights"],
                                          num_or_size_splits=num_gpus,
                                          axis=0)
        next_sentence_labels_list = tf.split(features["next_sentence_labels"],
                                             num_or_size_splits=num_gpus,
                                             axis=0)

        tower_grads = []
        train_perplexity = 0
        for index in range(num_gpus):
            with tf.name_scope('replica_%d' % index):
                with tf.device('/gpu:%d' % index):
                    model = modeling.BertModel(
                        config=bert_config,
                        is_training=is_training,
                        input_ids=input_ids_list[index],
                        input_mask=input_mask_list[index],
                        token_type_ids=segment_ids_list[index],
                        use_one_hot_embeddings=use_one_hot_embeddings)

                    (masked_lm_loss, masked_lm_example_loss,
                     masked_lm_log_probs) = get_masked_lm_output(
                         bert_config, model.get_sequence_output(),
                         model.get_embedding_table(),
                         masked_lm_positions_list[index],
                         masked_lm_ids_list[index],
                         masked_lm_weights_list[index])

                    (next_sentence_loss, next_sentence_example_loss,
                     next_sentence_log_probs) = get_next_sentence_output(
                         bert_config, model.get_pooled_output(),
                         next_sentence_labels_list[index])

                    total_loss = masked_lm_loss + next_sentence_loss

                    tvars = tf.trainable_variables()

                    scaffold_fn = None
                    initialized_variable_names = {}
                    if init_checkpoint and index == 0:
                        (assignment_map, initialized_variable_names
                         ) = modeling.get_assigment_map_from_checkpoint(
                             tvars, init_checkpoint)
                        for var in tvars:
                            param_name = var.name[:-2]
                            tf.get_variable(name=param_name + "/adam_m",
                                            shape=var.shape.as_list(),
                                            dtype=tf.float32,
                                            trainable=False,
                                            initializer=tf.zeros_initializer())
                            tf.get_variable(name=param_name + "/adam_v",
                                            shape=var.shape.as_list(),
                                            dtype=tf.float32,
                                            trainable=False,
                                            initializer=tf.zeros_initializer())

                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)

                        tf.logging.info("**** Trainable Variables ****")
                        for var in tvars:
                            init_string = ""
                            if var.name in initialized_variable_names:
                                init_string = ", *INIT_FROM_CKPT*"
                            tf.logging.info("  name = %s, shape = %s%s",
                                            var.name, var.shape, init_string)
                    if is_training:
                        # reuse variables
                        tf.get_variable_scope().reuse_variables()
                        loss = total_loss
                        # get gradients
                        grads = optimizer.compute_gradients(
                            loss,
                            aggregation_method=tf.AggregationMethod.
                            EXPERIMENTAL_TREE,
                        )
                        tower_grads.append(grads)
                        # keep track of loss across all GPUs
                        train_perplexity += loss

        if mode == tf.estimator.ModeKeys.TRAIN:
            global_step = tf.train.get_or_create_global_step()
            new_global_step = global_step + 1

            average_grads = average_gradients(tower_grads, None, None)
            average_grads, norm_summary_ops = clip_grads(
                average_grads, 1.0, True, global_step)

            train_op = optimizer.apply_gradients(average_grads)
            train_op = tf.group(train_op,
                                [global_step.assign(new_global_step)])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=train_perplexity / num_gpus,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs,
                masked_lm_ids_list[0], masked_lm_weights_list[0],
                next_sentence_example_loss, next_sentence_log_probs,
                next_sentence_labels_list[0]
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #6
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (start_logits, end_logits) = create_model(
            bert_config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map,
             initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            seq_length = modeling.get_shape_list(input_ids)[1]

            def compute_loss(logits, positions):
                one_hot_positions = tf.one_hot(
                    positions, depth=seq_length, dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(
                    tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
                return loss

            start_positions = features["start_positions"]
            end_positions = features["end_positions"]

            start_loss = compute_loss(start_logits, start_positions)
            end_loss = compute_loss(end_logits, end_positions)

            total_loss = (start_loss + end_loss) / 2.0

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "unique_ids": unique_ids,
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        else:
            raise ValueError(
                "Only TRAIN and PREDICT modes are supported: %s" % (mode))

        return output_spec
Пример #7
0
        new_bert_representation, new_mtl_input, attention_weights = history_attention_net(
            bert_representation, history_attention_input, mtl_input,
            slice_mask, slice_num)

(start_logits, end_logits) = cqa_model(new_bert_representation)
yesno_logits = yesno_model(new_mtl_input)
followup_logits = followup_model(new_mtl_input)
domain_logits = domain_model(new_mtl_input)

tvars = tf.trainable_variables()
# print(tvars)

initialized_variable_names = {}
if FLAGS.init_checkpoint:
    (assignment_map,
     initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(
         tvars, FLAGS.init_checkpoint)
    tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map)
# print('tvars',tvars)
# print('initialized_variable_names',initialized_variable_names)

# compute loss
seq_length = modeling.get_shape_list(input_ids)[1]


def compute_loss(logits, positions):
    one_hot_positions = tf.one_hot(positions,
                                   depth=seq_length,
                                   dtype=tf.float32)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    loss = -tf.reduce_mean(
        tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
Пример #8
0
    def model_fn(features, labels, mode, params):
        example_id = features["example_id"]

        pos_input_ids = features["pos_input_ids"]
        pos_input_mask = features["pos_input_mask"]
        pos_segment_ids = features["pos_segment_ids"]

        neg_input_ids = features["neg_input_ids"]
        neg_input_mask = features["neg_input_mask"]
        neg_segment_ids = features["neg_segment_ids"]

        is_training = (mode == tfes.estimator.ModeKeys.TRAIN)

        pos_logits = create_model_or_use_model(bert_config=bert_config,
                                               is_training=is_training,
                                               input_ids=pos_input_ids,
                                               input_mask=pos_input_mask,
                                               segment_ids=pos_segment_ids)
        neg_logits = create_model_or_use_model(bert_config=bert_config,
                                               is_training=is_training,
                                               input_ids=neg_input_ids,
                                               input_mask=neg_input_mask,
                                               segment_ids=neg_segment_ids)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        initialized_variable_names = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # tf.logging.info("**** Trainable Variables ****")
        # for var in tvars:
        #     init_string = ""
        #     if var.name[6:] in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                     init_string)

        if mode == tfes.estimator.ModeKeys.TRAIN:
            original_loss = tf.nn.relu(margin - pos_logits + neg_logits)
            total_loss = tf.reduce_mean(original_loss)
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, False)

            output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                               loss=total_loss,
                                               train_op=train_op,
                                               scaffold_fn=scaffold_fn)
        elif mode == tfes.estimator.ModeKeys.PREDICT:
            predictions = {
                "pos_logit": pos_logits,
                "neg_logit": neg_logits,
                "example_id": example_id
            }
            output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                               predictions=predictions,
                                               scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                             mode)

        return output_spec
Пример #9
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss,
         logits) = create_model(bert_config, is_training, input_ids,
                                input_mask, segment_ids, label_ids,
                                use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            #       def metric_fn(per_example_loss, label_ids, logits):
            #         predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
            #         accuracy = tf.metrics.accuracy(label_ids, predictions)
            #         loss = tf.metrics.mean(per_example_loss)
            #         return {
            #             "eval_accuracy": accuracy,
            #             "eval_loss": loss,
            #         }
            def metric_fn(per_example_loss, label_ids, logits):
                # Display labels and predictions
                concat1 = tf.contrib.metrics.streaming_concat(logits)
                concat2 = tf.contrib.metrics.streaming_concat(label_ids)

                # Compute Pearson correlation
                pearson = tf.contrib.metrics.streaming_pearson_correlation(
                    logits, label_ids)

                # Compute MSE
                # mse = tf.metrics.mean(per_example_loss)
                mse = tf.metrics.mean_squared_error(label_ids, logits)

                # Compute Spearman correlation
                size = tf.size(logits)
                indice_of_ranks_pred = tf.nn.top_k(logits, k=size)[1]
                indice_of_ranks_label = tf.nn.top_k(label_ids, k=size)[1]
                rank_pred = tf.nn.top_k(-indice_of_ranks_pred, k=size)[1]
                rank_label = tf.nn.top_k(-indice_of_ranks_label, k=size)[1]
                rank_pred = tf.to_float(rank_pred)
                rank_label = tf.to_float(rank_label)
                spearman = tf.contrib.metrics.streaming_pearson_correlation(
                    rank_pred, rank_label)

                return {
                    'pred': concat1,
                    'label_ids': concat2,
                    'pearson': pearson,
                    'spearman': spearman,
                    'MSE': mse
                }

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #10
0
def _creat_bert(is_training, features, bert_config, use_one_hot_embeddings, init_checkpoint, layer_num, plus_position):
    global initialized_variable_names
    input_ids = features["input_ids"]
    if "input_extract" in features:
        input_extract = features["input_extract"]
        input1_extract = None
        input2_extract = None
        input3_extract = None
    elif "input3_extract" not in features:
        input_extract = None
        input1_extract = features["input1_extract"]
        input2_extract = features["input2_extract"]
        input3_extract = None
    else:
        input_extract = None
        input1_extract = features["input1_extract"]
        input2_extract = features["input2_extract"]
        input3_extract = features["input3_extract"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    q_type = features["q_type"]
    label_ids = features["label_ids"]

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        output_layer_index=layer_num,
        plus_position=plus_position)

    tvars = tf.trainable_variables()

    scaffold_fn = None
    if init_checkpoint:
        (assignment_map,
         initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(tvars, init_checkpoint)

        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        # print("initialing checkpoint finished")

    # tf.logging.info("**** Trainable Variables ****")
    # residue = []
    # for var in tvars:
    #     init_string = ""
    #     if var.name in initialized_variable_names:
    #         init_string = ", *INIT_FROM_CKPT*"
    #     else:
    #         residue.append(var)
    #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
    #                     init_string)

    predictions = {"input_extract": input_extract,
                   "input1_extract": input1_extract,
                   "input2_extract": input2_extract,
                   "input3_extract": input3_extract,
                   "embedding": model.get_embedding_output(),
                   "input_mask": input_mask,
                   "q_type": q_type,
                   "label_ids": label_ids,
                   "output_layer": model.get_output_layer(),
                   "last_layer": model.get_sequence_output()}

    return predictions
Пример #11
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        global initialized_variable_names
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_extract = features["input_extract"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        q_type = features["q_type"]
        label_ids = features["label_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()

        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        residue = []
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            else:
                residue.append(var)
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()
        layer_indexes = [
            -1,
        ]

        predictions = {
            "input_extract": input_extract,
            "q_type": q_type,
            "label_ids": label_ids
        }
        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]
        output_spec = tpu.TPUEstimatorSpec(mode=mode,
                                           predictions=predictions,
                                           scaffold_fn=scaffold_fn)

        return output_spec
Пример #12
0
  def model_fn(features, labels, mode, params):
    """The `model_fn` for TPUEstimator."""
    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    num_gpus = n_gpus
    if is_training:
      optimizer = optimization.create_optimizer_mgpu(learning_rate, num_train_steps, num_warmup_steps)
    else:
      num_gpus=1

    input_ids_list = tf.split(features["input_ids"], num_or_size_splits=num_gpus, axis=0)
    input_mask_list = tf.split(features["input_mask"], num_or_size_splits=num_gpus, axis=0)
    segment_ids_list = tf.split(features["segment_ids"], num_or_size_splits=num_gpus, axis=0)
    label_ids_list = tf.split(features["label_ids"], num_or_size_splits=num_gpus, axis=0)

    tower_grads = []
    train_perplexity = 0
    for index in range(num_gpus):
      with tf.name_scope('replica_%d' % index):
        with tf.device('/gpu:%d' % index):
          (total_loss, per_example_loss, logits) = create_model(
              bert_config, is_training,
              input_ids_list[index], input_mask_list[index], segment_ids_list[index], label_ids_list[index],
              num_labels, use_one_hot_embeddings)

          tvars = tf.trainable_variables()

          scaffold_fn = None
          if init_checkpoint:
            (assignment_map,
             initialized_variable_names) = modeling.get_assigment_map_from_checkpoint(
                 tvars, init_checkpoint)
            for var in tvars:
              param_name = var.name[:-2]
              tf.get_variable(
                name=param_name + "/adam_m",
                shape=var.shape.as_list(),
                dtype=tf.float32,
                trainable=False,
                initializer=tf.zeros_initializer())
              tf.get_variable(
                name=param_name + "/adam_v",
                shape=var.shape.as_list(),
                dtype=tf.float32,
                trainable=False,
                initializer=tf.zeros_initializer())
            if use_tpu:
              def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

              scaffold_fn = tpu_scaffold
            else:
              tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

          tf.logging.info("**** Trainable Variables ****")
          tf.logging.info('device: %d init' % index)
          if index == 0:
            for var in tvars:
              init_string = ""
              if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
              tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                              init_string)
          if is_training:
            # reuse variables
            tf.get_variable_scope().reuse_variables()
            loss = total_loss
            # get gradients
            grads = optimizer.compute_gradients(
              loss,
              aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE,
            )
            tower_grads.append(grads)
            # keep track of loss across all GPUs
            train_perplexity += loss

    if mode == tf.estimator.ModeKeys.TRAIN:
      global_step = tf.train.get_or_create_global_step()
      new_global_step = global_step + 1

      average_grads = average_gradients(tower_grads, None, None)
      #average_grads, norm_summary_ops = clip_grads(average_grads, 1.0, True, global_step)
      train_op = optimizer.apply_gradients(average_grads)
      train_op = tf.group(train_op, [global_step.assign(new_global_step)])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=train_perplexity / n_gpus,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.PREDICT:
      predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={
              'predictions': predictions,
          })
    elif mode == tf.estimator.ModeKeys.EVAL:
      def metric_fn(per_example_loss, label_ids, logits):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(label_ids, predictions)
        loss = tf.metrics.mean(per_example_loss)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn, [per_example_loss, label_ids_list[0], logits])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

    return output_spec