Пример #1
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (start_logits, end_logits) = create_model(
        bert_config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      seq_length = modeling.get_shape_list(input_ids)[1]

      def compute_loss(logits, positions):
        one_hot_positions = tf.one_hot(
            positions, depth=seq_length, dtype=tf.float32)
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        loss = -tf.reduce_mean(
            tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
        return loss

      start_positions = features["start_positions"]
      end_positions = features["end_positions"]

      start_loss = compute_loss(start_logits, start_positions)
      end_loss = compute_loss(end_logits, end_positions)

      total_loss = (start_loss + end_loss) / 2.0

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.PREDICT:
      predictions = {
          "unique_ids": unique_ids,
          "start_logits": start_logits,
          "end_logits": end_logits,
      }
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    else:
      raise ValueError(
          "Only TRAIN and PREDICT modes are supported: %s" % (mode))

    return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        # *** 寻找模型最后全连接层的最优参数(调用AdamWeightDecayOptimizer())
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # *** 预测结果评价指标
            def metric_fn(per_example_loss, label_ids, logits):
                '''
        # 原始脚本
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # *** 得到一个向量中最大值所处的位置
        #predictions = tf.cast(predictions,tf.float32) # new add
        accuracy = tf.metrics.accuracy(label_ids, predictions)
        loss = tf.metrics.mean(per_example_loss)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }
        '''
                # 新脚本
                logits_split = tf.split(probabilities, num_labels, axis=-1)
                label_ids_split = tf.split(label_ids, num_labels, axis=-1)
                # metrics change to auc of every class
                eval_dict = {}
                for j, logits in enumerate(logits_split):
                    label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)
                    current_auc, update_op_auc = tf.metrics.auc(
                        label_id_, logits)
                    eval_dict[str(j)] = (current_auc, update_op_auc)
                eval_dict['eval_loss'] = tf.metrics.mean(
                    values=per_example_loss)
                return eval_dict

            eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            # *** 结果预测
            '''
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn)
      '''
            # used for savedmodel
            # Generate Predictions
            # v15 -- predict (this is work!)
            predictions = tf.argmax(
                probabilities, axis=-1,
                output_type=tf.int32)  #logits-->probabilities
            export_outputs = {
                'classes':
                tf.estimator.export.PredictOutput({
                    "probabilities": probabilities,
                    "classid": predictions
                })
            }
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=probabilities,
                scaffold_fn=scaffold_fn,
                export_outputs=export_outputs)
            '''
      # v13 -- classify
      predictions = tf.argmax(probabilities, axis=-1, output_type=tf.int32) 
      export_outputs = {
              'classes': tf.estimator.export.ClassificationOutput(
                      scores=probabilities, 
                      classes=tf.as_string(predictions))
              }
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn, export_outputs=export_outputs)
      '''

        return output_spec
Пример #3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), model.get_embedding_table_2(),
             masked_lm_positions, masked_lm_ids, masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        print("init_checkpoint:", init_checkpoint)
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            # next_sentence_example_loss=0.0 TODO
            # next_sentence_log_probs=0.0 # TODO
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #4
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        vals = features["vals"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, pred_vals) = create_model(bert_config, is_training,
                                               input_ids, input_mask,
                                               segment_ids, vals,
                                               use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(preds, vals):

                return {
                    "eval_loss": tf.metrics.mean_squared_error(vals, preds),
                }

            eval_metrics = (metric_fn, [pred_vals, vals])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=pred_vals, scaffold_fn=scaffold_fn)
        return output_spec
Пример #5
0
    def build(self, is_training=True):
        print("#" * 100)
        print(self.prembed)
        with self.graph.as_default():
            self.x = tf.placeholder(tf.float32,
                                    [None, self.max_len, self.n_channel])
            self.y = tf.placeholder(tf.int32, [
                None,
            ])

            x_shape = tf.shape(self.x)
            num_data = x_shape[0]
            masks = None
            if self.masking:
                print("*" * 100)
                print("Masking")
                pitch_size = int(np.sqrt(self.max_len))
                single_mask = self.get_mask(pitch_size=pitch_size)
                single_mask = single_mask.flatten()
                single_mask = tf.convert_to_tensor(np.expand_dims(single_mask,
                                                                  axis=0),
                                                   dtype=tf.bool)
                masks = tf.tile(single_mask, [num_data, 1])
                masks = create_attention_mask_from_input_mask(self.x, masks)
            print("self.prembed", self.prembed)
            emb_in = self.x
            if self.prembed:
                #print("#"*100)
                print("#prembed with positional embedding no bias")
                emb_in = tf.layers.dense(emb_in,
                                         self.prembed_dim,
                                         activation=tf.tanh,
                                         use_bias=False)
                #emb_in = feedforward(emb_in, [self.num_hidden, self.prembed_dim], scope="prembed", reuse=False)
                #emb_in = tf.layers.dropout(emb_in, rate=self.drop_rate)
                #emb_in = layer_norm(emb_in, name="prembed")
                emb_in = embedding_postprocessor(emb_in,
                                                 max_position_embeddings=200)
            print("my_emb_dim", emb_in.get_shape().as_list()[-1])
            #emb_dim = tf.shape(emb_in)[-1]
            emb_dim = emb_in.get_shape().as_list()[-1]
            print("emb_dim", emb_dim)
            #emb_in = tf.reshape(emb_in, [num_classes*num_support, emb_dim])
            attens = []
            for i in range(self.max_depth):
                name_scope = "Transformer_Encoder_" + str(i)
                if i == 0:
                    enc_embs, atten = transformer_encoder(
                        emb_in,
                        num_units=emb_dim,
                        num_heads=self.num_head,
                        num_hidden=self.num_hidden,
                        dropout_rate=self.drop_rate,
                        attention_dropout=self.attention_dropout,
                        mask=masks,
                        scope=name_scope)
                else:
                    enc_embs, atten = transformer_encoder(
                        enc_embs,
                        num_units=emb_dim,
                        num_heads=self.num_head,
                        num_hidden=self.num_hidden,
                        dropout_rate=self.drop_rate,
                        attention_dropout=self.attention_dropout,
                        mask=masks,
                        scope=name_scope)
                attens.append(atten)
            #emb_x = tf.reshape(emb_in, [1, num_classes*num_support, self.im_height*self.im_width*self.channels])
            if self.pooling:
                print("*" * 100)
                print("Pooling")
                pooled_data = self.simple_pooler(
                    enc_embs)  #self.rect_pooler(enc_embs)
                enc_embs = tf.reshape(pooled_data, [num_data, emb_dim])
            else:
                print("8" * 100)
                print("No Pooling")
                enc_embs = tf.reshape(enc_embs,
                                      [num_data, self.max_len * emb_dim])

            self.enc_embs = enc_embs
            self.istarget = tf.to_float(tf.not_equal(self.y, -99))

            self.logits = tf.layers.dense(enc_embs, self.num_classes)
            self.preds = tf.to_int32(tf.argmax(self.logits, axis=-1))
            self.acc = tf.reduce_sum(tf.to_float(tf.equal(
                self.preds, self.y))) / tf.reduce_sum(self.istarget)
            self.attens = attens
            tf.summary.scalar('acc', self.acc)
            if is_training:
                # Loss
                self.y_smoothed = label_smoothing(
                    tf.one_hot(self.y, depth=self.num_classes))
                self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.logits, labels=self.y_smoothed)
                print("self.loss")
                self.mean_loss = tf.reduce_sum(self.loss) / tf.reduce_sum(
                    self.istarget)

                # Training Scheme
                #self.global_step = tf.Variable(0, name='global_step', trainable=False)
                #self.learning_rate = tf.train.exponential_decay(
                #    learning_rate=starter_learning_rate, global_step=self.global_step, decay_steps=20, decay_rate=0.95, staircase=False)
                #self.learning_rate = tf.constant(self.start_learning_rate)
                #self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)#tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.9, beta2=0.98, epsilon=1e-8)
                #self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
                #self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step)
                self.train_op, self.learning_rate = optimization.create_optimizer(
                    self.mean_loss,
                    self.start_learning_rate,
                    num_train_steps=10000,
                    num_warmup_steps=200,
                    use_tpu=False)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                tf.summary.scalar('learning rate', self.learning_rate)
                self.merged = tf.summary.merge_all()
            self.saver = tf.train.Saver()
Пример #6
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        # tf.logging.info("*** Features ***")
        # for name in sorted(features.keys()):
        #    tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits, probabilities) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)
        prediction = tf.cast(probabilities, tf.float32)
        threshold = float(0.5)
        prediction = tf.cast(tf.greater(prediction, threshold), tf.int64)
        acc, acc_op = tf.metrics.accuracy(label_ids, prediction)

        with tf.name_scope('summary'):
            tf.summary.scalar('total_loss', total_loss)
            tf.summary.scalar('accuracy', acc)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            # tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
            prediction = tf.cast(probabilities, tf.float32)
            threshold = float(0.5)
            prediction = tf.cast(tf.greater(prediction, threshold), tf.int64)
            acc, acc_op = tf.metrics.accuracy(label_ids, prediction)
            logging_hook = tf.train.LoggingTensorHook({"loss": total_loss, "accuracy": acc_op}, every_n_iter=10)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold=scaffold_fn,
                training_hooks=[logging_hook],
            )
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, probabilities, is_real_example):

                logits_split = tf.split(probabilities, num_labels, axis=-1)
                label_ids_split = tf.split(label_ids, num_labels, axis=-1)
                # metrics change to auc of every class
                eval_dict = {}
                for j, logits in enumerate(logits_split):
                    label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)
                    current_auc, update_op_auc = tf.metrics.auc(label_id_, logits)
                    eval_dict[str(j)] = (current_auc, update_op_auc)
                eval_dict['eval_loss'] = tf.metrics.mean(values=per_example_loss)
                return eval_dict

                ## original eval metrics
                # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # accuracy = tf.metrics.accuracy(
                #     labels=label_ids, predictions=predictions, weights=is_real_example)
                # loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
                # return {
                #     "eval_accuracy": accuracy,
                #     "eval_loss": loss,
                # }

            eval_metrics = metric_fn(per_example_loss, label_ids, probabilities, is_real_example)
            prediction = tf.cast(probabilities, tf.float32)
            threshold = float(0.5)
            prediction = tf.cast(tf.greater(prediction, threshold), tf.int64)
            acc, acc_op = tf.metrics.accuracy(label_ids, prediction)
            logging_hook = tf.train.LoggingTensorHook({"loss": total_loss, "accuracy": acc_op}, every_n_iter=2)
            # accuracy = {"accuracy" : acc[1]}
            eval_metrics = metric_fn(per_example_loss, label_ids, probabilities, is_real_example)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metric_ops=eval_metrics,
                scaffold=scaffold_fn,
                evaluation_hooks=[logging_hook]
            )
            # output_spec = tf.estimator.EstimatorSpec(
            #    mode=mode,
            #    loss=total_loss,
            #    eval_metric_ops=eval_metrics,
            #    scaffold=scaffold_fn)
        else:
            print("mode:", mode, "probabilities:", probabilities)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold=scaffold_fn)
        return output_spec
Пример #7
0
  def model_fn(features, labels, mode, params):  
    """The `model_fn` for TPUEstimator."""

    # reading features input
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
    
    # checking if training mode
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    
    # create simple classification model
    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        num_labels, use_one_hot_embeddings)
    
    # getting variables for intialization and using pretrained init checkpoint
    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      # defining optimizar function
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
      
      # Training estimator spec
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:
      # accuracy, loss, auc, F1, precision and recall metrics for evaluation
      def metric_fn(per_example_loss, label_ids, logits, is_real_example):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions, weights=is_real_example)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predictions)
        auc = tf.metrics.auc(
            label_ids,
            predictions)
        recall = tf.metrics.recall(
            label_ids,
            predictions)
        precision = tf.metrics.precision(
            label_ids,
            predictions) 
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall
        }

      eval_metrics = (metric_fn,
                      [per_example_loss, label_ids, logits, is_real_example])
      # estimator spec for evalaution
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      # estimator spec for predictions
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
Пример #8
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        header_ids = features["header_ids"]
        extra_features = features["extra_features"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings,
                                       header_ids, extra_features)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, False)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:

            metrics = {
                'accuracy':
                tf.metrics.accuracy(labels=label_ids,
                                    predictions=tf.argmax(probabilities, 1),
                                    name='accuracy')
            }

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     eval_metric_ops=metrics)
        else:

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=probabilities)
        return output_spec
Пример #9
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

 #   tf.logging.info("*** Features ***")
 #   for name in sorted(features.keys()):
 #     tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    word_ids = features["word_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        num_labels, use_one_hot_embeddings)
    alpha = 0.1
    print("!!!!!!!!!!!!!!!alpha")
    print(alpha)
    cnn_loss, cnn_prob = create_cnn_model(is_training, word_ids, label_ids, bert_config, [3,4,5], 128, 0.2, num_labels, logits, alpha)
#    total_score = logits + cnn_prob

#    total_loss = alpha*total_loss + (1-alpha) *cnn_loss
    total_loss = cnn_loss



    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
   #   tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
  #                    init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
      logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=10)
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          training_hooks=[logging_hook],
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(per_example_loss, label_ids, logits):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(label_ids, predictions)
        f1 = tf_metrics.f1(label_ids, predictions, average="macro",num_classes = 2) 
        # = tf.contrib.metrics.f1_score(label_ids, predictions)

        loss = tf.metrics.mean(per_example_loss)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
            "eval_f1" : f1,
        }

      eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn)
    return output_spec
Пример #10
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        label_mask = features["label_mask"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (d_loss, g_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings,
                                       label_mask)

        tvars = tf.trainable_variables()

        bert_vars = [v for v in tvars if 'bert' in v.name]
        d_vars = bert_vars + [v for v in tvars if 'Discriminator' in v.name]
        g_vars = [v for v in tvars if 'Generator' in v.name]

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            d_train_op = optimization.create_optimizer("d", d_vars, d_loss,
                                                       learning_rate,
                                                       num_train_steps,
                                                       num_warmup_steps,
                                                       use_tpu)

            g_train_op = optimization.create_optimizer("g", g_vars, g_loss,
                                                       learning_rate,
                                                       num_train_steps,
                                                       num_warmup_steps,
                                                       use_tpu)

            logging_hook = tf.train.LoggingTensorHook(
                {
                    "d_loss": d_loss,
                    "g_loss": g_loss,
                    "per_example_loss": per_example_loss
                },
                every_n_iter=1)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=d_loss + g_loss,
                train_op=tf.group(d_train_op, g_train_op),
                training_hooks=[logging_hook],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                precision = tf_metrics.precision(labels=label_ids,
                                                 predictions=predictions,
                                                 num_classes=num_labels,
                                                 weights=is_real_example)
                recall = tf_metrics.recall(labels=label_ids,
                                           predictions=predictions,
                                           num_classes=num_labels,
                                           weights=is_real_example)
                f1_micro = tf_metrics.f1(labels=label_ids,
                                         predictions=predictions,
                                         num_classes=num_labels,
                                         weights=is_real_example,
                                         average='micro')
                f1_macro = tf_metrics.f1(labels=label_ids,
                                         predictions=predictions,
                                         num_classes=num_labels,
                                         weights=is_real_example,
                                         average='macro')
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f1_micro": f1_micro,
                    "eval_f1_macro": f1_macro,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=d_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Пример #11
0
    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    probabilities = tf.nn.softmax(logits, axis=-1)
    predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    one_hot_labels = tf.one_hot(label_ids, depth=len(labels), dtype=tf.float32)

    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    loss_pl = tf.placeholder(tf.float32)
    loss_summary = tf.summary.scalar('Loss', loss_pl)

# Optimisation function
with tf.name_scope('optimizer'):
    optimizer = optimization.create_optimizer(loss, learning_rate,
                                              num_training_steps, 0, False)

# Calculate accuracy
with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(predictions, tf.cast(label_ids, tf.int32))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy_pl = tf.placeholder(tf.float32)
    accuracy_summary = tf.summary.scalar('Accuracy', accuracy_pl)

# Restore model from checkpoint
trainable_vars = tf.trainable_variables()
if init_checkpoint:
    assignment_map, _ = bert.get_assignment_map_from_checkpoint(
        trainable_vars, init_checkpoint)
    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
Пример #12
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # probabilities
        # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示
        # (total_loss, per_example_loss, logits) = create_model(
        #     bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        #     num_labels, use_one_hot_embeddings)
        (total_loss, logits, trans,
         pred_ids) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            # 针对NER ,进行了修改
            def metric_fn(label_ids, pred_ids):
                # 首先对结果进行维特比解码
                # crf 解码
                indices = [2, 3]  # indice参数告诉评估矩阵评估哪些标签,与label_list相对应
                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels, indices, weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           indices, weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices,
                                  weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [label_ids, pred_ids])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)

        else:
            # raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn)
        return output_spec
Пример #13
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        len_gt_titles = features["len_gt_titles"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, per_example_loss,
         logits) = create_model(bert_config, is_training, input_ids,
                                input_mask, segment_ids, label_ids, num_labels,
                                use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        initialized_variable_names = []
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "logits": logits,
                    "label_ids": label_ids,
                    "len_gt_titles": len_gt_titles,
                },
                scaffold_fn=scaffold_fn)

        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                             (mode))

        return output_spec
Пример #14
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids_x = features["input_ids_x"]
        input_mask_x = features["input_mask_x"]
        label_ids_x = features["label_ids_x"]
        info_x = features["info_x"]

        input_ids_y = features["input_ids_y"]
        input_mask_y = features["input_mask_y"]
        label_ids_y = features["label_ids_y"]
        info_y = features["info_y"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        print('Istrain:', is_training)

        model = modeling_coverage_learning_loss.BertModel(
            config=bert_config,
            gpu=FLAGS.gpu,
            is_training=is_training,
            input_ids_x=input_ids_x,
            input_ids_y=input_ids_y,
            label_ids_x=label_ids_x,
            label_ids_y=label_ids_y,
            input_mask_x=input_mask_x,
            input_mask_y=input_mask_y,
        )

        total_loss = model.loss_ssl  # pre-train
        # total_loss = model.loss_x + model.loss_y # finetune

        masked_lm_log_probs = model.log_probs_x

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (
                assignment_map, initialized_variable_names
            ) = modeling_coverage_learning_loss.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     scaffold=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            tf.add_to_collection('eval_sp', masked_lm_log_probs)
            tf.add_to_collection('eval_sp', input_ids_x)
            tf.add_to_collection('eval_sp', label_ids_x)
            tf.add_to_collection('eval_sp', info_x)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     scaffold=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #15
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        num_labels, use_one_hot_embeddings)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(per_example_loss, label_ids, logits, is_real_example):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        accuracy = tf.metrics.accuracy(
            labels=label_ids, predictions=predictions, weights=is_real_example)
        loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn,
                      [per_example_loss, label_ids, logits, is_real_example])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    return output_spec
Пример #16
0
    def model_fn(features, labels, mode, params):
        tf.compat.v1.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.compat.v1.logging.info("  name = %s, shape = %s" %
                                      (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        # label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         predicts) = create_model(bert_config, is_training, input_ids,
                                  input_mask, segment_ids, label_ids,
                                  num_labels, use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint and (hvd is None or hvd.rank() == 0):
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.compat.v1.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.compat.v1.logging.info("  name = %s, shape = %s%s", var.name,
                                      var.shape, init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, hvd,
                                                     False, amp)
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:
            dummy_op = tf.no_op()
            # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
            if amp:
                dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimization.LAMBOptimizer(learning_rate=0.0))

            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 num_labels, [1, 2],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           num_labels, [1, 2],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  num_labels, [1, 2],
                                  average="macro")
                #
                return {
                    "precision": precision,
                    "recall": recall,
                    "f1": f,
                }

            eval_metric_ops = metric_fn(per_example_loss, label_ids, logits)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
        else:

            dummy_op = tf.no_op()
            # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
            if amp:
                dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimization.LAMBOptimizer(learning_rate=0.0))

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, predictions=predicts)  #probabilities)
        return output_spec
Пример #17
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.compat.v1.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.compat.v1.logging.info("  name = %s, shape = %s" %
                                      (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint and (hvd is None or hvd.rank() == 0):
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.compat.v1.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.compat.v1.logging.info("  name = %s, shape = %s%s", var.name,
                                      var.shape, init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, hvd,
                                                     False, amp)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:

            dummy_op = tf.no_op()
            # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
            if amp:
                loss_scaler = tf.train.experimental.FixedLossScale(1)
                dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler)

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metric_ops = metric_fn(per_example_loss, label_ids, logits,
                                        is_real_example)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
        else:
            dummy_op = tf.no_op()
            # Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
            if amp:
                dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimization.LAMBOptimizer(learning_rate=0.0))

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                predictions={"probabilities":
                             probabilities})  #predicts)#probabilities)
        return output_spec
Пример #18
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)
        print("total_loss::", total_loss)
        print("per_example_loss::", per_example_loss)
        print("logits::", logits)
        print("probabilities::", probabilities)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            print("从{}恢复参数".format(init_checkpoint))
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        def get_embedding_vars(tvars, grads, layer_name='word_embeddings'):
            '''
            返回embeding vars和归一化后的梯度
            '''
            embed_vars, embed_norm_grads = [], []
            for var, grad in zip(tvars, grads):
                if layer_name in var.name:
                    embed_vars.append(var)
                    norm = tf.norm(grad) + 1e-10
                    embed_norm_grads.append(tf.div(grad, norm))
            return embed_vars, embed_norm_grads

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            #  计算正常的梯度
            grads = tf.gradients(total_loss, tvars)

            #  得到embedding的梯度,进行normalize,也是作为一个backup,有助于后面恢复
            embed_vars, embed_norm_grads = get_embedding_vars(tvars, grads)

            #  embedding上加扰动, attack
            with tf.control_dependencies(embed_norm_grads):
                attack_op = []
                for param, norm_grad in zip(embed_vars, embed_norm_grads):
                    attack_op.append(
                        param.assign(param +
                                     tf.multiply(FLAGS.epsilon, norm_grad)))
                attack_op = tf.group(*attack_op, name='attack')

            #  attack后再计算loss, grads
            with tf.control_dependencies([attack_op]):
                (att_total_loss, att_per_example_loss, att_logits,
                 att_probabilities) = create_model(bert_config,
                                                   is_training,
                                                   input_ids,
                                                   input_mask,
                                                   segment_ids,
                                                   label_ids,
                                                   num_labels,
                                                   use_one_hot_embeddings,
                                                   reuse=True)
                att_grads = tf.gradients(att_total_loss,
                                         tvars,
                                         name='att_gradients')  #  返回None

            #  计算attack后的gradient后,进行参数恢复
            with tf.control_dependencies(att_grads):
                restore_op = []
                for param, norm_grad in zip(embed_vars, embed_norm_grads):
                    restore_op.append(
                        param.assign(param -
                                     tf.multiply(FLAGS.epsilon, norm_grad)))
                restore_op = tf.group(*restore_op, name='restore')

            #  进行梯度的累计,看做是一个batch的样本求平均
            with tf.control_dependencies([restore_op]):
                acc_grads = [
                    tf.div(tf.add(grad, att_grad), 2.0)
                    for grad, att_grad in zip(grads, att_grads)
                ]
                acc_total_loss = tf.div(tf.add(total_loss, att_total_loss),
                                        2.0)

            #  进行参数的更新,必须在参数恢复后
            with tf.control_dependencies(acc_grads):
                train_op = optimization.create_optimizer(
                    tvars, acc_grads, learning_rate, num_train_steps,
                    num_warmup_steps, use_tpu)

            logging_hook1 = tf.train.LoggingTensorHook(
                {"total_loss": acc_total_loss}, every_n_iter=10)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=acc_total_loss,
                train_op=train_op,
                training_hooks=[logging_hook1],
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, probabilities,
                          is_real_example):
                probabilities = probabilities[:, 1]
                predictions = tf.cast(probabilities > 0.5, tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                precision = tf.metrics.precision(label_ids, predictions)
                recall = tf.metrics.recall(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, probabilities, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn)
        return output_spec
Пример #19
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        #label_mask = features["label_mask"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         predictsDict) = create_model(bert_config, is_training, input_ids,
                                      input_mask, segment_ids, label_ids,
                                      num_labels, use_one_hot_embeddings)
        predictsDict["input_mask"] = input_mask
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf.logging.info("**** Trainable Variables ****")

        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits, num_labels):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 num_labels, [1, 2],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           num_labels, [1, 2],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  num_labels, [1, 2],
                                  average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    #"eval_loss": loss,
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, label_ids, logits, num_labels])
            # eval_metrics = (metric_fn, [label_ids, logits])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictsDict, scaffold_fn=scaffold_fn)
        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for Estimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        if not is_training and FLAGS.use_trt:
            trt_graph = get_frozen_tftrt_model(bert_config, input_ids.shape,
                                               num_labels,
                                               use_one_hot_embeddings,
                                               init_checkpoint)
            (total_loss, per_example_loss, logits,
             probabilities) = tf.import_graph_def(
                 trt_graph,
                 input_map={
                     'input_ids': input_ids,
                     'input_mask': input_mask,
                     'segment_ids': segment_ids,
                     'label_ids': label_ids
                 },
                 return_elements=[
                     'loss/cls_loss:0', 'loss/cls_per_example_loss:0',
                     'loss/cls_logits:0', 'loss/cls_probabilities:0'
                 ],
                 name='')
            if mode == tf.estimator.ModeKeys.PREDICT:
                predictions = {"probabilities": probabilities}
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            elif mode == tf.estimator.ModeKeys.EVAL:

                def metric_fn(per_example_loss, label_ids, logits):
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions)
                    loss = tf.metrics.mean(values=per_example_loss)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }

                eval_metric_ops = metric_fn(per_example_loss, label_ids,
                                            logits)
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=eval_metric_ops)
            return output_spec
        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint and (hvd is None or hvd.rank() == 0):
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        if FLAGS.verbose_logging:
            tf.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, hvd,
                                                     FLAGS.use_fp16)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(label_ids, predictions)
                loss = tf.metrics.mean(per_example_loss)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metric_ops = metric_fn(per_example_loss, label_ids, logits)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=probabilities)
        return output_spec
Пример #21
0
    def model_fn(features, labels, mode, params):
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]

        output = model.model(hparams=hparams, X=input_ids)
        loss = tf.reduce_mean(
            input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=input_ids[:, 1:], logits=output["logits"][:, :-1]))

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (
                assignment_map,
                initialized_variable_names,
            ) = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                loss,
                learning_rate,
                num_train_steps,
                num_warmup_steps,
                use_tpu,
                optimizer,
                poly_power,
                start_warmup_step,
                use_memory_saving_gradients=use_memory_saving_gradients)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
            )
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(loss):
                """Evaluation metric Fn which runs on CPU."""
                perplexity = tf.exp(tf.reduce_mean(loss))
                bpc = tf.reduce_mean(loss) / tf.constant(math.log(2))
                return {
                    "perplexity": tf.metrics.mean(perplexity),
                    "bpc": tf.metrics.mean(bpc),
                }

            if FLAGS.use_tpu:
                with tf.colocate_with(loss):
                    loss = tf.contrib.tpu.cross_replica_sum(loss) \
                              / FLAGS.num_tpu_cores
            metric_loss = tf.tile(tf.reshape(loss, [1, 1]),
                                  [FLAGS.eval_batch_size, 1])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                eval_metrics=(metric_fn, [metric_loss]),
                scaffold_fn=scaffold_fn)

            # eval_metrics = (metric_fn, {"loss":loss})
            # output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            #     mode=mode,
            #     loss=loss,
            #     eval_metrics=eval_metrics,
            #     scaffold_fn=scaffold_fn,
            # )
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
    def model_fn(features, labels, mode, params, config=None):  # pylint: disable=unused-argument
        """
        The `model_fn` for TPUEstimator.
        模型有训练,验证和测试三种阶段,而且对于不同模式,对数据有不同的处理方式。例如在训练阶段,我们需要将数据喂给模型,
        模型基于输入数据给出预测值,然后我们在通过预测值和真实值计算出loss,最后用loss更新网络参数,
        而在评估阶段,我们则不需要反向传播更新网络参数,换句话说,mdoel_fn需要对三种模式设置三套代码。

        Args:
            features: dict of Tensor, This is batch_features from input_fn,`Tensor` or dict of `Tensor` (depends on data passed to `fit`
            labels: This is batch_labels from input_fn. features, labels是从输入函数input_fn中返回的特征和标签batch
            mode: An instance of tf.estimator.ModeKeys
            params: Additional configuration for hyper-parameters. 是一个字典,它可以传入许多参数用来构建网络或者定义训练方式等


        Return:
            tf.estimator.EstimatorSpec

        """

        print("features={}".format(features))
        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        label_ids = features["label_ids"]
        if "is_real_example" in features:
            # 类型强制转换为tf.float32
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            # 创建一个将所有元素都设置为1的张量Tensor.
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        # 根据mode判断是否为训练模式
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # 基于特征数据创建模型,并计算loss等
        print("create_model:\ninput_ids={}".format(input_ids.shape))
        print("label_ids={}".format(label_ids.shape))
        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       None, None, label_ids, num_labels,
                                       use_one_hot_embeddings, None)

        print("total_loss={}".format(total_loss))
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        # 训练模式
        if mode == tf.estimator.ModeKeys.TRAIN:
            if FLAGS.num_gpu_cores > 1:
                train_op = custom_optimization.create_optimizer(
                    total_loss,
                    learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    fp16=fp16)

                output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                         loss=total_loss,
                                                         train_op=train_op,
                                                         scaffold=scaffold_fn)
            else:
                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps, use_tpu)

                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
        # 评估模式
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                # add more metrics
                pr, pr_op = tf.metrics.precision(labels=label_ids,
                                                 predictions=predictions,
                                                 weights=is_real_example)
                re, re_op = tf.metrics.recall(labels=label_ids,
                                              predictions=predictions,
                                              weights=is_real_example)
                # if FLAGS.classifier_mode == "multi-class":
                #     # multi-class
                #     # pr, pr_op = tf_metrics.precision(label_ids, predictions, num_labels, average="macro")
                #     # re, re_op = tf_metrics.recall(label_ids, predictions, num_labels, average="macro")
                #     f1 = tf_metrics.f1(label_ids, predictions, num_labels, average="macro")
                # else:
                #     # binary classifier
                #     f1 = tf.contrib.metrics.f1_score(label_ids, predictions)
                #     # f1, f1_op = (2 * pr * re) / (pr + re)  # f1-score for binary classification
                # 返回结果:dict: {key: value(tuple: (metric_tensor, update_op)) }
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                    "eval_precision": (pr, pr_op),
                    "eval_recall": (re, re_op),
                    # "eval_f1": f1,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            if FLAGS.num_gpu_cores > 1:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(per_example_loss, label_ids,
                                              logits, is_real_example),
                    scaffold=scaffold_fn,
                )
            else:
                # eval on single-gpu only
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
        else:
            # tf.estimator.ModeKeys.PREDICT 预测模式
            # 基于logits计算最大的概率所在索引的label
            predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
            if FLAGS.num_gpu_cores > 1:
                # 多GPUs
                output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                         predictions={
                                                             "probabilities":
                                                             probabilities,
                                                             "predictions":
                                                             predictions
                                                         })
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={
                        "probabilities": probabilities,
                        "predictions": predictions
                    },
                    scaffold_fn=scaffold_fn)
        return output_spec
Пример #23
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        input_span_mask = features["input_span_mask"]
        output_span_mask = features["output_span_mask"]

        source_input_ids = features["source_input_ids"]
        source_input_mask = features["source_input_mask"]
        source_segment_ids = features["source_segment_ids"]
        source_input_span_mask = features["source_input_span_mask"]
        source_output_span_mask = features["source_output_span_mask"]

        start_positions = features["start_positions"]
        end_positions = features["end_positions"]
        source_start_positions = features["source_start_positions"]
        source_end_positions = features["source_end_positions"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (start_logits, end_logits, source_raw_start_logits,
         source_raw_end_logits, target_span_gt_tensor,
         source_span_gt_tensor) = create_model(
             bert_config=bert_config,
             is_training=is_training,
             input_ids=input_ids,
             input_mask=input_mask,
             segment_ids=segment_ids,
             input_span_mask=input_span_mask,
             output_span_mask=output_span_mask,
             source_input_ids=source_input_ids,
             source_input_mask=source_input_mask,
             source_segment_ids=source_segment_ids,
             source_input_span_mask=source_input_span_mask,
             source_output_span_mask=source_output_span_mask,
             start_positions=start_positions,
             end_positions=end_positions,
             source_start_positions=source_start_positions,
             source_end_positions=source_end_positions,
             use_one_hot_embeddings=use_one_hot_embeddings)

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # print info
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            seq_length = modeling.get_shape_list(input_ids)[1]

            def compute_loss(logits, positions):
                on_hot_pos = tf.one_hot(positions,
                                        depth=seq_length,
                                        dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(
                    tf.reduce_sum(on_hot_pos * log_probs, axis=-1))
                return loss

            def cosine_similarity(tensor1, tensor2):
                cosine_val = 1 - tf.losses.cosine_distance(
                    tensor1, tensor2, axis=0)
                return cosine_val

            start_loss = compute_loss(start_logits, start_positions)
            end_loss = compute_loss(end_logits, end_positions)
            main_loss = (start_loss + end_loss) / 2.0

            aux_lambda = cosine_similarity(target_span_gt_tensor,
                                           source_span_gt_tensor)
            source_start_loss = compute_loss(source_raw_start_logits,
                                             source_start_positions)
            source_end_loss = compute_loss(source_raw_end_logits,
                                           source_end_positions)
            aux_loss = tf.maximum(
                0.0, aux_lambda) * (source_start_loss + source_end_loss) / 2.0

            total_loss = main_loss + aux_loss

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            start_logits = tf.nn.log_softmax(start_logits, axis=-1)
            end_logits = tf.nn.log_softmax(end_logits, axis=-1)
            predictions = {
                "unique_ids": unique_ids,
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                             (mode))

        return output_spec
Пример #24
0
def main():
    print("print start load the params...")
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.gfile.MakeDirs(config["out"])
    train_examples_len = config["train_examples_len"]
    dev_examples_len = config["dev_examples_len"]
    learning_rate = config["learning_rate"]
    eval_per_step = config["eval_per_step"]
    num_labels = config["num_labels"]
    print(num_labels)
    num_train_steps = int(train_examples_len / config["train_batch_size"] *
                          config["num_train_epochs"])
    print("num_train_steps:", num_train_steps)
    num_dev_steps = int(dev_examples_len / config["dev_batch_size"])
    num_warmup_steps = int(num_train_steps * config["warmup_proportion"])
    use_one_hot_embeddings = False
    is_training = True
    use_tpu = False
    seq_len = config["max_seq_len"]
    init_checkpoint = config["init_checkpoint"]
    print("print start compile the bert model...")
    # 定义输入输出
    print("{:*^100s}".format("v2"))
    input_ids_1 = tf.placeholder(tf.int64,
                                 shape=[None, seq_len],
                                 name='input_ids_1')
    input_mask_1 = tf.placeholder(tf.int64,
                                  shape=[None, seq_len],
                                  name='input_mask_1')
    segment_ids_1 = tf.placeholder(tf.int64,
                                   shape=[None, seq_len],
                                   name='segment_ids_1')

    input_ids_2 = tf.placeholder(tf.int64,
                                 shape=[None, seq_len],
                                 name='input_ids_2')
    input_mask_2 = tf.placeholder(tf.int64,
                                  shape=[None, seq_len],
                                  name='input_mask_2')
    segment_ids_2 = tf.placeholder(tf.int64,
                                   shape=[None, seq_len],
                                   name='segment_ids_2')

    labels = tf.placeholder(tf.int64, shape=[
        None,
    ], name='labels')
    keep_prob = tf.placeholder(tf.float32,
                               name='keep_prob')  # , name='is_training'

    bert_config_ = load_bert_config(config["bert_config"])
    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config_, is_training, input_ids_1, input_mask_1, segment_ids_1,
        input_ids_2, input_mask_2, segment_ids_2, labels, keep_prob,
        num_labels, use_one_hot_embeddings)
    print("{:*^100s}".format("v2"))
    train_op = optimization.create_optimizer(total_loss, learning_rate,
                                             num_train_steps, num_warmup_steps,
                                             False)
    print("print start train the bert model(multi class)...")

    batch_size = config["train_batch_size"]
    input_ids_1_train, input_mask_1_train, segment_ids_1_train, input_ids_2_train, input_mask_2_train, \
    segment_ids_2_train, labels_train = get_input_data(config["in_1"], seq_len, batch_size)

    dev_batch_size = config["dev_batch_size"]

    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)  # 保存最后top3模型

    with tf.Session() as sess:
        sess.run(init_global)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        print("start load the pretrain model")
        if init_checkpoint:
            tvars = tf.trainable_variables()
            print("trainable_variables", len(tvars))
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            print("initialized_variable_names:",
                  len(initialized_variable_names))
            saver_ = tf.train.Saver(
                [v for v in tvars if v.name in initialized_variable_names])
            saver_.restore(sess, init_checkpoint)
            tvars = tf.global_variables()
            not_initialized_vars = [
                v for v in tvars if v.name not in initialized_variable_names
            ]
            tf.logging.info('--all size %s; not initialized size %s' %
                            (len(tvars), len(not_initialized_vars)))
            if len(not_initialized_vars):
                sess.run(tf.variables_initializer(not_initialized_vars))
            for v in not_initialized_vars:
                tf.logging.info('--not initialized: %s, shape = %s' %
                                (v.name, v.shape))
        else:
            sess.run(tf.global_variables_initializer())

        print("********* bert_multi_class_train start *********")

        # tf.summary.FileWriter("output/",sess.graph)
        def train_step(ids_1, mask_1, segment_1, ids_2, mask_2, segment_2, y,
                       step):
            feed = {
                input_ids_1: ids_1,
                input_mask_1: mask_1,
                segment_ids_1: segment_1,
                input_ids_2: ids_2,
                input_mask_2: mask_2,
                segment_ids_2: segment_2,
                labels: y,
                keep_prob: 0.9
            }
            _, out_loss, out_logits, p_ = sess.run(
                [train_op, total_loss, logits, probabilities], feed_dict=feed)
            pre = np.argmax(p_, axis=-1)
            acc = np.sum(np.equal(pre, y)) / len(pre)
            print("step :{},loss :{}, acc :{}".format(step, out_loss, acc))
            return out_loss, pre, y

        def dev_step(ids_1, mask_1, segment_1, ids_2, mask_2, segment_2, y):
            feed = {
                input_ids_1: ids_1,
                input_mask_1: mask_1,
                segment_ids_1: segment_1,
                input_ids_2: ids_2,
                input_mask_2: mask_2,
                segment_ids_2: segment_2,
                labels: y,
                keep_prob: 1.0
            }
            out_loss, out_logits, p_ = sess.run(
                [total_loss, logits, probabilities], feed_dict=feed)
            pre = np.argmax(p_, axis=-1)
            acc = np.sum(np.equal(pre, y)) / len(pre)
            print("loss :{}, acc :{}".format(out_loss, acc))
            return out_loss, pre, y

        min_total_loss_dev = 999999
        for i in range(num_train_steps):
            # batch 数据
            i += 1
            ids_1_train, mask_1_train, segment_1_train, ids_2_train, mask_2_train, segment_2_train, y_train = sess.run(
                [
                    input_ids_1_train, input_mask_1_train, segment_ids_1_train,
                    input_ids_2_train, input_mask_2_train, segment_ids_2_train,
                    labels_train
                ])

            train_step(ids_1_train, mask_1_train, segment_1_train, ids_2_train,
                       mask_2_train, segment_2_train, y_train, i)

            if i % eval_per_step == 0 and i >= config["eval_start_step"]:
                total_loss_dev = 0
                input_ids_1_dev, input_mask_1_dev, segment_ids_1_dev, \
                input_ids_2_dev, input_mask_2_dev, segment_ids_2_dev, labels_dev = get_input_data(config["in_2"],
                                                                                                  seq_len,
                                                                                                  dev_batch_size)

                total_pre_dev = []
                total_true_dev = []
                for j in range(num_dev_steps):  # 一个 epoch 的 轮数
                    ids_1_dev, mask_1_dev, segment_1_dev, ids_2_dev, mask_2_dev, segment_2_dev, y_dev = sess.run(
                        [
                            input_ids_1_dev, input_mask_1_dev,
                            segment_ids_1_dev, input_ids_2_dev,
                            input_mask_2_dev, segment_ids_2_dev, labels_dev
                        ])
                    out_loss, pre, y = dev_step(ids_1_dev, mask_1_dev,
                                                segment_1_dev, ids_2_dev,
                                                mask_2_dev, segment_2_dev,
                                                y_dev)
                    total_loss_dev += out_loss
                    total_pre_dev.extend(pre)
                    total_true_dev.extend(y_dev)
                #
                print("dev result report:")
                print(classification_report(total_true_dev, total_pre_dev))

                if total_loss_dev < min_total_loss_dev:
                    print("save model:\t%f\t>%f" %
                          (min_total_loss_dev, total_loss_dev))
                    min_total_loss_dev = total_loss_dev
                    saver.save(sess,
                               config["out"] + 'bert.ckpt',
                               global_step=i)
            elif i < config["eval_start_step"] and i % 1000 == 0:
                print("auto saved model.")
                saver.save(sess, config["out"] + 'bert.ckpt', global_step=i)
    sess.close()

    print("remove dropout in predict")
    tf.reset_default_graph()
    is_training = False

    input_ids_1 = tf.placeholder(tf.int64,
                                 shape=[None, seq_len],
                                 name='input_ids_1')
    input_mask_1 = tf.placeholder(tf.int64,
                                  shape=[None, seq_len],
                                  name='input_mask_1')
    segment_ids_1 = tf.placeholder(tf.int64,
                                   shape=[None, seq_len],
                                   name='segment_ids_1')

    input_ids_2 = tf.placeholder(tf.int64,
                                 shape=[None, seq_len],
                                 name='input_ids_2')
    input_mask_2 = tf.placeholder(tf.int64,
                                  shape=[None, seq_len],
                                  name='input_mask_2')
    segment_ids_2 = tf.placeholder(tf.int64,
                                   shape=[None, seq_len],
                                   name='segment_ids_2')

    labels = tf.placeholder(tf.int64, shape=[
        None,
    ], name='labels')
    keep_prob = tf.placeholder(tf.float32,
                               name='keep_prob')  # , name='is_training'

    bert_config_ = load_bert_config(config["bert_config"])

    (total_loss, per_example_loss, logits, probabilities) = create_model(
        bert_config_, is_training, input_ids_1, input_mask_1, segment_ids_1,
        input_ids_2, input_mask_2, segment_ids_2, labels, keep_prob,
        num_labels, use_one_hot_embeddings)

    init_global = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)  # 保存最后top3模型

    try:
        checkpoint = tf.train.get_checkpoint_state(config["out"])
        input_checkpoint = checkpoint.model_checkpoint_path
        print("[INFO] input_checkpoint:", input_checkpoint)
    except Exception as e:
        input_checkpoint = config["out"]
        print("[INFO] Model folder", config["out"], repr(e))

    with tf.Session() as sess:
        sess.run(init_global)
        saver.restore(sess, input_checkpoint)
        saver.save(sess, config["out_1"] + 'bert.ckpt')
    sess.close()
Пример #25
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=None,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)
        tf.logging.info("**** {} parameters ****".format(
            np.sum([np.prod(v.shape) for v in tf.trainable_variables()])))

        n_predictions = masked_lm_ids.get_shape().as_list()[-1]
        probs = tf.reshape(masked_lm_log_probs,
                           [-1, n_predictions, bert_config.vocab_size])
        masked_lm_predictions = tf.argmax(probs, axis=-1, output_type=tf.int32)
        correct_prediction = tf.equal(masked_lm_predictions, masked_lm_ids)
        masked_lm_accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                    tf.float32),
                                            axis=1)

        #
        # with tf.control_dependencies([total_loss]):
        accuracy = tf.reduce_mean(masked_lm_accuracy)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                # training_hooks=[LoggingTensorHook({'accuracy': accuracy},
                #                                  every_n_iter=FLAGS.iterations_per_loop)]
            )
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights):
                """Computes the loss and accuracy of the model."""
                tf.summary.scalar("train_accuracy", accuracy)
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)
                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])

            loss_per_seq = tf.reduce_mean(tf.reshape(masked_lm_example_loss,
                                                     [-1, n_predictions]),
                                          axis=1)
            variables_to_export = [
                input_ids, input_mask, masked_lm_positions, masked_lm_ids,
                masked_lm_weights, loss_per_seq, probs, masked_lm_accuracy,
                features["seq"]
            ]

            output_spec = TPUEstimatorSpec(mode=mode,
                                           loss=total_loss,
                                           eval_metrics=eval_metrics,
                                           scaffold_fn=scaffold_fn,
                                           evaluation_hooks=[
                                               eval_hook(
                                                   variables_to_export,
                                                   FLAGS.output_dir)
                                           ])
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            compute_type=tf.float16 if FLAGS.manual_fp16 else tf.float32)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        masked_lm_loss = tf.identity(masked_lm_loss, name="mlm_loss")
        next_sentence_loss = tf.identity(next_sentence_loss, name="nsp_loss")
        total_loss = masked_lm_loss + next_sentence_loss
        total_loss = tf.identity(total_loss, name='total_loss')

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        if init_checkpoint and (hvd is None or hvd.rank() == 0):
            print("Loading checkpoint", init_checkpoint)
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        if FLAGS.verbose_logging:
            tf.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  %d :: name = %s, shape = %s%s",
                                0 if hvd is None else hvd.rank(), var.name,
                                var.shape, init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss, learning_rate, num_train_steps, num_warmup_steps,
                hvd, FLAGS.manual_fp16, FLAGS.use_fp16,
                FLAGS.num_accumulation_steps, FLAGS.optimizer_type,
                FLAGS.allreduce_post_accumulation)

            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metric_ops = metric_fn(masked_lm_example_loss,
                                        masked_lm_log_probs, masked_lm_ids,
                                        masked_lm_weights,
                                        next_sentence_example_loss,
                                        next_sentence_log_probs,
                                        next_sentence_labels)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #27
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
Пример #28
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        # Note: We keep this feature name `next_sentence_labels` to be compatible
        # with the original data created by lanzhzh@. However, in the ALBERT case
        # it does represent sentence_order_labels.
        sentence_order_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.AlbertModel(
            config=albert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             albert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (sentence_order_loss, sentence_order_example_loss,
         sentence_order_log_probs) = get_sentence_order_output(
             albert_config, model.get_pooled_output(), sentence_order_labels)

        total_loss = masked_lm_loss + sentence_order_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            tf.logging.info("number of hidden group %d to initialize",
                            albert_config.num_hidden_groups)
            num_of_initialize_group = 1
            if FLAGS.init_from_group0:
                num_of_initialize_group = albert_config.num_hidden_groups
                if albert_config.net_structure_type > 0:
                    num_of_initialize_group = albert_config.num_hidden_layers
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint, num_of_initialize_group)
            if use_tpu:

                def tpu_scaffold():
                    for gid in range(num_of_initialize_group):
                        tf.logging.info("initialize the %dth layer", gid)
                        tf.logging.info(assignment_map[gid])
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map[gid])
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                for gid in range(num_of_initialize_group):
                    tf.logging.info("initialize the %dth layer", gid)
                    tf.logging.info(assignment_map[gid])
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map[gid])

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu,
                                                     optimizer, poly_power,
                                                     start_warmup_step)

            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       loss=total_loss,
                                                       train_op=train_op,
                                                       scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(*args):
                """Computes the loss and accuracy of the model."""
                (masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                 masked_lm_weights, sentence_order_example_loss,
                 sentence_order_log_probs, sentence_order_labels) = args[:7]

                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                metrics = {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                }

                sentence_order_log_probs = tf.reshape(
                    sentence_order_log_probs,
                    [-1, sentence_order_log_probs.shape[-1]])
                sentence_order_predictions = tf.argmax(
                    sentence_order_log_probs, axis=-1, output_type=tf.int32)
                sentence_order_labels = tf.reshape(sentence_order_labels, [-1])
                sentence_order_accuracy = tf.metrics.accuracy(
                    labels=sentence_order_labels,
                    predictions=sentence_order_predictions)
                sentence_order_mean_loss = tf.metrics.mean(
                    values=sentence_order_example_loss)
                metrics.update({
                    "sentence_order_accuracy": sentence_order_accuracy,
                    "sentence_order_loss": sentence_order_mean_loss
                })
                return metrics

            metric_values = [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, sentence_order_example_loss,
                sentence_order_log_probs, sentence_order_labels
            ]

            eval_metrics = (metric_fn, metric_values)

            output_spec = contrib_tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Пример #29
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_sequence = features["input_sequence"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    edit_sequence = features["edit_sequence"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, logits, probabilities) = gec_create_model(
        bert_config, is_training, input_sequence,
         input_mask, segment_ids, edit_sequence,
        use_one_hot_embeddings, mode, 
        copy_weight, 
        use_bert_more, 
        insert_ids,
        multitoken_insert_ids,
        subtract_replaced_from_replacement)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    # tf.logging.info("**** Trainable Variables ****")
    # for var in tvars:
    #   init_string = ""
    #   if var.name in initialized_variable_names:
    #     init_string = ", *INIT_FROM_CKPT*"
    #   tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)

    elif mode == tf.estimator.ModeKeys.EVAL:
      def metric_fn(per_example_loss, edit_sequence, logits):
        predictions = tf.argmax(logits[:,:,3:], axis=-1, output_type=tf.int32) + 3 
        mask = tf.equal(edit_sequence,0)
        mask = tf.logical_or(mask, tf.equal(edit_sequence,1))
        mask = tf.logical_or(mask, tf.equal(edit_sequence,2))
        mask = tf.logical_or(mask, tf.equal(edit_sequence,3))
        mask = tf.to_float(tf.logical_not(mask))
        accuracy = tf.metrics.accuracy(edit_sequence, predictions, mask)
        loss = tf.metrics.mean(per_example_loss)
        result_dict = {}
        result_dict["eval_accuracy"] = accuracy
        result_dict["eval_loss"] = loss      
        return {
            "eval_accuracy": accuracy,
            "eval_loss": loss,
        }

      eval_metrics = (metric_fn, [per_example_loss, edit_sequence, logits])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      #first three edit ids unk, sos, eos are dummy. We do not consider them in predictions
      predictions = tf.argmax(logits[:,:,3:], axis=-1, output_type=tf.int32) + 3
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"predictions": predictions, "logits":logits},
                scaffold_fn=scaffold_fn)
    return output_spec
Пример #30
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       use_one_hot_embeddings, fp16)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        is_multi_gpu = use_gpu and int(num_gpu_cores) >= 2
        if mode == tf.estimator.ModeKeys.TRAIN:
            if is_multi_gpu:
                train_op = custom_optimization.create_optimizer(
                    total_loss,
                    learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    fp16=fp16)
                output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                         loss=total_loss,
                                                         train_op=train_op,
                                                         scaffold=scaffold_fn)
            else:
                train_op = optimization.create_optimizer(total_loss,
                                                         learning_rate,
                                                         num_train_steps,
                                                         num_warmup_steps,
                                                         use_tpu,
                                                         fp16=fp16)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, probabilities,
                          is_real_example):
                probs_split = tf.split(probabilities, NUM_LABELS, axis=-1)
                preds_split = tf.cast(
                    tf.greater(probs_split, tf.constant(0.5)), tf.int32)
                label_ids_split = tf.split(label_ids, NUM_LABELS, axis=-1)

                eval_dict = dict()
                for idx in range(NUM_LABELS):
                    label_ids_spec = label_ids_split[idx]
                    preds_spec = preds_split[idx]
                    acc, acc_op = tf.metrics.accuracy(labels=label_ids_spec,
                                                      predictions=preds_spec,
                                                      weights=is_real_example)
                    pr, pr_op = tf.metrics.precision(labels=label_ids_spec,
                                                     predictions=preds_spec,
                                                     weights=is_real_example)
                    re, re_op = tf.metrics.recall(labels=label_ids_spec,
                                                  predictions=preds_spec,
                                                  weights=is_real_example)
                    f1 = (2 * pr * re) / (pr + re)  # f1-score

                    eval_dict["eval_accuracy_" + str(idx)] = (acc, acc_op)
                    eval_dict["eval_precision_" + str(idx)] = (pr, pr_op)
                    eval_dict["eval_recall_" + str(idx)] = (re, re_op)
                    eval_dict["eval_f1score_" + str(idx)] = (f1,
                                                             tf.identity(f1))

                eval_dict["eval_loss"] = tf.metrics.mean(
                    values=per_example_loss, weights=is_real_example)
                # loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example)
                return eval_dict

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, probabilities, is_real_example
            ])
            # eval on single-gpu only
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            if is_multi_gpu:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions={"probabilities": probabilities})
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"probabilities": probabilities},
                    scaffold_fn=scaffold_fn)

        return output_spec
Пример #31
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    masked_lm_positions = features["masked_lm_positions"]
    masked_lm_ids = features["masked_lm_ids"]
    masked_lm_weights = features["masked_lm_weights"]
    next_sentence_labels = features["next_sentence_labels"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    (masked_lm_loss,
     masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
         bert_config, model.get_sequence_output(), model.get_embedding_table(),
         masked_lm_positions, masked_lm_ids, masked_lm_weights)

    (next_sentence_loss, next_sentence_example_loss,
     next_sentence_log_probs) = get_next_sentence_output(
         bert_config, model.get_pooled_output(), next_sentence_labels)

    total_loss = masked_lm_loss + next_sentence_loss

    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                    masked_lm_weights, next_sentence_example_loss,
                    next_sentence_log_probs, next_sentence_labels):
        """Computes the loss and accuracy of the model."""
        masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
                                         [-1, masked_lm_log_probs.shape[-1]])
        masked_lm_predictions = tf.argmax(
            masked_lm_log_probs, axis=-1, output_type=tf.int32)
        masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
        masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        masked_lm_accuracy = tf.metrics.accuracy(
            labels=masked_lm_ids,
            predictions=masked_lm_predictions,
            weights=masked_lm_weights)
        masked_lm_mean_loss = tf.metrics.mean(
            values=masked_lm_example_loss, weights=masked_lm_weights)

        next_sentence_log_probs = tf.reshape(
            next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
        next_sentence_predictions = tf.argmax(
            next_sentence_log_probs, axis=-1, output_type=tf.int32)
        next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
        next_sentence_accuracy = tf.metrics.accuracy(
            labels=next_sentence_labels, predictions=next_sentence_predictions)
        next_sentence_mean_loss = tf.metrics.mean(
            values=next_sentence_example_loss)

        return {
            "masked_lm_accuracy": masked_lm_accuracy,
            "masked_lm_loss": masked_lm_mean_loss,
            "next_sentence_accuracy": next_sentence_accuracy,
            "next_sentence_loss": next_sentence_mean_loss,
        }

      eval_metrics = (metric_fn, [
          masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
          masked_lm_weights, next_sentence_example_loss,
          next_sentence_log_probs, next_sentence_labels
      ])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode))

    return output_spec
Пример #32
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, per_example_loss, logits) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
        use_one_hot_embeddings)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(per_example_loss, label_ids, logits):
        # Display labels and predictions
        concat1 = tf.contrib.metrics.streaming_concat(logits)
        concat2 = tf.contrib.metrics.streaming_concat(label_ids)

        # Compute Pearson correlation
        pearson = tf.contrib.metrics.streaming_pearson_correlation(logits, label_ids)

        # Compute MSE
        # mse = tf.metrics.mean(per_example_loss)
        mse = tf.metrics.mean_squared_error(label_ids, logits)

        # Compute Spearman correlation
        size = tf.size(logits)
        indice_of_ranks_pred = tf.nn.top_k(logits, k=size)[1]
        indice_of_ranks_label = tf.nn.top_k(label_ids, k=size)[1]
        rank_pred = tf.nn.top_k(-indice_of_ranks_pred, k=size)[1]
        rank_label = tf.nn.top_k(-indice_of_ranks_label, k=size)[1]
        rank_pred = tf.to_float(rank_pred)
        rank_label = tf.to_float(rank_label)
        spearman = tf.contrib.metrics.streaming_pearson_correlation(rank_pred, rank_label)

        return {'pred': concat1, 'label_ids': concat2, 'pearson': pearson, 'spearman': spearman, 'MSE': mse}

      eval_metrics = (metric_fn, [per_example_loss, label_ids, logits])
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics,
          scaffold_fn=scaffold_fn)
    else:
      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, predictions=logits, scaffold_fn=scaffold_fn)
    return output_spec
Пример #33
0
    return loss

# get the max prob for the predicted start/end position
start_probs = tf.nn.softmax(start_logits, axis=-1)
start_prob = tf.reduce_max(start_probs, axis=-1)
end_probs = tf.nn.softmax(end_logits, axis=-1)
end_prob = tf.reduce_max(end_probs, axis=-1)

start_loss = compute_loss(start_logits, start_positions)
end_loss = compute_loss(end_logits, end_positions)
total_loss = (start_loss + end_loss) / 2.0
tf.summary.scalar('total_loss', total_loss)


if FLAGS.do_train:
    train_op = optimization.create_optimizer(total_loss, FLAGS.learning_rate, num_train_steps, num_warmup_steps, False)

    print("***** Running training *****")
    print("  Num orig examples = %d", len(train_examples))
    print("  Num train_features = %d", len(train_features))
    print("  Batch size = %d", FLAGS.train_batch_size)
    print("  Num steps = %d", num_train_steps)
    
merged_summary_op = tf.summary.merge_all()

RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])

saver = tf.train.Saver()
# Initializing the variables
init = tf.global_variables_initializer()
tf.get_default_graph().finalize()
Пример #34
0
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            from tensorflow.python.estimator.model_fn import EstimatorSpec

            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" %
                                (name, features[name].shape))

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]
            label_ids = features["label_ids"]

            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            (total_loss, per_example_loss, logits,
             probabilities) = BertSim.create_model(bert_config, is_training,
                                                   input_ids, input_mask,
                                                   segment_ids, label_ids,
                                                   num_labels,
                                                   use_one_hot_embeddings)

            tvars = tf.trainable_variables()
            initialized_variable_names = {}

            if init_checkpoint:
                (assignment_map, initialized_variable_names) \
                    = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            tf.logging.info("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

            if mode == tf.estimator.ModeKeys.TRAIN:

                train_op = optimization.create_optimizer(
                    total_loss, learning_rate, num_train_steps,
                    num_warmup_steps, False)

                output_spec = EstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            train_op=train_op)
            elif mode == tf.estimator.ModeKeys.EVAL:

                def metric_fn(per_example_loss, label_ids, logits):
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    accuracy = tf.metrics.accuracy(label_ids, predictions)
                    auc = tf.metrics.auc(label_ids, predictions)
                    loss = tf.metrics.mean(per_example_loss)
                    return {
                        "eval_accuracy": accuracy,
                        "eval_auc": auc,
                        "eval_loss": loss,
                    }

                eval_metrics = metric_fn(per_example_loss, label_ids, logits)
                output_spec = EstimatorSpec(mode=mode,
                                            loss=total_loss,
                                            eval_metric_ops=eval_metrics)
            else:
                output_spec = EstimatorSpec(mode=mode,
                                            predictions=probabilities)

            return output_spec