Exemplo n.º 1
0
 def _build_train_graph(self):
     with tf.device('/gpu:%d' % self.hparams.gpu_num[0]):
         bert_model = modeling_base.BertModel(
             config=self.bert_config,
             is_training=False,
             input_ids=self.knowledge_ids_ph,
             input_mask=self.knowledge_mask_ph,
             token_type_ids=self.knowledge_seg_ids_ph,
             use_one_hot_embeddings=False,
             scope='bert',
             hparams=self.hparams)
         self.bert_sequence_outputs = bert_model.get_sequence_output()
Exemplo n.º 2
0
    def analysis_evaluate(self, saved_file: str):
        # saved_file example
        config = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
        )
        config.gpu_options.allow_growth = True
        self.train_setup_vars["do_evaluate"] = True
        self.train_setup_vars["is_train_continue"] = True

        tf.reset_default_graph()
        self.sess = tf.Session(config=config)
        self._make_placeholders()
        # self._build_train_graph()

        bert_model = modeling_base.BertModel(
            config=self.bert_config,
            is_training=False,
            input_ids=self.input_ids_ph,
            input_mask=self.input_mask_ph,
            token_type_ids=self.segment_ids_ph,
            dialog_position_ids=None,
            use_adapter_layer=False,
            scope='bert',
            hparams=self.hparams)
        input_phs = (self.input_ids_ph, self.input_mask_ph)
        length_phs = (self.dialog_len_ph, self.response_len_ph,
                      self.knowledge_len_ph)
        label_ids_phs = (self.label_ids_ph, self.knowledge_label_ids_ph)

        created_model = self.hparams.graph.Model(self.hparams,
                                                 self.bert_config, bert_model,
                                                 input_phs, label_ids_phs,
                                                 length_phs, None, None)
        self.logits, self.seq_outputs = created_model.build_graph()

        self.sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()
        saver.restore(self.sess, saved_file)

        self._logger.info("Evaluation Step - Test")
        self.analysis_run_evaluate()
    def build_graph(self):
        if not self.train_setup_vars["do_evaluate"]:
            is_training = True
        else:
            is_training = False
        use_one_hot_embeddings = False
        input_ids, input_mask, segment_ids = self.input_phs

        # load bert model
        bert_model = modeling_base.BertModel(
            config=self.bert_config,
            dropout_prob=self.dropout_prob_ph,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            scope='bert',
            hparams=self.hparams)
        pooled_output = bert_model.get_pooled_output()

        return self._final_output_layer(pooled_output, self.label_id_phs)
Exemplo n.º 4
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling_base.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings,
            use_domain_embeddings=True,
            scope="bert")

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        total_loss = masked_lm_loss + next_sentence_loss

        tvars = tf.trainable_variables()
        """fix_start"""
        # vars_in_checkpoint = tf.train.list_variables(init_checkpoint)
        # checkpoint_vars = []
        # for var_name, _ in vars_in_checkpoint:
        #   checkpoint_vars.append(var_name)
        #   print("pretrained_var : ", var_name)
        """fix_end"""
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_example_loss, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                next_sentence_log_probs = tf.reshape(
                    next_sentence_log_probs,
                    [-1, next_sentence_log_probs.shape[-1]])
                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
                next_sentence_accuracy = tf.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.metrics.mean(
                    values=next_sentence_example_loss)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights, next_sentence_example_loss,
                next_sentence_log_probs, next_sentence_labels
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Exemplo n.º 5
0
    def _build_train_graph(self):
        gpu_num = len(self.hparams.gpu_num)
        if gpu_num > 1:
            print("-" * 10, "Using %d Multi-GPU" % gpu_num, "-" * 10)
        else:
            print("-" * 10, "Using Single-GPU", "-" * 10)

        if not self.train_setup_vars["do_evaluate"]: is_training = True
        else: is_training = False
        use_one_hot_embeddings = False

        input_ids_ph = tf.split(self.input_ids_ph, gpu_num, 0)
        input_mask_ph = tf.split(self.input_mask_ph, gpu_num, 0)
        segment_ids_ph = tf.split(self.segment_ids_ph, gpu_num, 0)
        dialog_position_ids_ph = tf.split(self.dialog_position_ids_ph, gpu_num,
                                          0)

        dialog_len_ph = tf.split(self.dialog_len_ph, gpu_num, 0)
        response_len_ph = tf.split(self.response_len_ph, gpu_num, 0)

        knowledge_tokens_ph = tf.split(self.knowledge_tokens_ph, gpu_num, 0)
        knowledge_len_ph = tf.split(self.knowledge_len_ph, gpu_num, 0)
        knowledge_label_ids_ph = tf.split(self.knowledge_label_ids_ph, gpu_num,
                                          0)

        similar_input_ids_ph = tf.split(self.similar_input_ids_ph, gpu_num, 0)
        similar_input_mask_ph = tf.split(self.similar_input_mask_ph, gpu_num,
                                         0)
        similar_len_ph = tf.split(self.similar_input_len_ph, gpu_num, 0)

        label_ids_ph = tf.split(self.label_ids_ph, gpu_num, 0)
        # is_real_examples_ph = tf.split(self.is_real_examples_ph, gpu_num, 0)

        tower_grads = []
        tot_losses = []
        tot_logits = []
        tot_labels = []
        tot_outputs = []
        tot_argmax = []

        tvars = []
        for i, gpu_id in enumerate(self.hparams.gpu_num):
            with tf.device('/gpu:%d' % gpu_id):
                with tf.variable_scope('', reuse=tf.AUTO_REUSE):
                    print("bert_graph_multi_gpu :", gpu_id)

                    if self.hparams.do_dialog_state_embedding:
                        each_dialog_position_ids = dialog_position_ids_ph[i]
                    else:
                        each_dialog_position_ids = None

                    bert_model = modeling_base.BertModel(
                        config=self.bert_config,
                        is_training=is_training,
                        input_ids=input_ids_ph[i],
                        input_mask=input_mask_ph[i],
                        token_type_ids=segment_ids_ph[i],
                        dialog_position_ids=each_dialog_position_ids,
                        use_adapter_layer=self.hparams.do_adapter_layer,
                        scope='bert',
                        hparams=self.hparams)

                    input_phs = (input_ids_ph[i], input_mask_ph[i])
                    length_phs = (dialog_len_ph[i], response_len_ph[i],
                                  knowledge_len_ph[i])
                    knowledge_phs = knowledge_tokens_ph[i]
                    similar_phs = None
                    if self.hparams.do_similar_dialog:
                        similar_phs = (similar_input_ids_ph[i],
                                       similar_input_mask_ph[i],
                                       similar_len_ph[i])
                    label_ids_phs = (label_ids_ph[i],
                                     knowledge_label_ids_ph[i])
                    created_model = self.hparams.graph.Model(
                        self.hparams, self.bert_config, bert_model, input_phs,
                        label_ids_phs, length_phs, knowledge_phs, similar_phs)
                    logits, loss_op, seq_outputs, sentence_argmax = created_model.build_graph(
                    )

                    tot_losses.append(loss_op)
                    tot_logits.append(logits)
                    tot_labels.append(label_ids_ph[i])
                    tot_outputs.append(seq_outputs)
                    tot_argmax.append(sentence_argmax)

                    if i == 0:

                        self._select_train_variables()
                        if self.hparams.do_adam_weight_optimizer:
                            self.optimizer, self.global_step = optimization.create_optimizer(
                                loss_op,
                                self.hparams.learning_rate,
                                self.num_train_steps,
                                self.num_warmup_steps,
                                use_tpu=False)
                        else:
                            self.optimizer = tf.train.AdamOptimizer(
                                self.hparams.learning_rate)
                            self.global_step = tf.Variable(0,
                                                           name="global_step",
                                                           trainable=False)

                    if not self.hparams.do_train_bert:
                        if i == 0:
                            for var in tf.trainable_variables():
                                if var not in self.pretrained_not_train_var_names:
                                    tvars.append(var)
                    else:
                        tvars = tf.trainable_variables()

                    if self.hparams.do_adam_weight_optimizer:
                        # This is how the model was pre-trained.
                        grads = tf.gradients(loss_op, tvars)
                        (grads, _) = tf.clip_by_global_norm(grads,
                                                            clip_norm=1.0)
                        tower_grads.append(zip(grads, tvars))
                    else:
                        grads = self.optimizer.compute_gradients(
                            loss_op, var_list=tvars)
                        tower_grads.append(grads)
                    tf.get_variable_scope().reuse_variables()

        avg_grads = average_gradients(tower_grads)
        self.loss_op = tf.divide(tf.add_n(tot_losses), gpu_num)
        self.logits = tf.concat(tot_logits, axis=0)
        self.sequence_outputs = seq_outputs
        self.sentence_argmax = sentence_argmax
        tot_labels = tf.concat(tot_labels, axis=0)

        with tf.variable_scope('', reuse=tf.AUTO_REUSE):
            self.train_op = self.optimizer.apply_gradients(
                avg_grads, self.global_step)
            # new_global_step = self.global_step + 1
            # self.train_op = tf.group(self.train_op, [self.global_step.assign(new_global_step)])

        if self.hparams.loss_type == "sigmoid":
            correct_pred = tf.equal(tf.round(tf.nn.sigmoid(self.logits)),
                                    tf.cast(self.label_ids_ph, tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        else:
            eval = tf.nn.in_top_k(self.logits, self.label_ids_ph, 1)
            correct_count = tf.reduce_sum(tf.cast(eval, tf.int32))
            self.accuracy = tf.divide(correct_count,
                                      tf.shape(self.label_ids_ph)[0])
            self.confidence = tf.nn.softmax(self.logits, axis=-1)

        if not self.train_setup_vars["do_evaluate"] and not self.train_setup_vars["on_training"] \
          and not self.train_setup_vars["is_train_continue"]:
            self._initialize_uninitialized_variables()
Exemplo n.º 6
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]
        next_sentence_labels = features["next_sentence_labels"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling_base.BertModel(config=bert_config,
                                        is_training=is_training,
                                        input_ids=input_ids,
                                        input_mask=input_mask,
                                        token_type_ids=segment_ids,
                                        use_adapter_layer=False,
                                        scope="bert")

        # (masked_lm_loss,
        #  masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output(
        #      bert_config, model.get_sequence_output(), model.get_embedding_table(),
        #      masked_lm_positions, masked_lm_ids, masked_lm_weights)

        (next_sentence_loss, next_sentence_example_loss,
         next_sentence_log_probs) = get_next_sentence_output(
             bert_config, model.get_pooled_output(), next_sentence_labels)

        # total_loss = masked_lm_loss + next_sentence_loss

        total_loss = next_sentence_loss  # TODO: next_sentence_loss

        # tf.summary.scalar('loss/masked_lm_loss', masked_lm_loss)
        tf.summary.scalar('loss/next_sentence_loss', next_sentence_loss)
        tf.summary.scalar('loss/total_loss', total_loss)

        tvars = tf.trainable_variables()
        vars = tf.global_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling_utils.get_assignment_map_from_checkpoint(
                 vars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

                for map_val in assignment_map:
                    print(map_val)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            train_logging_hook = tf.train.LoggingTensorHook(
                {
                    "loss": total_loss,
                    # "masked_lm_loss" : masked_lm_loss,
                    "next_sentence_loss": next_sentence_loss
                },
                every_n_iter=100)

            summary_hook = tf.train.SummarySaverHook(
                save_steps=100,
                output_dir=FLAGS.output_dir,
                summary_op=tf.summary.merge_all)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                training_hooks=[train_logging_hook],
                evaluation_hooks=[summary_hook],
            )
        # elif mode == tf.estimator.ModeKeys.EVAL:
        #
        #   def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
        #                 masked_lm_weights, next_sentence_example_loss,
        #                 next_sentence_log_probs, next_sentence_labels):
        #     """Computes the loss and accuracy of the model."""
        #     masked_lm_log_probs = tf.reshape(masked_lm_log_probs,
        #                                      [-1, masked_lm_log_probs.shape[-1]])
        #     masked_lm_predictions = tf.argmax(
        #         masked_lm_log_probs, axis=-1, output_type=tf.int32)
        #     masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
        #     masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
        #     masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
        #     masked_lm_accuracy = tf.metrics.accuracy(
        #         labels=masked_lm_ids,
        #         predictions=masked_lm_predictions,
        #         weights=masked_lm_weights)
        #     masked_lm_mean_loss = tf.metrics.mean(
        #         values=masked_lm_example_loss, weights=masked_lm_weights)
        #
        #     next_sentence_log_probs = tf.reshape(
        #         next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])
        #     next_sentence_predictions = tf.argmax(
        #         next_sentence_log_probs, axis=-1, output_type=tf.int32)
        #     next_sentence_labels = tf.reshape(next_sentence_labels, [-1])
        #     next_sentence_accuracy = tf.metrics.accuracy(
        #         labels=next_sentence_labels, predictions=next_sentence_predictions)
        #     next_sentence_mean_loss = tf.metrics.mean(
        #         values=next_sentence_example_loss)
        #
        #     return {
        #         "masked_lm_accuracy": masked_lm_accuracy,
        #         "masked_lm_loss": masked_lm_mean_loss,
        #         "next_sentence_accuracy": next_sentence_accuracy,
        #         "next_sentence_loss": next_sentence_mean_loss,
        #     }
        #
        #   eval_metrics = (metric_fn, [
        #       masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
        #       masked_lm_weights, next_sentence_example_loss,
        #       next_sentence_log_probs, next_sentence_labels
        #   ])
        #
        #   output_spec = tf.contrib.tpu.TPUEstimatorSpec(
        #       mode=mode,
        #       loss=total_loss,
        #       eval_metrics=eval_metrics,
        #       scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec