示例#1
0
    def evaluation(self, global_step):
        eval_input_fn = self.input_fn_builder(features=self.dev_features,
                                              seq_length=self.max_seq_length,
                                              is_training=False,
                                              drop_remainder=False)

        predictions = self.estimator.predict(eval_input_fn,
                                             yield_single_examples=False)
        res = np.concatenate([a["prob"] for a in predictions], axis=0)

        metrics = PRF(np.array(self.dev_label), res.argmax(axis=-1))

        print('\n Global step is : ', global_step)
        MAP, AvgRec, MRR = eval_reranker(self.dev_cid, self.dev_label, res[:,
                                                                           0])

        metrics['MAP'] = MAP
        metrics['AvgRec'] = AvgRec
        metrics['MRR'] = MRR

        metrics['global_step'] = global_step

        print_metrics(metrics, 'dev', save_dir=self._log_save_path)

        return MAP * 100, MRR
示例#2
0
    def evaluate(self, eva_data, steps_num, eva_type, eva_ID=None):
        label = []
        predict = []
        loss = []
        with tqdm(total=steps_num, ncols=70) as tbar:
            for batch_eva_data in eva_data:
                batch_label = batch_eva_data[-1]

                feed_dict = {
                    inv: array
                    for inv, array in zip(self.input_placeholder,
                                          batch_eva_data)
                }
                batch_loss, batch_predict = self.sess.run(
                    [self.loss, self.predict_prob], feed_dict=feed_dict)

                label.append(batch_label.argmax(axis=1))
                loss.append(batch_loss * batch_label.shape[0])
                predict.append(batch_predict)

                tbar.update(batch_label.shape[0])

        label = np.concatenate(label, axis=0)
        predict = np.concatenate(predict, axis=0)

        loss = sum(loss) / steps_num
        metrics = PRF(label, predict.argmax(axis=1))
        metrics['loss'] = loss

        if eva_ID is not None:
            eval_reranker(eva_ID,
                          label,
                          predict[:, 0],
                          metrics['matrix'],
                          categories_num=self.args.categories_num)

        loss_summ = tf.Summary(value=[
            tf.Summary.Value(tag="{}/loss".format(eva_type),
                             simple_value=metrics['loss']),
        ])
        macro_F_summ = tf.Summary(value=[
            tf.Summary.Value(tag="{}/f1".format(eva_type),
                             simple_value=metrics['macro_prf'][2]),
        ])
        acc = tf.Summary(value=[
            tf.Summary.Value(tag="{}/acc".format(eva_type),
                             simple_value=metrics['acc']),
        ])
        return metrics, [loss_summ, macro_F_summ, acc]
示例#3
0
 def one_train(self, epochs, batch_size, train_data, train_label, dev_data,
               dev_label):
     self.compile_model()
     for e in range(epochs):
         history = self.model.fit(train_data,
                                  train_label,
                                  batch_size=batch_size,
                                  verbose=1,
                                  validation_data=(dev_data, dev_label))
         dev_out = self.model.predict(dev_data,
                                      batch_size=2 * batch_size,
                                      verbose=1)
         metrics = PRF(dev_label,
                       (dev_out > 0.5).astype('int32').reshape([-1]))
         metrics['epoch'] = e + 1
         metrics['val_loss'] = history.history['val_loss']
         print_metrics(metrics,
                       metrics_type=self.__class__.__name__ +
                       self.args.selfname,
                       save_dir=self.args.log_dir)
示例#4
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tpu.TPUConfig(iterations_per_loop=FLAGS.iterations_per_loop,
                                 num_shards=FLAGS.num_tpu_cores,
                                 per_host_input_for_training=is_per_host))

    session_config = tf.ConfigProto(log_device_placement=True)
    session_config.gpu_options.allow_growth = True
    run_config.replace(session_config=session_config)

    num_train_steps = None
    num_warmup_steps = None

    with open('cqa_data.pkl', 'rb') as fr:
        train_features, dev_cid, dev_features = pkl.load(fr)
        dev_label = [feature.label_id for feature in dev_features]

    if FLAGS.do_train:
        num_train_steps = int(
            len(train_features) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=2,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu,
                                dev_cid=dev_cid)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        # params={'batch_size': FLAGS.train_batch_size},
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_features))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = input_fn_builder(features=train_features,
                                          seq_length=FLAGS.max_seq_length,
                                          is_training=True,
                                          drop_remainder=True)

        estimator.train(input_fn=train_input_fn,
                        max_steps=num_train_steps,
                        hooks=[
                            EvalHook(estimator=estimator,
                                     dev_features=dev_features,
                                     dev_label=dev_label,
                                     dev_cid=dev_cid,
                                     max_seq_length=FLAGS.max_seq_length,
                                     eval_steps=FLAGS.save_checkpoints_steps,
                                     checkpoint_dir=FLAGS.output_dir)
                        ])

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(dev_features))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(dev_features) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = input_fn_builder(features=dev_features,
                                         seq_length=FLAGS.max_seq_length,
                                         is_training=False,
                                         drop_remainder=eval_drop_remainder)

        predictions = estimator.predict(eval_input_fn,
                                        yield_single_examples=False)
        res = np.concatenate([a for a in predictions], axis=0)
        print(res.shape, np.array(dev_label).shape)
        metrics = PRF(np.array(dev_label), res.argmax(axis=-1))
        # print((np.array(dev_label) != res.argmax(axis=-1))[:1000])
        MAP, AvgRec, MRR = eval_reranker(dev_cid, dev_label, res[:, 0])
        metrics['MAP'] = MAP
        metrics['AvgRec'] = AvgRec
        metrics['MRR'] = MRR

        print_metrics(metrics, 'dev')