def evaluation(self, global_step): eval_input_fn = self.input_fn_builder(features=self.dev_features, seq_length=self.max_seq_length, is_training=False, drop_remainder=False) predictions = self.estimator.predict(eval_input_fn, yield_single_examples=False) res = np.concatenate([a["prob"] for a in predictions], axis=0) metrics = PRF(np.array(self.dev_label), res.argmax(axis=-1)) print('\n Global step is : ', global_step) MAP, AvgRec, MRR = eval_reranker(self.dev_cid, self.dev_label, res[:, 0]) metrics['MAP'] = MAP metrics['AvgRec'] = AvgRec metrics['MRR'] = MRR metrics['global_step'] = global_step print_metrics(metrics, 'dev', save_dir=self._log_save_path) return MAP * 100, MRR
def evaluate(self, eva_data, steps_num, eva_type, eva_ID=None): label = [] predict = [] loss = [] with tqdm(total=steps_num, ncols=70) as tbar: for batch_eva_data in eva_data: batch_label = batch_eva_data[-1] feed_dict = { inv: array for inv, array in zip(self.input_placeholder, batch_eva_data) } batch_loss, batch_predict = self.sess.run( [self.loss, self.predict_prob], feed_dict=feed_dict) label.append(batch_label.argmax(axis=1)) loss.append(batch_loss * batch_label.shape[0]) predict.append(batch_predict) tbar.update(batch_label.shape[0]) label = np.concatenate(label, axis=0) predict = np.concatenate(predict, axis=0) loss = sum(loss) / steps_num metrics = PRF(label, predict.argmax(axis=1)) metrics['loss'] = loss if eva_ID is not None: eval_reranker(eva_ID, label, predict[:, 0], metrics['matrix'], categories_num=self.args.categories_num) loss_summ = tf.Summary(value=[ tf.Summary.Value(tag="{}/loss".format(eva_type), simple_value=metrics['loss']), ]) macro_F_summ = tf.Summary(value=[ tf.Summary.Value(tag="{}/f1".format(eva_type), simple_value=metrics['macro_prf'][2]), ]) acc = tf.Summary(value=[ tf.Summary.Value(tag="{}/acc".format(eva_type), simple_value=metrics['acc']), ]) return metrics, [loss_summ, macro_F_summ, acc]
def one_train(self, epochs, batch_size, train_data, train_label, dev_data, dev_label): self.compile_model() for e in range(epochs): history = self.model.fit(train_data, train_label, batch_size=batch_size, verbose=1, validation_data=(dev_data, dev_label)) dev_out = self.model.predict(dev_data, batch_size=2 * batch_size, verbose=1) metrics = PRF(dev_label, (dev_out > 0.5).astype('int32').reshape([-1])) metrics['epoch'] = e + 1 metrics['val_loss'] = history.history['val_loss'] print_metrics(metrics, metrics_type=self.__class__.__name__ + self.args.selfname, save_dir=self.args.log_dir)
def main(): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tpu.InputPipelineConfig.PER_HOST_V2 run_config = tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tpu.TPUConfig(iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True run_config.replace(session_config=session_config) num_train_steps = None num_warmup_steps = None with open('cqa_data.pkl', 'rb') as fr: train_features, dev_cid, dev_features = pkl.load(fr) dev_label = [feature.label_id for feature in dev_features] if FLAGS.do_train: num_train_steps = int( len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=2, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, dev_cid=dev_cid) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, # params={'batch_size': FLAGS.train_batch_size}, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.eval_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_features)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = input_fn_builder(features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=[ EvalHook(estimator=estimator, dev_features=dev_features, dev_label=dev_label, dev_cid=dev_cid, max_seq_length=FLAGS.max_seq_length, eval_steps=FLAGS.save_checkpoints_steps, checkpoint_dir=FLAGS.output_dir) ]) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(dev_features)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(dev_features) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = input_fn_builder(features=dev_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) predictions = estimator.predict(eval_input_fn, yield_single_examples=False) res = np.concatenate([a for a in predictions], axis=0) print(res.shape, np.array(dev_label).shape) metrics = PRF(np.array(dev_label), res.argmax(axis=-1)) # print((np.array(dev_label) != res.argmax(axis=-1))[:1000]) MAP, AvgRec, MRR = eval_reranker(dev_cid, dev_label, res[:, 0]) metrics['MAP'] = MAP metrics['AvgRec'] = AvgRec metrics['MRR'] = MRR print_metrics(metrics, 'dev')