def eval_model(val_loader, epoch): """ Evaluate on the validation set. """ net.eval() eval_loss = 0.0 true_labels, predicted_scores = [], [] for batch in val_loader: x_val_fb_content, x_val_fb_question, x_val_fb_option, \ x_val_fb_clens, x_val_fb_qlens, x_val_fb_olens, y_val_fb = batch logits, scores = net(x_val_fb_content, x_val_fb_question, x_val_fb_option) avg_batch_loss = criterion(scores, y_val_fb) eval_loss = eval_loss + avg_batch_loss.item() for i in y_val_fb[0].tolist(): true_labels.append(i) for j in scores[0].tolist(): predicted_scores.append(j) # Calculate the Metrics eval_rmse = mean_squared_error(true_labels, predicted_scores) ** 0.5 eval_r2 = r2_score(true_labels, predicted_scores) eval_pcc, eval_doa = dh.evaluation(true_labels, predicted_scores) eval_loss = eval_loss / len(val_loader) cur_value = eval_rmse logger.info("All Validation set: Loss {0:g} | PCC {1:.4f} | DOA {2:.4f} | RMSE {3:.4f} | R2 {4:.4f}" .format(eval_loss, eval_pcc, eval_doa, eval_rmse, eval_r2)) writer.add_scalar('validation loss', eval_loss, epoch) writer.add_scalar('validation PCC', eval_pcc, epoch) writer.add_scalar('validation DOA', eval_doa, epoch) writer.add_scalar('validation RMSE', eval_rmse, epoch) writer.add_scalar('validation R2', eval_r2, epoch) return cur_value
def validation_step(x_val_content, x_val_question, x_val_option, y_val, writer=None): """Evaluates model on a validation set""" batches_validation = dh.batch_iter( list( zip(x_val_content, x_val_question, x_val_option, y_val)), args.batch_size, 1) eval_counter, eval_loss = 0, 0.0 true_labels = [] predicted_scores = [] for batch_validation in batches_validation: x_batch_content, x_batch_question, x_batch_option, y_batch = zip( *batch_validation) feed_dict = { rmidp.input_x_content: x_batch_content, rmidp.input_x_question: x_batch_question, rmidp.input_x_option: x_batch_option, rmidp.input_y: y_batch, rmidp.dropout_keep_prob: 1.0, rmidp.is_training: False } step, summaries, scores, cur_loss = sess.run([ rmidp.global_step, validation_summary_op, rmidp.scores, rmidp.loss ], feed_dict) # Prepare for calculating metrics for i in y_batch: true_labels.append(i) for j in scores: predicted_scores.append(j) eval_loss = eval_loss + cur_loss eval_counter = eval_counter + 1 if writer: writer.add_summary(summaries, step) eval_loss = float(eval_loss / eval_counter) # Calculate PCC & DOA pcc, doa = dh.evaluation(true_labels, predicted_scores) # Calculate RMSE rmse = mean_squared_error(true_labels, predicted_scores)**0.5 r2 = r2_score(true_labels, predicted_scores) return eval_loss, pcc, doa, rmse, r2
def test(): logger.info("Loading Data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file) logger.info("Data padding...") test_dataset = dh.MyData(test_data, args.pad_seq_len, device) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix( args.word2vec_file) criterion = Loss() net = HMIDP(args, VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix).to(device) checkpoint_file = cm.get_best_checkpoint(CPT_DIR, select_maximum_value=False) checkpoint = torch.load(checkpoint_file) net.load_state_dict(checkpoint['model_state_dict']) net.eval() logger.info("Scoring...") true_labels, predicted_scores = [], [] batches = trange(len(test_loader), desc="Batches", leave=True) for batch_cnt, batch in zip(batches, test_loader): x_test_fb_content, x_test_fb_question, x_test_fb_option, \ x_test_fb_clens, x_test_fb_qlens, x_test_fb_olens, y_test_fb = batch logits, scores = net(x_test_fb_content, x_test_fb_question, x_test_fb_option) for i in y_test_fb[0].tolist(): true_labels.append(i) for j in scores[0].tolist(): predicted_scores.append(j) # Calculate the Metrics test_rmse = mean_squared_error(true_labels, predicted_scores)**0.5 test_r2 = r2_score(true_labels, predicted_scores) test_pcc, test_doa = dh.evaluation(true_labels, predicted_scores) logger.info( "All Test set: PCC {0:.4f} | DOA {1:.4f} | RMSE {2:.4f} | R2 {3:.4f}". format(test_pcc, test_doa, test_rmse, test_r2)) logger.info('Test Finished.') logger.info('Creating the prediction file...') dh.create_prediction_file(save_dir=SAVE_DIR, identifiers=test_data['f_id'], predictions=predicted_scores) logger.info('All Finished.')
def test_rmidp(): """Test RMIDP model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_test_content, x_test_question, x_test_option, y_test = dh.pad_data(test_data, args.pad_seq_len) # Load rmidp model OPTION = dh.option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_content = graph.get_operation_by_name("input_x_content").outputs[0] input_x_question = graph.get_operation_by_name("input_x_question").outputs[0] input_x_option = graph.get_operation_by_name("input_x_option").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-rmidp-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test_content, x_test_question, x_test_option, y_test)), args.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 # Collect the predictions here true_labels = [] predicted_scores = [] for batch_test in batches: x_batch_content, x_batch_question, x_batch_option, y_batch = zip(*batch_test) feed_dict = { input_x_content: x_batch_content, input_x_question: x_batch_question, input_x_option: x_batch_option, input_y: y_batch, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch: true_labels.append(i) for j in batch_scores: predicted_scores.append(j) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate PCC & DOA pcc, doa = dh.evaluation(true_labels, predicted_scores) # Calculate RMSE rmse = mean_squared_error(true_labels, predicted_scores) ** 0.5 r2 = r2_score(true_labels, predicted_scores) test_loss = float(test_loss / test_counter) logger.info("All Test Dataset: Loss {0:g} | PCC {1:g} | DOA {2:g} | RMSE {3:g} | R2 {4:g}" .format(test_loss, pcc, doa, rmse, r2)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", all_id=test_data.id, all_labels=true_labels, all_predict_scores=predicted_scores) logger.info("All Done.")
def test_hmidp(): """Test HMIDP model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format( FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test_content, x_test_question, x_test_option, y_test = dh.pad_data( test_data, FLAGS.pad_seq_len) # Load hmidp model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST.upper() == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_content = graph.get_operation_by_name( "input_x_content").outputs[0] input_x_question = graph.get_operation_by_name( "input_x_question").outputs[0] input_x_option = graph.get_operation_by_name( "input_x_option").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-hmidp-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list( zip(x_test_content, x_test_question, x_test_option, y_test)), FLAGS.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 # Collect the predictions here true_labels = [] predicted_scores = [] for batch_test in batches: x_batch_content, x_batch_question, x_batch_option, y_batch = zip( *batch_test) feed_dict = { input_x_content: x_batch_content, input_x_question: x_batch_question, input_x_option: x_batch_option, input_y: y_batch, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch: true_labels.append(i) for j in batch_scores: predicted_scores.append(j) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate PCC & DOA pcc, doa = dh.evaluation(true_labels, predicted_scores) # Calculate RMSE rmse = mean_squared_error(true_labels, predicted_scores)**0.5 test_loss = float(test_loss / test_counter) logger.info( "☛ All Test Dataset: Loss {0:g} | PCC {1:g} | DOA {2:g} | RMSE {1:g}" .format(test_loss, pcc, doa, rmse)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", all_id=test_data.id, all_labels=true_labels, all_predict_scores=predicted_scores) logger.info("✔︎ Done.")