def convert_model(): # Getting anchors and labels for the prediction class_names = get_classes(config.classes_path) anchors = read_anchors(config.anchors_path) num_classes = config.num_classes num_anchors = config.num_anchors # Retriving the input shape of the model i.e. (608x608), (416x416), (320x320) input_shape = (config.input_shape, config.input_shape) # Defining placeholder for passing the image data onto the model image_tensor = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3], name='input_image') image_shape = tf.placeholder(dtype=tf.int32, shape=[2], name='input_shape') output_nodes = yolo(input_images=image_tensor, is_training=False, config_path=config.yolov3_cfg_path, num_classes=config.num_classes) print(output_nodes) sess = tf.Session() scale_1, scale_2, scale3 = tf.identity( output_nodes[0], name='scale_1'), tf.identity( output_nodes[1], name='scale_2'), tf.identity(output_nodes[2], name='scale_3') ckpt_path = config.model_dir exponential_moving_average_obj = tf.train.ExponentialMovingAverage( config.weight_decay) saver = tf.train.Saver( exponential_moving_average_obj.variables_to_restore()) ckpt = tf.train.get_checkpoint_state(ckpt_path) # chkp.print_tensors_in_checkpoint_file(checkmate.get_best_checkpoint(ckpt_path), tensor_name='', all_tensors=True) # exit() if config.pre_train is True: load_ops = load_weights(tf.global_variables(), config.yolov3_weights_path) sess.run(load_ops) elif ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Restoring model ', checkmate.get_best_checkpoint(ckpt_path)) saver.restore(sess, checkmate.get_best_checkpoint(ckpt_path)) print('Model Loaded!') else: print("No appropriate weights found for creating protobuf file") if not os.path.exists(config.model_export_path.split('/')[1]): os.mkdir(config.model_export_path.split('/')[1]) freeze_graph(sess, config.model_export_path) sess.close()
def predict_new(x): saver = tf.train.Saver() with tf.Session() as session: ckpt = get_best_checkpoint(model.ckpt_dir) saver.restore(session, ckpt) if model_name == 'm2': pred = session.run([model.predictions], {model.x: x}) else: y_ = model.q_y_x_model(model.x) pred = session.run([y_], {model.x: x}) return pred
def test(): logger.info("Loading Data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file) logger.info("Data padding...") test_dataset = dh.MyData(test_data, args.pad_seq_len, device) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix( args.word2vec_file) criterion = Loss() net = HMIDP(args, VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix).to(device) checkpoint_file = cm.get_best_checkpoint(CPT_DIR, select_maximum_value=False) checkpoint = torch.load(checkpoint_file) net.load_state_dict(checkpoint['model_state_dict']) net.eval() logger.info("Scoring...") true_labels, predicted_scores = [], [] batches = trange(len(test_loader), desc="Batches", leave=True) for batch_cnt, batch in zip(batches, test_loader): x_test_fb_content, x_test_fb_question, x_test_fb_option, \ x_test_fb_clens, x_test_fb_qlens, x_test_fb_olens, y_test_fb = batch logits, scores = net(x_test_fb_content, x_test_fb_question, x_test_fb_option) for i in y_test_fb[0].tolist(): true_labels.append(i) for j in scores[0].tolist(): predicted_scores.append(j) # Calculate the Metrics test_rmse = mean_squared_error(true_labels, predicted_scores)**0.5 test_r2 = r2_score(true_labels, predicted_scores) test_pcc, test_doa = dh.evaluation(true_labels, predicted_scores) logger.info( "All Test set: PCC {0:.4f} | DOA {1:.4f} | RMSE {2:.4f} | R2 {3:.4f}". format(test_pcc, test_doa, test_rmse, test_r2)) logger.info('Test Finished.') logger.info('Creating the prediction file...') dh.create_prediction_file(save_dir=SAVE_DIR, identifiers=test_data['f_id'], predictions=predicted_scores) logger.info('All Finished.')
def test(): logger.info("Loading Data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file) test_dataset = dh.MyData(test_data.activity, test_data.timestep, test_data.labels) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=dh.collate_fn) # Load word2vec model COURSE_SIZE = dh.course2vec(args.course2vec_file) criterion = Loss() net = MOOCNet(args, COURSE_SIZE).to(device) checkpoint_file = cm.get_best_checkpoint(CPT_DIR, select_maximum_value=False) checkpoint = torch.load(checkpoint_file) net.load_state_dict(checkpoint['model_state_dict']) net.eval() logger.info("Scoring...") true_labels, predicted_scores, predicted_labels = [], [], [] batches = trange(len(test_loader), desc="Batches", leave=True) for batch_cnt, batch in zip(batches, test_loader): x_test, tsp_test, y_test = create_input_data(batch) logits, scores = net(x_test, tsp_test) for i in y_test.tolist(): true_labels.append(i) for j in scores.tolist(): predicted_scores.append(j) if j >= 0.5: predicted_labels.append(1) else: predicted_labels.append(0) # Calculate the Metrics logger.info('Test Finished.') logger.info('Creating the prediction file...') dh.create_prediction_file(save_dir=SAVE_DIR, identifiers=test_data.id, predictions=predicted_labels) logger.info('All Finished.')
def test_harnn(): """Test HARNN model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.num_classes_list, args.total_classes, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_test, y_test, y_test_tuple = dh.pad_data(test_data, args.pad_seq_len) y_test_labels = test_data.labels # Load harnn model OPTION = dh._option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y_first = graph.get_operation_by_name("input_y_first").outputs[0] input_y_second = graph.get_operation_by_name("input_y_second").outputs[0] input_y_third = graph.get_operation_by_name("input_y_third").outputs[0] input_y_fourth = graph.get_operation_by_name("input_y_fourth").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] beta = graph.get_operation_by_name("beta").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate first_scores = graph.get_operation_by_name("first-output/scores").outputs[0] second_scores = graph.get_operation_by_name("second-output/scores").outputs[0] third_scores = graph.get_operation_by_name("third-output/scores").outputs[0] fourth_scores = graph.get_operation_by_name("fourth-output/scores").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "first-output/scores|second-output/scores|third-output/scores|fourth-output/scores|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test, y_test, y_test_tuple, y_test_labels)), args.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 # Collect the predictions here true_labels = [] predicted_labels = [] predicted_scores = [] # Collect for calculating metrics true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(args.topK)] true_onehot_first_labels = [] true_onehot_second_labels = [] true_onehot_third_labels = [] true_onehot_fourth_labels = [] predicted_onehot_scores_first = [] predicted_onehot_scores_second = [] predicted_onehot_scores_third = [] predicted_onehot_scores_fourth = [] predicted_onehot_labels_first = [] predicted_onehot_labels_second = [] predicted_onehot_labels_third = [] predicted_onehot_labels_fourth = [] for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_tuple, y_batch_test_labels = zip(*batch_test) y_batch_test_first = [i[0] for i in y_batch_test_tuple] y_batch_test_second = [j[1] for j in y_batch_test_tuple] y_batch_test_third = [k[2] for k in y_batch_test_tuple] y_batch_test_fourth = [t[3] for t in y_batch_test_tuple] feed_dict = { input_x: x_batch_test, input_y_first: y_batch_test_first, input_y_second: y_batch_test_second, input_y_third: y_batch_test_third, input_y_fourth: y_batch_test_fourth, input_y: y_batch_test, dropout_keep_prob: 1.0, beta: args.beta, is_training: False } batch_first_scores, batch_second_scores, batch_third_scores, batch_fourth_scores, batch_scores, cur_loss = \ sess.run([first_scores, second_scores, third_scores, fourth_scores, scores, loss], feed_dict) # Prepare for calculating metrics for onehot_labels in y_batch_test: true_onehot_labels.append(onehot_labels) for onehot_labels in y_batch_test_first: true_onehot_first_labels.append(onehot_labels) for onehot_labels in y_batch_test_second: true_onehot_second_labels.append(onehot_labels) for onehot_labels in y_batch_test_third: true_onehot_third_labels.append(onehot_labels) for onehot_labels in y_batch_test_fourth: true_onehot_fourth_labels.append(onehot_labels) for onehot_scores in batch_scores: predicted_onehot_scores.append(onehot_scores) for onehot_scores in batch_first_scores: predicted_onehot_scores_first.append(onehot_scores) for onehot_scores in batch_second_scores: predicted_onehot_scores_second.append(onehot_scores) for onehot_scores in batch_third_scores: predicted_onehot_scores_third.append(onehot_scores) for onehot_scores in batch_fourth_scores: predicted_onehot_scores_fourth.append(onehot_scores) # Get the predicted labels by threshold batch_predicted_labels_ts, batch_predicted_scores_ts = \ dh.get_label_threshold(scores=batch_scores, threshold=args.threshold) # Add results to collection for labels in y_batch_test_labels: true_labels.append(labels) for labels in batch_predicted_labels_ts: predicted_labels.append(labels) for values in batch_predicted_scores_ts: predicted_scores.append(values) # Get one-hot prediction by threshold batch_predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=batch_scores, threshold=args.threshold) batch_predicted_onehot_labels_first = \ dh.get_onehot_label_threshold(scores=batch_first_scores, threshold=args.threshold) batch_predicted_onehot_labels_second = \ dh.get_onehot_label_threshold(scores=batch_second_scores, threshold=args.threshold) batch_predicted_onehot_labels_third = \ dh.get_onehot_label_threshold(scores=batch_third_scores, threshold=args.threshold) batch_predicted_onehot_labels_fourth = \ dh.get_onehot_label_threshold(scores=batch_fourth_scores, threshold=args.threshold) for onehot_labels in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(onehot_labels) for onehot_labels in batch_predicted_onehot_labels_first: predicted_onehot_labels_first.append(onehot_labels) for onehot_labels in batch_predicted_onehot_labels_second: predicted_onehot_labels_second.append(onehot_labels) for onehot_labels in batch_predicted_onehot_labels_third: predicted_onehot_labels_third.append(onehot_labels) for onehot_labels in batch_predicted_onehot_labels_fourth: predicted_onehot_labels_fourth.append(onehot_labels) # Get one-hot prediction by topK for i in range(args.topK): batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(scores=batch_scores, top_num=i + 1) for onehot_labels in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[i].append(onehot_labels) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate Precision & Recall & F1 test_pre_ts = precision_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_pre_first = precision_score(y_true=np.array(true_onehot_first_labels), y_pred=np.array(predicted_onehot_labels_first), average='micro') test_pre_second = precision_score(y_true=np.array(true_onehot_second_labels), y_pred=np.array(predicted_onehot_labels_second), average='micro') test_pre_third = precision_score(y_true=np.array(true_onehot_third_labels), y_pred=np.array(predicted_onehot_labels_third), average='micro') test_pre_fourth = precision_score(y_true=np.array(true_onehot_fourth_labels), y_pred=np.array(predicted_onehot_labels_fourth), average='micro') test_rec_ts = recall_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_rec_first = recall_score(y_true=np.array(true_onehot_first_labels), y_pred=np.array(predicted_onehot_labels_first), average='micro') test_rec_second = recall_score(y_true=np.array(true_onehot_second_labels), y_pred=np.array(predicted_onehot_labels_second), average='micro') test_rec_third = recall_score(y_true=np.array(true_onehot_third_labels), y_pred=np.array(predicted_onehot_labels_third), average='micro') test_rec_fourth = recall_score(y_true=np.array(true_onehot_fourth_labels), y_pred=np.array(predicted_onehot_labels_fourth), average='micro') test_F1_ts = f1_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_F1_first = f1_score(y_true=np.array(true_onehot_first_labels), y_pred=np.array(predicted_onehot_labels_first), average='micro') test_F1_second = f1_score(y_true=np.array(true_onehot_second_labels), y_pred=np.array(predicted_onehot_labels_second), average='micro') test_F1_third = f1_score(y_true=np.array(true_onehot_third_labels), y_pred=np.array(predicted_onehot_labels_third), average='micro') test_F1_fourth = f1_score(y_true=np.array(true_onehot_fourth_labels), y_pred=np.array(predicted_onehot_labels_fourth), average='micro') # Calculate the average AUC test_auc = roc_auc_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR test_prc = average_precision_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average="micro") test_prc_first = average_precision_score(y_true=np.array(true_onehot_first_labels), y_score=np.array(predicted_onehot_scores_first), average="micro") test_prc_second = average_precision_score(y_true=np.array(true_onehot_second_labels), y_score=np.array(predicted_onehot_scores_second), average="micro") test_prc_third = average_precision_score(y_true=np.array(true_onehot_third_labels), y_score=np.array(predicted_onehot_scores_third), average="micro") test_prc_fourth = average_precision_score(y_true=np.array(true_onehot_fourth_labels), y_score=np.array(predicted_onehot_scores_fourth), average="micro") test_loss = float(test_loss / test_counter) logger.info("All Test Dataset: Loss {0:g} | AUC {1:g} | AUPRC {2:g}" .format(test_loss, test_auc, test_prc)) # Predict by threshold logger.info("Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" .format(test_pre_ts, test_rec_ts, test_F1_ts)) logger.info("Predict by threshold in Level-1: Precision {0:g}, Recall {1:g}, F1 {2:g}, AUPRC {3:g}" .format(test_pre_first, test_rec_first, test_F1_first, test_prc_first)) logger.info("Predict by threshold in Level-2: Precision {0:g}, Recall {1:g}, F1 {2:g}, AUPRC {3:g}" .format(test_pre_second, test_rec_second, test_F1_second, test_prc_second)) logger.info("Predict by threshold in Level-3: Precision {0:g}, Recall {1:g}, F1 {2:g}, AUPRC {3:g}" .format(test_pre_third, test_rec_third, test_F1_third, test_prc_third)) logger.info("Predict by threshold in Level-4: Precision {0:g}, Recall {1:g}, F1 {2:g}, AUPRC {3:g}" .format(test_pre_fourth, test_rec_fourth, test_F1_fourth, test_prc_fourth)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data.patent_id, all_labels=true_labels, all_predict_labels=predicted_labels, all_predict_scores=predicted_scores) logger.info("All Done.")
def train(ckpt_path, log_path, class_path, decay_steps=2000, decay_rate=0.8): """ Function to train the model. ckpt_path: string, path for saving/restoring the model log_path: string, path for saving the training/validation logs class_path: string, path for the classes of the dataset decay_steps: int, steps after which the learning rate is to be decayed decay_rate: float, rate to carrying out exponential decay """ # Getting the anchors anchors = read_anchors(config.anchors_path) if not os.path.exists(config.data_dir): os.mkdir(config.data_dir) classes = get_classes(class_path) # Building the training pipeline graph = tf.get_default_graph() with graph.as_default(): # Getting the training data with tf.name_scope('data_parser/'): train_reader = Parser('train', config.data_dir, config.anchors_path, config.output_dir, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) train_data = train_reader.build_dataset(config.train_batch_size // config.subdivisions) train_iterator = train_data.make_one_shot_iterator() val_reader = Parser('val', config.data_dir, config.anchors_path, config.output_dir, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) val_data = val_reader.build_dataset(config.val_batch_size // config.subdivisions) val_iterator = val_data.make_one_shot_iterator() is_training = tf.placeholder( dtype=tf.bool, shape=[], name='train_flag' ) # Used for different behaviour of batch normalization mode = tf.placeholder(dtype=tf.int16, shape=[], name='mode_flag') def train(): # images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = train_iterator.get_next() return train_iterator.get_next() def valid(): # images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = val_iterator.get_next() return val_iterator.get_next() images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = tf.cond( pred=tf.equal(mode, 1), true_fn=train, false_fn=valid, name='train_val_cond') images.set_shape([None, config.input_shape, config.input_shape, 3]) bbox.set_shape([None, config.max_boxes, 5]) grid_shapes = [ config.input_shape // 32, config.input_shape // 16, config.input_shape // 8 ] draw_box(images, bbox) # Extracting the pre-defined yolo graph from the darknet cfg file if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) output = yolo(images, is_training, config.yolov3_cfg_path, config.num_classes) # Declaring the parameters for GT with tf.name_scope('Targets'): bbox_true_13.set_shape([ None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes ]) bbox_true_26.set_shape([ None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes ]) bbox_true_52.set_shape([ None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes ]) y_true = [bbox_true_13, bbox_true_26, bbox_true_52] # Compute Loss with tf.name_scope('Loss_and_Detect'): yolo_loss = compute_loss(output, y_true, anchors, config.num_classes, print_loss=False) l2_loss = tf.losses.get_regularization_loss() loss = yolo_loss + l2_loss yolo_loss_summary = tf.summary.scalar('yolo_loss', yolo_loss) l2_loss_summary = tf.summary.scalar('l2_loss', l2_loss) total_loss_summary = tf.summary.scalar('Total_loss', loss) # Declaring the parameters for training the model with tf.name_scope('train_parameters'): epoch_loss = [] global_step = tf.Variable(0, trainable=False, name='global_step') learning_rate = tf.train.exponential_decay(config.learning_rate, global_step, decay_steps, decay_rate) tf.summary.scalar('learning rate', learning_rate) # Define optimizer for minimizing the computed loss with tf.name_scope('Optimizer'): #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=config.momentum) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if config.pre_train: train_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='yolo') grads = optimizer.compute_gradients(loss=loss, var_list=train_vars) gradients = [(tf.placeholder(dtype=tf.float32, shape=grad[1].get_shape()), grad[1]) for grad in grads] gradients = gradients * config.subdivisions train_step = optimizer.apply_gradients( grads_and_vars=gradients, global_step=global_step) else: grads = optimizer.compute_gradients(loss=loss) gradients = [(tf.placeholder(dtype=tf.float32, shape=grad[1].get_shape()), grad[1]) for grad in grads] gradients = gradients * config.subdivisions train_step = optimizer.apply_gradients( grads_and_vars=gradients, global_step=global_step) #################################### Training loop ############################################################ # A saver object for saving the model best_ckpt_saver = checkmate.BestCheckpointSaver(save_dir=ckpt_path, num_to_keep=5) summary_op = tf.summary.merge_all() summary_op_valid = tf.summary.merge( [yolo_loss_summary, l2_loss_summary, total_loss_summary]) init_op = tf.global_variables_initializer() # Defining some train loop dependencies gpu_config = tf.ConfigProto(log_device_placement=False) gpu_config.gpu_options.allow_growth = True sess = tf.Session(config=gpu_config) tf.logging.set_verbosity(tf.logging.ERROR) train_summary_writer = tf.summary.FileWriter( os.path.join(log_path, 'train'), sess.graph) val_summary_writer = tf.summary.FileWriter( os.path.join(log_path, 'val'), sess.graph) # Restoring the model ckpt = tf.train.get_checkpoint_state(ckpt_path) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Restoring model ', checkmate.get_best_checkpoint(ckpt_path)) tf.train.Saver().restore(sess, checkmate.get_best_checkpoint(ckpt_path)) print('Model Loaded!') elif config.pre_train is True: load_ops = load_weights(tf.global_variables(scope='darknet53'), config.darknet53_weights_path) sess.run(load_ops) else: sess.run(init_op) print('Uninitialized variables: ', sess.run(tf.report_uninitialized_variables())) epochbar = tqdm(range(config.Epoch)) for epoch in epochbar: epochbar.set_description('Epoch %s of %s' % (epoch, config.Epoch)) mean_loss_train = [] mean_loss_valid = [] trainbar = tqdm(range(config.train_num // config.train_batch_size)) for k in trainbar: total_grad = [] for minibach in range(config.subdivisions): train_summary, loss_train, grads_and_vars = sess.run( [summary_op, loss, grads], feed_dict={ is_training: True, mode: 1 }) total_grad += grads_and_vars feed_dict = {is_training: True, mode: 1} for i in range(len(gradients)): feed_dict[gradients[i][0]] = total_grad[i][0] # print(np.shape(feed_dict)) _ = sess.run(train_step, feed_dict=feed_dict) train_summary_writer.add_summary(train_summary, epoch) train_summary_writer.flush() mean_loss_train.append(loss_train) trainbar.set_description('Train loss: %s' % str(loss_train)) print('Validating.....') valbar = tqdm(range(config.val_num // config.val_batch_size)) for k in valbar: val_summary, loss_valid = sess.run([summary_op_valid, loss], feed_dict={ is_training: False, mode: 0 }) val_summary_writer.add_summary(val_summary, epoch) val_summary_writer.flush() mean_loss_valid.append(loss_valid) valbar.set_description('Validation loss: %s' % str(loss_valid)) mean_loss_train = np.mean(mean_loss_train) mean_loss_valid = np.mean(mean_loss_valid) print('\n') print('Train loss after %d epochs is: %f' % (epoch + 1, mean_loss_train)) print('Validation loss after %d epochs is: %f' % (epoch + 1, mean_loss_valid)) print('\n\n') if ((epoch + 1) % 3) == 0: best_ckpt_saver.handle(mean_loss_valid, sess, tf.constant(epoch)) print('Tuning Completed!!') train_summary_writer.close() val_summary_writer.close() sess.close()
def test_cnn(): """Test ABCNN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format( FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.embedding_dim) logger.info("✔︎ Test data padding...") x_test_front, x_test_behind, y_test = dh.pad_data(test_data, FLAGS.pad_seq_len) y_test_labels = test_data.labels # Load abcnn model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_front = graph.get_operation_by_name( "input_x_front").outputs[0] input_x_behind = graph.get_operation_by_name( "input_x_behind").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] topKPreds = graph.get_operation_by_name( "output/topKPreds").outputs[0] accuracy = graph.get_operation_by_name( "accuracy/accuracy").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/logits|output/predictions|output/softmax_scores|output/topKPreds" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-abcnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list( zip(x_test_front, x_test_behind, y_test, y_test_labels)), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_labels = [] all_predicted_labels = [] all_predicted_values = [] for index, x_test_batch in enumerate(batches): x_batch_front, x_batch_behind, y_batch, y_batch_labels = zip( *x_test_batch) feed_dict = { input_x_front: x_batch_front, input_x_behind: x_batch_behind, input_y: y_batch, dropout_keep_prob: 1.0, is_training: False } all_labels = np.append(all_labels, y_batch_labels) batch_predicted_labels = sess.run(predictions, feed_dict) all_predicted_labels = np.concatenate( [all_predicted_labels, batch_predicted_labels]) batch_predicted_values = sess.run(topKPreds, feed_dict) all_predicted_values = np.append(all_predicted_values, batch_predicted_values) batch_loss = sess.run(loss, feed_dict) batch_acc = sess.run(accuracy, feed_dict) logger.info( "✔︎ Test batch {0}: loss {1:g}, accuracy {2:g}.".format( (index + 1), batch_loss, batch_acc)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", front_data_id=test_data.front_testid, behind_data_id=test_data.behind_testid, all_labels=all_labels, all_predict_labels=all_predicted_labels, all_predict_values=all_predicted_values) logger.info("✔︎ Done.")
def train(self, Data, n_epochs, l_bs, u_bs, lr, eval_samps=None, binarize=False, verbose=1, decay_ratio=0.75, decay_period=200, h_opt=False, keep_ckpt=True, restore=False): """ Method for training the models """ self.data_init(Data, eval_samps, l_bs, u_bs) self.global_step = tf.Variable(0, trainable=False, name='global_step') # self.global_epoch = tf.Variable(0, trainable=False, name='global_epoch') self.epoch = 0 #self.lr = self.set_learning_rate([lr[0], 1600, lr[0] / 10.0]) self.lr = self.set_learning_rate( [lr[0], lr[0] / 10.0, decay_period, decay_ratio], 'exp') # define optimizer optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) gvs = optimizer.compute_gradients(self.loss) # clip gradients capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.optimizer = optimizer.apply_gradients( capped_gvs, global_step=self.global_step) self.y_pred = self.predict(self.x) self.curve_array = np.zeros((n_epochs + 1, 14)) if self.learning_paradigm == 'unsupervised': self.elbo_l_curve = tf.reduce_mean(self.unlabelled_loss(self.x)) self.qy_ll_curve = tf.reduce_mean(self.qy_loss(self.x)) self.elbo_u_curve = tf.reduce_mean(self.unlabelled_loss(self.x)) else: if self.model_name == 'adgm' or self.model_name == 'adg_dgm': self.elbo_l_curve = tf.reduce_mean( self.labelled_loss(self.x, self.y)[0]) self.qy_ll_curve = tf.reduce_mean( self.labelled_loss(self.x, self.y)[1]) else: self.elbo_l_curve = tf.reduce_mean( self.labelled_loss(self.x, self.y)) self.qy_ll_curve = tf.reduce_mean(self.qy_loss(self.x, self.y)) self.elbo_u_curve = tf.reduce_mean(self.unlabelled_loss(self.x)) self.compute_accuracies() # initialize session and train with self.session as sess: sess.run(tf.global_variables_initializer()) if restore == True: saver_for_restore = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(self.ckpt_dir) best_ckpt = get_best_checkpoint(self.ckpt_dir) best_epoch = int(re.match('.*?([0-9]+)$', best_ckpt).group(1)) best_ckpt_usable = re.sub('-([0-9]+)$', "", best_ckpt) saver_for_restore.restore(sess, best_ckpt_usable) self.epoch = best_epoch self.curve_array[self.epoch] = self.calc_curve_vals(sess, Data) if verbose == 3: self.print_verbose3(self.epoch) if keep_ckpt == True: saver = BestCheckpointSaver(save_dir=self.ckpt_dir, num_to_keep=2, maximize=True) while self.epoch < n_epochs: x_labelled, labels, x_unlabelled, _ = \ Data.next_batch(l_bs, u_bs) if binarize is True: x_labelled = self.binarize(x_labelled) x_unlabelled = self.binarize(x_unlabelled) fd = self.training_fd(x_labelled, labels, x_unlabelled) _, loss_batch = sess.run([self.optimizer, self.loss], fd) if Data._epochs_unlabelled > self.epoch: self.epoch += 1 # sess.run(self.global_epoch.assign(self.epoch) self.curve_array[self.epoch] = \ self.calc_curve_vals(sess, Data) if h_opt == True and self.epoch > 20: if self.curve_array[self.epoch, 12] < 0.07: raise Exception('results too bad') if h_opt == True and self.epoch > 40: if self.curve_array[self.epoch, 12] < 0.1: raise Exception('results too bad') if keep_ckpt == True: saver.handle(self.curve_array[self.epoch, 6], sess, self.global_step, self.epoch) if verbose == 1: fd = self._printing_feed_dict(Data, x_labelled, x_unlabelled, labels, eval_samps, binarize) self.print_verbose1(self.epoch, fd, sess) elif verbose == 2: fd = self._printing_feed_dict(Data, x_labelled, x_unlabelled, labels, eval_samps, binarize) self.print_verbose2(self.epoch, fd, sess) elif verbose == 3: self.print_verbose3(self.epoch) if self.epoch % 10 == 0: y_pred_test = sess.run([self.y_pred], { self.x: Data.data['x_test'], K.learning_phase(): 0 })[0] conf_mat = confusion_matrix( Data.data['y_test'].argmax(1), y_pred_test.argmax(1)) np.save( os.path.join( self.output_dir, 'conf_mat_' + self.name + '_' + str(self.epoch) + '.npy'), conf_mat) np.save( os.path.join( self.output_dir, 'y_pred_' + self.name + '_' + str(self.epoch) + '.npy'), y_pred_test) np.save( os.path.join( self.output_dir, 'y_true_' + self.name + '_' + str(self.epoch) + '.npy'), Data.data['y_test']) if np.sum(np.isnan(self.curve_array)) > 0: print( 'loss is nan, going back to previous best checkpoint' ) best_ckpt = get_best_checkpoint(self.ckpt_dir) best_epoch = int( re.match('.*?([0-9]+)$', best_ckpt).group(1)) best_ckpt_usable = re.sub('-([0-9]+)$', "", best_ckpt) self.epoch = best_epoch saver._saver.restore(sess, best_ckpt_usable) return self.curve_array
def test_parnn(): """Test PARNN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test_content, x_test_question, x_test_option, y_test = dh.pad_data( test_data, FLAGS.pad_seq_len) # Load parnn model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST.upper() == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_content = graph.get_operation_by_name( "input_x_content").outputs[0] input_x_question = graph.get_operation_by_name( "input_x_question").outputs[0] input_x_option = graph.get_operation_by_name( "input_x_option").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-parnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter( (x_test_content, x_test_question, x_test_option, y_test), FLAGS.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 # Collect the predictions here true_labels = [] predicted_scores = [] for batch_test in batches: x_content_batch_front, x_content_batch_behind, \ x_question_batch_front, x_question_batch_behind, \ x_option_batch_front, x_option_batch_behind, \ y_batch_front, y_batch_behind = zip(*batch_test) feed_dict = { input_x_content: [x_content_batch_front, x_content_batch_behind], input_x_question: [x_question_batch_front, x_question_batch_behind], input_x_option: [x_option_batch_front, x_option_batch_behind], input_y: [y_batch_front, y_batch_behind], dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch_front: true_labels.append(i) for j in batch_scores[0]: predicted_scores.append(j) print(predicted_scores) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate RMSE rmse = mean_squared_error(true_labels, predicted_scores)**0.5 test_loss = float(test_loss / test_counter) logger.info("☛ All Test Dataset: Loss {0:g} | RMSE {1:g}".format( test_loss, rmse)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", all_id=test_data.id, all_labels=true_labels, all_predict_scores=predicted_scores) logger.info("✔︎ Done.")
def run_inference(img_path, output_dir, args): """ A function making inference using the pre-trained darknet weights in the tensorflow framework Input: img_path: string, path to the image on which inference is to be run, path to the image directory containing images in the case of multiple images. output_dir: string, directory for saving the output args: argparse object """ # Reading the images if not os.path.exists(output_dir): os.mkdir(output_dir) if not os.path.exists(os.path.join(output_dir, 'images')): os.mkdir(os.path.join(output_dir, 'images')) if not os.path.exists(os.path.join(output_dir, 'labels')): os.mkdir(os.path.join(output_dir, 'labels')) output_dir_images = os.path.join(output_dir, 'images') output_dir_labels = os.path.join(output_dir, 'labels') file_names = sorted(os.listdir(img_path)) images_batch = read_image(img_path) # Getting anchors and labels for the prediction class_names = get_classes(config.classes_path) num_classes = config.num_classes num_anchors = config.num_anchors # Retriving the input shape of the model i.e. (608x608), (416x416), (320x320) input_shape = (config.input_shape, config.input_shape) # Defining placeholder for passing the image data onto the model image_tensor = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[2]) model = model(image_tensor, is_training=False, num_classes=config.num_classes) output_nodes, model_layers = model.forward() print('Summary of the model created.......\n') for layer in model_layers: print(layer) # Creating a session for running the model gpu_config = tf.ConfigProto(log_device_placement=False) gpu_config.gpu_options.allow_growth = True sess = tf.Session(config=gpu_config) output_values = predict(output_nodes, num_classes, input_shape, image_shape) ckpt_path = config.model_dir + 'valid/' exponential_moving_average_obj = tf.train.ExponentialMovingAverage( config.weight_decay) saver = tf.train.Saver( exponential_moving_average_obj.variables_to_restore()) ckpt = tf.train.get_checkpoint_state(ckpt_path) # chkp.print_tensors_in_checkpoint_file(checkmate.get_best_checkpoint(ckpt_path), tensor_name='', all_tensors=True) # exit() if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Restoring model ', checkmate.get_best_checkpoint(ckpt_path)) saver.restore(sess, checkmate.get_best_checkpoint(ckpt_path)) print('Model Loaded!') total_time_pred = [] for x in range(len(images_batch)): image = images_batch[x] new_image_size = (config.input_shape, config.input_shape) image_data = np.array(resize_image(image, new_image_size)) print('Image height: {}\tImage width: {}'.format( image.shape[0], image.shape[1])) img = image_data / 255. img = np.expand_dims(img, 0) # Adding the batch dimension tick = time() # Actually run the graph in a tensorflow session to get the outputs out_values = sess.run([output_values], feed_dict={ image_tensor: img, image_shape: [image.shape[0], image.shape[1]] }) tock = time() total_time_pred.append(tock - tick) print('Found {} boxes for {} in {}sec'.format(len(out_boxes), 'img', tock - tick)) ######################## Visualization ###################### font = ImageFont.truetype(font='./font/FiraMono-Medium.otf', size=np.floor(1e-2 * image.shape[1] + 0.5).astype(np.int32)) thickness = (image.shape[0] + image.shape[1]) // 500 # do day cua BB image = Image.fromarray((image).astype('uint8'), mode='RGB') output_labels = open( os.path.join(output_dir_labels, file_names[x].split('.')[0] + '.txt'), 'w') ### DO ALL THE PLOTTING THING IF REQUIRED ### ### SAVE THE IMAGE ### output_labels.close() # Saving labels sess.close() total_time_pred = sum(total_time_pred[1:]) print('FPS of model with post processing over {} images is {}'.format( len(images_batch) - 1, (len(images_batch) - 1) / total_time_pred))
def test_han(): """Test HAN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format( FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.num_classes, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test, y_test = dh.pad_data(test_data, FLAGS.pad_seq_len) y_test_labels = test_data.labels # Load han model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/logits|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-han-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test, y_test, y_test_labels)), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_labels = [] all_predicted_labels = [] all_predicted_values = [] # Calculate the metric test_counter, test_loss, test_rec, test_pre, test_F = 0, 0.0, 0.0, 0.0, 0.0 for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_labels = zip( *batch_test) feed_dict = { input_x: x_batch_test, input_y: y_batch_test, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Predict by threshold predicted_labels_threshold, predicted_values_threshold = \ dh.get_label_using_scores_by_threshold(scores=batch_scores, threshold=FLAGS.threshold) cur_rec, cur_pre, cur_F = 0.0, 0.0, 0.0 for index, predicted_label_threshold in enumerate( predicted_labels_threshold): rec_inc, pre_inc = dh.cal_metric(predicted_label_threshold, y_batch_test[index]) cur_rec, cur_pre = cur_rec + rec_inc, cur_pre + pre_inc cur_rec = cur_rec / len(y_batch_test) cur_pre = cur_pre / len(y_batch_test) test_rec, test_pre = test_rec + cur_rec, test_pre + cur_pre # Add results to collection for item in y_batch_test_labels: all_labels.append(item) for item in predicted_labels_threshold: all_predicted_labels.append(item) for item in predicted_values_threshold: all_predicted_values.append(item) test_loss = test_loss + cur_loss test_counter = test_counter + 1 test_loss = float(test_loss / test_counter) test_rec = float(test_rec / test_counter) test_pre = float(test_pre / test_counter) test_F = dh.cal_F(test_rec, test_pre) logger.info("☛ All Test Dataset: Loss {0:g}".format(test_loss)) # Predict by threshold logger.info( "☛ Predict by threshold: Recall {0:g}, Precision {1:g}, F {2:g}" .format(test_rec, test_pre, test_F)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data.testid, all_labels=all_labels, all_predict_labels=all_predicted_labels, all_predict_values=all_predicted_values) logger.info("✔︎ Done.")
def visualize(): """Visualize HARNN model.""" # Load word2vec model word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args, args.test_file, word2idx) # Load harnn model OPTION = dh._option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y_first = graph.get_operation_by_name( "input_y_first").outputs[0] input_y_second = graph.get_operation_by_name( "input_y_second").outputs[0] input_y_third = graph.get_operation_by_name( "input_y_third").outputs[0] input_y_fourth = graph.get_operation_by_name( "input_y_fourth").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] alpha = graph.get_operation_by_name("alpha").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate first_visual = graph.get_operation_by_name( "first-output/visual").outputs[0] second_visual = graph.get_operation_by_name( "second-output/visual").outputs[0] third_visual = graph.get_operation_by_name( "third-output/visual").outputs[0] fourth_visual = graph.get_operation_by_name( "fourth-output/visual").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "first-output/visual|second-output/visual|third-output/visual|fourth-output/visual|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False) for batch_id, batch_test in enumerate(batches): x, x_content, sec, subsec, group, subgroup, y_onehot = zip( *batch_test) feed_dict = { input_x: x, input_y_first: sec, input_y_second: subsec, input_y_third: group, input_y_fourth: subgroup, input_y: y_onehot, dropout_keep_prob: 1.0, alpha: args.alpha, is_training: False } batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual = \ sess.run([first_visual, second_visual, third_visual, fourth_visual], feed_dict) batch_visual = [ batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual ] seq_len = len(x_content[0]) pad_len = len(batch_first_visual[0]) length = (pad_len if seq_len >= pad_len else seq_len) visual_list = [] for visual in batch_visual: visual_list.append( normalization(visual[0].tolist(), length)) create_visual_file(batch_id, x_content, visual_list, seq_len) logger.info("Done.")
def run_inference(img_path, output_dir, args): """ A function making inference using the pre-trained darknet weights in the tensorflow framework Input: img_path: string, path to the image on which inference is to be run, path to the image directory containing images in the case of multiple images. output_dir: string, directory for saving the output args: argparse object """ # Reading the images if not os.path.exists(output_dir): os.mkdir(output_dir) if not os.path.exists(os.path.join(output_dir, 'images')): os.mkdir(os.path.join(output_dir, 'images')) if not os.path.exists(os.path.join(output_dir, 'labels')): os.mkdir(os.path.join(output_dir, 'labels')) output_dir_images = os.path.join(output_dir, 'images') output_dir_labels = os.path.join(output_dir, 'labels') file_names = sorted(os.listdir(img_path)) images_batch = read_image(img_path) # Getting anchors and labels for the prediction class_names = get_classes(config.classes_path) anchors = read_anchors(config.anchors_path) num_classes = config.num_classes num_anchors = config.num_anchors # Retriving the input shape of the model i.e. (608x608), (416x416), (320x320) input_shape = (config.input_shape, config.input_shape) # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(10101) # Fixed seed for consistent colors across runs. random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. random.seed(None) # Reset seed to default. # Defining placeholder for passing the image data onto the model image_tensor = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[2]) # Building the model for running inference output_nodes = yolo(input_images=image_tensor, is_training=False, config_path=config.yolov3_cfg_path, num_classes=num_classes) # Creating a session for running the model gpu_config = tf.ConfigProto(log_device_placement=False) gpu_config.gpu_options.allow_growth = True sess = tf.Session(config=gpu_config) boxes, scores, classes = predict(output_nodes, anchors, num_classes, input_shape, image_shape) total_time_pred = [] total_time_yolo = [] for x in range(len(images_batch)): image = images_batch[x] new_image_size = (config.input_shape, config.input_shape) image_data = np.array(resize_image(image, new_image_size)) print('Image height: {}\tImage width: {}'.format( image.shape[0], image.shape[1])) img = image_data / 255. img = np.expand_dims(img, 0) # Adding the batch dimension # Loading the model/weights for running the model if x < 1: if args.darknet_model is not None: print('Loading pre-trained weights.....') if not os.path.exists(config.yolov3_weights_path): print('yolov3 weights not found.....\n') if not os.path.exists('./yolov3.weights'): os.system( 'wget https://pjreddie.com/media/files/yolov3.weights' ) os.system( 'mv yolov3.weights ./darknet_data/yolov3.weights') load_op = load_weights(tf.global_variables(), weights_file=config.yolov3_weights_path) sess.run(load_op) else: ckpt_path = config.model_dir saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(ckpt_path) if ckpt and tf.train.checkpoint_exists( ckpt.model_checkpoint_path): print('Restoring model ', checkmate.get_best_checkpoint(ckpt_path)) saver.restore(sess, checkmate.get_best_checkpoint(ckpt_path)) print('Model Loaded!') # tick = time() # sess.run(output_nodes, feed_dict={image_tensor: img, image_shape: [image.shape[0], image.shape[1]]}) # tock = time() # print("Prediction time: ", tock-tick) # total_time_yolo.append(tock-tick) tick = time() # Actually run the graph in a tensorflow session to get the outputs out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ image_tensor: img, image_shape: [image.shape[0], image.shape[1]] }) tock = time() total_time_pred.append(tock - tick) print('Found {} boxes for {} in {}sec'.format(len(out_boxes), 'img', tock - tick)) ######################## Visualization ###################### font = ImageFont.truetype(font='./font/FiraMono-Medium.otf', size=np.floor(1e-2 * image.shape[1] + 0.5).astype(np.int32)) thickness = (image.shape[0] + image.shape[1]) // 1000 # do day cua BB image = Image.fromarray((image).astype('uint8'), mode='RGB') output_labels = open( os.path.join(output_dir_labels, file_names[x].split(',')[0] + '.txt'), 'w') for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) # print(label_size) top, left, bottom, right = box # y_min, x_min, y_max, x_max top = max(0, np.floor(top + 0.5).astype(np.int32)) left = max(0, np.floor(left + 0.5).astype(np.int32)) bottom = min(image.size[1], np.floor(bottom + 0.5).astype(np.int32)) right = min(image.size[0], np.floor(right + 0.5).astype(np.int32)) print(label, (left, top), (right, bottom)) # (x_min, y_min), (x_max, y_max) output_labels.write( str(left) + ',' + str(top) + ',' + str(right) + ',' + str(bottom) + ',' + str(c) + ',' + str(score) + '\n') if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for j in range(thickness): draw.rectangle([left + j, top + j, right - j, bottom - j], outline=colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw # image.show() image.save(os.path.join(output_dir_images, file_names[x]), compress_level=1) output_labels.close() sess.close() total_time_pred = sum(total_time_pred[1:]) # total_time_yolo = sum(total_time_yolo[1:]) print('FPS of model with post processing over {} images is {}'.format( len(images_batch) - 1, (len(images_batch) - 1) / total_time_pred))
def train(ckpt_path, log_path, class_path): """ Function to train the model. ckpt_path: string, path for saving/restoring the model log_path: string, path for saving the training/validation logs class_path: string, path for the classes of the dataset decay_steps: int, steps after which the learning rate is to be decayed decay_rate: float, rate to carrying out exponential decay """ # Getting the anchors anchors = read_anchors(config.anchors_path) classes = get_classes(class_path) if anchors.shape[0] // 3 == 2: yolo_tiny = True else: yolo_tiny = False # Building the training pipeline graph = tf.get_default_graph() with graph.as_default(): # Getting the training data with tf.name_scope('data_parser/'): train_reader = Parser('train', config.anchors_path, config.output_dir, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) train_data = train_reader.build_dataset(config.train_batch_size // config.subdivisions) train_iterator = train_data.make_one_shot_iterator() val_reader = Parser('val', config.anchors_path, config.output_dir, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) val_data = val_reader.build_dataset(config.val_batch_size // config.subdivisions) val_iterator = val_data.make_one_shot_iterator() is_training = tf.placeholder( dtype=tf.bool, shape=[], name='train_flag' ) # Used for different behaviour of batch normalization mode = tf.placeholder(dtype=tf.int16, shape=[], name='mode_flag') def train(): # images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = train_iterator.get_next() return train_iterator.get_next() def valid(): # images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = val_iterator.get_next() return val_iterator.get_next() if yolo_tiny: images, bbox, bbox_true_13, bbox_true_26 = tf.cond( pred=tf.equal(mode, 1), true_fn=train, false_fn=valid, name='train_val__data') grid_shapes = [ config.input_shape // 32, config.input_shape // 16 ] else: images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = tf.cond( pred=tf.equal(mode, 1), true_fn=train, false_fn=valid, name='train_val_data') grid_shapes = [ config.input_shape // 32, config.input_shape // 16, config.input_shape // 8 ] images.set_shape([None, config.input_shape, config.input_shape, 3]) bbox.set_shape([None, config.max_boxes, 5]) # image_summary = draw_box(images, bbox) # Extracting the pre-defined yolo graph from the darknet cfg file if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) output = yolo(images, is_training, config.yolov3_cfg_path, config.num_classes) # Declaring the parameters for GT with tf.name_scope('Targets'): if yolo_tiny: bbox_true_13.set_shape([ None, grid_shapes[0], grid_shapes[0], config.num_anchors_per_scale, 5 + config.num_classes ]) bbox_true_26.set_shape([ None, grid_shapes[1], grid_shapes[1], config.num_anchors_per_scale, 5 + config.num_classes ]) y_true = [bbox_true_13, bbox_true_26] else: bbox_true_13.set_shape([ None, grid_shapes[0], grid_shapes[0], config.num_anchors_per_scale, 5 + config.num_classes ]) bbox_true_26.set_shape([ None, grid_shapes[1], grid_shapes[1], config.num_anchors_per_scale, 5 + config.num_classes ]) bbox_true_52.set_shape([ None, grid_shapes[2], grid_shapes[2], config.num_anchors_per_scale, 5 + config.num_classes ]) y_true = [bbox_true_13, bbox_true_26, bbox_true_52] # Compute Loss with tf.name_scope('Loss_and_Detect'): loss_scale, yolo_loss, xy_loss, wh_loss, obj_loss, noobj_loss, conf_loss, class_loss = compute_loss( output, y_true, anchors, config.num_classes, config.input_shape, ignore_threshold=config.ignore_thresh) loss = yolo_loss exponential_moving_average_op = tf.train.ExponentialMovingAverage( config.weight_decay).apply( var_list=tf.trainable_variables()) # For regularisation scale1_loss_summary = tf.summary.scalar('scale_loss_1', loss_scale[0], family='Loss') scale2_loss_summary = tf.summary.scalar('scale_loss_2', loss_scale[1], family='Loss') yolo_loss_summary = tf.summary.scalar('yolo_loss', yolo_loss, family='Loss') # total_loss_summary = tf.summary.scalar('Total_loss', loss, family='Loss') xy_loss_summary = tf.summary.scalar('xy_loss', xy_loss, family='Loss') wh_loss_summary = tf.summary.scalar('wh_loss', wh_loss, family='Loss') obj_loss_summary = tf.summary.scalar('obj_loss', obj_loss, family='Loss') noobj_loss_summary = tf.summary.scalar('noobj_loss', noobj_loss, family='Loss') conf_loss_summary = tf.summary.scalar('confidence_loss', conf_loss, family='Loss') class_loss_summary = tf.summary.scalar('class_loss', class_loss, family='Loss') # Declaring the parameters for training the model with tf.name_scope('train_parameters'): global_step = tf.Variable(0, trainable=False, name='global_step') def learning_rate_scheduler(learning_rate, scheduler_name, global_step, decay_steps=100): if scheduler_name == 'exponential': lr = tf.train.exponential_decay( learning_rate, global_step, decay_steps, decay_rate, staircase=True, name='exponential_learning_rate') return tf.maximum(lr, config.learning_rate_lower_bound) elif scheduler_name == 'polynomial': lr = tf.train.polynomial_decay( learning_rate, global_step, decay_steps, config.learning_rate_lower_bound, power=0.8, cycle=True, name='polynomial_learning_rate') return tf.maximum(lr, config.learning_rate_lower_bound) elif scheduler_name == 'cosine': lr = tf.train.cosine_decay(learning_rate, global_step, decay_steps, alpha=0.5, name='cosine_learning_rate') return tf.maximum(lr, config.learning_rate_lower_bound) elif scheduler_name == 'linear': return tf.convert_to_tensor(learning_rate, name='linear_learning_rate') else: raise ValueError( 'Unsupported learning rate scheduler\n[supported types: exponential, polynomial, linear]' ) if config.use_warm_up: learning_rate = tf.cond( pred=tf.less( global_step, config.burn_in_epochs * (config.train_num // config.train_batch_size)), true_fn=lambda: learning_rate_scheduler( config.init_learning_rate, config.warm_up_lr_scheduler, global_step), false_fn=lambda: learning_rate_scheduler( config.learning_rate, config.lr_scheduler, global_step, decay_steps=500)) else: learning_rate = learning_rate_scheduler(config.learning_rate, config.lr_scheduler, global_step, decay_steps=2000) tf.summary.scalar('learning rate', learning_rate) # Define optimizer for minimizing the computed loss with tf.name_scope('Optimizer'): # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=config.momentum) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=config.momentum) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if config.pre_train: train_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='yolo') else: train_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) grads = optimizer.compute_gradients(loss=loss, var_list=train_vars) gradients = [(tf.placeholder(dtype=tf.float32, shape=grad[1].get_shape()), grad[1]) for grad in grads] optimizing_op = optimizer.apply_gradients( grads_and_vars=gradients, global_step=global_step) # optimizing_op = optimizer.minimize(loss=loss, global_step=global_step) with tf.control_dependencies([optimizing_op]): with tf.control_dependencies([exponential_moving_average_op]): train_op_with_mve = tf.no_op() train_op = train_op_with_mve #################################### Training loop ############################################################ # A saver object for saving the model best_ckpt_saver_train = checkmate.BestCheckpointSaver( save_dir=ckpt_path + 'train/', num_to_keep=5) best_ckpt_saver_valid = checkmate.BestCheckpointSaver( save_dir=ckpt_path + 'valid/', num_to_keep=5) summary_op = tf.summary.merge_all() summary_op_valid = tf.summary.merge([ yolo_loss_summary, xy_loss_summary, wh_loss_summary, obj_loss_summary, noobj_loss_summary, conf_loss_summary, class_loss_summary, scale1_loss_summary, scale2_loss_summary ]) init_op = tf.global_variables_initializer() # Defining some train loop dependencies gpu_config = tf.ConfigProto(log_device_placement=False) gpu_config.gpu_options.allow_growth = True sess = tf.Session(config=gpu_config) tf.logging.set_verbosity(tf.logging.ERROR) train_summary_writer = tf.summary.FileWriter( os.path.join(log_path, 'train'), sess.graph) val_summary_writer = tf.summary.FileWriter( os.path.join(log_path, 'val'), sess.graph) # Restoring the model ckpt = tf.train.get_checkpoint_state(ckpt_path + 'valid/') if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Restoring model ', checkmate.get_best_checkpoint(ckpt_path + 'valid/')) tf.train.Saver().restore( sess, checkmate.get_best_checkpoint(ckpt_path + 'valid/')) print('Model Loaded!') elif config.pre_train is True: sess.run(init_op) load_ops = load_weights(tf.global_variables(scope='darknet53'), config.darknet53_weights_path) sess.run(load_ops) else: sess.run(init_op) print('Uninitialized variables: ', sess.run(tf.report_uninitialized_variables())) epochbar = tqdm(range(config.Epoch)) for epoch in epochbar: epochbar.set_description('Epoch %s of %s' % (epoch, config.Epoch)) mean_loss_train = [] mean_loss_valid = [] trainbar = tqdm(range(config.train_num // config.train_batch_size)) for k in trainbar: all_grads_and_vars = [] for minibatch in range(config.train_batch_size // config.subdivisions): num_steps, train_summary, loss_train, grads_and_vars = sess.run( [global_step, summary_op, loss, grads], feed_dict={ is_training: True, mode: 1 }) all_grads_and_vars += grads_and_vars train_summary_writer.add_summary(train_summary, epoch) train_summary_writer.flush() mean_loss_train.append(loss_train) trainbar.set_description('Train loss: %s' % str(loss_train)) feed_dict = {is_training: True, mode: 1} for i in range(len(gradients), len(all_grads_and_vars)): all_grads_and_vars[ i % len(gradients)] += all_grads_and_vars[i][0] all_grads_and_vars = all_grads_and_vars[:len(gradients)] for i in range(len(gradients)): feed_dict[gradients[i][0]] = all_grads_and_vars[i][0] # print(np.shape(feed_dict)) _ = sess.run(train_op, feed_dict=feed_dict) print('Validating.....') valbar = tqdm(range(config.val_num // config.val_batch_size)) for k in valbar: for minibatch in range(config.train_batch_size // config.subdivisions): val_summary, loss_valid = sess.run( [summary_op_valid, loss], feed_dict={ is_training: False, mode: 0 }) val_summary_writer.add_summary(val_summary, epoch) val_summary_writer.flush() mean_loss_valid.append(loss_valid) valbar.set_description('Validation loss: %s' % str(loss_valid)) mean_loss_train = np.mean(mean_loss_train) mean_loss_valid = np.mean(mean_loss_valid) print('\n') print('Train loss after %d epochs is: %f' % (epoch + 1, mean_loss_train)) print('Validation loss after %d epochs is: %f' % (epoch + 1, mean_loss_valid)) print('\n\n') if (config.use_warm_up): if (num_steps > config.burn_in_epochs * (config.train_num // config.train_batch_size)): best_ckpt_saver_train.handle(mean_loss_train, sess, global_step) best_ckpt_saver_valid.handle(mean_loss_valid, sess, global_step) else: continue else: best_ckpt_saver_train.handle(mean_loss_train, sess, global_step) best_ckpt_saver_valid.handle(mean_loss_valid, sess, global_step) print('Tuning Completed!!') train_summary_writer.close() val_summary_writer.close() sess.close()
def train(ckpt_path, log_path, class_path): """ Function to train the model. ckpt_path: string, path for saving/restoring the model log_path: string, path for saving the training/validation logs class_path: string, path for the classes of the dataset decay_steps: int, steps after which the learning rate is to be decayed decay_rate: float, rate to carrying out exponential decay """ # Getting the anchors anchors = read_anchors(config.anchors_path) if not os.path.exists(config.data_dir): os.mkdir(config.data_dir) classes = get_classes(class_path) # Building the training pipeline graph = tf.get_default_graph() with graph.as_default(): # Getting the training data with tf.name_scope('data_parser/'): train_reader = Parser('train', config.data_dir, config.anchors_path, config.output_dir, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) train_data = train_reader.build_dataset(config.train_batch_size//config.subdivisions) train_iterator = train_data.make_one_shot_iterator() val_reader = Parser('val', config.data_dir, config.anchors_path, config.output_dir, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) val_data = val_reader.build_dataset(config.val_batch_size) val_iterator = val_data.make_one_shot_iterator() is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') # Used for different behaviour of batch normalization mode = tf.placeholder(dtype=tf.int16, shape=[], name='mode_flag') def train(): return train_iterator.get_next() def valid(): return val_iterator.get_next() images, labels = tf.cond(pred=tf.equal(mode, 1), true_fn=train, false_fn=valid, name='train_val_data') grid_shapes = [config.input_shape // 32, config.input_shape // 16, config.input_shape // 8] images.set_shape([None, config.input_shape, config.input_shape, 3]) labels.set_shape([None, required_shape, 5]) # image_summary = draw_box(images, bbox, file_name) if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) model = model(images, is_training, config.num_classes, config.num_anchors_per_scale, config.weight_decay, config.norm_decay) output, model_layers = model.forward() print('Summary of the created model.......\n') for layer in model_layers: print(layer) # Declaring the parameters for GT with tf.name_scope('Targets'): ### GT PROCESSING ### # Compute Loss with tf.name_scope('Loss_and_Detect'): loss_scale,summaries = compute_loss(output, y_true, config.num_classes, ignore_threshold=config.ignore_thresh) exponential_moving_average_op = tf.train.ExponentialMovingAverage(config.weight_decay).apply(var_list=tf.trainable_variables()) loss = model_loss model_loss_summary = tf.summary.scalar('model_loss', summaries, family='Losses') # Declaring the parameters for training the model with tf.name_scope('train_parameters'): global_step = tf.Variable(0, trainable=False, name='global_step') # Declaring the parameters for training the model with tf.name_scope('train_parameters'): global_step = tf.Variable(0, trainable=False, name='global_step') def learning_rate_scheduler(learning_rate, scheduler_name, global_step, decay_steps=100): if scheduler_name == 'exponential': lr = tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=True, name='exponential_learning_rate') return tf.maximum(lr, config.learning_rate_lower_bound) elif scheduler_name == 'polynomial': lr = tf.train.polynomial_decay(learning_rate, global_step, decay_steps, config.learning_rate_lower_bound, power=0.8, cycle=True, name='polynomial_learning_rate') return tf.maximum(lr, config.learning_rate_lower_bound) elif scheduler_name == 'cosine': lr = tf.train.cosine_decay(learning_rate, global_step, decay_steps, alpha=0.5, name='cosine_learning_rate') return tf.maximum(lr, config.learning_rate_lower_bound) elif scheduler_name == 'linear': return tf.convert_to_tensor(learning_rate, name='linear_learning_rate') else: raise ValueError('Unsupported learning rate scheduler\n[supported types: exponential, polynomial, linear]') if config.use_warm_up: learning_rate = tf.cond(pred=tf.less(global_step, config.burn_in_epochs * (config.train_num // config.train_batch_size)), true_fn=lambda: learning_rate_scheduler(config.init_learning_rate, config.warm_up_lr_scheduler, global_step), false_fn=lambda: learning_rate_scheduler(config.learning_rate, config.lr_scheduler, global_step, decay_steps=2000)) else: learning_rate = learning_rate_scheduler(config.learning_rate, config.lr_scheduler, global_step=global_step, decay_steps=2000) tf.summary.scalar('learning rate', learning_rate, family='Train_Parameters') # Define optimizer for minimizing the computed loss with tf.name_scope('Optimizer'): optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=config.momentum) # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=config.momentum) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # grads = optimizer.compute_gradients(loss=loss) # gradients = [(tf.placeholder(dtype=tf.float32, shape=grad[1].get_shape()), grad[1]) for grad in grads] # train_step = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) optimizing_op = optimizer.minimize(loss=loss, global_step=global_step) with tf.control_dependencies([optimizing_op]): with tf.control_dependencies([exponential_moving_average_op]): train_op_with_mve = tf.no_op() train_op = train_op_with_mve #################################### Training loop ############################################################ # A saver object for saving the model best_ckpt_saver_train = checkmate.BestCheckpointSaver(save_dir=ckpt_path+'train/', num_to_keep=5) best_ckpt_saver_valid = checkmate.BestCheckpointSaver(save_dir=ckpt_path+'valid/', num_to_keep=5) summary_op = tf.summary.merge_all() summary_op_valid = tf.summary.merge([model_loss_summary_without_learning_rate]) init_op = tf.global_variables_initializer() # Defining some train loop dependencies gpu_config = tf.ConfigProto(log_device_placement=False) gpu_config.gpu_options.allow_growth = True sess = tf.Session(config=gpu_config) tf.logging.set_verbosity(tf.logging.ERROR) train_summary_writer = tf.summary.FileWriter(os.path.join(log_path, 'train'), sess.graph) val_summary_writer = tf.summary.FileWriter(os.path.join(log_path, 'val'), sess.graph) print(sess.run(receptive_field)) # Restoring the model ckpt = tf.train.get_checkpoint_state(ckpt_path+'train/') if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Restoring model ', checkmate.get_best_checkpoint(ckpt_path+'train/')) tf.train.Saver().restore(sess, checkmate.get_best_checkpoint(ckpt_path+'train/')) print('Model Loaded!') else: sess.run(init_op) print('Uninitialized variables: ', sess.run(tf.report_uninitialized_variables())) epochbar = tqdm(range(config.Epoch)) for epoch in epochbar: epochbar.set_description('Epoch %s of %s' % (epoch, config.Epoch)) mean_loss_train = [] mean_loss_valid = [] trainbar = tqdm(range(config.train_num//config.train_batch_size)) for k in trainbar: num_steps, train_summary, loss_train, _ = sess.run([global_step, summary_op, loss, train_op], feed_dict={is_training: True, mode: 1}) train_summary_writer.add_summary(train_summary, epoch) train_summary_writer.flush() mean_loss_train.append(loss_train) trainbar.set_description('Train loss: %s' %str(loss_train)) print('Validating.....') valbar = tqdm(range(config.val_num//config.val_batch_size)) for k in valbar: val_summary, loss_valid = sess.run([summary_op_valid, loss], feed_dict={is_training: False, mode: 0}) val_summary_writer.add_summary(val_summary, epoch) val_summary_writer.flush() mean_loss_valid.append(loss_valid) valbar.set_description('Validation loss: %s' %str(loss_valid)) mean_loss_train = np.mean(mean_loss_train) mean_loss_valid = np.mean(mean_loss_valid) print('\n') print('Train loss after %d epochs is: %f' %(epoch+1, mean_loss_train)) print('Validation loss after %d epochs is: %f' %(epoch+1, mean_loss_valid)) print('\n\n') if (config.use_warm_up): if (num_steps > config.burn_in_epochs * (config.train_num // config.train_batch_size)): best_ckpt_saver_train.handle(mean_loss_train, sess, global_step) best_ckpt_saver_valid.handle(mean_loss_valid, sess, global_step) else: continue else: best_ckpt_saver_train.handle(mean_loss_train, sess, global_step) best_ckpt_saver_valid.handle(mean_loss_valid, sess, global_step) print('Tuning Completed!!') train_summary_writer.close() val_summary_writer.close() sess.close() def main(): """ main function which calls all the other required functions for training """ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_num) train(config.model_dir, config.logs_dir, config.classes_path) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' if __name__ == '__main__': main()
def test_harnn(): """Test HARNN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format( FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.num_classes_list, FLAGS.total_classes, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test, y_test, y_test_tuple = dh.pad_data(test_data, FLAGS.pad_seq_len) y_test_labels = test_data.labels # Load harnn model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST.upper() == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y_first = graph.get_operation_by_name( "input_y_first").outputs[0] input_y_second = graph.get_operation_by_name( "input_y_second").outputs[0] input_y_third = graph.get_operation_by_name( "input_y_third").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] beta = graph.get_operation_by_name("beta").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate first_attention = graph.get_operation_by_name( "first-attention/attention").outputs[0] first_visual = graph.get_operation_by_name( "first-output/visual").outputs[0] second_visual = graph.get_operation_by_name( "second-output/visual").outputs[0] third_visual = graph.get_operation_by_name( "third-output/visual").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "first-output/scores|second-output/scores|third-output/scores|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list( zip(x_test, y_test, y_test_tuple, y_test_labels)), FLAGS.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 # Collection true_labels = [] predicted_labels = [] predicted_scores = [] # Collect for calculating metrics true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)] for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_tuple, y_batch_test_labels = zip( *batch_test) y_batch_test_first = [i[0] for i in y_batch_test_tuple] y_batch_test_second = [j[1] for j in y_batch_test_tuple] y_batch_test_third = [k[2] for k in y_batch_test_tuple] feed_dict = { input_x: x_batch_test, input_y_first: y_batch_test_first, input_y_second: y_batch_test_second, input_y_third: y_batch_test_third, input_y: y_batch_test, dropout_keep_prob: 1.0, beta: FLAGS.beta, is_training: False } batch_first_attention, batch_first_visual, batch_second_visual, batch_third_visual, batch_scores, cur_loss = \ sess.run([first_attention, first_visual, second_visual, third_visual, scores, loss], feed_dict) print(batch_first_visual) print(batch_first_visual[0]) print(batch_first_visual[1]) print(batch_first_visual[0, :15]) f = open('attention.html', 'w') f.write( '<html style="margin:0;padding:0;"><body style="margin:0;padding:0;">\n' ) f.write('<div style="margin:25px;">\n') for k in range(len(batch_first_attention[0])): f.write('<p style="margin:10px;">\n') for i in range(len(batch_first_attention[0][0])): alpha = "{:.2f}".format(batch_first_attention[0][k][i]) word = x_batch_test[i] f.write( f'\t<span style="margin-left:3px;background-color:rgba(255,0,0,{alpha})">{word}</span>\n' ) f.write('</p>\n') f.write('</div>\n') f.write('</body></html>') f.close() # Prepare for calculating metrics for onehot_labels in y_batch_test: true_onehot_labels.append(onehot_labels) for onehot_scores in batch_scores: predicted_onehot_scores.append(onehot_scores) # Get the predicted labels by threshold batch_predicted_labels_ts, batch_predicted_scores_ts = \ dh.get_label_threshold(scores=batch_scores, threshold=FLAGS.threshold) # Add results to collection for labels in y_batch_test_labels: true_labels.append(labels) for labels in batch_predicted_labels_ts: predicted_labels.append(labels) for values in batch_predicted_scores_ts: predicted_scores.append(values) # Get one-hot prediction by threshold batch_predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=batch_scores, threshold=FLAGS.threshold) for onehot_labels in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(onehot_labels) # Get one-hot prediction by topK for i in range(FLAGS.top_num): batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk( scores=batch_scores, top_num=i + 1) for onehot_labels in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[i].append(onehot_labels) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate Precision & Recall & F1 test_pre_ts = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_rec_ts = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_F_ts = f1_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') # Calculate the average AUC test_auc = roc_auc_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR test_prc = average_precision_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average="micro") test_loss = float(test_loss / test_counter) logger.info( "☛ All Test Dataset: Loss {0:g} | AUC {1:g} | AUPRC {2:g}". format(test_loss, test_auc, test_prc)) # Predict by threshold logger.info( "☛ Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" .format(test_pre_ts, test_rec_ts, test_F_ts)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data.patent_id, all_labels=true_labels, all_predict_labels=predicted_labels, all_predict_scores=predicted_scores) logger.info("✔︎ Done.")
def test_fasttext(): """Test FASTTEXT model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.num_classes, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_test, y_test = dh.pad_data(test_data, args.pad_seq_len) y_test_labels = test_data.labels # Load fasttext model OPTION = dh._option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-fasttext-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test, y_test, y_test_labels)), args.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 test_pre_tk = [0.0] * args.topK test_rec_tk = [0.0] * args.topK test_F1_tk = [0.0] * args.topK # Collect the predictions here true_labels = [] predicted_labels = [] predicted_scores = [] # Collect for calculating metrics true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(args.topK)] for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_labels = zip( *batch_test) feed_dict = { input_x: x_batch_test, input_y: y_batch_test, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch_test: true_onehot_labels.append(i) for j in batch_scores: predicted_onehot_scores.append(j) # Get the predicted labels by threshold batch_predicted_labels_ts, batch_predicted_scores_ts = \ dh.get_label_threshold(scores=batch_scores, threshold=args.threshold) # Add results to collection for i in y_batch_test_labels: true_labels.append(i) for j in batch_predicted_labels_ts: predicted_labels.append(j) for k in batch_predicted_scores_ts: predicted_scores.append(k) # Get onehot predictions by threshold batch_predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=batch_scores, threshold=args.threshold) for i in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(i) # Get onehot predictions by topK for top_num in range(args.topK): batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk( scores=batch_scores, top_num=top_num + 1) for i in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[top_num].append(i) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate Precision & Recall & F1 test_pre_ts = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_rec_ts = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_F1_ts = f1_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') for top_num in range(args.topK): test_pre_tk[top_num] = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') test_rec_tk[top_num] = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') test_F1_tk[top_num] = f1_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') # Calculate the average AUC test_auc = roc_auc_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR test_prc = average_precision_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average="micro") test_loss = float(test_loss / test_counter) logger.info( "All Test Dataset: Loss {0:g} | AUC {1:g} | AUPRC {2:g}". format(test_loss, test_auc, test_prc)) # Predict by threshold logger.info( "Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" .format(test_pre_ts, test_rec_ts, test_F1_ts)) # Predict by topK logger.info("Predict by topK:") for top_num in range(args.topK): logger.info( "Top{0}: Precision {1:g}, Recall {2:g}, F1 {3:g}".format( top_num + 1, test_pre_tk[top_num], test_rec_tk[top_num], test_F1_tk[top_num])) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data.testid, all_labels=true_labels, all_predict_labels=predicted_labels, all_predict_scores=predicted_scores) logger.info("All Done.")
def visualize(): """visualize HARNN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format(FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.num_classes_list, FLAGS.total_classes, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test, y_test, y_test_tuple = dh.pad_data(test_data, FLAGS.pad_seq_len) x_test_content, y_test_labels = test_data.abstract_content, test_data.labels # Load harnn model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input("✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST.upper() == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y_first = graph.get_operation_by_name("input_y_first").outputs[0] input_y_second = graph.get_operation_by_name("input_y_second").outputs[0] input_y_third = graph.get_operation_by_name("input_y_third").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] beta = graph.get_operation_by_name("beta").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate first_visual = graph.get_operation_by_name("first-output/visual").outputs[0] second_visual = graph.get_operation_by_name("second-output/visual").outputs[0] third_visual = graph.get_operation_by_name("third-output/visual").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "first-output/visual|second-output/visual|third-output/visual|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test, y_test, y_test_tuple, x_test_content, y_test_labels)), FLAGS.batch_size, 1, shuffle=False) for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_tuple, \ x_batch_test_content, y_batch_test_labels = zip(*batch_test) y_batch_test_first = [i[0] for i in y_batch_test_tuple] y_batch_test_second = [j[1] for j in y_batch_test_tuple] y_batch_test_third = [k[2] for k in y_batch_test_tuple] feed_dict = { input_x: x_batch_test, input_y_first: y_batch_test_first, input_y_second: y_batch_test_second, input_y_third: y_batch_test_third, input_y: y_batch_test, dropout_keep_prob: 1.0, beta: FLAGS.beta, is_training: False } batch_first_visual, batch_second_visual, batch_third_visual, batch_scores = \ sess.run([first_visual, second_visual, third_visual, scores], feed_dict) seq_len = len(x_batch_test_content[0]) pad_len = len(batch_first_visual[0]) if seq_len >= pad_len: length = pad_len else: length = seq_len # print(seq_len, pad_len, length) final_first_visual = normalization(batch_first_visual[0].tolist(), length) final_second_visual = normalization(batch_second_visual[0].tolist(), length) final_third_visual = normalization(batch_third_visual[0].tolist(), length) visual_list = [final_first_visual, final_second_visual, final_third_visual] print(visual_list) f = open('attention.html', 'w') f.write('<html style="margin:0;padding:0;"><body style="margin:0;padding:0;">\n') f.write('<div style="margin:25px;">\n') for k in range(len(visual_list)): f.write('<p style="margin:10px;">\n') for i in range(seq_len): alpha = "{:.2f}".format(visual_list[k][i]) word = x_batch_test_content[0][i] f.write('\t<span style="margin-left:3px;background-color:rgba(255,0,0,{0})">{1}</span>\n' .format(alpha, word)) f.write('</p>\n') f.write('</div>\n') f.write('</body></html>') f.close() logger.info("✔︎ Done.")
def visualize(): """Visualize HARNN model.""" # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.num_classes_list, args.total_classes, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_test, y_test, y_test_tuple = dh.pad_data(test_data, args.pad_seq_len) x_test_content, y_test_labels = test_data.abstract_content, test_data.labels # Load harnn model OPTION = dh._option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y_first = graph.get_operation_by_name( "input_y_first").outputs[0] input_y_second = graph.get_operation_by_name( "input_y_second").outputs[0] input_y_third = graph.get_operation_by_name( "input_y_third").outputs[0] input_y_fourth = graph.get_operation_by_name( "input_y_fourth").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] alpha = graph.get_operation_by_name("alpha").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate first_visual = graph.get_operation_by_name( "first-output/visual").outputs[0] second_visual = graph.get_operation_by_name( "second-output/visual").outputs[0] third_visual = graph.get_operation_by_name( "third-output/visual").outputs[0] fourth_visual = graph.get_operation_by_name( "fourth-output/visual").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "first-output/visual|second-output/visual|third-output/visual|fourth-output/visual|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list( zip(x_test, y_test, y_test_tuple, x_test_content, y_test_labels)), args.batch_size, 1, shuffle=False) for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_tuple, \ x_batch_test_content, y_batch_test_labels = zip(*batch_test) y_batch_test_first = [i[0] for i in y_batch_test_tuple] y_batch_test_second = [j[1] for j in y_batch_test_tuple] y_batch_test_third = [k[2] for k in y_batch_test_tuple] y_batch_test_fourth = [t[3] for t in y_batch_test_tuple] feed_dict = { input_x: x_batch_test, input_y_first: y_batch_test_first, input_y_second: y_batch_test_second, input_y_third: y_batch_test_third, input_y_fourth: y_batch_test_fourth, input_y: y_batch_test, dropout_keep_prob: 1.0, alpha: args.alpha, is_training: False } batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual = \ sess.run([first_visual, second_visual, third_visual, fourth_visual], feed_dict) seq_len = len(x_batch_test_content[0]) pad_len = len(batch_first_visual[0]) length = (pad_len if seq_len >= pad_len else seq_len) # print(seq_len, pad_len, length) final_first_visual = normalization( batch_first_visual[0].tolist(), length) final_second_visual = normalization( batch_second_visual[0].tolist(), length) final_third_visual = normalization( batch_third_visual[0].tolist(), length) final_fourth_visual = normalization( batch_fourth_visual[0].tolist(), length) visual_list = [ final_first_visual, final_second_visual, final_third_visual, final_fourth_visual ] print(visual_list) f = open('attention.html', 'w') f.write( '<html style="margin:0;padding:0;"><body style="margin:0;padding:0;">\n' ) f.write('<div style="margin:25px;">\n') for k in range(len(visual_list)): f.write('<p style="margin:10px;">\n') for i in range(seq_len): alpha = "{:.2f}".format(visual_list[k][i]) word = x_batch_test_content[0][i] f.write( '\t<span style="margin-left:3px;background-color:rgba(255,0,0,{0})">{1}</span>\n' .format(alpha, word)) f.write('</p>\n') f.write('</div>\n') f.write('</body></html>') f.close() logger.info("Done.")
def test_rmidp(): """Test RMIDP model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_test_content, x_test_question, x_test_option, y_test = dh.pad_data(test_data, args.pad_seq_len) # Load rmidp model OPTION = dh.option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_content = graph.get_operation_by_name("input_x_content").outputs[0] input_x_question = graph.get_operation_by_name("input_x_question").outputs[0] input_x_option = graph.get_operation_by_name("input_x_option").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-rmidp-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test_content, x_test_question, x_test_option, y_test)), args.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 # Collect the predictions here true_labels = [] predicted_scores = [] for batch_test in batches: x_batch_content, x_batch_question, x_batch_option, y_batch = zip(*batch_test) feed_dict = { input_x_content: x_batch_content, input_x_question: x_batch_question, input_x_option: x_batch_option, input_y: y_batch, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch: true_labels.append(i) for j in batch_scores: predicted_scores.append(j) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate PCC & DOA pcc, doa = dh.evaluation(true_labels, predicted_scores) # Calculate RMSE rmse = mean_squared_error(true_labels, predicted_scores) ** 0.5 r2 = r2_score(true_labels, predicted_scores) test_loss = float(test_loss / test_counter) logger.info("All Test Dataset: Loss {0:g} | PCC {1:g} | DOA {2:g} | RMSE {3:g} | R2 {4:g}" .format(test_loss, pcc, doa, rmse, r2)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", all_id=test_data.id, all_labels=true_labels, all_predict_scores=predicted_scores) logger.info("All Done.")
def test(): # Load data logger.info("Loading data...") logger.info("Training data processing...") test_students, test_max_num_problems, test_max_skill_num = dh.read_test_data_from_csv_file( FLAGS.test_data_file) max_num_steps = test_max_num_problems max_num_skills = test_max_skill_num # Load rnn model BEST_OR_LATEST = input("Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "he format of your input is illegal, please re-input: ") if BEST_OR_LATEST == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) if BEST_OR_LATEST == 'L': logger.info("latest") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_data = graph.get_operation_by_name("input_data").outputs[0] input_skill = graph.get_operation_by_name("input_skill").outputs[0] l = graph.get_operation_by_name("l").outputs[0] next_id = graph.get_operation_by_name("next_id").outputs[0] target_id = graph.get_operation_by_name("target_id").outputs[0] target_correctness = graph.get_operation_by_name( "target_correctness").outputs[0] target_id2 = graph.get_operation_by_name("target_id2").outputs[0] target_correctness2 = graph.get_operation_by_name( "target_correctness2").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] skill = graph.get_operation_by_name("skill_w").outputs[0] states = graph.get_operation_by_name("states").outputs[0] pred = graph.get_operation_by_name("pred").outputs[0] data_size = len(test_students) index = 0 actual_labels = [] pred_labels = [] while (index + FLAGS.batch_size < data_size): x = np.zeros((FLAGS.batch_size, max_num_steps)) xx = np.zeros((FLAGS.batch_size, max_num_steps)) next_id_b = np.zeros((FLAGS.batch_size, max_num_steps)) l_b = np.ones( (FLAGS.batch_size, max_num_steps, max_num_skills)) target_id_b = [] target_correctness_b = [] target_id2_b = [] target_correctness2_b = [] for i in range(FLAGS.batch_size): student = test_students[index + i] problem_ids = student[1] correctness = student[2] leng = len(problem_ids) correct_num = np.zeros(max_num_skills) answer_count = np.ones(max_num_skills) for j in range(len(problem_ids) - 1): problem_id = int(problem_ids[j]) if (int(correctness[j]) == 0): x[i, j] = problem_id + max_num_skills else: x[i, j] = problem_id correct_num[problem_id] += 1 l_b[i, j] = correct_num / answer_count answer_count[problem_id] += 1 xx[i, j] = problem_id next_id_b[i, j] = int(problem_ids[j + 1]) target_id_b.append(i * max_num_steps + j) target_correctness_b.append(int(correctness[j + 1])) actual_labels.append(int(correctness[j + 1])) target_id2_b.append(i * max_num_steps + j) target_correctness2_b.append(int(correctness[j + 1])) index += FLAGS.batch_size feed_dict = { input_data: x, input_skill: xx, l: l_b, next_id: next_id_b, target_id: target_id_b, target_correctness: target_correctness_b, target_id2: target_id2_b, target_correctness2: target_correctness2_b, dropout_keep_prob: 1.0, is_training: False } ''' skill_b = sess.run([skill], feed_dict) print(np.shape(skill_b)) item = skill_b[0] with open('skill_2009.txt', 'a')as fi: for temp in item: for iii in temp: fi.write(str(iii) + ',') fi.write('\n') break ''' pred_b, state = sess.run([pred, states], feed_dict) print(np.shape(state)) print(np.shape(pred_b)) state = np.squeeze(state, axis=0) state = state[:leng] if leng > 50 and leng < 100: writer = csv.writer(open('state.csv', 'a', newline='')) writer.writerow([len(problem_ids)]) writer.writerow(student[1]) writer.writerow(student[2]) writer.writerow(state) writer.writerow('\n') for p in pred_b: pred_labels.append(p) rmse = sqrt(mean_squared_error(actual_labels, pred_labels)) fpr, tpr, thresholds = metrics.roc_curve(actual_labels, pred_labels, pos_label=1) auc = metrics.auc(fpr, tpr) #calculate r^2 r2 = r2_score(actual_labels, pred_labels) print("epochs {0}: rmse {1:g} auc {2:g} r2 {3:g} ".format( 1, rmse, auc, r2)) logger.info("epochs {0}: rmse {1:g} auc {2:g} r2 {3:g} ".format( 1, rmse, auc, r2)) logger.info("Done.")
def test_cnn(): """Test CNN model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load word2vec model word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args, args.test_file, word2idx) # Load cnn model OPTION = dh._option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x_front = graph.get_operation_by_name("input_x_front").outputs[0] input_x_behind = graph.get_operation_by_name("input_x_behind").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/topKPreds").outputs[0] predictions = graph.get_operation_by_name("output/topKPreds").outputs[1] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/topKPreds" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-cnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches_test = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False) # Collect the predictions here test_counter, test_loss = 0, 0.0 true_labels = [] predicted_labels = [] predicted_scores = [] for batch_test in batches_test: x_f, x_b, y_onehot = zip(*batch_test) feed_dict = { input_x_front: x_f, input_x_behind: x_b, input_y: y_onehot, dropout_keep_prob: 1.0, is_training: False } batch_predicted_scores, batch_predicted_labels, batch_loss \ = sess.run([scores, predictions, loss], feed_dict) for i in y_onehot: true_labels.append(np.argmax(i)) for j in batch_predicted_scores: predicted_scores.append(j[0]) for k in batch_predicted_labels: predicted_labels.append(k[0]) test_loss = test_loss + batch_loss test_counter = test_counter + 1 test_loss = float(test_loss / test_counter) # Calculate Precision & Recall & F1 test_acc = accuracy_score(y_true=np.array(true_labels), y_pred=np.array(predicted_labels)) test_pre = precision_score(y_true=np.array(true_labels), y_pred=np.array(predicted_labels), average='micro') test_rec = recall_score(y_true=np.array(true_labels), y_pred=np.array(predicted_labels), average='micro') test_F1 = f1_score(y_true=np.array(true_labels), y_pred=np.array(predicted_labels), average='micro') # Calculate the average AUC test_auc = roc_auc_score(y_true=np.array(true_labels), y_score=np.array(predicted_scores), average='micro') logger.info("All Test Dataset: Loss {0:g} | Acc {1:g} | Precision {2:g} | " "Recall {3:g} | F1 {4:g} | AUC {5:g}" .format(test_loss, test_acc, test_pre, test_rec, test_F1, test_auc)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", front_data_id=test_data['f_id'], behind_data_id=test_data['b_id'], true_labels=true_labels, predict_labels=predicted_labels, predict_scores=predicted_scores) logger.info("All Done.")
def test_harnn(): """Test HARNN model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load word2vec model word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file) # Load data logger.info("Loading data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args, args.test_file, word2idx) # Load harnn model OPTION = dh._option(pattern=1) if OPTION == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) else: logger.info("Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y_first = graph.get_operation_by_name( "input_y_first").outputs[0] input_y_second = graph.get_operation_by_name( "input_y_second").outputs[0] input_y_third = graph.get_operation_by_name( "input_y_third").outputs[0] input_y_fourth = graph.get_operation_by_name( "input_y_fourth").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] alpha = graph.get_operation_by_name("alpha").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate first_scores = graph.get_operation_by_name( "first-output/scores").outputs[0] second_scores = graph.get_operation_by_name( "second-output/scores").outputs[0] third_scores = graph.get_operation_by_name( "third-output/scores").outputs[0] fourth_scores = graph.get_operation_by_name( "fourth-output/scores").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "first-output/scores|second-output/scores|third-output/scores|fourth-output/scores|output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False) # Collect the predictions here true_labels = [] predicted_labels = [] predicted_scores = [] # Collect for calculating metrics true_onehot_labels = [[], [], [], [], []] predicted_onehot_scores = [[], [], [], [], []] predicted_onehot_labels = [[], [], [], [], []] for batch_test in batches: x, sec, subsec, group, subgroup, y_onehot, y = zip(*batch_test) y_batch_test_list = [y_onehot, sec, subsec, group, subgroup] feed_dict = { input_x: x, input_y_first: sec, input_y_second: subsec, input_y_third: group, input_y_fourth: subgroup, input_y: y_onehot, dropout_keep_prob: 1.0, alpha: args.alpha, is_training: False } batch_global_scores, batch_first_scores, batch_second_scores, batch_third_scores, batch_fourth_scores = \ sess.run([scores, first_scores, second_scores, third_scores, fourth_scores], feed_dict) batch_scores = [ batch_global_scores, batch_first_scores, batch_second_scores, batch_third_scores, batch_fourth_scores ] # Get the predicted labels by threshold batch_predicted_labels_ts, batch_predicted_scores_ts = \ dh.get_label_threshold(scores=batch_scores[0], threshold=args.threshold) # Add results to collection for labels in y: true_labels.append(labels) for labels in batch_predicted_labels_ts: predicted_labels.append(labels) for values in batch_predicted_scores_ts: predicted_scores.append(values) for index in range(len(predicted_onehot_scores)): for onehot_labels in y_batch_test_list[index]: true_onehot_labels[index].append(onehot_labels) for onehot_scores in batch_scores[index]: predicted_onehot_scores[index].append(onehot_scores) # Get one-hot prediction by threshold predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=batch_scores[index], threshold=args.threshold) for onehot_labels in predicted_onehot_labels_ts: predicted_onehot_labels[index].append(onehot_labels) # Calculate Precision & Recall & F1 for index in range(len(predicted_onehot_scores)): test_pre = precision_score( y_true=np.array(true_onehot_labels[index]), y_pred=np.array(predicted_onehot_labels[index]), average='micro') test_rec = recall_score( y_true=np.array(true_onehot_labels[index]), y_pred=np.array(predicted_onehot_labels[index]), average='micro') test_F1 = f1_score(y_true=np.array(true_onehot_labels[index]), y_pred=np.array( predicted_onehot_labels[index]), average='micro') test_auc = roc_auc_score( y_true=np.array(true_onehot_labels[index]), y_score=np.array(predicted_onehot_scores[index]), average='micro') test_prc = average_precision_score( y_true=np.array(true_onehot_labels[index]), y_score=np.array(predicted_onehot_scores[index]), average="micro") if index == 0: logger.info( "[Global] Predict by threshold: Precision {0:g}, Recall {1:g}, " "F1 {2:g}, AUC {3:g}, AUPRC {4:g}".format( test_pre, test_rec, test_F1, test_auc, test_prc)) else: logger.info( "[Local] Predict by threshold in Level-{0}: Precision {1:g}, Recall {2:g}, " "F1 {3:g}, AUPRC {4:g}".format(index, test_pre, test_rec, test_F1, test_prc)) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data['uniq_id'], true_labels=true_labels, predict_labels=predicted_labels, predict_scores=predicted_scores) logger.info("All Done.")
def test_cnn(): """Test CNN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format(FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.num_classes, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test, y_test = dh.pad_data(test_data, FLAGS.pad_seq_len) y_test_labels = test_data.labels # Load cnn model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input("✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST.upper() == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-cnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test, y_test, y_test_labels)), FLAGS.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 test_pre_tk = [0.0] * FLAGS.top_num test_rec_tk = [0.0] * FLAGS.top_num test_F_tk = [0.0] * FLAGS.top_num # Collect the predictions here true_labels = [] predicted_labels = [] predicted_scores = [] # Collect for calculating metrics true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)] for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_labels = zip(*batch_test) feed_dict = { input_x: x_batch_test, input_y: y_batch_test, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch_test: true_onehot_labels.append(i) for j in batch_scores: predicted_onehot_scores.append(j) # Get the predicted labels by threshold batch_predicted_labels_ts, batch_predicted_scores_ts = \ dh.get_label_threshold(scores=batch_scores, threshold=FLAGS.threshold) # Add results to collection for i in y_batch_test_labels: true_labels.append(i) for j in batch_predicted_labels_ts: predicted_labels.append(j) for k in batch_predicted_scores_ts: predicted_scores.append(k) # Get onehot predictions by threshold batch_predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=batch_scores, threshold=FLAGS.threshold) for i in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(i) # Get onehot predictions by topK for top_num in range(FLAGS.top_num): batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(scores=batch_scores, top_num=top_num+1) for i in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[top_num].append(i) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate Precision & Recall & F1 (threshold & topK) test_pre_ts = precision_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_rec_ts = recall_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_F_ts = f1_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') for top_num in range(FLAGS.top_num): test_pre_tk[top_num] = precision_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') test_rec_tk[top_num] = recall_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') test_F_tk[top_num] = f1_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') # Calculate the average AUC test_auc = roc_auc_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR test_prc = average_precision_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average="micro") test_loss = float(test_loss / test_counter) logger.info("☛ All Test Dataset: Loss {0:g} | AUC {1:g} | AUPRC {2:g}" .format(test_loss, test_auc, test_prc)) # Predict by threshold logger.info("☛ Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" .format(test_pre_ts, test_rec_ts, test_F_ts)) # Predict by topK logger.info("☛ Predict by topK:") for top_num in range(FLAGS.top_num): logger.info("Top{0}: Precision {1:g}, Recall {2:g}, F {3:g}" .format(top_num + 1, test_pre_tk[top_num], test_rec_tk[top_num], test_F_tk[top_num])) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data.testid, all_labels=true_labels, all_predict_labels=predicted_labels, all_predict_scores=predicted_scores) logger.info("✔︎ Done.")
def test(): test_students, test_max_num_problems, test_max_skill_num = dh.read_test_data_from_csv_file( FLAGS.test_data_file) max_num_steps = test_max_num_problems max_num_skills = test_max_skill_num fileName = "clustered_skill_name.txt" same_b, differ_b = eb.embedding(fileName) BEST_OR_LATEST = input("Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "he format of your input is illegal, please re-input: ") if BEST_OR_LATEST == 'B': logger.info("Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) if BEST_OR_LATEST == 'L': logger.info("latest") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_data = graph.get_operation_by_name("input_data").outputs[0] next_id = graph.get_operation_by_name("next_id").outputs[0] target_id = graph.get_operation_by_name("target_id").outputs[0] target_correctness = graph.get_operation_by_name( "target_correctness").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] same = graph.get_operation_by_name("same").outputs[0] differ = graph.get_operation_by_name("differ").outputs[0] skill = graph.get_operation_by_name("skill_w").outputs[0] states = graph.get_operation_by_name("states").outputs[0] pred = graph.get_operation_by_name("pred").outputs[0] data_size = len(test_students) index = 0 actual_labels = [] pred_labels = [] while (index < data_size): x = np.zeros((FLAGS.batch_size, max_num_steps)) xx = np.zeros((FLAGS.batch_size, max_num_steps)) next_id_b = np.zeros((FLAGS.batch_size, max_num_steps)) l_b = np.ones( (FLAGS.batch_size, max_num_steps, max_num_skills)) target_id_b = [] target_correctness_b = [] for i in range(FLAGS.batch_size): student = test_students[index + i] problem_ids = student[1] correctness = student[2] correct_num = np.zeros(max_num_skills) answer_count = np.ones(max_num_skills) for j in range(len(problem_ids) - 1): problem_id = int(problem_ids[j]) if (int(correctness[j]) == 0): x[i, j] = problem_id + max_num_skills else: x[i, j] = problem_id correct_num[problem_id] += 1 l_b[i, j] = correct_num / answer_count answer_count[problem_id] += 1 xx[i, j] = problem_id next_id_b[i, j] = int(problem_ids[j + 1]) target_id_b.append(i * max_num_steps + j) target_correctness_b.append(int(correctness[j + 1])) actual_labels.append(int(correctness[j + 1])) index += FLAGS.batch_size feed_dict = { input_data: x, next_id: next_id_b, target_id: target_id_b, target_correctness: target_correctness_b, dropout_keep_prob: 1.0, is_training: False, same: same_b, differ: differ_b } pred_b, state, skill_b = sess.run([pred, states, skill], feed_dict) for p in pred_b: pred_labels.append(p) rmse = sqrt(mean_squared_error(actual_labels, pred_labels)) fpr, tpr, thresholds = metrics.roc_curve(actual_labels, pred_labels, pos_label=1) auc = metrics.auc(fpr, tpr) r2 = r2_score(actual_labels, pred_labels) pred_score = np.greater_equal(pred_labels, 0.5) pred_score = pred_score.astype(int) pred_score = np.equal(actual_labels, pred_score) acc = np.mean(pred_score.astype(int)) logger.info( "epochs {0}: rmse {1:g} auc {2:g} r2 {3:g} acc {4:g}". format(1, rmse, auc, r2, acc))