def evaluate_all(gt_file_dir, gt_img_dir, ckpt_path, gpuid='0'): db = DB(ckpt_path, gpuid) img_list = os.listdir(gt_img_dir) show = './eva' make_dir(show) total_TP = 0 total_gt_care_num = 0 total_pred_care_num = 0 for img_name in tqdm.tqdm(img_list): img = cv2.imread(os.path.join(gt_img_dir, img_name)) pred_box_list, pred_score_list, _ = db.detect_img(os.path.join( gt_img_dir, img_name), ispoly=True, show_res=False) gt_file_name = os.path.splitext(img_name)[0] + '.txt' gt_boxes, tags = load_ctw1500_labels( os.path.join(gt_file_dir, gt_file_name)) gt_care_list = [] gt_dontcare_list = [] for i, box in enumerate(gt_boxes): box = box.reshape((-1, 2)).tolist() if tags[i] == False: gt_care_list.append(box) else: gt_dontcare_list.append(box) precision, recall, f1_score, TP, gt_care_num, pred_care_num, pairs_list = evaluate( gt_care_list, gt_dontcare_list, pred_box_list, overlap=0.5) for pair in pairs_list: cv2.polylines(img, [np.array(pair['gt'], np.int).reshape([-1, 1, 2])], True, (0, 255, 0)) cv2.polylines(img, [np.array(pair['pred'], np.int).reshape([-1, 1, 2])], True, (255, 0, 0)) cv2.imwrite(os.path.join(show, img_name), img) total_TP += TP total_gt_care_num += gt_care_num total_pred_care_num += pred_care_num total_precision = float(total_TP) / total_pred_care_num total_recall = float(total_TP) / total_gt_care_num total_f1_score = compute_f1_score(total_precision, total_recall) return total_precision, total_recall, total_f1_score
def testModel(self): n_test_samples, max_length = self.data['X_test'].shape accuracy_test = [] preds_test = [] self.initModel() test_bar = ProgressBar('Testing', max=len(self.data['X_test'])) for batch in minibatches_iter(self.data['X_test'], self.data['Y_test'], masks=self.data['mask_test'], char_inputs=self.data['C_test'], lexicons=self.lexicons['lexicons_test'], batch_size=self.batch_size): inputs, targets, masks, char_inputs, lexicons = batch test_bar.next(len(inputs)) corrects = self.model.eval_fn(inputs, targets, masks, lexicons) _, preds = self.model.test_fn(inputs, targets, masks, lexicons) preds_test.append(preds) accuracy_test.append(corrects) this_test_accuracy = np.concatenate( accuracy_test)[0:n_test_samples].sum() / float(n_test_samples) test_bar.finish() print("Test accuracy: " + str(this_test_accuracy * 100) + "%") compute_f1_score(self.data['Y_test'], preds_test)
def compute_metrics(self, predicted_names, test_labels, sample_infs, print_labels=False): n_correct, n_nonzero, f1 = 0, 0, 0 print("Predictions: ", len(predicted_names)) print("Test labels: ", len(test_labels)) for i in range(len(predicted_names)): if print_labels: print("Predicted: ", [sym.encode('utf-8') for sym in predicted_names[i]]) print("Actual: ", [sym.encode('utf-8') for sym in test_labels[i]]) print("") print("") f1 += compute_f1_score(predicted_names[i], test_labels[i]) if predicted_names[i] == test_labels[i]: n_correct += 1 sample_infs[i].predicted_correctly = True else: sample_infs[i].predicted_correctly = False accuracy = n_correct / len(test_labels) * 100 f1 = f1 * 100 / len(predicted_names) print("Absolute accuracy: ", accuracy) print("F1 score: ", f1) return accuracy, f1, predicted_names
def trainingModel(self): self.initModel() best_acc = 0 best_validation_accuracy = 0 stop_count = 0 lr = self.learning_rate patience = self.patience n_dev_samples, max_length = self.data['X_dev'].shape n_test_samples, max_length = self.data['X_test'].shape for epoch in range(1, self.num_epochs + 1): print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % ( epoch, lr, self.decay_rate) train_err = 0.0 train_batches = 0 train_bar = ProgressBar('Training', max=len(self.data['X_train'])) for batch in minibatches_iter( self.data['X_train'], self.data['Y_train'], masks=self.data['mask_train'], char_inputs=self.data['C_train'], lexicons=self.lexicons['lexicons_train'], batch_size=self.batch_size, shuffle=True): inputs, targets, masks, char_inputs, lexicons = batch err = self.model.train_fn(inputs, targets, masks, lexicons) train_err += err train_bar.next(len(inputs)) if train_batches > 0 and train_batches % self.valid_freq == 0: accuracy_valid = [] for batch in minibatches_iter( self.data['X_dev'], self.data['Y_dev'], masks=self.data['mask_dev'], lexicons=self.lexicons['lexicons_dev'], char_inputs=self.data['C_dev'], batch_size=self.batch_size): inputs, targets, masks, char_inputs, lexicons = batch accuracy_valid.append( self.model.eval_fn(inputs, targets, masks, lexicons)) this_validation_accuracy = np.concatenate(accuracy_valid)[ 0:n_dev_samples].sum() / float(n_dev_samples) if this_validation_accuracy > best_validation_accuracy: print("\nTrain loss, " + str( (train_err / self.valid_freq)) + ", validation accuracy: " + str(this_validation_accuracy * 100) + "%") best_validation_accuracy = this_validation_accuracy preds_test = [] accuracy_test = [] for batch in minibatches_iter( self.data['X_test'], self.data['Y_test'], masks=self.data['mask_test'], char_inputs=self.data['C_test'], lexicons=self.lexicons['lexicons_test'], batch_size=self.batch_size): inputs, targets, masks, char_inputs, lexicons = batch _, preds = self.model.test_fn( inputs, targets, masks, lexicons) preds_test.append(preds) accuracy_test.append( self.model.eval_fn(inputs, targets, masks, lexicons)) this_test_accuracy = np.concatenate(accuracy_test)[ 0:n_test_samples].sum() / float(n_test_samples) print "F1-score: " + str( compute_f1_score(self.data["Y_test"], preds_test, self.data['label_alphabet']) * 100) print("Test accuracy: " + str(this_test_accuracy * 100) + "%") if best_acc < this_test_accuracy: best_acc = this_test_accuracy write_model_data(self.model.network, self.model_path + '/best_model') train_err = 0 train_batches += 1 train_bar.finish() # stop if dev acc decrease 3 time straightly. if stop_count == patience: break # re-compile a function with new learning rate for training if self.update_algo != 'adadelta': lr = self.learning_rate / (1.0 + epoch * self.decay_rate) updates = utils.create_updates(self.model.loss_train, self.model.params, self.update_algo, lr, momentum=self.momentum) self.model.train_fn = theano.function( [ self.model.input_var, self.model.target_var, self.model.mask_var, self.model.lex_var ], outputs=self.model.loss_train, updates=updates, allow_input_downcast=True) print("Epoch " + str(epoch) + " finished.") print("The final best acc: " + str(best_acc * 100) + "%") if self.output_predict: f = codecs.open('./results/10-fold.txt', 'a+', 'utf-8') f.write(str(best_acc * 100) + '\n') f.close()
def evaluate(gt_care_list, gt_dontcare_list, pred_list, overlap=0.5): """ :param gt_care_list: [-1, M, 2] :param gt_dontcare_list: [-1, M, 2] :param pred_list: [-1, M, 2] :param overlap: :return: """ pred_care_list = [] pred_dontcare_list = [] if len(gt_dontcare_list) != 0: for pred_box in pred_list: flag = False for gt_box in gt_dontcare_list: if quad_iou(gt_box, pred_box) > overlap: flag = True break if not flag: pred_care_list.append(pred_box) else: pred_dontcare_list.append(pred_box) else: pred_care_list = pred_list gt_care_flag_list = [False] * len(gt_care_list) pred_care_flag_list = [False] * len(pred_care_list) pairs_list = [] gt_not_pair_list = [] pred_not_pair_list = [] for gt_i, gt_box in enumerate(gt_care_list): for pred_i, pred_box in enumerate(pred_care_list): if pred_care_flag_list[pred_i]: continue else: iou = quad_iou(gt_box, pred_box) if iou > overlap: pair_dict = {} pair_dict['gt'] = gt_box pair_dict['pred'] = pred_box pair_dict['iou'] = iou pairs_list.append(pair_dict) pred_care_flag_list[pred_i] = True gt_care_flag_list[gt_i] = True TP = len(pairs_list) if len(gt_care_list) == 0: recall = 1.0 precision = 1.0 if len(pred_care_list) == 0 else 0.0 elif len(pred_care_list) == 0: recall = 0.0 precision = 0.0 else: recall = 1.0 * TP / len(gt_care_list) precision = 1.0 * TP / len(pred_care_list) f1_score = compute_f1_score(precision, recall) return precision, recall, f1_score, TP, len(gt_care_list), len( pred_care_list), pairs_list