def without_segmentation_sequence_test(metadata_path, model_path, maxlen=200): data_dim = 33 label_list = [ 'null', 'reaching', 'moving', 'placing', 'opening', 'cleaning', 'closing', 'pouring', 'eating', 'drinking' ] path_skeleton = metadata_path + 'aligned_skeletons/' path_label = metadata_path + 'sequence_label.json' with open(path_label, 'r') as f: sequence_label = json.load(f) skeleton_list = glob.glob(path_skeleton + '*.mat') video_index = 5 sequence_id = skeleton_list[video_index].split('/')[-1][:-4] cur_skeleton = np.transpose( scipy.io.loadmat(skeleton_list[video_index])['skeleton']) num_frame = 0 for sequence_cut in sequence_label: if sequence_id == sequence_cut['sequence_id']: if sequence_cut['end_frame'] > num_frame: num_frame = sequence_cut['end_frame'] y_all = np.zeros(num_frame) x_all = np.zeros((num_frame, maxlen, data_dim)) for sequence_cut in sequence_label: if sequence_id == sequence_cut['sequence_id']: start_frame = sequence_cut['start_frame'] end_frame = sequence_cut['end_frame'] # print label_list.index(str(sequence_cut['sequence_label'])) y_all[start_frame:end_frame] = label_list.index( sequence_cut['sequence_label']) for frame in range(num_frame): if frame < maxlen: # x_temp = np.zeros((data_dim, frame+1)) x_temp = skeleton_prune(cur_skeleton[:, :frame + 1]) x_temp = np.reshape(x_temp, (data_dim, frame + 1)) else: x_temp = skeleton_prune(cur_skeleton[:, frame - (maxlen - 1):frame + 1]) # print type(x_temp) x_all[frame, :, :] = np.transpose( sequence.pad_sequences(x_temp, dtype=float, maxlen=maxlen)) model = load_model(model_path) prediction = model.predict(x_all) predict_result = np.zeros(num_frame) for i in range(num_frame): predict_result[i] = int( list(prediction[i, :]).index(max(prediction[i, :]))) print label_list[int(y_all[i])], label_list[int(predict_result[i])] vizutil.plot_segmentation( [y_all, predict_result, (y_all - predict_result) == 0], frame)
def without_segmentation_sequence_test_per_frame(metadata_path, model_path): data_dim = 33 label_list = [ 'null', 'reaching', 'moving', 'placing', 'opening', 'cleaning', 'closing', 'pouring', 'eating', 'drinking' ] path_skeleton = metadata_path + 'aligned_skeletons/' path_label = metadata_path + 'sequence_label.json' with open(path_label, 'r') as f: sequence_label = json.load(f) skeleton_list = glob.glob(path_skeleton + '*.mat') video_index = 60 sequence_id = skeleton_list[video_index].split('/')[-1][:-4] cur_skeleton = np.transpose( scipy.io.loadmat(skeleton_list[video_index])['skeleton']) num_frame = 0 for sequence_cut in sequence_label: if sequence_id == sequence_cut['sequence_id']: if sequence_cut['end_frame'] > num_frame: num_frame = sequence_cut['end_frame'] y_all = np.zeros(num_frame) x_all = np.zeros((num_frame, data_dim)) for sequence_cut in sequence_label: if sequence_id == sequence_cut['sequence_id']: start_frame = sequence_cut['start_frame'] end_frame = sequence_cut['end_frame'] y_all[start_frame:end_frame] = label_list.index( sequence_cut['sequence_label']) for frame in range(num_frame): # print frame x_temp = skeleton_prune(cur_skeleton[:, frame]) x_all[frame, :] = x_temp model = load_model(model_path) prediction = model.predict(x_all) predict_result = np.zeros(num_frame) correct_num = 0 for i in range(num_frame): predict_result[i] = int( list(prediction[i, :]).index(max(prediction[i, :]))) if predict_result[i] == int(y_all[i]): correct_num += 1 print i, label_list[int(y_all[i])], label_list[int(predict_result[i])] vizutil.plot_segmentation( [y_all, predict_result, (y_all - predict_result) == 0], frame) print 'accuracy', float(correct_num) / num_frame
def without_segmentation_sequence_test_per_frame_sequential( data_root, metadata_path): model_path = metadata_path + 'models/cnn/' relative_path = 'flipped/all/activity_corpus.p' if os.path.exists(metadata_path + relative_path): activity_corpus = pickle.load(open(metadata_path + relative_path, 'rb')) model_name = 'affordance_mixed_feature_epoch_30_with_dropout_3_layer_with_weight_1.4_with_initialization.h5' result_path = metadata_path + 'data/affordance_result/' tpg_id = dict() for activity, tpgs in activity_corpus.items(): for tpg in tpgs: tpg_id[tpg.id] = tpg.terminals print 'successful loading the model!' if not os.path.exists(result_path): os.mkdir(result_path) test_set = [1, 3, 4, 5] model = sequential_model(weights_path=model_path + model_name) for subject_index in test_set: if not os.path.exists(result_path + 'subject' + str(subject_index)): os.mkdir(result_path + 'subject' + str(subject_index)) subject_path = metadata_path + 'data/subject' + str(subject_index) frame_count_path = subject_path + '/' + 'affordance_frame_count.json' subject = 'Subject' + str(subject_index) + '_rgbd_images/' action = os.listdir(data_root + subject) gt_path = subject_path + '/' + 'affordance_gt.npy' feature_path = subject_path + '/' + 'affordance_sequential_feature.npy' label_path = subject_path + '/' + 'affordance_object_label_feature.npy' gt_all = np.load(gt_path) feature_all = np.load(feature_path) label_all = np.load(label_path) with open(frame_count_path, 'r') as f: frame_count = json.load(f) f.close() prediction = model.predict([feature_all, label_all]) for action_category in action: video = os.listdir(data_root + subject + action_category) for sequence_id in video: video_prediction = list() video_gt = list() index = 0 num_obj = len(frame_count[sequence_id]['object']) sequence_prediction = np.zeros( (num_obj, frame_count[sequence_id]['length'], 12)) for obj_index in frame_count[sequence_id]['frame_record']: add_index = 0 for sequence_list in obj_index: if add_index == 0: video_prediction.append(prediction[ sequence_list[0]:sequence_list[1], :]) video_gt.append( gt_all[sequence_list[0]:sequence_list[1], :]) else: video_prediction[index] = np.concatenate( (video_prediction[index], prediction[ sequence_list[0]:sequence_list[1], :]), axis=0) video_gt[index] = np.concatenate( (video_gt[index], gt_all[sequence_list[0]:sequence_list[1], :]), axis=0) add_index += 1 frame_length = frame_count[sequence_id]['length'] if frame_length - video_gt[index].shape[0] != 0: video_prediction[index] = np.concatenate((fill_in( frame_length - video_prediction[index].shape[0], 12, 0.8), video_prediction[index]), axis=0) video_gt[index] = np.concatenate( (fill_in(frame_length - video_gt[index].shape[0], 12, 0.8), video_gt[index]), axis=0) predict_result = np.zeros(frame_length) y_all = np.zeros(frame_length) correct_num = 0 for i in range(frame_length): predict_result[i] = int( list(video_prediction[index][i, :]).index( max(video_prediction[index][i, :]))) y_all[i] = np.argmax(video_gt[index][i, :]) if y_all[i] == predict_result[i]: correct_num += 1 vizutil.plot_segmentation( [y_all, predict_result, (y_all - predict_result) == 0], frame_length) plt.savefig( result_path + 'subject' + str(subject_index) + '/' + sequence_id + '_' + str(frame_count[sequence_id]['object'][index]) + str(index) + '_' + str(float(correct_num) / frame_length) + '.png') plt.close() sequence_prediction[index, :, :] = video_prediction[index] index += 1 np.save( open( result_path + 'subject' + str(subject_index) + '/' + sequence_id + '.npy', 'w'), sequence_prediction)
def validate(val_loader, model, args, test=False): def compute_accuracy(gt_results, results, metric='micro'): return sklearn.metrics.precision_recall_fscore_support( gt_results, results, labels=range(10), average=metric) batch_time = logutil.AverageMeter() baseline_acc_ratio = logutil.AverageMeter() subactivity_acc_ratio = logutil.AverageMeter() seg_pred_acc_ratio = logutil.AverageMeter() frame_pred_acc_ratio = logutil.AverageMeter() all_baseline_detections = list() all_gt_detections = list() all_detections = list() all_gt_seg_predictions = list() all_gt_frame_predictions = list() all_seg_predictions = list() all_frame_predictions = list() # switch to evaluate mode model.eval() end_time = time.time() for i, (features, labels, probs, total_lengths, ctc_labels, ctc_lengths, activities, sequence_ids) in enumerate(val_loader): features = utils.to_variable(features, args.cuda) labels = utils.to_variable(labels, args.cuda) total_lengths = torch.autograd.Variable(total_lengths) # Inference model_outputs = model(features) pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference( model_outputs, activities, sequence_ids, ctc_labels, args) # Visualize results for batch_i in range(labels.size()[1]): vizutil.plot_segmentation( [ labels[:, batch_i].squeeze(), pred_labels[:, batch_i].squeeze(), batch_earley_pred_labels[batch_i] ], int(total_lengths[batch_i]), filename=os.path.join( args.tmp_root, 'visualize', 'segmentation', 'cad', '{}_{}.pdf'.format(activities[batch_i], sequence_ids[batch_i])), border=False, vmax=len(datasets.cad_metadata.subactivities)) # Evaluation # Frame-wise detection baseline_detections = pred_labels.cpu().data.numpy().flatten().tolist() gt_detections = labels.cpu().data.numpy().flatten().tolist() detections = [ l for pred_labels in batch_earley_pred_labels for l in pred_labels.tolist() ] all_baseline_detections.extend(baseline_detections) all_gt_detections.extend(gt_detections) all_detections.extend(detections) baseline_micro_result = compute_accuracy(gt_detections, baseline_detections) subact_micro_result = compute_accuracy(gt_detections, detections) gt_seg_predictions, gt_frame_predictions, seg_predictions, frame_predictions = predict( activities, total_lengths, labels, ctc_labels, batch_tokens, batch_seg_pos) all_gt_seg_predictions.extend(gt_seg_predictions) all_gt_frame_predictions.extend(gt_frame_predictions) all_seg_predictions.extend(seg_predictions) all_frame_predictions.extend(frame_predictions) seg_pred_result = compute_accuracy(gt_seg_predictions, seg_predictions) frame_pred_result = compute_accuracy(gt_frame_predictions, frame_predictions) baseline_acc_ratio.update(baseline_micro_result[0], torch.sum(total_lengths).data[0]) subactivity_acc_ratio.update(subact_micro_result[0], torch.sum(total_lengths).data[0]) seg_pred_acc_ratio.update(seg_pred_result[0], torch.sum(total_lengths).data[0]) frame_pred_acc_ratio.update(frame_pred_result[0], len(all_gt_frame_predictions)) # Measure elapsed time batch_time.update(time.time() - end_time) end_time = time.time() print(' * Baseline Accuracy Ratio {base_acc.avg:.3f}; '.format( base_acc=baseline_acc_ratio)) print( ' * Detection Accuracy Ratio {act_acc.avg:.3f}; Segment Prediction Accuracy Ratio Batch Avg {seg_pred_acc.avg:.3f}; Frame Prediction Accuracy Ratio Batch Avg {frame_pred_acc.avg:.3f}; Time {b_time.avg:.3f}' .format(act_acc=subactivity_acc_ratio, seg_pred_acc=seg_pred_acc_ratio, frame_pred_acc=frame_pred_acc_ratio, b_time=batch_time)) print( compute_accuracy(all_gt_detections, all_baseline_detections, metric='macro')) print(compute_accuracy(all_gt_detections, all_detections, metric='macro')) print( compute_accuracy(all_gt_seg_predictions, all_seg_predictions, metric='macro')) print( compute_accuracy(all_gt_frame_predictions, all_frame_predictions, metric='macro')) confusion_matrix = sklearn.metrics.confusion_matrix( all_gt_detections, all_detections, labels=range(len(datasets.cad_metadata.subactivities))) vizutil.plot_confusion_matrix(confusion_matrix, datasets.cad_metadata.subactivities[:], normalize=True, title='', filename=os.path.join( args.tmp_root, 'visualize', 'confusion', 'cad', 'detection.pdf')) confusion_matrix = sklearn.metrics.confusion_matrix( all_gt_frame_predictions, all_frame_predictions, labels=range(len(datasets.cad_metadata.subactivities))) vizutil.plot_confusion_matrix(confusion_matrix, datasets.cad_metadata.subactivities[:], normalize=True, title='', filename=os.path.join( args.tmp_root, 'visualize', 'confusion', 'cad', 'prediction_frame.pdf')) confusion_matrix = sklearn.metrics.confusion_matrix( all_gt_seg_predictions, all_seg_predictions, labels=range(len(datasets.cad_metadata.subactivities))) vizutil.plot_confusion_matrix(confusion_matrix, datasets.cad_metadata.subactivities[:], normalize=True, title='', filename=os.path.join( args.tmp_root, 'visualize', 'confusion', 'cad', 'prediction_seg.pdf')) return 1.0 - subactivity_acc_ratio.avg
def without_segmentation_sequence_test_per_frame_sequential(metadata_path): label_list = [ 'null', 'reaching', 'moving', 'placing', 'opening', 'cleaning', 'closing', 'pouring', 'eating', 'drinking' ] test_path = metadata_path + 'data/test' model_path = metadata_path + 'models/cnn/' model_name = 'mixed_feature_last_try_epoch_150_layer_3_with_initialization.h5' result_path = metadata_path + 'data/subactivity_result/' model = Sequential() # final_model.add(merged) model.add(Dense(512, init=my_init, input_dim=1452, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(128, activation='relu', init=my_init)) model.add(Dropout(0.5)) model.add(Dense(32, activation='relu', init=my_init)) model.add(Dense(10, init=my_init)) model.add(Activation('softmax')) model.load_weights(model_path + model_name) print 'successful loading the model!' if not os.path.exists(result_path): os.mkdir(result_path) if not os.path.exists(test_path): os.mkdir(test_path) test_set = [1, 3, 4, 5] for subject_index in test_set: path_test = list() subject_path = metadata_path + 'data/subject' + str(subject_index) img_path = subject_path + '/' + 'img_path.txt' gt_path = subject_path + '/' + 'subactivity_gt.npy' frame_count_path = subject_path + '/' + 'frame_count.json' sk_sq_path = subject_path + '/' + 'sk_sq.npy' with open(img_path, 'r') as f: path_test.extend(f.readlines()) f.close() label_all = np.load(gt_path) num_frame = len(path_test) y_all = np.zeros(num_frame) sk_sq = np.load(sk_sq_path) prediction = model.predict(sk_sq) predict_result = np.zeros(num_frame) print 'successful predicting the data!' for i in range(num_frame): predict_result[i] = int( list(prediction[i, :]).index(max(prediction[i, :]))) y_all[i] = np.argmax(label_all[i, :]) with open(frame_count_path, 'r') as f: video_count = json.load(f) for video in video_count: print video correct_num = 0 start_num = video_count[video]['start_num'] end_num = video_count[video]['end_num'] for j in range(end_num - start_num): if int(predict_result[start_num + j]) == int(y_all[start_num + j]): correct_num += 1 vizutil.plot_segmentation([ y_all[start_num:end_num], predict_result[start_num:end_num], (y_all[start_num:end_num] - predict_result[start_num:end_num]) == 0 ], end_num - start_num) plt.savefig(result_path + 'subject' + str(subject_index) + '/' + video + '_' + str(float(correct_num) / (end_num - start_num)) + '.png') plt.close() cm = sklearn.metrics.confusion_matrix( y_all[start_num:end_num], predict_result[start_num:end_num], labels=range(10)) vizutil.plot_confusion_matrix(cm, classes=label_list, normalize=True, filename=result_path + 'subject' + str(subject_index) + '/' + video + '_confusion.png') np.save( open( result_path + 'subject' + str(subject_index) + '/' + video + '.npy', 'w'), prediction[start_num:end_num, :]) cm = sklearn.metrics.confusion_matrix(y_all, predict_result, labels=range(10)) vizutil.plot_confusion_matrix(cm, classes=label_list, normalize=True, filename=result_path + 'subject' + str(subject_index) + '/' + 'a_confusion_all.png')
def without_segmentation_sequence_test_per_frame_vgg16(metadata_path): nb_classes = 10 batch_size = 1 test_path = metadata_path + 'data/test' model_path = metadata_path + 'models/cnn/' model_name = 'vgg_tune_subactivity_train_134_learning_rate_-5_.h5' result_path = metadata_path + 'data/subactivity_result/' model = vgg_16(model_path + model_name, nb_classes) print 'successful loading the model!' if not os.path.exists(result_path): os.mkdir(result_path) if not os.path.exists(test_path): os.mkdir(test_path) test_set = [5] for subject_index in test_set: path_test = list() index = 0 subject_path = metadata_path + 'data/subject' + str(subject_index) img_path = subject_path + '/' + 'img_path.txt' gt_path = subject_path + '/' + 'subactivity_gt.npy' bdb_path = subject_path + '/' + 'bdb_gt.npy' frame_count_path = subject_path + '/' + 'frame_count.json' with open(img_path, 'r') as f: path_test.extend(f.readlines()) f.close() if index == 0: label_all = np.load(gt_path) else: label_all = np.concatenate((label_all, np.load(open(gt_path))), axis=0) index += 1 num_frame = len(path_test) print num_frame - (num_frame // batch_size) * batch_size y_all = np.zeros(num_frame) test_generator = img_from_list_test(batch_size, img_path, bdb_path) prediction = model.predict_generator(test_generator, val_samples=num_frame) predict_result = np.zeros(num_frame) print 'successful predicting the data!' for i in range(num_frame): predict_result[i] = int( list(prediction[i, :]).index(max(prediction[i, :]))) y_all[i] = np.argmax(label_all[i, :]) with open(frame_count_path, 'r') as f: video_count = json.load(f) for video in video_count: print video correct_num = 0 start_num = video_count[video]['start_num'] end_num = video_count[video]['end_num'] for j in range(end_num - start_num): if int(predict_result[start_num + j]) == int(y_all[start_num + j]): correct_num += 1 vizutil.plot_segmentation([ y_all[start_num:end_num], predict_result[start_num:end_num], (y_all[start_num:end_num] - predict_result[start_num:end_num]) == 0 ], end_num - start_num) np.save( open( result_path + 'subject' + str(subject_index) + '/' + video + '.npy', 'w'), prediction[start_num:end_num, :]) plt.savefig(result_path + 'subject' + str(subject_index) + '/' + video + '_' + str(float(correct_num) / (end_num - start_num)) + '.png') plt.close() precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support( y_all, predict_result, labels=range(10), average='micro') print 'micro result' print precision, recall, beta_score, support print get_f1_score(precision, recall) precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support( y_all, predict_result, labels=range(10), average='macro') print 'macro result' print precision, recall, beta_score, support print get_f1_score(precision, recall)