def evaluate(model, dataset, test_keys, use_gpu): print("==> Test") with torch.no_grad(): model.eval() fms = [] taus = [] human_taus = [] eval_metric = 'avg' if args.metric == 'tvsum' else 'max' if args.verbose: table = [["No.", "Video", "F-score", "Kendall's Tau", "Avg human score"]] if args.save_results: h5_res = h5py.File(osp.join(args.save_dir, 'result.h5'), 'w') for key_idx, key in enumerate(test_keys): seq = dataset[key]['features'][...] seq = torch.from_numpy(seq).unsqueeze(0) if use_gpu: seq = seq.cuda() probs = model(seq) probs = probs.data.cpu().squeeze().numpy() cps = dataset[key]['change_points'][...] num_frames = dataset[key]['n_frames'][()] nfps = dataset[key]['n_frame_per_seg'][...].tolist() positions = dataset[key]['picks'][...] user_summary = dataset[key]['user_summary'][...] machine_summary = vsum_tools.generate_summary(probs, cps, num_frames, nfps, positions) fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric) fms.append(fm) kendaltau, human_avg_score = vsum_tools.kendaltau(machine_summary, user_summary) taus.append(kendaltau) human_taus.append(human_avg_score) if args.verbose: table.append([key_idx+1, key, "{:.4f}".format(fm), "{:.4f}".format(kendaltau), "{:.4f}".format(human_avg_score)]) if args.save_results: h5_res.create_dataset(key + '/score', data=probs) h5_res.create_dataset(key + '/machine_summary', data=machine_summary) h5_res.create_dataset(key + '/gtscore', data=dataset[key]['gtscore'][...]) h5_res.create_dataset(key + '/fm', data=fm) h5_res.create_dataset(key + '/tau', data=kendaltau) if args.verbose: print(tabulate(table)) if args.save_results: h5_res.close() mean_fm = np.mean(fms) print("Average F-score {:.4%}".format(mean_fm)) mean_tau = np.mean(taus) print("Average Kendall's tau {:.4f}".format(mean_tau)) human_tau = np.mean(human_taus) print("Average Human tau {:.4f}".format(human_tau)) return mean_fm, mean_tau
def evaluate(self, epoch_i, step, writer): self.summarizer.eval() # ======================== testing set test ================================ # # ========================================================================== # out_dict = {} acc_list = [] loss_list = [] # for [video_tensor, video_gt, video_name] in self.test_loader: for [ video_tensor, gtsummary, gtscore, cps, num_frames, nfps, positions, user_summary, name ] in self.test_loader: # video_name = video_name[0] video_name = name[0] video_gt = gtsummary # video_pos = Variable(pos).cuda() video_feature = Variable(video_tensor).cuda() scores, = self.summarizer(video_feature) # scores = self.summarizer(video_pos, video_feature) classify_loss = self.classify_loss( scores, Variable(video_gt).view(-1, 1, 1).cuda()) # classify_loss = self.weighted_binary_cross_entropy(scores, Variable(video_gt.view(-1,1,1)).cuda()) scores = scores.cpu().detach().numpy().squeeze() cps = cps.numpy().squeeze(0) num_frames = num_frames.numpy().squeeze(0) nfps = nfps.numpy().squeeze(0).tolist() positions = positions.numpy().squeeze(0) user_summary = user_summary.numpy().squeeze(0) video_name = name[0] # print(user_summary.shape[0]) machine_summary = vsum_tools.generate_summary( scores, cps, num_frames, nfps, positions) fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, 'avg') # out_dict[video_name] = scores.squeeze(1).tolist() # P, R, f_score = self.f_score(scores.squeeze(), video_gt.squeeze(), True) loss_list.append(classify_loss.item()) # acc_list.append(f_score) acc_list.append(fm) # log(f'video_name: {video_name:<9} P: {P:.3f} R:{R:.3f} f_score:{f_score:.3f}') log(f'video_name: {video_name:<9} f_score:{fm:.3f}') seclog([ f'testing loss : {np.mean(loss_list):.3f} mean of f_score : {np.mean(acc_list):.3f}', 'light_red' ]) # seclog([f'testing f_score: {np.mean(acc_list):.3f}', 'blue']) # writer.add_scalar('test_loss',np.mean(loss_list),step) writer.add_scalar('test_f_score', np.mean(acc_list), step) writer.add_scalar('test_loss', np.mean(loss_list), step) return np.mean(acc_list)
def test( n_episodes=5, input_dim=1024, hidden_dim=256, W_init='normal', U_init='normal', weight_decay=1e-5, regularizer='L2', optimizer='adam', alpha=0.01, model_file='', eval_dataset='summe', verbose=True, ): assert eval_dataset in ['summe', 'tvsum'] assert os.path.isfile(model_file) if eval_dataset == 'summe': eval_metric = 'max' elif eval_dataset == 'tvsum': eval_metric = 'avg' model_options = locals().copy() log_dir = 'log-test' if not os.path.exists(log_dir): os.mkdir(log_dir) logging.basicConfig(filename=log_dir + '/log.txt', filemode='w', format='%(asctime)s %(message)s', datefmt='[%d/%m/%Y %I:%M:%S]', level=logging.INFO) logger = logging.getLogger() ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter(fmt='%(asctime)s %(message)s', datefmt='[%d/%m/%Y %I:%M:%S]') ch.setFormatter(formatter) logger.addHandler(ch) logger.info('initializing net model') net = reinforceRNN(model_options) logger.info('loading %s data' % (eval_dataset)) h5f_path = 'datasets/eccv16_dataset_' + eval_dataset + '_google_pool5.h5' dataset = h5py.File(h5f_path, 'r') dataset_keys = dataset.keys() n_videos = len(dataset_keys) logger.info('=> testing') start_time = time.time() fms = [] precs = [] recs = [] for i_video in range(n_videos): key = dataset_keys[i_video] data_x = dataset[key]['features'][...].astype(_DTYPE) probs = net.model_inference(data_x) cps = dataset[key]['change_points'][...] n_frames = dataset[key]['n_frames'][()] nfps = dataset[key]['n_frame_per_seg'][...].tolist() positions = dataset[key]['picks'][...] machine_summary = vsum_tools.generate_summary(probs, cps, n_frames, nfps, positions) user_summary = dataset[key]['user_summary'][...] fm, prec, rec = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric) fms.append(fm) precs.append(prec) recs.append(rec) if verbose: logger.info('video %s. fm=%f' % (key, fm)) mean_fm = np.mean(fms) mean_prec = np.mean(precs) mean_rec = np.mean(recs) logger.info( '========================= conclusion =========================') logger.info('-- recap of model options') logger.info(str(model_options)) logger.info('-- final outcome') logger.info('f-measure {:.1%}. precision {:.1%}. recall {:.1%}.'.format( mean_fm, mean_prec, mean_rec)) elapsed_time = time.time() - start_time logger.info('elapsed time %.2f s' % (elapsed_time)) logger.info( '==============================================================') dataset.close()
def visualize(self): # model_path = 'meeting2/tvsum/tvsum_standard_3layer18head/model/score-0.60574.pkl' model_path = 'log/tvsum_11.5_atten_only_posffn/model/epoch12_score-0.18391.pkl' self.summarizer.load_state_dict(torch.load(model_path)) self.summarizer.eval() # ======================== testing set test ================================ # # ========================================================================== # out_dict = {} acc_list = [] loss_list = [] for [ video_tensor, gtsummary, gtscore, cps, num_frames, nfps, positions, user_summary, name ] in self.test_loader: video_name = name[0] video_feature = Variable(video_tensor).cuda() scores, att_map = self.summarizer(video_feature, return_attns=True) scores = scores.cpu().detach().numpy().squeeze() gtsummary = gtsummary.numpy().squeeze(0) gtscore = gtscore.numpy().squeeze(0) cps = cps.numpy().squeeze(0) num_frames = num_frames.numpy().squeeze(0) nfps = nfps.numpy().squeeze(0).tolist() positions = positions.numpy().squeeze(0) user_summary = user_summary.numpy().squeeze(0) save_path = f'log/tvsum_2layer8head_11.1/feature_map/' if not os.path.exists(save_path): os.mkdir(save_path) save_path = save_path + f'{video_name}/' machine_summary = vsum_tools.generate_summary( scores, cps, num_frames, nfps, positions) fm, P, R = vsum_tools.evaluate_summary(machine_summary, user_summary, 'avg') user_score = np.zeros(len(user_summary[0])) for user in user_summary: user_score += user # [seq, head, layer, seq] # =========================== Encoder attentive Decoder ================================== # # [seq, head, layer, seq] attention_map = np.zeros((len(att_map), att_map[0][0][0].shape[0], len(att_map[0][0]), len(att_map))) for i in range(len(att_map)): for j in range(len(att_map[0][0])): attention_map[i, :, j, :] = att_map[i][0][j].cpu().detach( ).numpy().squeeze() for layer in range(attention_map.shape[2]): for h in range(attention_map.shape[1]): df_cm = pd.DataFrame( attention_map[60:, h, layer, :], index=[i for i in range(attention_map.shape[0] - 60)], columns=[i for i in range(attention_map.shape[0])]) # plt.figure(figsize = (10,7)) # sn.heatmap(df_cm, annot=True) f, ax = plt.subplots(figsize=(14 * 2, 14 * 2)) sn.heatmap(df_cm, cmap='YlGnBu', linewidths=0.05, ax=ax) # sn.heatmap(df_cm, annot=True, ax = ax) # 設定Axes的標題 ax.set_title(f'Accuracy = {fm*100:.2f}') if not os.path.exists(save_path): os.mkdir(save_path) f.savefig(save_path + f'layer{layer}head_{h}.jpg', dpi=100, bbox_inches='tight') plt.close() # ======================================================================================== # # =========================== original =================================================== # # att_map = att_map[0] # for i in range(3): # att_map0 = att_map[i].cpu().detach().numpy() # for h in range(len(att_map0)): # df_cm = pd.DataFrame(att_map0[h], index = [i for i in range(att_map0[h].shape[0])], # columns = [i for i in range(att_map0[h].shape[1])]) # # plt.figure(figsize = (10,7)) # # sn.heatmap(df_cm, annot=True) # f, ax= plt.subplots(figsize = (14*2, 14*2)) # sn.heatmap(df_cm,cmap='YlGnBu', linewidths = 0.05, ax = ax) # # sn.heatmap(df_cm, annot=True, ax = ax) # # 設定Axes的標題 # ax.set_title(f'Accuracy = {fm*100:.2f}') # if not os.path.exists(save_path): # os.mkdir(save_path) # f.savefig(save_path+f'layer{i}head_{h}.jpg', dpi=100, bbox_inches='tight') # plt.close() # ======================================================================================= # # plot score vs gtscore fig, axs = plt.subplots(3) n = len(gtscore) limits = int(math.floor(len(scores) * 0.15)) order = np.argsort(scores)[::-1].tolist() picks = [] total_len = 0 for i in order: if total_len < limits: picks.append(i) total_len += 1 y_scores = np.zeros(len(scores)) y_scores[picks] = gtscore[picks] y_summary = np.zeros(len(scores)) y_summary[picks] = gtsummary[picks] # machine_summary = user_score*machine_summary # set_trace() axs[0].bar(range(n), gtsummary, width=1, color='lightgray') axs[0].bar(range(n), y_summary, width=1, color='orange') axs[0].set_title("tvsum {} F-score {:.1%}".format(video_name, fm)) axs[1].bar(range(n), gtscore, width=1, color='lightgray') axs[1].bar(range(n), y_scores, width=1, color='orange') plt.xticks(np.linspace(0, n, n / 20, endpoint=False, dtype=int)) axs[2].bar(range(n), scores.tolist(), width=1, color='orange') plt.xticks(np.linspace(0, n, n / 20, endpoint=False, dtype=int)) # axs[2].bar(range(len(user_score)), user_score, width=1, color='lightgray') # axs[2].bar(range(len(user_score)), user_score*machine_summary, width=1, color='orange') # for i in range(15): # axs[i+3].bar(range(len(user_score)), user_summary[i], width=1, color='lightgray') # axs[i+3].bar(range(len(user_score)), user_summary[i]*machine_summary, width=1, color='orange') # print(i) # fig = plt.figure(figsize=(10,60)) fig.tight_layout() fig.savefig(save_path + f'visualization3.png', bbox_inches='tight') plt.close() acc_list.append(fm) log(f'video_name: {video_name:<9} P: {P:.3f} R:{R:.3f} f_score:{fm:.3f}' ) break seclog([f'testing f_score: {np.mean(acc_list):.3f}', 'blue'])
def evaluate(model, dataset, userscoreset, test_keys, use_gpu): print("==> Test") with torch.no_grad(): model.eval() fms = [] eval_metric = 'avg' if args.metric == 'tvsum' else 'max' if args.verbose: table = [["No.", "Video", "F-score"]] if args.save_results: h5_res = h5py.File( osp.join( args.save_dir, 'result_ep{}_split_{}_{}.h5'.format( args.max_epoch, args.split_id, args.rnn_cell)), 'w') spear_avg_corrs = [] kendal_avg_corrs = [] if args.dataset is None: for key_idx, _ in enumerate(test_keys): key_parts = test_keys[key_idx].split('/') name, key = key_parts seq = dataset[name][key]['features'][...] seq = torch.from_numpy(seq).unsqueeze(0) if use_gpu: seq = seq.cuda() probs, _, _ = model(seq) probs = probs.data.cpu().squeeze().numpy() cps = dataset[name][key]['change_points'][...] num_frames = dataset[name][key]['n_frames'][()] nfps = dataset[name][key]['n_frame_per_seg'][...].tolist() positions = dataset[name][key]['picks'][...] user_summary = dataset[name][key]['user_summary'][...] gtscore = dataset[name][key]['gtscore'][...] machine_summary, gt_frame_score = vsum_tools.generate_summary( probs, gtscore, cps, num_frames, nfps, positions) fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric) fms.append(fm) #### Calculate correlation matrices #### user_scores = userscoreset[key]["user_scores"][...] machine_scores = generate_scores(probs, num_frames, positions) spear_avg_corr = evaluate_scores(machine_scores, user_scores, metric="spearmanr") kendal_avg_corr = evaluate_scores(machine_scores, user_scores, metric="kendalltau") spear_avg_corrs.append(spear_avg_corr) kendal_avg_corrs.append(kendal_avg_corr) if args.verbose: table.append([key_idx + 1, key, "{:.1%}".format(fm)]) if args.save_results: h5_res.create_dataset(key + '/gt_frame_score', data=gt_frame_score) h5_res.create_dataset(key + '/score', data=probs) h5_res.create_dataset(key + '/machine_summary', data=machine_summary) h5_res.create_dataset( key + '/gtscore', data=dataset[name][key]['gtscore'][...]) h5_res.create_dataset(key + '/fm', data=fm) else: for key_idx, key in enumerate(test_keys): seq = dataset[key]['features'][...] seq = torch.from_numpy(seq).unsqueeze(0) if use_gpu: seq = seq.cuda() probs, _, _ = model(seq) probs = probs.data.cpu().squeeze().numpy() cps = dataset[key]['change_points'][...] num_frames = dataset[key]['n_frames'][()] nfps = dataset[key]['n_frame_per_seg'][...].tolist() positions = dataset[key]['picks'][...] user_summary = dataset[key]['user_summary'][...] gtscore = dataset[key]['gtscore'][...] machine_summary, gt_frame_score = vsum_tools.generate_summary( probs, gtscore, cps, num_frames, nfps, positions) fm, _, _ = vsum_tools.evaluate_summary(machine_summary, user_summary, eval_metric) fms.append(fm) #### Calculate correlation matrices #### user_scores = userscoreset[key]["user_scores"][...] machine_scores = generate_scores(probs, num_frames, positions) spear_avg_corr = evaluate_scores(machine_scores, user_scores, metric="spearmanr") kendal_avg_corr = evaluate_scores(machine_scores, user_scores, metric="kendalltau") spear_avg_corrs.append(spear_avg_corr) kendal_avg_corrs.append(kendal_avg_corr) if args.verbose: table.append([key_idx + 1, key, "{:.1%}".format(fm)]) if args.save_results: h5_res.create_dataset(key + '/gt_frame_score', data=gt_frame_score) h5_res.create_dataset(key + '/score', data=probs) h5_res.create_dataset(key + '/machine_summary', data=machine_summary) h5_res.create_dataset(key + '/gtscore', data=dataset[key]['gtscore'][...]) h5_res.create_dataset(key + '/fm', data=fm) if args.verbose: print(tabulate(table)) if args.save_results: h5_res.close() mean_fm = np.mean(fms) print("Average F1-score {:.1%}".format(mean_fm)) mean_spear_avg = np.mean(spear_avg_corrs) mean_kendal_avg = np.mean(kendal_avg_corrs) print("Average Kendal {}".format(mean_kendal_avg)) print("Average Spear {}".format(mean_spear_avg)) return mean_fm