def test(net, config, logger, test_loader, test_info, step, model_file=None): with torch.no_grad(): net.eval() if model_file is not None: net.load_state_dict(torch.load(model_file)) final_res = {} final_res['version'] = 'VERSION 1.3' final_res['results'] = {} final_res['external_data'] = { 'used': True, 'details': 'Features from I3D Network' } num_correct = 0. num_total = 0. load_iter = iter(test_loader) for i in range(len(test_loader.dataset)): _data, _label, _, vid_name, vid_num_seg = next(load_iter) _data = _data.cuda() _label = _label.cuda() vid_num_seg = vid_num_seg[0].cpu().item() num_segments = _data.shape[1] score_act, _, feat_act, feat_bkg, features, cas_softmax = net( _data) feat_magnitudes_act = torch.mean(torch.norm(feat_act, dim=2), dim=1) feat_magnitudes_bkg = torch.mean(torch.norm(feat_bkg, dim=2), dim=1) label_np = _label.cpu().data.numpy() score_np = score_act[0].cpu().data.numpy() pred_np = np.zeros_like(score_np) pred_np[np.where(score_np < config.class_thresh)] = 0 pred_np[np.where(score_np >= config.class_thresh)] = 1 correct_pred = np.sum(label_np == pred_np, axis=1) num_correct += np.sum( (correct_pred == config.num_classes).astype(np.float32)) num_total += correct_pred.shape[0] feat_magnitudes = torch.norm(features, p=2, dim=2) feat_magnitudes = utils.minmax_norm(feat_magnitudes, max_val=feat_magnitudes_act, min_val=feat_magnitudes_bkg) feat_magnitudes = feat_magnitudes.repeat( (config.num_classes, 1, 1)).permute(1, 2, 0) cas = utils.minmax_norm(cas_softmax * feat_magnitudes) pred = np.where(score_np >= config.class_thresh)[0] if len(pred) == 0: pred = np.array([np.argmax(score_np)]) cas_pred = cas[0].cpu().numpy()[:, pred] cas_pred = np.reshape(cas_pred, (num_segments, -1, 1)) cas_pred = utils.upgrade_resolution(cas_pred, config.scale) proposal_dict = {} feat_magnitudes_np = feat_magnitudes[0].cpu().data.numpy()[:, pred] feat_magnitudes_np = np.reshape(feat_magnitudes_np, (num_segments, -1, 1)) feat_magnitudes_np = utils.upgrade_resolution( feat_magnitudes_np, config.scale) for i in range(len(config.act_thresh_cas)): cas_temp = cas_pred.copy() zero_location = np.where( cas_temp[:, :, 0] < config.act_thresh_cas[i]) cas_temp[zero_location] = 0 seg_list = [] for c in range(len(pred)): pos = np.where(cas_temp[:, c, 0] > 0) seg_list.append(pos) proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \ vid_num_seg, config.feature_fps, num_segments) for i in range(len(proposals)): class_id = proposals[i][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[i] for i in range(len(config.act_thresh_magnitudes)): cas_temp = cas_pred.copy() feat_magnitudes_np_temp = feat_magnitudes_np.copy() zero_location = np.where(feat_magnitudes_np_temp[:, :, 0] < config.act_thresh_magnitudes[i]) feat_magnitudes_np_temp[zero_location] = 0 seg_list = [] for c in range(len(pred)): pos = np.where(feat_magnitudes_np_temp[:, c, 0] > 0) seg_list.append(pos) proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \ vid_num_seg, config.feature_fps, num_segments) for i in range(len(proposals)): class_id = proposals[i][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[i] final_proposals = [] for class_id in proposal_dict.keys(): final_proposals.append(utils.nms(proposal_dict[class_id], 0.6)) final_res['results'][vid_name[0]] = utils.result2json( final_proposals) test_acc = num_correct / num_total json_path = os.path.join(config.output_path, 'result.json') with open(json_path, 'w') as f: json.dump(final_res, f) f.close() tIoU_thresh = np.linspace(0.1, 0.7, 7) anet_detection = ANETdetection(config.gt_path, json_path, subset='test', tiou_thresholds=tIoU_thresh, verbose=False, check_status=False) mAP, average_mAP = anet_detection.evaluate() logger.log_value('Test accuracy', test_acc, step) for i in range(tIoU_thresh.shape[0]): logger.log_value('mAP@{:.1f}'.format(tIoU_thresh[i]), mAP[i], step) logger.log_value('Average mAP', average_mAP, step) test_info["step"].append(step) test_info["test_acc"].append(test_acc) test_info["average_mAP"].append(average_mAP) for i in range(tIoU_thresh.shape[0]): test_info["mAP@{:.1f}".format(tIoU_thresh[i])].append(mAP[i])
def test(net, config, logger, test_loader, test_info, step, model_file=None): with torch.no_grad(): net.eval() if model_file is not None: net.load_state_dict(torch.load(model_file)) final_res = {} final_res['version'] = 'VERSION 1.3' final_res['results'] = {} final_res['external_data'] = { 'used': True, 'details': 'Features from I3D Network' } num_correct = 0. num_total = 0. load_iter = iter(test_loader) for i in range(len(test_loader.dataset)): # _data: 视频特征 _label : 视频标签 vid_name: 视频名称 vid_num_seg:视频特征序列实际长度 _data, _label, _, vid_name, vid_num_seg = next(load_iter) _data = _data.cuda() _label = _label.cuda() """ cas_base: (1,T,21) score_supp: (1,21) cas_supp: (1,T,21) fore_weights: (1,T,1) """ _, cas_base, score_supp, cas_supp, fore_weights = net(_data) label_np = _label.cpu().numpy() score_np = score_supp[ 0, :-1].cpu().data.numpy() #获取动作类的得分,不考虑背景类 (1,20) score_np[np.where( score_np < config.class_thresh)] = 0 # cls_thresh = 0.25 score_np[np.where(score_np >= config.class_thresh)] = 1 correct_pred = np.sum( label_np == score_np, axis=1) # 统计的是预测的类别和label能够对应上的数目,只有20个类别全部预测正确才认为这个视频预测正确 num_correct += np.sum((correct_pred == config.num_classes).astype( np.float32)) # 预测正确的视频数1 num_total += correct_pred.shape[0] # 视频数 # 对数值进行限定,更加稳定 cas_base = utils.minmax_norm(cas_base) # (B,T,C+1) cas_supp = utils.minmax_norm(cas_supp) # (B,T,C+1) pred = np.where( score_np > config.class_thresh)[0] # 0.25, 当前视频预测动作类别索引 if pred.any(): cas_pred = cas_supp[0].cpu().numpy()[:, pred] # (T, C+1)-->T cas_pred = np.reshape(cas_pred, (config.num_segments, -1, 1)) # (T,1,1) # [[[-0.035]],[[-0.025]],.....[[0.0029]]] (18000,1,1) cas_pred = utils.upgrade_resolution(cas_pred, config.scale) # scale:24 proposal_dict = {} for i in range(len(config.act_thresh) ): #act_thresh = np.arange(0.0, 0.25, 0.025) cas_temp = cas_pred.copy() # (18000,1,1) # [0,1,2,3,1531,1532,.......9910] zero_location = np.where( cas_temp[:, :, 0] < config.act_thresh[i]) cas_temp[zero_location] = 0 # cas_temp: (18000,len(pred),1) 其中len(pred)为满足条件的类别数目 seg_list = [ ] # [[],[],..[]] # 保存每个类别的预测结果,其中每一个子列表中保存对应类别着pos的索引 for c in range(len(pred)): pos = np.where( cas_temp[:, c, 0] > 0) # [4,5,6,.....17999] seg_list.append(pos) # [[[5,0.0025,169.42,169.6]]] :(class, score, start, end) proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \ vid_num_seg[0].cpu().item(), config.feature_fps, config.num_segments) for i in range(len(proposals)): class_id = proposals[i][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[i] final_proposals = [] for class_id in proposal_dict.keys(): final_proposals.append( utils.nms(proposal_dict[class_id], 0.7)) final_res['results'][vid_name[0]] = utils.result2json( final_proposals) test_acc = num_correct / num_total json_path = os.path.join(config.output_path, 'temp_result.json') with open(json_path, 'w') as f: json.dump(final_res, f) f.close() tIoU_thresh = np.linspace(0.1, 0.9, 9) anet_detection = ANETdetection(config.gt_path, json_path, subset='test', tiou_thresholds=tIoU_thresh, verbose=False, check_status=False) mAP, average_mAP = anet_detection.evaluate() logger.log_value('Test accuracy', test_acc, step) for i in range(tIoU_thresh.shape[0]): logger.log_value('mAP@{:.1f}'.format(tIoU_thresh[i]), mAP[i], step) logger.log_value('Average mAP', average_mAP, step) test_info["step"].append(step) test_info["test_acc"].append(test_acc) test_info["average_mAP"].append(average_mAP) for i in range(tIoU_thresh.shape[0]): test_info["mAP@{:.1f}".format(tIoU_thresh[i])].append(mAP[i])
def eval_one_batch(self, data, net, class_dict): features, _label, segm, vid_name, vid_num_seg = data if _label.sum() == 0: return elem, _, _, element_atn = net(features) element_logits = elem * element_atn label_np = _label.squeeze().cpu().numpy() pred_vid_score = get_cls_score(element_logits, rat=self.config.rat) score_np = pred_vid_score.copy() self.class_true.append(label_np) self.class_pred.append(pred_vid_score) score_np[score_np < self.config.class_thresh] = 0 score_np[score_np >= self.config.class_thresh] = 1 correct_pred = np.sum(label_np == score_np) self.num_correct += np.sum( (correct_pred == self.config.num_class).astype(np.float32)) self.num_total += 1 cas_supp = element_logits[..., :-1] cas_supp_atn = element_atn logit_atn = cas_supp_atn.expand_as( cas_supp).squeeze().data.cpu().numpy() self.dict_pred[vid_name[0]]["logit"] = logit_atn self.dict_pred[vid_name[0]]["duration"] = int(vid_num_seg.item() * 16 / 25) pred = np.where(pred_vid_score >= self.config.class_thresh)[0] # NOTE: threshold act_thresh = self.config.act_thresh if len(pred) > 0: cas_pred = cas_supp[0].cpu().numpy()[:, pred] num_segments = cas_pred.shape[0] cas_pred = np.reshape(cas_pred, (num_segments, -1, 1)) cas_pred_atn = cas_supp_atn[0].cpu().numpy()[:, [0]] cas_pred_atn = np.reshape(cas_pred_atn, (num_segments, -1, 1)) proposal_dict = {} for i in range(len(act_thresh)): cas_temp = cas_pred.copy() cas_temp_atn = cas_pred_atn.copy() seg_list = [] for c in range(len(pred)): pos = np.where(cas_temp_atn[:, 0, 0] > act_thresh[i]) seg_list.append(pos) proposals = utils.get_proposal_oic(seg_list, cas_temp, pred_vid_score, pred, self.config.scale, vid_num_seg[0].cpu().item(), self.config.feature_fps, num_segments, gamma=self.config.gamma_oic) for j in range(len(proposals)): try: class_id = proposals[j][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[j] except IndexError: logger.error(f"Index error") final_proposals = [] for class_id in proposal_dict.keys(): final_proposals.append( utils.soft_nms(proposal_dict[class_id], 0.7, sigma=0.3)) self.final_res["results"][vid_name[0]] = utils.result2json( final_proposals, class_dict)