예제 #1
0
def test(net, config, logger, test_loader, test_info, step, model_file=None):
    with torch.no_grad():
        net.eval()

        if model_file is not None:
            net.load_state_dict(torch.load(model_file))

        final_res = {}
        final_res['version'] = 'VERSION 1.3'
        final_res['results'] = {}
        final_res['external_data'] = {
            'used': True,
            'details': 'Features from I3D Network'
        }

        num_correct = 0.
        num_total = 0.

        load_iter = iter(test_loader)

        for i in range(len(test_loader.dataset)):
            # _data: 视频特征  _label : 视频标签  vid_name: 视频名称 vid_num_seg:视频特征序列实际长度
            _data, _label, _, vid_name, vid_num_seg = next(load_iter)

            _data = _data.cuda()
            _label = _label.cuda()
            """
            cas_base: (1,T,21)
            score_supp: (1,21) 
            cas_supp: (1,T,21)
            fore_weights: (1,T,1)
            """
            _, cas_base, score_supp, cas_supp, fore_weights = net(_data)

            label_np = _label.cpu().numpy()
            score_np = score_supp[
                0, :-1].cpu().data.numpy()  #获取动作类的得分,不考虑背景类 (1,20)

            score_np[np.where(
                score_np < config.class_thresh)] = 0  # cls_thresh = 0.25
            score_np[np.where(score_np >= config.class_thresh)] = 1

            correct_pred = np.sum(
                label_np == score_np,
                axis=1)  # 统计的是预测的类别和label能够对应上的数目,只有20个类别全部预测正确才认为这个视频预测正确

            num_correct += np.sum((correct_pred == config.num_classes).astype(
                np.float32))  # 预测正确的视频数1
            num_total += correct_pred.shape[0]  # 视频数

            # 对数值进行限定,更加稳定
            cas_base = utils.minmax_norm(cas_base)  # (B,T,C+1)
            cas_supp = utils.minmax_norm(cas_supp)  # (B,T,C+1)

            pred = np.where(
                score_np > config.class_thresh)[0]  # 0.25, 当前视频预测动作类别索引

            if pred.any():
                cas_pred = cas_supp[0].cpu().numpy()[:, pred]  # (T, C+1)-->T
                cas_pred = np.reshape(cas_pred,
                                      (config.num_segments, -1, 1))  # (T,1,1)
                # [[[-0.035]],[[-0.025]],.....[[0.0029]]] (18000,1,1)
                cas_pred = utils.upgrade_resolution(cas_pred,
                                                    config.scale)  # scale:24

                proposal_dict = {}

                for i in range(len(config.act_thresh)
                               ):  #act_thresh = np.arange(0.0, 0.25, 0.025)
                    cas_temp = cas_pred.copy()  # (18000,1,1)
                    # [0,1,2,3,1531,1532,.......9910]
                    zero_location = np.where(
                        cas_temp[:, :, 0] < config.act_thresh[i])
                    cas_temp[zero_location] = 0

                    # cas_temp: (18000,len(pred),1) 其中len(pred)为满足条件的类别数目
                    seg_list = [
                    ]  # [[],[],..[]]  # 保存每个类别的预测结果,其中每一个子列表中保存对应类别着pos的索引
                    for c in range(len(pred)):
                        pos = np.where(
                            cas_temp[:, c, 0] > 0)  # [4,5,6,.....17999]
                        seg_list.append(pos)
                    # [[[5,0.0025,169.42,169.6]]] :(class, score, start, end)
                    proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \
                                    vid_num_seg[0].cpu().item(), config.feature_fps, config.num_segments)

                    for i in range(len(proposals)):
                        class_id = proposals[i][0][0]

                        if class_id not in proposal_dict.keys():
                            proposal_dict[class_id] = []

                        proposal_dict[class_id] += proposals[i]

                final_proposals = []
                for class_id in proposal_dict.keys():
                    final_proposals.append(
                        utils.nms(proposal_dict[class_id], 0.7))

                final_res['results'][vid_name[0]] = utils.result2json(
                    final_proposals)

        test_acc = num_correct / num_total

        json_path = os.path.join(config.output_path, 'temp_result.json')
        with open(json_path, 'w') as f:
            json.dump(final_res, f)
            f.close()

        tIoU_thresh = np.linspace(0.1, 0.9, 9)
        anet_detection = ANETdetection(config.gt_path,
                                       json_path,
                                       subset='test',
                                       tiou_thresholds=tIoU_thresh,
                                       verbose=False,
                                       check_status=False)
        mAP, average_mAP = anet_detection.evaluate()

        logger.log_value('Test accuracy', test_acc, step)

        for i in range(tIoU_thresh.shape[0]):
            logger.log_value('mAP@{:.1f}'.format(tIoU_thresh[i]), mAP[i], step)

        logger.log_value('Average mAP', average_mAP, step)

        test_info["step"].append(step)
        test_info["test_acc"].append(test_acc)
        test_info["average_mAP"].append(average_mAP)

        for i in range(tIoU_thresh.shape[0]):
            test_info["mAP@{:.1f}".format(tIoU_thresh[i])].append(mAP[i])
예제 #2
0
def test(sess, model, init, input_list, test_iter):
    ckpt = input_list['ckpt']
    scale = input_list['scale']
    class_threshold = input_list['class_threshold']

    rgb_saver = tf.train.Saver()
    flow_saver = tf.train.Saver()
    test_vid_list = open(
        'THUMOS14_test_vid_list.txt',
        'r')  # file for matching 'video number' and 'video name'
    lines = test_vid_list.read().splitlines()

    # Define json File (output)
    final_result = {}
    final_result['version'] = 'VERSION 1.3'
    final_result['results'] = {}
    final_result['external_data'] = {
        'used': True,
        'details': 'Features from I3D Net'
    }

    for i in range(1, TEST_NUM + 1):
        vid_name = lines[i - 1]
        # Load Frames
        rgb_features, flow_features, temp_seg, vid_len = utils.processTestVid(
            i, INPUT_PATHS['test'], NUM_SEGMENTS)
        rgb_features = rgb_features.astype(np.float32)
        flow_features = flow_features.astype(np.float32)

        # RGB Stream
        sess.run(init)
        rgb_saver.restore(sess,
                          os.path.join(ckpt['rgb'], 'rgb_' + str(test_iter)))
        rgb_class_w = tf.get_default_graph().get_tensor_by_name(
            'Classification/class_weight/kernel:0').eval()
        rgb_attention, rgb_raw, rgb_class_result = sess.run(
            [model.attention_weights, model.class_weight, model.class_result],
            feed_dict={model.X: rgb_features})

        # Flow Stream
        sess.run(init)
        flow_saver.restore(
            sess, os.path.join(ckpt['flow'], 'flow_' + str(test_iter)))
        flow_class_w = tf.get_default_graph().get_tensor_by_name(
            'Classification/class_weight/kernel:0').eval()
        flow_attention, flow_raw, flow_class_result = sess.run(
            [model.attention_weights, model.class_weight, model.class_result],
            feed_dict={model.X: flow_features})

        # Gathering Classification Result
        rgb_class_prediction = np.where(rgb_class_result > class_threshold)[1]
        flow_class_prediction = np.where(
            flow_class_result > class_threshold)[1]

        rgb_tCam = utils.get_tCAM(rgb_features, rgb_class_w)
        flow_tCam = utils.get_tCAM(flow_features, flow_class_w)
        r_check = False
        f_check = False

        if rgb_class_prediction.any():
            r_check = True
            # Weighted T-CAM
            rgb_wtCam = utils.get_wtCAM(rgb_tCam, flow_tCam, rgb_attention,
                                        ALPHA, rgb_class_prediction)
            # Interpolate W-TCAM
            rgb_int_wtCam = utils.interpolated_wtCAM(rgb_wtCam, scale)
            # Get segment list of rgb_int_wtCam
            rgb_temp_idx = utils.get_tempseg_list(rgb_int_wtCam,
                                                  len(rgb_class_prediction))
            # Temporal Proposal
            rgb_temp_prop = utils.get_temp_proposal(rgb_temp_idx,
                                                    rgb_int_wtCam,
                                                    rgb_class_prediction,
                                                    scale, vid_len)

        if flow_class_prediction.any():
            f_check = True
            # Weighted T-CAM
            flow_wtCam = utils.get_wtCAM(flow_tCam, rgb_tCam, flow_attention,
                                         1 - ALPHA, flow_class_prediction)
            # Interpolate W-TCAM
            flow_int_wtCam = utils.interpolated_wtCAM(flow_wtCam, scale)
            # Get segment list of flow_int_wtCam
            flow_temp_idx = utils.get_tempseg_list(flow_int_wtCam,
                                                   len(flow_class_prediction))
            # Temporal Proposal
            flow_temp_prop = utils.get_temp_proposal(flow_temp_idx,
                                                     flow_int_wtCam,
                                                     flow_class_prediction,
                                                     scale, vid_len)

        if r_check and f_check:
            # Fuse two stream and perform non-maximum suppression
            temp_prop = utils.integrated_prop(rgb_temp_prop, flow_temp_prop,
                                              list(rgb_class_prediction),
                                              list(flow_class_prediction))
            final_result['results'][vid_name] = utils.result2json([temp_prop])
        elif r_check and not f_check:
            final_result['results'][vid_name] = utils.result2json(
                rgb_temp_prop)
        elif not r_check and f_check:
            final_result['results'][vid_name] = utils.result2json(
                flow_temp_prop)

        utils.inf_progress(i, TEST_NUM, 'Progress', 'Complete', 1, 50)

    # Save Results
    json_path = os.path.join(ckpt['path'], 'results.json')
    with open(json_path, 'w') as fp:
        json.dump(final_result, fp)

    txt_path = os.path.join(ckpt['path'], 'results.txt')
    with open(txt_path, 'w') as tp:
        utils.json2txt(final_result['results'], tp)

    test_vid_list.close()
예제 #3
0
def test(net, config, logger, test_loader, test_info, step, model_file=None):
    with torch.no_grad():
        net.eval()

        if model_file is not None:
            net.load_state_dict(torch.load(model_file))

        final_res = {}
        final_res['version'] = 'VERSION 1.3'
        final_res['results'] = {}
        final_res['external_data'] = {
            'used': True,
            'details': 'Features from I3D Network'
        }

        num_correct = 0.
        num_total = 0.

        load_iter = iter(test_loader)

        for i in range(len(test_loader.dataset)):

            _data, _label, _, vid_name, vid_num_seg = next(load_iter)

            _data = _data.cuda()
            _label = _label.cuda()

            vid_num_seg = vid_num_seg[0].cpu().item()
            num_segments = _data.shape[1]

            score_act, _, feat_act, feat_bkg, features, cas_softmax = net(
                _data)

            feat_magnitudes_act = torch.mean(torch.norm(feat_act, dim=2),
                                             dim=1)
            feat_magnitudes_bkg = torch.mean(torch.norm(feat_bkg, dim=2),
                                             dim=1)

            label_np = _label.cpu().data.numpy()
            score_np = score_act[0].cpu().data.numpy()

            pred_np = np.zeros_like(score_np)
            pred_np[np.where(score_np < config.class_thresh)] = 0
            pred_np[np.where(score_np >= config.class_thresh)] = 1

            correct_pred = np.sum(label_np == pred_np, axis=1)

            num_correct += np.sum(
                (correct_pred == config.num_classes).astype(np.float32))
            num_total += correct_pred.shape[0]

            feat_magnitudes = torch.norm(features, p=2, dim=2)

            feat_magnitudes = utils.minmax_norm(feat_magnitudes,
                                                max_val=feat_magnitudes_act,
                                                min_val=feat_magnitudes_bkg)
            feat_magnitudes = feat_magnitudes.repeat(
                (config.num_classes, 1, 1)).permute(1, 2, 0)

            cas = utils.minmax_norm(cas_softmax * feat_magnitudes)

            pred = np.where(score_np >= config.class_thresh)[0]

            if len(pred) == 0:
                pred = np.array([np.argmax(score_np)])

            cas_pred = cas[0].cpu().numpy()[:, pred]
            cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))

            cas_pred = utils.upgrade_resolution(cas_pred, config.scale)

            proposal_dict = {}

            feat_magnitudes_np = feat_magnitudes[0].cpu().data.numpy()[:, pred]
            feat_magnitudes_np = np.reshape(feat_magnitudes_np,
                                            (num_segments, -1, 1))
            feat_magnitudes_np = utils.upgrade_resolution(
                feat_magnitudes_np, config.scale)

            for i in range(len(config.act_thresh_cas)):
                cas_temp = cas_pred.copy()

                zero_location = np.where(
                    cas_temp[:, :, 0] < config.act_thresh_cas[i])
                cas_temp[zero_location] = 0

                seg_list = []
                for c in range(len(pred)):
                    pos = np.where(cas_temp[:, c, 0] > 0)
                    seg_list.append(pos)

                proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \
                                vid_num_seg, config.feature_fps, num_segments)

                for i in range(len(proposals)):
                    class_id = proposals[i][0][0]

                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []

                    proposal_dict[class_id] += proposals[i]

            for i in range(len(config.act_thresh_magnitudes)):
                cas_temp = cas_pred.copy()

                feat_magnitudes_np_temp = feat_magnitudes_np.copy()

                zero_location = np.where(feat_magnitudes_np_temp[:, :, 0] <
                                         config.act_thresh_magnitudes[i])
                feat_magnitudes_np_temp[zero_location] = 0

                seg_list = []
                for c in range(len(pred)):
                    pos = np.where(feat_magnitudes_np_temp[:, c, 0] > 0)
                    seg_list.append(pos)

                proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \
                                vid_num_seg, config.feature_fps, num_segments)

                for i in range(len(proposals)):
                    class_id = proposals[i][0][0]

                    if class_id not in proposal_dict.keys():
                        proposal_dict[class_id] = []

                    proposal_dict[class_id] += proposals[i]

            final_proposals = []
            for class_id in proposal_dict.keys():
                final_proposals.append(utils.nms(proposal_dict[class_id], 0.6))

            final_res['results'][vid_name[0]] = utils.result2json(
                final_proposals)

        test_acc = num_correct / num_total

        json_path = os.path.join(config.output_path, 'result.json')
        with open(json_path, 'w') as f:
            json.dump(final_res, f)
            f.close()

        tIoU_thresh = np.linspace(0.1, 0.7, 7)
        anet_detection = ANETdetection(config.gt_path,
                                       json_path,
                                       subset='test',
                                       tiou_thresholds=tIoU_thresh,
                                       verbose=False,
                                       check_status=False)
        mAP, average_mAP = anet_detection.evaluate()

        logger.log_value('Test accuracy', test_acc, step)

        for i in range(tIoU_thresh.shape[0]):
            logger.log_value('mAP@{:.1f}'.format(tIoU_thresh[i]), mAP[i], step)

        logger.log_value('Average mAP', average_mAP, step)

        test_info["step"].append(step)
        test_info["test_acc"].append(test_acc)
        test_info["average_mAP"].append(average_mAP)

        for i in range(tIoU_thresh.shape[0]):
            test_info["mAP@{:.1f}".format(tIoU_thresh[i])].append(mAP[i])
예제 #4
0
    def eval_one_batch(self, data, net, class_dict):
        features, _label, segm, vid_name, vid_num_seg = data

        if _label.sum() == 0:
            return

        elem, _, _, element_atn = net(features)

        element_logits = elem * element_atn

        label_np = _label.squeeze().cpu().numpy()

        pred_vid_score = get_cls_score(element_logits, rat=self.config.rat)
        score_np = pred_vid_score.copy()

        self.class_true.append(label_np)
        self.class_pred.append(pred_vid_score)

        score_np[score_np < self.config.class_thresh] = 0
        score_np[score_np >= self.config.class_thresh] = 1

        correct_pred = np.sum(label_np == score_np)

        self.num_correct += np.sum(
            (correct_pred == self.config.num_class).astype(np.float32))
        self.num_total += 1

        cas_supp = element_logits[..., :-1]
        cas_supp_atn = element_atn

        logit_atn = cas_supp_atn.expand_as(
            cas_supp).squeeze().data.cpu().numpy()

        self.dict_pred[vid_name[0]]["logit"] = logit_atn
        self.dict_pred[vid_name[0]]["duration"] = int(vid_num_seg.item() * 16 /
                                                      25)

        pred = np.where(pred_vid_score >= self.config.class_thresh)[0]

        # NOTE: threshold
        act_thresh = self.config.act_thresh

        if len(pred) > 0:
            cas_pred = cas_supp[0].cpu().numpy()[:, pred]
            num_segments = cas_pred.shape[0]
            cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))

            cas_pred_atn = cas_supp_atn[0].cpu().numpy()[:, [0]]
            cas_pred_atn = np.reshape(cas_pred_atn, (num_segments, -1, 1))

            proposal_dict = {}

            for i in range(len(act_thresh)):
                cas_temp = cas_pred.copy()
                cas_temp_atn = cas_pred_atn.copy()

                seg_list = []

                for c in range(len(pred)):
                    pos = np.where(cas_temp_atn[:, 0, 0] > act_thresh[i])
                    seg_list.append(pos)

                proposals = utils.get_proposal_oic(seg_list,
                                                   cas_temp,
                                                   pred_vid_score,
                                                   pred,
                                                   self.config.scale,
                                                   vid_num_seg[0].cpu().item(),
                                                   self.config.feature_fps,
                                                   num_segments,
                                                   gamma=self.config.gamma_oic)

                for j in range(len(proposals)):
                    try:
                        class_id = proposals[j][0][0]

                        if class_id not in proposal_dict.keys():
                            proposal_dict[class_id] = []

                        proposal_dict[class_id] += proposals[j]
                    except IndexError:
                        logger.error(f"Index error")

            final_proposals = []
            for class_id in proposal_dict.keys():
                final_proposals.append(
                    utils.soft_nms(proposal_dict[class_id], 0.7, sigma=0.3))
            self.final_res["results"][vid_name[0]] = utils.result2json(
                final_proposals, class_dict)