示例#1
0
    def do_for_one_vid_seg(self,
                           props,
                           prop_feats,
                           gt_boxs,
                           gt_frms,
                           out_file,
                           save=True):
        """
        props: all the proposal boxes
        gt_boxs: all the groundtruth_boxes
        out_props: props with highest IoU with gt_box
        # nframes x 1,
        one-to-one correspondence
        Also, used to calculate recall.
        """
        props = torch.tensor(props).float()
        gt_boxs = torch.tensor(gt_boxs).float()
        gt_frms = torch.tensor(gt_frms).float()

        ngt = len(gt_boxs)

        prop_frms = props[:, 4]
        frm_msk = prop_frms[:, None] == gt_frms

        if len(gt_boxs) > 0 and len(props) > 0:
            ious = box_iou(props[:, :4], gt_boxs) * frm_msk.float()
            ious_max, ious_arg_max = ious.max(dim=0)
            recall = (ious_max > 0.5).sum().float()
            out_props = props[ious_arg_max]
        else:
            self.cfg.no_gt_count += 1
            ngt = 1
            recall = 0
            ious = torch.zeros(props.size(0), 1)
            out_props = props[0]

        nprop = ngt
        if save:
            prop_dim = prop_feats.size(-1)
            prop_feats = torch.tensor(prop_feats).float()
            out_prop_feats = prop_feats[ious_arg_max].view(
                1, ngt, prop_dim).detach().cpu().numpy()
            assert list(out_prop_feats.shape[:2]) == [1, ngt]
            np.save(out_file, out_prop_feats)

        return {
            'out_props': out_props,
            'recall': recall,
            'num_prop': nprop,
            'num_gt': ngt
        }
示例#2
0
    def compute_one_srl(self, pred_cmp, pred_boxes_for_srl,
                        pred_scores_for_srl,
                        targ_cmp, gt_boxes_with_frames,
                        gt_frames_all, cmp_msk):
        """
        For sep
        pred_cmp: is the chosen video (1)
        targ_cmp: is the target video (1)
        pred_boxes_for_srl: predicted boxes for
        given srl (#nvids x #nframes x #1-prop(4))
        """
        # nvids = len(pred_boxes_for_srl)
        # nfrms = len(pred_boxes_for_srl[0])
        if pred_cmp == targ_cmp:
            gt_frms = gt_boxes_with_frames[:, -1].long().tolist()
            pred_boxes = pred_boxes_for_srl[pred_cmp]
            pred_scores = pred_scores_for_srl[pred_cmp]
            for frm_idx_ind, frm_idx in enumerate(gt_frms):
                predicted_box = torch.tensor(pred_boxes[frm_idx][:4])
                pbox = torch.tensor(pred_boxes[frm_idx])
                groundtruth_box = gt_boxes_with_frames[frm_idx_ind][:4]
                prediction_score = pred_scores[frm_idx]
                assert gt_boxes_with_frames[frm_idx_ind][4] == frm_idx
                iou = box_iou(
                    predicted_box.float(),
                    groundtruth_box.float()
                )
                # TODO: Check why prediction scores
                # are ridiculously low!!
                if iou > 0.5 and prediction_score > self.prob_thresh:
                    return {
                        'targ_cmp': targ_cmp,
                        'pred_cmp': pred_cmp,
                        'predicted_box': predicted_box,
                        'pbox': pbox,
                        'gt_box': groundtruth_box,
                        'frm_idx': frm_idx,
                        'iou': iou
                    }

        return {
            'targ_cmp': targ_cmp,
            'pred_cmp': pred_cmp,
            'iou': torch.tensor(0)
        }
示例#3
0
    def compute_one_srl(
            self,
            pred_cmp_for_srl,
            pred_boxes_for_srl,
            pred_scores_for_srl,
            targ_cmp,
            gt_boxes_with_frames,
            gt_frames_all,
            cmp_msk
    ):
        """
        For spatial
        targ_cmp: is the target video (1)
        pred_boxes_for_srl: predicted boxes for
        given srl (#nvids x #1-prop(4))
        """
        nfrms = len(pred_boxes_for_srl[0])
        con_vid = -1
        con_vid_score = 0
        con_vid_scores = {}
        con_vid_boxes = {}
        # req_frms = list(set([frm.tolist() for frm1 in gt_frames_all
        # for frm in frm1]))

        req_frms = [i for i in range(nfrms)]

        con_outs = {nv: False for nv in req_frms}

        gt_frms = set(gt_boxes_with_frames[:, -1].long().tolist())
        assert gt_frms.intersection(
            set(gt_frames_all[targ_cmp].tolist())
        ) == gt_frms

        delta = torch.zeros(gt_boxes_with_frames.size(1)).long()
        delta[[0, 2]] = 720

        gt_box_for_frms = {}
        for g in gt_boxes_with_frames:
            gfrm = g[4].item()
            if gfrm not in gt_box_for_frms:
                gt_box_for_frms[gfrm] = []
            gt_box_for_frms[gfrm].append(g + delta * targ_cmp)

        for nf in req_frms:
            nv = pred_cmp_for_srl[nf]
            assert cmp_msk[nv] == 1
            prediction_score = pred_scores_for_srl[nv][nf]
            pred_boxes = pred_boxes_for_srl[nv][nf]
            if nf in gt_frms:
                if nv == targ_cmp:
                    predicted_box = torch.tensor(
                        pred_boxes[:4]
                    )
                    pbox = torch.tensor(
                        pred_boxes
                    )

                    assert nf in gt_box_for_frms

                    groundtruth_boxes = gt_box_for_frms[nf]
                    for groundtruth_box in groundtruth_boxes:
                        assert groundtruth_box[4] == nf
                        iou = box_iou(
                            predicted_box.float(),
                            groundtruth_box[:4].float()
                        )
                        # TODO: Check why prediction scores
                        # are ridiculously low!!
                        if iou > 0.5 and prediction_score > self.prob_thresh:
                            con_iou = iou
                            con_box = predicted_box
                            con_box_full = pbox
                            con_gt = groundtruth_box
                            con_frm = nf
                            con_vid_score = prediction_score
                            con_outs[nf] = True
                            con_vid = nv
                else:
                    corr = False
                    con_outs[nf] = corr
            else:
                corr = True
                if nv != targ_cmp and prediction_score > self.prob_thresh:
                    corr = False
                con_outs[nf] = corr
                # if not corr:
                con_vid_scores[nf] = prediction_score
                con_vid_boxes[nf] = pred_boxes

        if all(list(con_outs.values())):
            return {
                'targ_cmp': targ_cmp,
                'pred_cmp': con_vid,
                'pred_score': con_vid_score,
                'predicted_box': con_box,
                'pbox': con_box_full,
                'gt_box': con_gt,
                'frm_idx': con_frm,
                'iou': con_iou
            }

        con_vid_list = sorted(
            [(k, v, con_vid_boxes[k]) for k, v in con_vid_scores.items()],
            key=lambda x: x[1], reverse=True
        )
        if len(con_vid_list) > 0:
            con_vid = -con_vid_list[0][0]
            con_vid_score = con_vid_list[0][1]
            con_vid_box = torch.tensor(con_vid_list[0][2])
        else:
            con_vid = -5
            con_vid_score = 0
            con_vid_box = torch.tensor([0, 0, 0, 0, 0])
        return {
            'targ_cmp': targ_cmp,
            'pred_cmp': con_vid,
            'pred_score': con_vid_score,
            'predicted_box': con_vid_box,
            'gt_box': gt_box_for_frms,
            'iou': torch.tensor(0)
        }
示例#4
0
    def compute_one_srl(
            self,
            pred_cmp,
            pred_boxes_for_srl,
            pred_scores_for_srl,
            targ_cmp,
            gt_boxes_with_frames,
            gt_frames_all,
            cmp_msk
    ):
        """
        For sep
        pred_cmp: is the chosen video (1)
        targ_cmp: is the target video (1)
        pred_boxes_for_srl: predicted boxes for
        given srl (#nvids x #nframes x #1-prop(4))
        """
        nvids = len(pred_boxes_for_srl)
        assert len(cmp_msk) == nvids
        # nfrms = len(pred_boxes_for_srl[0])
        # corr_outs = [False for _ in range(nvids)]
        con_outs = {nv: False for nv in range(nvids)}
        # con_boxs = {}
        # con_gts = {}
        # con_frms = {}
        # con_vid = {}
        con_vid = -1
        con_vid_score = 0
        con_vid_scores = {}

        for nv in range(nvids):
            if not cmp_msk[nv] == 1:
                con_outs[nv] = True
                assert [ps0 == 0. for ps0 in pred_scores_for_srl[nv]]
                continue
            pred_boxes = pred_boxes_for_srl[nv]
            pred_scores = pred_scores_for_srl[nv]
            if nv == targ_cmp:
                gt_frms = gt_boxes_with_frames[:, -1].long().tolist()
                assert set(gt_frms).intersection(
                    set(gt_frames_all[nv].tolist())
                ) == set(gt_frms)
                for frm_idx_ind, frm_idx in enumerate(gt_frms):
                    predicted_box = torch.tensor(pred_boxes[frm_idx][:4])
                    pbox = torch.tensor(pred_boxes[frm_idx])
                    groundtruth_box = gt_boxes_with_frames[frm_idx_ind][:4]
                    prediction_score = pred_scores[frm_idx]
                    assert gt_boxes_with_frames[frm_idx_ind][4] == frm_idx
                    iou = box_iou(
                        predicted_box.float(),
                        groundtruth_box.float()
                    )
                    # TODO: Check why prediction scores
                    # are ridiculously low!!
                    if iou > 0.5 and prediction_score > self.prob_thresh:
                        con_iou = iou
                        con_box = predicted_box
                        con_box_full = pbox
                        con_gt = groundtruth_box
                        con_frm = frm_idx
                        con_vid_score = prediction_score
                        con_outs[nv] = True
                        con_vid = nv
            else:
                gt_frms = gt_frames_all[nv]
                # rfrms = [i for i in range(
                corr = True
                for frm_idx_ind, frm_idx in enumerate(gt_frms):
                    prediction_score = pred_scores[frm_idx]
                    if prediction_score > self.prob_thresh:
                        corr = False
                        break
                con_outs[nv] = corr
                if not corr:
                    con_vid = nv
                    con_vid_scores[nv] = prediction_score

        if all(list(con_outs.values())):
            return {
                'targ_cmp': targ_cmp,
                'pred_cmp': con_vid,
                'pred_score': con_vid_score,
                'predicted_box': con_box,
                'pbox': con_box_full,
                'gt_box': con_gt,
                'frm_idx': con_frm,
                'iou': con_iou
            }

        con_vid_list = sorted(
            [(k, v) for k, v in con_vid_scores.items()],
            key=lambda x: x[-1], reverse=True
        )
        if len(con_vid_list) > 0:
            con_vid = con_vid_list[0][0]
            con_vid_score = con_vid_list[0][1]
        else:
            con_vid = -1
            con_vid_score = 0
        return {
            'targ_cmp': targ_cmp,
            'pred_cmp': con_vid,
            'pred_score': con_vid_score,
            'iou': torch.tensor(0)
        }
示例#5
0
    def eval_one_sent_idx(self, pred_row, gt_rows):
        assert len(gt_rows) == 1
        # if len(gt_rows) == 1:
        gt_row = gt_rows.iloc[0]
        gt_row_ind = gt_row.name

        results_dict = {}
        tot_dict = {}
        considered_boxes = []
        vid_seg = gt_row.vid_seg
        vid, seg = vid_seg.split('_segment_')
        seg = str(int(seg))

        anet_ann_row = self.anet_annots[vid]['segments'][seg]
        all_gt_boxes = torch.tensor(anet_ann_row['bbox'])
        all_gt_frames = torch.tensor(anet_ann_row['frm_idx'])
        assert len(all_gt_boxes) == len(all_gt_frames)

        pred_boxes_for_verb = self.get_req_pred_from_row(
            pred_row, gt_row, gt_row_ind
        )
        if pred_boxes_for_verb == -1:
            return -1

        for srl_ind, (
                srl_arg,
                srl_arg_box_indicator,
                srl_arg_box_ind
        ) in enumerate(gt_row.req_cls_pats_mask):

            if srl_arg_box_indicator == 1:
                if gt_row_ind not in results_dict:
                    results_dict[gt_row_ind] = 0
                if gt_row_ind not in tot_dict:
                    tot_dict[gt_row_ind] = 0

                tot_dict[gt_row_ind] += 1
                if srl_ind >= len(pred_boxes_for_verb):
                    continue

                box_inds = torch.tensor(srl_arg_box_ind)
                gt_boxes = torch.index_select(all_gt_boxes, 0, box_inds)
                frm_idxs = torch.index_select(all_gt_frames, 0, box_inds)
                pred_boxes = pred_boxes_for_verb[srl_ind]
                for frm_idx_ind, frm_idx in enumerate(frm_idxs):
                    predicted_box = torch.tensor(pred_boxes[frm_idx][:4])
                    groundtruth_box = gt_boxes[frm_idx_ind]
                    iou = box_iou(predicted_box.float(),
                                  groundtruth_box.float())
                    considered_boxes.append({
                        'predicted_box': predicted_box,
                        'gt_box': groundtruth_box,
                        'frm_idx': frm_idx,
                        'srl_ind': srl_ind,
                        'iou': iou
                    })
                    if iou > 0.5:
                        results_dict[gt_row_ind] += 1

        return {
            'res_dict': results_dict,
            'tot_dict': tot_dict,
            'considered_boxes': considered_boxes
        }
示例#6
0
    def no_gt_prop10_one_vid_seg(self,
                                 props,
                                 prop_feats,
                                 gt_boxs,
                                 gt_frms,
                                 out_file,
                                 save=True):
        nfrms = 10
        props = torch.tensor(props).float()
        prop_feats = torch.tensor(prop_feats).float()
        # gt_frms_dict = {}
        # for gfrm, gbox in zip(gt_frms, gt_boxs):
        #     if gfrm not in gt_frms_dict:
        #         gt_frms_dict[gfrm] = []
        #     gt_frms_dict[gfrm].append(gbox)
        gt_frms_set = set(gt_frms)
        gt_boxs = torch.tensor(gt_boxs).float()
        gt_frms = torch.tensor(gt_frms).float()

        ngt = len(gt_boxs)

        nppf = 100

        fin_out_props = {}
        props1 = props.view(10, 100, 7)
        prop_dim = prop_feats.size(-1)
        prop_feats1 = prop_feats.view(10, 100, prop_dim)

        for frm in range(nfrms):
            if frm not in fin_out_props:
                fin_out_props[frm] = []

            # if frm in gt_frms_set:
            #     props_inds_gt_in_frm = out_props_inds[out_props[..., 4] == frm]
            #     fin_out_props[frm] += props_inds_gt_in_frm.tolist()
            props_to_use_inds = props1[frm, ...,
                                       6].argsort(descending=True)[:nppf]
            fin_out_props[frm] += props_to_use_inds.tolist()

            fin_out_props[frm] = list(OrderedDict.fromkeys(
                fin_out_props[frm]))[:nppf]

        props_output = torch.zeros(10, nppf, 7)
        prop_feats_output = torch.zeros(10, nppf, prop_dim)

        for frm in fin_out_props:
            inds = fin_out_props[frm]
            props_output[frm] = props1[frm][inds]
            prop_feats_output[frm] = prop_feats1[frm][inds]

        props_output = props_output.view(nfrms * nppf, 7)
        prop_feats_output = prop_feats_output.view(
            nfrms, nppf, prop_dim).detach().cpu().numpy()

        if len(gt_boxs) > 0 and len(props_output) > 0:
            prop_frms = props_output[:, 4]
            frm_msk = prop_frms[:, None] == gt_frms
            ious = box_iou(props_output[:, :4], gt_boxs) * frm_msk.float()
            ious_max, ious_arg_max = ious.max(dim=0)
            recall = (ious_max > 0.5).sum()
        else:
            self.cfg.no_gt_count += 1
            ngt = 1
            recall = 0
            ious = torch.zeros(props.size(0), 1)

        props_output = props_output.detach().cpu().numpy()

        if save:
            np.save(out_file, prop_feats_output)

        return {
            'out_props': props_output,
            'recall': recall,
            'num_prop': 100,
            'num_gt': ngt
        }