Пример #1
0
    def _get_train_label(self, index, anchor_xmin, anchor_xmax):
        video_name = self.video_list[index]
        video_info = self.video_dict[video_name]
        video_frame = video_info['duration_frame']
        video_second = video_info['duration_second']
        feature_frame = video_info['feature_frame']
        corrected_second = float(feature_frame) / video_frame * video_second  # there are some frames not used
        video_labels = video_info['annotations']  # the measurement is second, not frame

        ##############################################################################################
        # change the measurement from second to percentage
        gt_bbox = []
        gt_iou_map = []
        for j in range(len(video_labels)):
            tmp_info = video_labels[j]
            tmp_start = max(min(1, tmp_info['segment'][0] / corrected_second), 0)
            tmp_end = max(min(1, tmp_info['segment'][1] / corrected_second), 0)
            gt_bbox.append([tmp_start, tmp_end])

        ####################################################################################################
        # generate R_s and R_e
        gt_bbox = np.array(gt_bbox)
        gt_xmins = gt_bbox[:, 0]
        gt_xmaxs = gt_bbox[:, 1]
        gt_lens = gt_xmaxs - gt_xmins
        gt_len_small = 3 * self.temporal_gap  # np.maximum(self.temporal_gap, self.boundary_ratio * gt_lens)
        gt_start_bboxs = np.stack((gt_xmins - gt_len_small / 2, gt_xmins + gt_len_small / 2), axis=1)
        gt_end_bboxs = np.stack((gt_xmaxs - gt_len_small / 2, gt_xmaxs + gt_len_small / 2), axis=1)
        #####################################################################################################

        gt_iou_map = np.zeros([self.temporal_scale, self.temporal_scale])
        for i in range(self.temporal_scale):
            for j in range(i, self.temporal_scale):
                gt_iou_map[i, j] = np.max(
                    iou_with_anchors(i * self.temporal_gap, (j + 1) * self.temporal_gap, gt_xmins, gt_xmaxs))
        gt_iou_map = torch.Tensor(gt_iou_map)

        ##########################################################################################################
        # calculate the ioa for all timestamp
        match_score_start = []
        for jdx in range(len(anchor_xmin)):
            match_score_start.append(np.max(
                ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], gt_start_bboxs[:, 0], gt_start_bboxs[:, 1])))
        match_score_end = []
        for jdx in range(len(anchor_xmin)):
            match_score_end.append(np.max(
                ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], gt_end_bboxs[:, 0], gt_end_bboxs[:, 1])))
        match_score_start = torch.Tensor(match_score_start)
        match_score_end = torch.Tensor(match_score_end)
        ############################################################################################################

        return match_score_start, match_score_end, gt_iou_map
Пример #2
0
def soft_nms(df, alpha, t1, t2):
    '''
    df: proposals generated by network;
    alpha: alpha value of Gaussian decaying function;
    t1, t2: threshold for soft nms.
    '''
    df = df.sort_values(by="score", ascending=False)  # 按得分降序排列
    tstart = list(df.xmin.values[:])
    tend = list(df.xmax.values[:])
    tscore = list(df.score.values[:])

    rstart = []
    rend = []
    rscore = []

    # 每个视频获取前100个提议
    while len(tscore) > 1 and len(rscore) < 101:
        max_index = tscore.index(max(tscore))
        tmp_iou_list = iou_with_anchors(np.array(tstart), np.array(tend),
                                        tstart[max_index], tend[max_index])
        for idx in range(0, len(tscore)):
            if idx != max_index:
                tmp_iou = tmp_iou_list[idx]
                tmp_width = tend[max_index] - tstart[max_index]
                if tmp_iou > t1 + (t2 - t1) * tmp_width:
                    tscore[idx] = tscore[idx] * np.exp(
                        -np.square(tmp_iou) / alpha)

        rstart.append(tstart[max_index])
        rend.append(tend[max_index])
        rscore.append(tscore[max_index])
        tstart.pop(max_index)
        tend.pop(max_index)
        tscore.pop(max_index)

    newDf = pd.DataFrame()
    newDf['score'] = rscore
    newDf['xmin'] = rstart
    newDf['xmax'] = rend
    return newDf
    def _get_train_label(self, index):
        video_id = self.video_ids[index]
        video_info = self.event_dict[video_id]
        video_labels = video_info[
            'events']  # the measurement is second, not frame
        duration = video_info['duration']

        ##############################################################################################
        # change the measurement from second to percentage
        gt_bbox = []
        gt_iou_map = []
        for j in range(len(video_labels)):
            tmp_info = video_labels[j]
            tmp_start = max(min(1, tmp_info['segment'][0] / duration), 0)
            tmp_end = max(min(1, tmp_info['segment'][1] / duration), 0)
            gt_bbox.append([tmp_start, tmp_end])
            tmp_gt_iou_map = iou_with_anchors(self.match_map[:, 0],
                                              self.match_map[:, 1], tmp_start,
                                              tmp_end)
            tmp_gt_iou_map = np.reshape(tmp_gt_iou_map,
                                        [self.max_duration, self.temporal_dim])
            gt_iou_map.append(tmp_gt_iou_map)
        gt_iou_map = np.array(gt_iou_map)
        gt_iou_map = np.max(gt_iou_map, axis=0)
        gt_iou_map = torch.Tensor(gt_iou_map)
        ##############################################################################################

        ##############################################################################################
        # generate R_s and R_e
        gt_bbox = np.array(gt_bbox)
        gt_xmins = gt_bbox[:, 0]
        gt_xmaxs = gt_bbox[:, 1]
        # gt_lens = gt_xmaxs - gt_xmins
        gt_len_small = 3 * self.temporal_gap  # np.maximum(self.temporal_gap, self.boundary_ratio * gt_lens)
        gt_start_bboxs = np.stack(
            (gt_xmins - gt_len_small / 2, gt_xmins + gt_len_small / 2), axis=1)
        gt_end_bboxs = np.stack(
            (gt_xmaxs - gt_len_small / 2, gt_xmaxs + gt_len_small / 2), axis=1)
        ##############################################################################################

        ##############################################################################################
        # calculate the ioa for all timestamp
        match_score_start = []
        for jdx in range(len(self.anchor_xmin)):
            match_score_start.append(
                np.max(
                    ioa_with_anchors(self.anchor_xmin[jdx],
                                     self.anchor_xmax[jdx],
                                     gt_start_bboxs[:, 0], gt_start_bboxs[:,
                                                                          1])))
        match_score_end = []
        for jdx in range(len(self.anchor_xmin)):
            match_score_end.append(
                np.max(
                    ioa_with_anchors(self.anchor_xmin[jdx],
                                     self.anchor_xmax[jdx], gt_end_bboxs[:, 0],
                                     gt_end_bboxs[:, 1])))
        match_score_start = torch.tensor(match_score_start)
        match_score_end = torch.tensor(match_score_end)
        ##############################################################################################

        return match_score_start, match_score_end, gt_iou_map
Пример #4
0
    def _get_label(self, duration, timestamps, video_length):
        gt_bbox = []
        # print(duration)
        coefficient = 1 / float(duration)
        for timestamp in timestamps:
            # print(timestamp)
            start = max(min(float(timestamp[0]) * coefficient, 1), 0)
            end = min(max(float(timestamp[1]) * coefficient, 0), 1)
            gt_bbox.append([start, end])
        gt_bbox = np.array(gt_bbox)
        # print('gt_bbox', gt_bbox.shape)
        # print(gt_bbox)
        gt_xmins = gt_bbox[:, 0]
        gt_xmaxs = gt_bbox[:, 1]
        # print('gt_xmins')
        # print(gt_xmins)
        # print('gt_xmaxs')
        # print(gt_xmaxs)
        gt_lens = gt_xmaxs - gt_xmins
        # print('gt_lens')
        # print(gt_lens)
        gt_len_small = 3. / self.temporal_scale
        # print('gt_len_small')
        # print(gt_len_small)
        gt_start_bboxs = np.stack(
            (gt_xmins - gt_len_small / 2, gt_xmins + gt_len_small / 2), axis=1)
        gt_end_bboxs = np.stack(
            (gt_xmaxs - gt_len_small / 2, gt_xmaxs + gt_len_small / 2), axis=1)
        # print('gt_start_bboxs, gt_end_bboxs')
        # print(gt_start_bboxs)
        # print(gt_end_bboxs)

        confidence_map = np.zeros((self.temporal_scale, self.temporal_scale))
        # print('confidence_map', confidence_map.shape)
        for i in range(self.temporal_scale):
            for j in range(i, self.temporal_scale):
                confidence_map[i, j] = np.max(
                    iou_with_anchors(i / self.temporal_scale,
                                     (j + 1) / self.temporal_scale, gt_xmins,
                                     gt_xmaxs))
        confidence_map = torch.tensor(confidence_map)

        start_label_map = []
        end_label_map = []
        for xmin, xmax in zip(self.anchor_xmin, self.anchor_xmax):
            start_label_map.append(
                np.max(
                    ioa_with_anchors(xmin, xmax, gt_start_bboxs[:, 0],
                                     gt_start_bboxs[:, 1])))
            end_label_map.append(
                np.max(
                    ioa_with_anchors(xmin, xmax, gt_end_bboxs[:, 0],
                                     gt_end_bboxs[:, 1])))
        # print('start_label_map')
        # print(start_label_map)
        # print('end_label_map')
        # print(end_label_map)
        start_label_map = torch.tensor(start_label_map)
        end_label_map = torch.tensor(end_label_map)

        return start_label_map, end_label_map, confidence_map
Пример #5
0
    def _get_train_label(self, index, anchor_xmin, anchor_xmax):
        video_name = self.video_list[index]
        video_info = self.video_dict[video_name]
        video_frame = video_info['duration_frame']  #1128
        video_second = video_info['duration_second']  #47.114
        feature_frame = video_info['feature_frame']  # 1120
        corrected_second = float(
            feature_frame
        ) / video_frame * video_second  # there are some frames not used  46.77
        video_labels = video_info[
            'annotations']  # the measurement is second, not frame  [{'segment':[0.01,37.11],'labels':'waxing skis'}]

        ##############################################################################################
        # change the measurement from second to percentage
        # 计算的是起止时间相对于视频时长的百分比
        gt_bbox = []
        gt_iou_map = []
        for j in range(len(video_labels)):
            tmp_info = video_labels[j]
            tmp_start = max(min(1, tmp_info['segment'][0] / corrected_second),
                            0)  # 0.00
            tmp_end = max(min(1, tmp_info['segment'][1] / corrected_second),
                          0)  # 0.79
            gt_bbox.append([tmp_start, tmp_end])
            # 计算当前gt_bbox与所有预设anchor的IOU
            tmp_gt_iou_map = iou_with_anchors(self.match_map[:, 0],
                                              self.match_map[:, 1], tmp_start,
                                              tmp_end)  #(10000,)
            # 这里reshape之后正好变成了行为不同的起止时间,列为不同持续时长的形式
            # 例如对于矩阵:
            #     [[1]
            #      [2]      ------>  [[1,2],
            #                         [3,4]]
            #      [3]
            #      [4]]
            tmp_gt_iou_map = np.reshape(
                tmp_gt_iou_map,
                [self.temporal_scale, self.temporal_scale])  #(100,100)
            gt_iou_map.append(tmp_gt_iou_map)

        # 相当于建立了一个字典保存所有可能的提议与gt_bbox的IOU值
        gt_iou_map = np.array(gt_iou_map)  # (1,100,100) 其中1表示gt_bbox的个数
        gt_iou_map = np.max(gt_iou_map,
                            axis=0)  # 如果存在多个gt_bbox,则选取最大IOU值作为iou_map中的值
        gt_iou_map = torch.Tensor(gt_iou_map)
        ##############################################################################################

        # 将gt的起止时间扩大为一个范围
        ####################################################################################################
        # generate R_s and R_e
        gt_bbox = np.array(gt_bbox)
        gt_xmins = gt_bbox[:, 0]
        gt_xmaxs = gt_bbox[:, 1]
        gt_lens = gt_xmaxs - gt_xmins
        gt_len_small = 3 * self.temporal_gap  # np.maximum(self.temporal_gap, self.boundary_ratio * gt_lens)
        # 间隔0.03
        gt_start_bboxs = np.stack(
            (gt_xmins - gt_len_small / 2, gt_xmins + gt_len_small / 2),
            axis=1)  # [0.12,0.15]
        gt_end_bboxs = np.stack(
            (gt_xmaxs - gt_len_small / 2, gt_xmaxs + gt_len_small / 2),
            axis=1)  # [0.85,0.88]
        #####################################################################################################

        # 不明白为啥label可以这样构建
        ##########################################################################################################
        # calculate the ioa for all timestamp
        # 计算每个0.02的小区间与gt_start和gt_end的重叠度
        match_score_start = []  # (100)
        for jdx in range(len(anchor_xmin)):
            match_score_start.append(
                np.max(
                    ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx],
                                     gt_start_bboxs[:, 0], gt_start_bboxs[:,
                                                                          1])))
        match_score_end = []  # (100)

        for jdx in range(len(anchor_xmin)):
            match_score_end.append(
                np.max(
                    ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx],
                                     gt_end_bboxs[:, 0], gt_end_bboxs[:, 1])))
        match_score_start = torch.Tensor(match_score_start)
        match_score_end = torch.Tensor(match_score_end)
        ############################################################################################################

        return match_score_start, match_score_end, gt_iou_map