示例#1
0
def worker(output_dir, video_dir):
    image_names = glob(os.path.join(video_dir, 'img/*.jpg'))
    if config.MACHINE_TYPE == Machine_type.Windows:
        image_names = sorted(
            image_names,
            key=lambda x: int(
                x.replace('/', "\\").split("\\")[-1].split('.')[0]))
        video_name = video_dir.replace('/', "\\").split("\\")[-1]
    else:
        image_names = sorted(image_names,
                             key=lambda x: int(x.split('/')[-1].split('.')[0]))
        video_name = video_dir.split('/')[-1]
    save_folder = os.path.join(output_dir, video_name)
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    trajs = {0: []}
    trkid = 0
    anno_str = "groundtruth_rect.txt"
    # if video_name == "Jogging": # ToDo:这个视频有两个人的跟踪框,暂时用一个的训练
    #     anno_str = "groundtruth_rect.1.txt"
    vid_anno_path = os.path.join(video_dir, anno_str)
    with open(vid_anno_path, 'r') as f:
        bboxs = f.readlines()
        # 有些是,号分隔;有些是空格或者制表符分隔
        if ',' in bboxs[0]:
            bboxs = [list(map(int, box.split(','))) for box in bboxs]
        else:
            bboxs = [list(map(int, box.split())) for box in bboxs]
        # gt的cx,cy需要减1
        bboxs = [np.array(box) - [1, 1, 0, 0] for box in bboxs]
    assert len(bboxs) == len(image_names), 'bboxs的数量必须要和image_names的一致'
    for i, image_name in enumerate(image_names):
        img = cv2.imread(image_name)
        img_mean = tuple(map(int, img.mean(axis=(0, 1))))
        bbox = bboxs[i]  # 这里的bbox是 x,y,w,h (x,y为左上角的坐标)
        if (bbox == [-1, -1, 0, 0]).all():
            continue  # ToDo:有两个视频的最后一句是四个0,先跳过,如:Board
        filename = os.path.basename(image_name).split('.')[0]
        trajs[0].append(filename)
        instance_crop_size = int(
            np.ceil((config.instance_size + config.max_translate * 2) *
                    (1 + config.scale_resize)))
        # 转换成cx,cy,w,h格式
        bbox = np.array([
            bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2,
            bbox[2], bbox[3]
        ])  # (213.5, 253.0, 34, 81)
        instance_img, w, h, _ = get_instance_image(img, bbox,
                                                   config.exemplar_size,
                                                   instance_crop_size,
                                                   config.context_amount,
                                                   img_mean)
        instance_img_name = os.path.join(
            save_folder,
            filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h))
        cv2.imwrite(instance_img_name, instance_img)
    return video_name, trajs
示例#2
0
    def update(self, frame):
        """track object based on the previous frame
        Args:
            frame: an RGB image

        Returns:
            bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
        """
        instance_img, _, _, scale_x = get_instance_image(frame, self.bbox, config.exemplar_size,
                                                         config.instance_size,
                                                         config.context_amount, self.img_mean)
        instance_img = self.transforms(instance_img)[None, :, :, :]
        pred_score, pred_regression = self.model.track(instance_img.cuda())

        pred_conf = pred_score.reshape(-1, 2, config.anchor_num * config.score_size * config.score_size).permute(0,
                                                                                                                 2,
                                                                                                                 1)
        pred_offset = pred_regression.reshape(-1, 4,
                                              config.anchor_num * config.score_size * config.score_size).permute(0,
                                                                                                                 2,
                                                                                                                 1)
        delta = pred_offset[0].cpu().detach().numpy()
        box_pred = box_transform_inv(self.anchors, delta)
        score_pred = F.softmax(pred_conf, dim=2)[
            0, :, 1].cpu().detach().numpy()

        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            sz2 = (w + pad) * (h + pad)
            return np.sqrt(sz2)

        def sz_wh(wh):
            pad = (wh[0] + wh[1]) * 0.5
            sz2 = (wh[0] + pad) * (wh[1] + pad)
            return np.sqrt(sz2)

        s_c = change(sz(box_pred[:, 2], box_pred[:, 3]) /
                     (sz_wh(self.target_sz * scale_x)))  # scale penalty
        r_c = change((self.target_sz[0] / self.target_sz[1]) /
                     (box_pred[:, 2] / box_pred[:, 3]))  # ratio penalty
        penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k)
        pscore = penalty * score_pred
        pscore = pscore * (1 - config.window_influence) + \
            self.window * config.window_influence
        best_pscore_id = np.argmax(pscore)
        target = box_pred[best_pscore_id, :] / scale_x

        lr = penalty[best_pscore_id] * \
            score_pred[best_pscore_id] * config.lr_box

        res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[1])
        res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[0])

        res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0],
                        config.max_scale * self.origin_target_sz[0])
        res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1],
                        config.max_scale * self.origin_target_sz[1])

        self.pos = np.array([res_x, res_y])
        self.target_sz = np.array([res_w, res_h])
        bbox = np.array([res_x, res_y, res_w, res_h])
        self.bbox = (
            np.clip(bbox[0], 0, frame.shape[1]).astype(np.float64),
            np.clip(bbox[1], 0, frame.shape[0]).astype(np.float64),
            np.clip(bbox[2], 10, frame.shape[1]).astype(np.float64),
            np.clip(bbox[3], 10, frame.shape[0]).astype(np.float64))
        return self.bbox, score_pred[best_pscore_id]
示例#3
0
def worker(output_dir, video_dir):
    instance_crop_size = 500
    if 'YT-BB' in video_dir:

        image_names = glob(os.path.join(video_dir, '*.jpg'))
        image_names = sorted(image_names,
                             key=lambda x: int(x.split('/')[-1].split('_')[1]))
        video_name = '_'.join(os.path.basename(video_dir).split('_')[:-1])

        with open('/dataset_ssd/std_xml_ytb/' + video_name + '.pkl',
                  'rb') as f:
            std_xml_dict = pickle.load(f)

        save_folder = os.path.join(output_dir, video_name)
        if not os.path.exists(save_folder):
            os.mkdir(save_folder)

        trajs = {}
        for image_name in image_names:
            img = cv2.imread(image_name)
            h, w, _ = img.shape
            img_mean = tuple(map(int, img.mean(axis=(0, 1))))
            frame = image_name.split('_')[-2]

            if int(frame) == 0:
                anno = std_xml_dict[str(int(frame))]
            else:
                anno = std_xml_dict[frame]

            filename = '_'.join(image_name.split('/')[-1].split('_')[:-1])
            for class_id in anno.keys():
                for track_id in anno[class_id].keys():
                    class_name, present, xmin_scale, xmax_scale, ymin_scale, ymax_scale = anno[
                        class_id][track_id]
                    new_track_id = class_id.zfill(3) + track_id.zfill(3)
                    bbox = np.array(
                        list(
                            map(float, [
                                xmin_scale, xmax_scale, ymin_scale, ymax_scale
                            ]))) * np.array([w, w, h, h])
                    if present == 'present':
                        if new_track_id in trajs.keys():
                            trajs[new_track_id].append(filename)
                        else:
                            trajs[new_track_id] = [filename]
                        bbox = np.array([(bbox[1] + bbox[0]) / 2,
                                         (bbox[3] + bbox[2]) / 2,
                                         bbox[1] - bbox[0] + 1,
                                         bbox[3] - bbox[2] + 1])
                        instance_img, w, h, _ = get_instance_image(
                            img, bbox, config.exemplar_size,
                            instance_crop_size, config.context_amount,
                            img_mean)
                        instance_img_name = os.path.join(
                            save_folder,
                            filename + ".{}.x_{:.2f}_{:.2f}.jpg".format(
                                new_track_id, w, h))
                        cv2.imwrite(instance_img_name, instance_img)

                    elif present == 'absent':
                        continue

    else:
        image_names = glob(os.path.join(video_dir, '*.JPEG'))
        image_names = sorted(image_names,
                             key=lambda x: int(x.split('/')[-1].split('.')[0]))
        video_name = video_dir.split('/')[-1]
        save_folder = os.path.join(output_dir, video_name)
        if not os.path.exists(save_folder):
            os.mkdir(save_folder)
        trajs = {}
        for image_name in image_names:
            img = cv2.imread(image_name)
            img_mean = tuple(map(int, img.mean(axis=(0, 1))))
            anno_name = image_name.replace('Data', 'Annotations')
            anno_name = anno_name.replace('JPEG', 'xml')
            tree = ET.parse(anno_name)
            root = tree.getroot()
            bboxes = []
            filename = root.find('filename').text
            for obj in root.iter('object'):
                bbox = obj.find('bndbox')
                bbox = list(
                    map(int, [
                        bbox.find('xmin').text,
                        bbox.find('ymin').text,
                        bbox.find('xmax').text,
                        bbox.find('ymax').text
                    ]))
                trkid = int(obj.find('trackid').text)
                if trkid in trajs:
                    trajs[trkid].append(filename)
                else:
                    trajs[trkid] = [filename]
                bbox = np.array([(bbox[2] + bbox[0]) / 2,
                                 (bbox[3] + bbox[1]) / 2,
                                 bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1])

                instance_img, w, h, _ = get_instance_image(
                    img, bbox, config.exemplar_size, instance_crop_size,
                    config.context_amount, img_mean)
                instance_img_name = os.path.join(
                    save_folder, filename +
                    ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h))
                cv2.imwrite(instance_img_name, instance_img)
    return video_name, trajs
示例#4
0
    def update(self, frame):
        """track object based on the previous frame
        Args:
            frame: an RGB image

        Returns:
            bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
        """
        # ToDo:看看这几个返回的值都是些什么东西
        instance_img, _, _, scale_x = get_instance_image(frame, self.bbox, config.exemplar_size, 
                config.instance_size, config.context_amount, self.img_mean)
        # cv2.imshow("update", instance_img)
        instance_img = self.transforms(instance_img)[None, :, :, :]
        if config.CUDA:
            instance_img = instance_img.cuda()
        pred_scores, pred_regressions = self.model.track(instance_img)
        
        def change(r):
            # np.maximum:(X, Y, out=None);X 与 Y 逐位比较取其大者
            return np.maximum(r, 1. / r)

        def sz(w, h):
            # 在bounding_box
            pad = (w + h) * 0.5
            sz2 = (w + pad) * (h + pad)
            return np.sqrt(sz2)

        def sz_wh(wh):
            pad = (wh[0] + wh[1]) * 0.5
            sz2 = (wh[0] + pad) * (wh[1] + pad)
            return np.sqrt(sz2)

        # 这里比较复杂,我们先分层预测,每层选最佳的匹配,并记录下score
        # 后面对score进行排序,返回最高得分的预测结果
        # PS:记录下不是 19*19 组获得最高评分的次数,分析FPN的效果        
        results_bboxs  = []
        results_scores = []
        for i in range(len(pred_scores)):
            if i!=1:
                continue
            pred_score = pred_scores[i] # torch.Size([1, 6, 37, 37])
            pred_regression = pred_regressions[i] # torch.Size([1, 12, 37, 37])
            score_size = config.FEATURE_MAP_SIZE[i] # 37
            anchor_num = 3 # 暂时定为3 即[0.5,1,2]

            pred_conf = pred_score.reshape(-1, 2, anchor_num * score_size * score_size).permute(0,2,1) # torch.Size([1, 4107, 2])
            pred_offset = pred_regression.reshape(-1, 4, anchor_num * score_size * score_size).permute(0,2,1) # # torch.Size([1, 4107, 4])
            
            delta = pred_offset[0].cpu().detach().numpy() # (4107, 4)
            box_pred = box_transform_inv(self.anchors[i], delta) # (4107, 4)
            score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy() # (4107,)

            # # 不进行后面的尺度惩罚等等,直接选最大得分的试试
            # best_pscore_id = np.argmax(score_pred)
            # target = box_pred[best_pscore_id, :] / scale_x
            # res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[0])
            # res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[1])
            # res_w = np.clip(target[2], 
            #         config.min_scale * self.origin_target_sz[0], 
            #         config.max_scale * self.origin_target_sz[0])
            # res_h = np.clip(target[3], 
            #         config.min_scale * self.origin_target_sz[1], 
            #         config.max_scale * self.origin_target_sz[1])
            # bbox = np.array([res_x, res_y, res_w, res_h])          
            # results_bboxs.append(bbox)
            # results_scores.append(score_pred[best_pscore_id])
            # continue

            # 进行尺度惩罚等措施,但是相关的超参数不知道怎么确定
            s_c = change(sz(box_pred[:, 2], box_pred[:, 3]) /
                        (sz_wh(self.target_sz * scale_x)))  # scale penalty (4107,)
            r_c = change((self.target_sz[0] / self.target_sz[1]) /
                        (box_pred[:, 2] / box_pred[:, 3]))  # ratio penalty (4107,)
            penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k) # (4107,) penalty_k=0.22
            pscore = penalty * score_pred # (4107,)
            # window_influence = 0.4
            pscore = pscore * (1 - config.window_influence) + self.windows[i] * config.window_influence # (4107,)
            best_pscore_id = np.argmax(pscore)
            target = box_pred[best_pscore_id, :] / scale_x

            lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box # lr_box = 0.3

            res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[0])
            res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[1])

            # min_scale = 0.1 max_scale = 10
            # numpy.clip(a, a_min, a_max, out=None) 
            # 将数组中的元素限制在a_min, a_max之间,大于a_max的就使得它等于 a_max,小于a_min,的就使得它等于a_min
            res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, 
                    config.min_scale * self.origin_target_sz[0], 
                    config.max_scale * self.origin_target_sz[0])
            res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, 
                    config.min_scale * self.origin_target_sz[1], 
                    config.max_scale * self.origin_target_sz[1])

            bbox = np.array([res_x, res_y, res_w, res_h])          
            results_bboxs.append(bbox)
            results_scores.append(pscore[best_pscore_id])
        max_score_id = np.argmax(results_scores)   
        _box = results_bboxs[max_score_id]  
        _socre = results_scores[max_score_id]
        # results = sorted(results.items,key=lambda x:x[1], reverse=True) # 按照得分进行排序
        # _box = results.keys[0]
        # _socre = results[0]
        x, y, w, h = _box
        self.pos = np.array([x, y])
        self.target_sz = np.array([w, h])
        self.bbox = (
                np.clip(_box[0], 0, frame.shape[1]).astype(np.float64),
                np.clip(_box[1], 0, frame.shape[0]).astype(np.float64),
                np.clip(_box[2], 10, frame.shape[1]).astype(np.float64),
                np.clip(_box[3], 10, frame.shape[0]).astype(np.float64))

        return self.bbox, _socre
示例#5
0
def worker(output_dir, video_dir):
    if 'YT-BB' in video_dir:
        image_names = glob(os.path.join(video_dir, '*.jpg'))
        image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('_')[1]))
        video_name = video_dir.split('/')[-1]
        save_folder = os.path.join(output_dir, video_name)
        anno_path = '/mnt/diska1/YT-BB/xml/youtube_dection_frame_xml_temp'
        if not os.path.exists(save_folder):
            os.mkdir(save_folder)
        trajs = {}
        for image_name in image_names:
            img = cv2.imread(image_name)
            img_mean = tuple(map(int, img.mean(axis=(0, 1))))
            anno_name = os.path.join(anno_path, video_name, image_name.split('/')[-1]).replace('.jpg', '.xml')
            tree = ET.parse(anno_name)
            root = tree.getroot()
            bboxes = []
            filename = root.find('filename').text
            for obj in root.iter('object'):
                bbox = obj.find('bndbox')
                bbox = list(map(int, [bbox.find('xmin').text,
                                      bbox.find('ymin').text,
                                      bbox.find('xmax').text,
                                      bbox.find('ymax').text]))

                trkid = int(obj.find('trackid').text)
                if trkid in trajs:
                    trajs[trkid].append(filename)
                else:
                    trajs[trkid] = [filename]
                instance_crop_size = int(
                    np.ceil((config.instance_size + config.max_translate * 2) * (1 + config.scale_resize)))
                bbox = np.array(
                    [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2, bbox[2] - bbox[0] + 1,
                     bbox[3] - bbox[1] + 1])

                instance_img, w, h, _ = get_instance_image(img, bbox,
                                                           config.exemplar_size, instance_crop_size,
                                                           config.context_amount,
                                                           img_mean)
                instance_img_name = os.path.join(save_folder,
                                                 filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h))
                cv2.imwrite(instance_img_name, instance_img)
    else:
        image_names = glob(os.path.join(video_dir, '*.JPEG'))
        image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('.')[0]))
        video_name = video_dir.split('/')[-1]
        save_folder = os.path.join(output_dir, video_name)
        if not os.path.exists(save_folder):
            os.mkdir(save_folder)
        trajs = {}
        for image_name in image_names:
            img = cv2.imread(image_name)
            img_mean = tuple(map(int, img.mean(axis=(0, 1))))
            anno_name = image_name.replace('Data', 'Annotations')
            anno_name = anno_name.replace('JPEG', 'xml')
            tree = ET.parse(anno_name)
            root = tree.getroot()
            bboxes = []
            filename = root.find('filename').text
            for obj in root.iter('object'):
                bbox = obj.find('bndbox')
                bbox = list(map(int, [bbox.find('xmin').text,
                                      bbox.find('ymin').text,
                                      bbox.find('xmax').text,
                                      bbox.find('ymax').text]))
                trkid = int(obj.find('trackid').text)
                if trkid in trajs:
                    trajs[trkid].append(filename)
                else:
                    trajs[trkid] = [filename]
                instance_crop_size = int(
                    np.ceil((config.instance_size + config.max_translate * 2) * (1 + config.scale_resize)))
                bbox = np.array(
                    [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2, bbox[2] - bbox[0] + 1,
                     bbox[3] - bbox[1] + 1])

                instance_img, w, h, _ = get_instance_image(img, bbox,
                                                           config.exemplar_size, instance_crop_size,
                                                           config.context_amount,
                                                           img_mean)
                instance_img_name = os.path.join(save_folder,
                                                 filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h))
                cv2.imwrite(instance_img_name, instance_img)
    return video_name, trajs