Python gaussian_radius примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib.utils.image

Метод/Функция: gaussian_radius

Примеров на hotexamples.com: 9

Python gaussian_radius - 9 примеров найдено. Это лучшие примеры Python кода для lib.utils.image.gaussian_radius, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: ctdet.py Проект: Biismarck/CanUFindMe-

    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        pos = file_name.rfind('.')
        list_i = list(file_name)  # str -> list
        list_i.insert(pos, '_A')
        newname = ''.join(list_i)  # list -> str
        newpath = os.path.join(self.img_dir, newname)

        img1 = cv2.imread(img_path)
        img2 = cv2.imread(newpath)

        height, width = img1.shape[0], img1.shape[1]

        c = np.array([img1.shape[1] / 2., img1.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img1.shape[0], img1.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img1.shape[1])
                h_border = self._get_border(128, img1.shape[0])
                c[0] = np.random.randint(low=w_border, high=img1.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img1.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img1 = img1[:, ::-1, :]
                img2 = img2[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, 0, [input_w, input_h])
        inp1 = cv2.warpAffine(img1, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp2 = cv2.warpAffine(img2, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp1 = (inp1.astype(np.float32) / 255.)
        inp2 = (inp2.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp1, self._eig_val, self._eig_vec)
            color_aug(self._data_rng, inp2, self._eig_val, self._eig_vec)
        inp1 = (inp1 - self.mean) / self.std
        inp1 = inp1.transpose(2, 0, 1)
        inp2 = (inp2 - self.mean) / self.std
        inp2 = inp2.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array(
                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
        # 将处理后的两张图叠成六通道
        inp = np.vstack((inp1, inp2))
        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret

Пример #2

Показать файл

    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        # print("55555:", img_path)
        ann_ids = self.coco.getAnnIds(
            imgIds=[img_id])  # getAnnIds:通过输入图片的id来得到图片的anno_id
        anns = self.coco.loadAnns(
            ids=ann_ids)  # loadAnns:通过anno_id，得到图片对应的详细anno信息
        # print(111111, anns)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:  # (not self.opt.not_rand_crop) = True
                s = s * np.random.choice(np.arange(
                    0.6, 1.4, 0.1))  # 从0.6-1.4中随机选取一个数字（步长为0.1）
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale  # 0.4 # when not using random crop apply scale augmentation.
                cf = self.opt.shift  # 0.1 # when not using random crop apply shift augmentation.
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0,
                                           [input_w, input_h])  # 由三对点计算仿射变换
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)  # 对图像做仿射变换
        inp = (inp.astype(np.float32) / 255.)  # 归一化
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std  # ！！！！！！！ modified by zy
        inp = inp.transpose(2, 0, 1)

        # add by zy
        # inp = np.transpose(inp, [1, 2, 0])
        # cv2.imshow('input', inp)
        # cv2.waitKey(0)

        output_h = input_h // self.opt.down_ratio  # 网络输出的预测结果特征图是128*128,这里要将GTbox也缩小为128*128来计算loss
        output_w = input_w // self.opt.down_ratio  #
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        ori_wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        cxcy = np.zeros((self.max_objs, 2), dtype=np.float32)
        ori_cxcy = np.zeros((self.max_objs, 2), dtype=np.float32)
        cls_idx = np.zeros((self.max_objs), dtype=np.int64)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            ori_h, ori_w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            ori_cx, ori_cy = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            # print(111111, bbox)
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            # print(222222, bbox)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            cx, cy = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
            # print(333333, h,w)
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ori_wh[k] = 1. * ori_w, 1. * ori_h
                cxcy[k] = 1. * cx, 1. * cy
                ori_cxcy[k] = 1. * ori_cx, 1. * ori_cy
                cls_idx[k] = cls_id
                ind[k] = ct_int[1] * output_w + ct_int[
                    0]  # ind这个参数是用来？？？ ind = int_cy*output_w + int_cx
                reg[k] = ct - ct_int  # reg是用来回归精确小数与整数之间的误差的？？？
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])  # [tlx,tly,brx,bry,1,cls_id]
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'ori_wh': ori_wh,
            'cxcy': cxcy,
            'ori_cxcy': ori_cxcy,
            'cls_idx': cls_idx
        }
        if self.opt.dense_wh:  # FALSE , 'apply weighted regression near center or just apply regression on center point.'
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:  # flase , 'category specific bounding box size.'
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:  # true
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta

        # print(ret['hm'].shape, )
        # cx, cy = int(ret['cxcy'][0][0]), int(ret['cxcy'][0][1])
        # w, h = int(ret['wh'][0][0]), int(ret['wh'][0][1])
        # print(cx,cy,w,h, ret['hm'][0][cy][cx])
        # print(1111111111111, ret['input'].shape, ret['hm'].shape, ret['wh'].shape, ret['reg'].shape, ret['reg_mask'].shape, ret['ind'].shape)
        # input_img = np.transpose([ret['hm'][cls_id]], [1,2,0])
        # crop_img = input_img[(cy - h // 2):(cy + h // 2), (cx - w // 2):(cx + w // 2)]
        # cv2.rectangle(input_img, (cx - w // 2, cy - h // 2),
        #               (cx + w // 2, cy + h // 2), (255, 250, 250),  2)
        # cv2.imshow('input', crop_img)
        # cv2.waitKey(0)
        # print(2222222222222, type(ret['input']), type(ret['hm']), type(ret['hm']), type(ret['reg']), type(ret['reg_mask']), type(ret['ind']))
        # print(ret.keys())   # dict_keys(['ind', 'reg_mask', 'hm', 'input', 'reg', 'wh'])
        return ret

Пример #3

Показать файл

Файл: multi_pose.py Проект: huyalvchuan/CenterNet

    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opt.aug_rot:
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])

        # reisize
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_joints = self.num_joints
        # 对坐标执行相同操作。
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        #  热图
        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)

        # 关键点坐标
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)

        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            # 左上左下两个点
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1  # 1
            pts = np.array(ann['keypoints'],
                           np.float32).reshape(num_joints, 3)  # x, y, 可见不可见。
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            # 缩小后的坐标和大小
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                # 可以达到一定iou的半径，该方法解释可以列出等式求解。
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                # hp的时候使用固定半径
                radius = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, int(radius))
                # 中心坐标
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                # 缩小后的大小
                wh[k] = 1. * w, 1. * h
                # 把图拉直了的中心所在的坐标
                ind[k] = ct_int[1] * output_res + ct_int[0]
                # 偏差
                reg[k] = ct - ct_int
                # 计数存在的物体
                reg_mask[k] = 1

                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    # 如果都是不可见的那么，热力图设置为0.99但是不计数。
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                # 关键点半径
                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                            if self.opt.mse_loss else max(0, int(hp_radius))

                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        # 获取缩小后的坐标
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:

                            # kps是关键点的坐标
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)

                            # 关键点的偏差。
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            # 关键点拉直后的坐标和存在与否。
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0

        # (hm：box的热图， reg_mask: 目标是否存在), ind: 拉直后中心位置。 wh： 物体在热图上的wh。
        # kps： 关键点和中心的偏差。 kps_mask： 关键点坐标存在与不存在。 reg：box的偏差。
        # hm_hp: 关键点热图。hp_offset：关键点与自己的偏差。hp_ind：关键点的位置存在与否。hp_mask:关键点存在不存在。
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret

Пример #4

Показать файл

    def __getitem__(self, f_idx):
        # 为子训练集计算起始index
        for i, c in enumerate(self.cds):
            if f_idx >= c:
                ds = list(self.label_files.keys())[i]
                start_index = c

        img_path = self.img_files[ds][f_idx - start_index]
        label_path = self.label_files[ds][f_idx - start_index]

        # Get image data and label
        imgs, labels, img_path, (input_h, input_w) = self.get_data(img_path, label_path)
        # print('input_h, input_w: %d %d' % (input_h, input_w))

        # 存在多个子训练集时, 为每个子训练集合(视频seq)计算正确的起始index
        # @even: for MCMOT training
        if self.opt.id_weight > 0:
            for i, _ in enumerate(labels):
                if labels[i, 1] > -1:
                    cls_id = int(labels[i][0])
                    start_idx = self.tid_start_idx_of_cls_ids[ds][cls_id]
                    labels[i, 1] += start_idx

        output_h = imgs.shape[1] // self.opt.down_ratio  # 向下取整除法
        output_w = imgs.shape[2] // self.opt.down_ratio
        # print('output_h, output_w: %d %d' % (output_h, output_w))

        # num_classes = self.num_classes

        # 图片中实际标注的目标数
        num_objs = labels.shape[0]

        # --- GT of detection
        hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32)  # C×H×W: heat-map通道数即类别数
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs,), dtype=np.int64)  # K个object
        reg_mask = np.zeros((self.max_objs,), dtype=np.uint8)  # 只计算feature map有目标的像素的reg loss

        if self.opt.id_weight > 0:
            # --- GT of ReID
            ids = np.zeros((self.max_objs,), dtype=np.int64)  # 一张图最多检测并ReID K个目标, 都初始化id为0

            # @even: 每个目标类别都对应一组track ids
            cls_tr_ids = np.zeros((self.num_classes, output_h, output_w), dtype=np.int64)

            # @even, class id map: 每个(x, y)处的目标类别, 都初始化为-1
            cls_id_map = np.full((1, output_h, output_w), -1, dtype=np.int64)  # 1×H×W

        # Gauss function definition
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        # 遍历每一个ground truth检测目标
        for k in range(num_objs):  # 图片中实际的目标个数
            label = labels[k]

            # 计算bbox的经过网络的输出GT值
            #                       0        1        2       3
            bbox = label[2:]  # center_x, center_y, bbox_w, bbox_h

            # 检测目标的类别(索引从0开始, 0代表背景类别)
            cls_id = int(label[0])

            bbox[[0, 2]] = bbox[[0, 2]] * output_w
            bbox[[1, 3]] = bbox[[1, 3]] * output_h
            bbox[0] = np.clip(bbox[0], 0, output_w - 1)
            bbox[1] = np.clip(bbox[1], 0, output_h - 1)

            w, h = bbox[2], bbox[3]

            if h > 0 and w > 0:
                # heat-map radius
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))  # radius >= 0
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius

                # bbox center coordinate
                ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                ct_int = ct.astype(np.int32)  # floor int

                # draw gauss weight for heat-map
                draw_gaussian(hm[cls_id], ct_int, radius)  # hm

                # --- GT of detection
                wh[k] = float(w), float(h)

                # 记录feature map上有目标的坐标索引
                ind[k] = ct_int[1] * output_w + ct_int[0]  # feature map index:y*w+x

                # offset regression
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                # --- GT of ReID
                if self.opt.id_weight > 0:
                    # @even: 取output feature map的每个(y, x)处的目标类别
                    cls_id_map[0][ct_int[1], ct_int[0]] = cls_id  # 1×H×W

                    # @even: 记录该类别对应的track ids
                    cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = label[1] - 1  # track id从1开始的, 转换成从0开始

                    ids[k] = label[1] - 1  # 分类的idx: track id - 1

        if self.opt.id_weight > 0:
            ret = {'input': imgs,
                   'hm': hm,
                   'reg': reg,
                   'wh': wh,
                   'ind': ind,
                   'reg_mask': reg_mask,
                   'ids': ids,
                   'cls_id_map': cls_id_map,  # feature map上每个(x, y)处的目标类别id
                   'cls_tr_ids': cls_tr_ids}
        else:  # only for detection
            ret = {'input': imgs,
                   'hm': hm,
                   'reg': reg,
                   'wh': wh,
                   'ind': ind,
                   'reg_mask': reg_mask}

        return ret  # 返回一个字典(第一次见识这样的getitem)

Пример #5

Показать файл

Файл: exdet.py Проект: ZY-Russell/CenterNet-FSAF-NMS-single-img

    def __getitem__(self, index):
        img_id = self.images[index]
        img_info = self.coco.loadImgs(ids=[img_id])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
        s = max(img.shape[0], img.shape[1]) * 1.0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf,
                                               2 * cf)
                c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf,
                                               2 * cf)
            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]

        trans_input = get_affine_transform(
            c, s, 0, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_classes = self.opt.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        num_hm = 1 if self.opt.agnostic_ex else num_classes

        hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_c = np.zeros((num_classes, output_res, output_res),
                        dtype=np.float32)
        reg_t = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_l = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_b = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_r = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind_t = np.zeros((self.max_objs), dtype=np.int64)
        ind_l = np.zeros((self.max_objs), dtype=np.int64)
        ind_b = np.zeros((self.max_objs), dtype=np.int64)
        ind_r = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['bbox'])
            # tlbr
            pts = np.array(ann['extreme_points'],
                           dtype=np.float32).reshape(4, 2)
            # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug
            cls_id = int(self.cat_ids[ann['category_id']])
            hm_id = 0 if self.opt.agnostic_ex else cls_id
            if flipped:
                pts[:, 0] = width - pts[:, 0] - 1
                pts[1], pts[3] = pts[3].copy(), pts[1].copy()
            for j in range(4):
                pts[j] = affine_transform(pts[j], trans_output)
            pts = np.clip(pts, 0, self.opt.output_res - 1)
            h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                pt_int = pts.astype(np.int32)
                draw_gaussian(hm_t[hm_id], pt_int[0], radius)
                draw_gaussian(hm_l[hm_id], pt_int[1], radius)
                draw_gaussian(hm_b[hm_id], pt_int[2], radius)
                draw_gaussian(hm_r[hm_id], pt_int[3], radius)
                reg_t[k] = pts[0] - pt_int[0]
                reg_l[k] = pts[1] - pt_int[1]
                reg_b[k] = pts[2] - pt_int[2]
                reg_r[k] = pts[3] - pt_int[3]
                ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0]
                ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0]
                ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0]
                ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0]

                ct = [
                    int((pts[3, 0] + pts[1, 0]) / 2),
                    int((pts[0, 1] + pts[2, 1]) / 2)
                ]
                draw_gaussian(hm_c[cls_id], ct, radius)
                reg_mask[k] = 1
        ret = {
            'input': inp,
            'hm_t': hm_t,
            'hm_l': hm_l,
            'hm_b': hm_b,
            'hm_r': hm_r,
            'hm_c': hm_c
        }
        if self.opt.reg_offset:
            ret.update({
                'reg_mask': reg_mask,
                'reg_t': reg_t,
                'reg_l': reg_l,
                'reg_b': reg_b,
                'reg_r': reg_r,
                'ind_t': ind_t,
                'ind_l': ind_l,
                'ind_b': ind_b,
                'ind_r': ind_r
            })

        return ret

Пример #6

Показать файл

Файл: jde.py Проект: zoeyyu37/FairMOTVehicle

    def __getitem__(self, files_index):
        # 为子训练集计算起始index
        for i, c in enumerate(self.cds):
            if files_index >= c:
                ds = list(self.label_files.keys())[i]
                start_index = c

        img_path = self.img_files[ds][files_index - start_index]
        label_path = self.label_files[ds][files_index - start_index]

        imgs, labels, img_path, (input_h, input_w) = self.get_data(
            img_path, label_path)
        # print('input_h, input_w: %d %d' % (input_h, input_w))

        # 存在多个子训练集时, 为每个子训练集合(视频seq)计算正确的起始index
        for i, _ in enumerate(labels):
            if labels[i, 1] > -1:
                labels[i, 1] += self.tid_start_index[ds]

        output_h = imgs.shape[1] // self.opt.down_ratio  # 向下取整除法
        output_w = imgs.shape[2] // self.opt.down_ratio
        # print('output_h, output_w: %d %d' % (output_h, output_w))

        num_classes = self.num_classes
        num_objs = labels.shape[0]  # 图片中实际标注的目标数
        hm = np.zeros(
            (num_classes, output_h, output_w),  # C×H×W
            dtype=np.float32)  # heatmap通道数即类别数
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs, ), dtype=np.int64)  # K个object
        reg_mask = np.zeros((self.max_objs, ),
                            dtype=np.uint8)  # 只计算feature map有目标的像素的reg loss
        ids = np.zeros((self.max_objs, ),
                       dtype=np.int64)  # 一张图最多检测并ReID K个目标, 都初始化id为0

        # 设置用于heatmap初始化的高斯函数
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        # 遍历每一个ground truth检测目标
        for k in range(num_objs):  # 图片中实际的目标个数
            label = labels[k]

            # 计算bbox的经过网络的输出GT值
            #                       0        1        2       3
            bbox = label[2:]  # center_x, center_y, bbox_w, bbox_h
            cls_id = int(label[0])  # object's class label(index start from 0)

            bbox[[0, 2]] = bbox[[0, 2]] * output_w
            bbox[[1, 3]] = bbox[[1, 3]] * output_h
            bbox[0] = np.clip(bbox[0], 0, output_w - 1)
            bbox[1] = np.clip(bbox[1], 0, output_h - 1)

            w, h = bbox[2], bbox[3]

            if h > 0 and w > 0:
                # heatmap radius
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))  # radius >= 0
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius

                # bbox center coordinate
                ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                ct_int = ct.astype(np.int32)  # floor int

                # draw gauss weight for heatmap
                # draw_gaussian(hm[cls_id], ct_int, radius)  # hm
                draw_gaussian(hm[0], ct_int, radius)  # hm

                wh[k] = 1. * w, 1. * h

                # 记录feature map上有目标的坐标索引
                ind[k] = ct_int[1] * output_w + ct_int[
                    0]  # feature map index:y*w+x

                reg[k] = ct - ct_int
                reg_mask[k] = 1
                # ids[k] = label[1]  # track id的ground truth: 这里是不是应该-1(因为track id从1开始)?
                ids[k] = label[1] - 1  # 分类的idx: track id - 1

        ret = {
            'input': imgs,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'ids': ids
        }

        return ret  # 返回一个字典(第一次见识这样的getitem)

Пример #7

Показать файл

    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        # delete not person and crowd
        anns = list(
            filter(
                lambda x: x['category_id'] in self._valid_ids and x['iscrowd']
                != 1, anns))

        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        trans_seg_output = get_affine_transform(c, s, 0, [output_w, output_h])
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        seg = np.zeros((self.max_objs, output_h, output_w), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            segment = self.coco.annToMask(ann)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                segment = segment[:, ::-1]

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            segment = cv2.warpAffine(segment,
                                     trans_seg_output, (output_w, output_h),
                                     flags=cv2.INTER_LINEAR)
            segment = segment.astype(np.float32)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                pad_rate = 0.3
                segment_mask = np.ones_like(segment)
                x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_w - 1)*2).astype(np.int), \
                      (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_h - 1)*2).astype(np.int)
                segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                segment[segment_mask == 1] = 255
                seg[k] = segment

                # cv2.imshow('',segment-255)
                # cv2.waitKey(0)
                #seg_mask[k] = segment_mask

                # print(np.sum(segment)/np.sum(segment_mask)) ## pos / neg

                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'seg': seg
        }

        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret

Пример #8

Показать файл

Файл: jde.py Проект: wss321/CenterPerson

    def __getitem__(self, files_index):

        for i, c in enumerate(self.cds):
            if files_index >= c:
                ds = list(self.label_files.keys())[i]
                start_index = c

        img_path = self.img_files[ds][files_index - start_index]
        label_path = self.label_files[ds][files_index - start_index]

        imgs, labels, img_path, (input_h, input_w), img = self.get_data(
            img_path, label_path)
        for i, _ in enumerate(labels):
            if labels[i, 1] > -1:
                labels[i, 1] += self.tid_start_index[ds]

        output_h = imgs.shape[1] // self.opt.down_ratio
        output_w = imgs.shape[2] // self.opt.down_ratio
        num_classes = self.num_classes
        num_objs = labels.shape[0]
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs, ), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs, ), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian
        for k in range(num_objs):
            label = labels[k]
            bbox = label[2:]
            cls_id = int(label[0])
            bbox[[0, 2]] = bbox[[0, 2]] * output_w
            bbox[[1, 3]] = bbox[[1, 3]] * output_h
            bbox[0] = np.clip(bbox[0], 0, output_w - 1)
            bbox[1] = np.clip(bbox[1], 0, output_h - 1)
            h = bbox[3]
            w = bbox[2]

            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

        # while True:
        #     h = cv2.resize(hm.squeeze(), (1088, 608))
        #     cv2.imshow(f"hm", h)
        #     cv2.imshow(f"img", img)
        #     if cv2.waitKey(1000) & 0xff == ord('q'):
        #         break
        ret = {
            'input': imgs,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg
        }
        return ret

Пример #9

Показать файл

  def __getitem__(self, index):
    img_id = self.images[index]
    img_info = self.coco.loadImgs(ids=[img_id])[0]
    img_path = os.path.join(self.img_dir, img_info['file_name'])
    img = cv2.imread(img_path)
    if 'calib' in img_info:
      calib = np.array(img_info['calib'], dtype=np.float32)
    else:
      calib = self.calib

    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
    if self.opt.keep_res:
      s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32)
    else:
      s = np.array([width, height], dtype=np.int32)
    
    aug = False
    if self.split == 'train' and np.random.random() < self.opt.aug_ddd:
      aug = True
      sf = self.opt.scale
      cf = self.opt.shift
      s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
      c[0] += img.shape[1] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
      c[1] += img.shape[0] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)

    trans_input = get_affine_transform(
      c, s, 0, [self.opt.input_w, self.opt.input_h])
    inp = cv2.warpAffine(img, trans_input, 
                         (self.opt.input_w, self.opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)
    # if self.split == 'train' and not self.opt.no_color_aug:
    #   color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
    inp = (inp - self.mean) / self.std
    inp = inp.transpose(2, 0, 1)

    num_classes = self.opt.num_classes
    trans_output = get_affine_transform(
      c, s, 0, [self.opt.output_w, self.opt.output_h])

    hm = np.zeros(
      (num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    dep = np.zeros((self.max_objs, 1), dtype=np.float32)
    rotbin = np.zeros((self.max_objs, 2), dtype=np.int64)
    rotres = np.zeros((self.max_objs, 2), dtype=np.float32)
    dim = np.zeros((self.max_objs, 3), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    rot_mask = np.zeros((self.max_objs), dtype=np.uint8)

    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)
    num_objs = min(len(anns), self.max_objs)
    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian
    gt_det = []
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      cls_id = int(self.cat_ids[ann['category_id']])
      if cls_id <= -99:
        continue
      # if flipped:
      #   bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      if h > 0 and w > 0:
        radius = gaussian_radius((h, w))
        radius = max(0, int(radius))
        ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)
        if cls_id < 0:
          ignore_id = [_ for _ in range(num_classes)] \
                      if cls_id == - 1 else  [- cls_id - 2]
          if self.opt.rect_mask:
            hm[ignore_id, int(bbox[1]): int(bbox[3]) + 1, 
              int(bbox[0]): int(bbox[2]) + 1] = 0.9999
          else:
            for cc in ignore_id:
              draw_gaussian(hm[cc], ct, radius)
            hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999
          continue
        draw_gaussian(hm[cls_id], ct, radius)

        wh[k] = 1. * w, 1. * h
        gt_det.append([ct[0], ct[1], 1] + \
                      self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \
                      [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id])
        if self.opt.reg_bbox:
          gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]]
        # if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!!
        if 1:
          alpha = self._convert_alpha(ann['alpha'])
          # print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y'])
          if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
            rotbin[k, 0] = 1
            rotres[k, 0] = alpha - (-0.5 * np.pi)    
          if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
            rotbin[k, 1] = 1
            rotres[k, 1] = alpha - (0.5 * np.pi)
          dep[k] = ann['depth']
          dim[k] = ann['dim']
          # print('        cat dim', cls_id, dim[k])
          ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
          reg[k] = ct - ct_int
          reg_mask[k] = 1 if not aug else 0
          rot_mask[k] = 1
    # print('gt_det', gt_det)
    # print('')
    ret = {'input': inp, 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind, 
           'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask,
           'rot_mask': rot_mask}
    if self.opt.reg_bbox:
      ret.update({'wh': wh})
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not ('train' in self.split):
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 18), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'calib': calib,
              'image_path': img_path, 'img_id': img_id}
      ret['meta'] = meta
    
    return ret