示例#1
0
  def _get_additional_inputs(self, dets, meta, with_hm=True):
    trans_input, trans_output = meta['trans_input'], meta['trans_output']
    inp_width, inp_height = meta['inp_width'], meta['inp_height']
    out_width, out_height = meta['out_width'], meta['out_height']
    input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)

    output_inds = []
    for det in dets:
      if det['score'] < self.opt.pre_thresh:
        continue
      bbox = self._trans_bbox(det['bbox'], trans_input, inp_width, inp_height)
      bbox_out = self._trans_bbox(
        det['bbox'], trans_output, out_width, out_height)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      if (h > 0 and w > 0):
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)
        if with_hm:
          draw_umich_gaussian(input_hm[0], ct_int, radius)
        ct_out = np.array(
          [(bbox_out[0] + bbox_out[2]) / 2, 
           (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32)
        output_inds.append(ct_out[1] * out_width + ct_out[0])
    if with_hm:
      input_hm = input_hm[np.newaxis]
      if self.opt.flip_test:
        input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]), axis=0)
      input_hm = torch.from_numpy(input_hm).to(self.opt.device)
    output_inds = np.array(output_inds, np.int64).reshape(1, -1)
    output_inds = torch.from_numpy(output_inds).to(self.opt.device)
    return input_hm, output_inds
示例#2
0
def get_additional_inputs(dets, meta, with_hm=True):
    '''
    Render input heatmap from previous trackings.
    '''
    trans_input, trans_output = meta['trans_input'], meta['trans_output']
    inp_width, inp_height = meta['inp_width'], meta['inp_height']
    out_width, out_height = meta['out_width'], meta['out_height']
    input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)

    output_inds = []
    for det in dets:
        if det['score'] < 0.2 or det['active'] == 0:
            continue
        bbox = trans_bbox(det['bbox'], trans_input, inp_width, inp_height)
        bbox_out = trans_bbox(det['bbox'], trans_output, out_width, out_height)
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if (h > 0 and w > 0):
            radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            radius = max(0, int(radius))
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)
            ct_int = ct.astype(np.int32)
            if with_hm:
                draw_umich_gaussian(input_hm[0], ct_int, radius)
            ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2,
                               (bbox_out[1] + bbox_out[3]) / 2],
                              dtype=np.int32)
            output_inds.append(ct_out[1] * out_width + ct_out[0])
    if with_hm:
        input_hm = input_hm[np.newaxis]
        input_hm = torch.from_numpy(input_hm).to(torch.device('cuda'))
    output_inds = np.array(output_inds, np.int64).reshape(1, -1)
    output_inds = torch.from_numpy(output_inds).to(torch.device('cuda'))
    return input_hm, output_inds
    def _get_pre_dets(self, anns, trans_input, trans_output):
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        down_ratio = self.opt.down_ratio
        trans = trans_input
        reutrn_hm = self.opt.pre_hm
        pre_hm = np.zeros(
            (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
        pre_cts, track_ids = [], []
        for ann in anns:
            cls_id = int(self.cat_ids[ann['category_id']])
            if cls_id > self.opt.num_classes or cls_id <= -99 or \
               ('iscrowd' in ann and ann['iscrowd'] > 0):
                continue
            bbox = self._coco_box_to_bbox(ann['bbox'])

            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            max_rad = 1

            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                max_rad = max(max_rad, radius)

                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct0 = ct.copy()
                conf = 1

                ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w
                ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h
                conf = 1 if np.random.random() > self.opt.lost_disturb else 0

                ct_int = ct.astype(np.int32)
                # if conf == 0:
                if conf == 1:
                    pre_cts.append(ct / down_ratio)
                else:
                    pre_cts.append(ct0 / down_ratio)

                track_ids.append(ann['track_id'] if 'track_id' in ann else -1)
                if reutrn_hm:
                    draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)

                if np.random.random() < self.opt.fp_disturb and reutrn_hm:
                    ct2 = ct0.copy()
                    # Hard code heatmap disturb ratio, haven't tried other numbers.
                    ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
                    ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
                    ct2_int = ct2.astype(np.int32)
                    draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)

        return pre_hm, pre_cts, track_ids
示例#4
0
    def visualize_centers(self, im_blob, keep, node0_neighbor_idx, attn, output, p_img):
        from utils.image import draw_umich_gaussian, gaussian_radius, draw_msra_gaussian

        node0_neighbor_idx_keep = node0_neighbor_idx[keep].cpu().numpy()
        attn_keep = attn[keep].cpu().numpy()
        # in ltwh
        node0_neighbor_idx_keep_box = np.array([[idx % 272, idx // 272 + 1, 1, 1] for idx in node0_neighbor_idx_keep])
        node0_neighbor_idx_keep_box *= 4
        im_blob_np = im_blob * 255.
        im_blob_np = im_blob_np.squeeze().cpu().numpy()
        im_blob_np = im_blob_np.transpose(1, 2, 0)[:, :, ::-1]
        im_blob_np = cv2.UMat(im_blob_np)
        hm = np.zeros((608, 1088))
        for i in range(0, node0_neighbor_idx_keep_box.shape[0]):
            bbox = node0_neighbor_idx_keep_box[i][0:4]
            cv2.rectangle(im_blob_np, (bbox[0], bbox[1]),
                          (bbox[2] + bbox[0], bbox[3] + bbox[1]),
                          (0, 255, 0), 2)

            radius = gaussian_radius((self.opt.viz_heatmap_radius, self.opt.viz_heatmap_radius))
            radius = max(0, int(radius))
            ct = bbox[:2].astype(np.int32)
            draw_umich_gaussian(hm, ct, radius)

        import matplotlib.pyplot as plt
        from matplotlib import patches
        import matplotlib
        matplotlib.use("Agg")
        fig, ax = plt.subplots()

        im_blob_plt = im_blob.squeeze().cpu().numpy()
        im_blob_plt = im_blob_plt.transpose(1, 2, 0)
        ax.imshow(im_blob_plt)
        ax.imshow(hm, cmap='plasma', alpha=0.3)
        plt.axis('off')
        plt.savefig("heatmap.png")

        # Draw the prevoius box
        fig, ax = plt.subplots()
        node0_box = output['node0_box'].cpu().numpy()
        node0_box *= np.array([1088, 608, 1088, 608])
        p_img_np = p_img.squeeze().cpu().numpy()
        p_img_np = p_img_np.transpose(1, 2, 0)[:, :, ::-1]
        ax.imshow(p_img_np)
        rect = patches.Rectangle((node0_box[0], node0_box[1]), node0_box[2]-node0_box[0],
                                 node0_box[3]-node0_box[1], linewidth=1, edgecolor='g', facecolor='none')
        ax.add_patch(rect)
        plt.axis('off')
        plt.savefig("p_img_vis.png")
        exit()
def process_data(line, use_aug):
    if 'str' not in str(type(line)):
        line = line.decode()
    s = line.split()
    line_id = s[0]
    image_path = s[1]
    # print(image_path)
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = np.array(cv2.imread(image_path))
    ori_w = s[2]
    ori_h = s[3]
    labels = np.array(
        [list(map(lambda x: int(float(x)), box.split(','))) for box in s[4:]])
    # labels = np.array([list(map(lambda x: int(float(x)), box.split(','))) for box in s[1:]])

    if use_aug:
        image, labels = random_horizontal_flip(image, labels)
        image, labels = random_crop(image, labels)
        image, labels = random_translate(image, labels)

    image, labels = image_preporcess(np.copy(image),
                                     [cfg.input_image_h, cfg.input_image_w],
                                     np.copy(labels))

    output_h = cfg.input_image_h // cfg.down_ratio
    output_w = cfg.input_image_w // cfg.down_ratio
    hm = np.zeros((output_h, output_w, cfg.num_classes), dtype=np.float32)
    wh = np.zeros((cfg.max_objs, 2), dtype=np.float32)
    reg = np.zeros((cfg.max_objs, 2), dtype=np.float32)
    ind = np.zeros((cfg.max_objs), dtype=np.float32)
    reg_mask = np.zeros((cfg.max_objs), dtype=np.float32)

    for idx, label in enumerate(labels):
        # print("label", label)
        bbox = label[:4] / cfg.down_ratio
        class_id = label[4]
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        draw_umich_gaussian(hm[:, :, class_id], ct_int, radius)
        wh[idx] = 1. * w, 1. * h
        ind[idx] = ct_int[1] * output_w + ct_int[0]
        reg[idx] = ct - ct_int
        reg_mask[idx] = 1

    return image, hm, wh, reg, reg_mask, ind, ori_w, ori_h, line_id
示例#6
0
    def __getitem__(self, idx):
        sample = {}
        row_bbox = self.df.loc[idx]
        row_landmarks = np.array(self.df_landmarks.loc[idx].tolist())

        img_file = os.path.join(self.root, row_bbox['filename'])
        img = Image.open(img_file)

        # кроп лица
        bbox = [row_bbox['top_x'], row_bbox['top_y'], row_bbox['bottom_x'], row_bbox['bottom_y']]
        img = img.crop(bbox)

        # ресайз кропа до размеров CROP_SIZE с сохранением соотношения сторон
        w, h = img.size
        if h > w:
            f = self.CROP_SIZE / w
        else:
            f = self.CROP_SIZE / h
        img = img.resize((int(w * f), int(h * f)))
        row_landmarks = row_landmarks * f

        # CropCenter
        w, h = img.size
        margin_h = (h - self.CROP_SIZE) // 2
        margin_w = (w - self.CROP_SIZE) // 2
        img = img.crop([margin_w, margin_h, self.CROP_SIZE + margin_w, self.CROP_SIZE + margin_h])
        row_landmarks = row_landmarks.astype(np.int16).reshape(-1, 2)
        row_landmarks -= np.array((margin_w, margin_h), dtype=np.int16)[None, :]
        # row_landmarks = row_landmarks.reshape(-1)

        # hmap = np.zeros((self.NUM_PTS + 1, self.hmap_size, self.hmap_size), dtype=np.float32)
        # M = np.zeros((self.NUM_PTS + 1, self.hmap_size, self.hmap_size), dtype=np.float32)

        hmap = np.zeros((self.NUM_PTS, self.hmap_size, self.hmap_size), dtype=np.float32)
        M = np.zeros((self.NUM_PTS, self.hmap_size, self.hmap_size), dtype=np.float32)

        for ind, xy in enumerate(row_landmarks):
            hmap[ind] = draw_umich_gaussian(hmap[ind], xy / self.CROP_SIZE * self.hmap_size, 7)
        # hmap[-1] = draw_boundary(hmap[-1], np.clip((row_landmarks / self.CROP_SIZE * self.hmap_size).astype(np.int), 0, self.NUM_PTS))

        for i in range(len(M)):
            M[i] = grey_dilation(hmap[i], size=(3, 3))
        M = np.where(M >= 0.5, 1, 0)


        sample = {"file_name": row_bbox['filename'],
                  "image": img,
                  "landmarks": torch.from_numpy(row_landmarks.astype(np.float32)),
                  "crop_margin_x": margin_w,
                  "crop_margin_y": margin_h,
                  "scale_coef": f,
                  "top_x": row_bbox['top_x'],
                  "top_y": row_bbox['top_y'],
                  "hmap": hmap,
                  "M": M}

        if self.transforms is not None:
            sample = self.transforms(sample)

        return sample
示例#7
0
    def _add_hps(self, ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w):
        num_joints = self.num_joints
        pts = (np.array(ann["keypoints"], np.float32).reshape(num_joints, 3)
               if "keypoints" in ann else np.zeros(
                   (self.num_joints, 3), np.float32))
        if self.opt.simple_radius > 0:
            hp_radius = int(
                simple_radius(h, w, min_overlap=self.opt.simple_radius))
        else:
            hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            hp_radius = max(0, int(hp_radius))

        for j in range(num_joints):
            pts[j, :2] = affine_transform(pts[j, :2], trans_output)
            if pts[j, 2] > 0:
                if (pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w
                        and pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h):
                    ret["hps"][k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                    ret["hps_mask"][k, j * 2:j * 2 + 2] = 1
                    pt_int = pts[j, :2].astype(np.int32)
                    ret["hp_offset"][k * num_joints + j] = pts[j, :2] - pt_int
                    ret["hp_ind"][k * num_joints +
                                  j] = (pt_int[1] * self.opt.output_w +
                                        pt_int[0])
                    ret["hp_offset_mask"][k * num_joints + j] = 1
                    ret["hm_hp_mask"][k * num_joints + j] = 1
                    ret["joint"][k * num_joints + j] = j
                    draw_umich_gaussian(ret["hm_hp"][j], pt_int, hp_radius)
                    if pts[j, 2] == 1:
                        ret["hm_hp"][j, pt_int[1], pt_int[0]] = self.ignore_val
                        ret["hp_offset_mask"][k * num_joints + j] = 0
                        ret["hm_hp_mask"][k * num_joints + j] = 0
                else:
                    pts[j, :2] *= 0
            else:
                pts[j, :2] *= 0
                self._ignore_region(ret["hm_hp"][j,
                                                 int(bbox[1]):int(bbox[3]) + 1,
                                                 int(bbox[0]):int(bbox[2]) +
                                                 1, ])
        gt_det["hps"].append(pts[:, :2].reshape(num_joints * 2))
示例#8
0
hm = np.zeros((output_h, output_w, num_classes),dtype=np.float32)
wh = np.zeros((max_objs, 2),dtype=np.float32)
reg = np.zeros((max_objs, 2),dtype=np.float32)
ind = np.zeros((max_objs),dtype=np.float32)
reg_mask = np.zeros((max_objs),dtype=np.float32)

down_ratio = 4
label = np.array([10, 30, 50, 100, 1])
idx = 0
bbox = label[:4] / down_ratio
class_id = label[4]
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
print("w h is", w, h)
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
print("radius is", radius)
radius = max(0, int(radius))
print(radius)
ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
print("ct is", ct)
draw_umich_gaussian(hm[:, :, class_id], ct_int, radius)
print(hm)
cv2.imwrite("/home/pcl/tf_work/TF_CenterNet/single_heatmap.jpg", hm[0]*255)
wh[idx] = 1. * w, 1. * h
ind[idx] = ct_int[1] * output_w + ct_int[0]
reg[idx] = ct - ct_int
reg_mask[idx] = 1
print("ind is", ind)
print("wh is", wh)
print("reg is", reg)
示例#9
0
    def _get_additional_inputs(self,
                               tracks,
                               meta,
                               age_images,
                               with_hm=True,
                               with_kmf=False,
                               with_sch=False):
        '''
    Render input heatmap from previous trackings.
    '''
        trans_input, trans_output = meta['trans_input'], meta['trans_output']
        inp_width, inp_height = meta['inp_width'], meta['inp_height']
        out_width, out_height = meta['out_width'], meta['out_height']
        input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)

        output_inds = []
        track_ids = []
        kmf_inds = []
        sch_weights = []
        for track in tracks:
            if track['score'] < self.opt.pre_thresh[
                    track['class'] - 1]:  #or det['active'] == 0:
                continue
            bbox = self._trans_bbox(track['bbox'], trans_input, inp_width,
                                    inp_height)
            bbox_out = self._trans_bbox(track['bbox'], trans_output, out_width,
                                        out_height)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0):
                if 'seg' in self.opt.task and self.opt.seg_center:
                    seg_mask = self.get_masks_as_input(track, trans_input)
                    ct = np.array([
                        np.mean(np.where(seg_mask >= 0.5)[1]),
                        np.mean(np.where(seg_mask >= 0.5)[0])
                    ],
                                  dtype=np.float32)
                else:
                    ct = np.array([(bbox[0] + bbox[2]) / 2,
                                   (bbox[1] + bbox[3]) / 2],
                                  dtype=np.float32)
                ct_int = ct.astype(np.int32)
                ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2,
                                   (bbox_out[1] + bbox_out[3]) / 2],
                                  dtype=np.int32)
                output_inds.append(ct_out[1] * out_width + ct_out[0])
                track_ids.append(track['tracking_id'])
                if with_sch:
                    sch_weights.append(track['sch_weight'])
                if with_hm:
                    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                    radius = max(0, int(radius))
                    draw_umich_gaussian(input_hm[0], ct_int, radius)
                if with_kmf:
                    if track['active'] >= self.opt.kmf_confirm_age:
                        p_bbox_ = track['kmf'].predict()[0]
                        p_bbox = self._trans_bbox(p_bbox_, trans_input,
                                                  inp_width, inp_height)
                        # kmf_ind: trans to output
                        p_bbox_out = self._trans_bbox(p_bbox_, trans_output,
                                                      out_width, out_height)
                        p_ct_out = np.array(
                            [(p_bbox_out[0] + p_bbox_out[2]) / 2,
                             (p_bbox_out[1] + p_bbox_out[3]) / 2],
                            dtype=np.int32)
                        kmf_inds.append(p_ct_out[1] * out_width + p_ct_out[0])
                    else:  # unconfirm kmf tracker
                        kmf_inds.append(ct_out[1] * out_width + ct_out[0])

        if with_hm:
            input_hm = input_hm[np.newaxis]
            if self.opt.flip_test:
                input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]),
                                          axis=0)
            input_hm = torch.from_numpy(input_hm).to(self.opt.device)

        if with_kmf:
            assert (len(output_inds) == len(kmf_inds))
        num_pre = len(output_inds)
        output_inds = np.array(output_inds, np.int64).reshape(1, -1)
        output_inds = torch.from_numpy(output_inds).to(self.opt.device)
        kmf_inds = np.array(kmf_inds, np.int64).reshape(1, -1)
        kmf_inds = torch.from_numpy(kmf_inds).to(
            self.opt.device) if with_kmf else None
        track_ids = np.array(track_ids, np.int64).reshape(1, -1)
        sch_weights = np.array(sch_weights)[None, :]
        sch_weights = torch.from_numpy(sch_weights).to(self.opt.device)
        return input_hm, output_inds, None, track_ids, kmf_inds, sch_weights
示例#10
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), cfg.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

        if self.split == 'train':
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = cfg.train_resolution[0], cfg.train_resolution[1]
        else:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)

        flipped = False
        if self.split == 'train':
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            w_border = get_border(128, img.shape[1])
            h_border = get_border(128, img.shape[0])

            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_matrix = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_matrix, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = inp.astype(np.float32) / 255.

        # TODO:inp appears numbers below 0 after color_aug (myself)
        if self.split == 'train':
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - cfg.mean) / cfg.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // cfg.down_ratio
        output_w = input_w // cfg.down_ratio
        trans_matrix = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((cfg.max_objs, 2), dtype=np.float32)
        reg = np.zeros((cfg.max_objs, 2), dtype=np.float32)
        ind = np.zeros(cfg.max_objs, dtype=np.int64)
        reg_mask = np.zeros(cfg.max_objs, dtype=np.uint8)

        gt_box = []
        for i in range(num_objs):
            ann = anns[i]
            bbox = coco2x1y1x2y2(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_matrix)
            bbox[2:] = affine_transform(bbox[2:], trans_matrix)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if h > 0 and w > 0:
                # get an object size-adapative radius
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)

                draw_umich_gaussian(hm[cls_id], ct_int, radius)

                wh[i] = 1. * w, 1. * h
                ind[i] = ct_int[1] * output_w + ct_int[0]
                reg[i] = ct - ct_int
                reg_mask[i] = 1

                gt_box.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg': reg,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }

        if self.opt.debug > 0 or not self.split == 'train':
            gt_box = np.array(
                gt_box, dtype=np.float32) if len(gt_box) > 0 else np.zeros(
                    (1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_box, 'img_id': img_id}
            ret['meta'] = meta
        return ret
    def _add_instance_tl(self,
                         ret,
                         gt_det,
                         k,
                         cls_id,
                         bbox,
                         bbox_amodal,
                         ann,
                         trans_output,
                         aug_s,
                         calib,
                         pre_cts=None,
                         track_ids=None):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret['cat_tl'][k] = cls_id - 1
        ret['mask_tl'][k] = 1
        if 'wh_tl' in ret:
            ret['wh_tl'][k] = 1. * w, 1. * h
            ret['wh_tl_mask'][k] = 1
        ret['ind_tl'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret['reg_tl'][k] = ct - ct_int
        ret['reg_tl_mask'][k] = 1
        draw_umich_gaussian(ret['hm_tl'][cls_id - 1], ct_int, radius)

        gt_det['bboxes'].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32))
        gt_det['scores'].append(1)
        gt_det['clses'].append(cls_id - 1)
        gt_det['cts'].append(ct)

        # if 'tracking' in self.opt.heads:
        #   if ann['track_id'] in track_ids:
        #     pre_ct = pre_cts[track_ids.index(ann['track_id'])]
        #     ret['tracking_mask'][k] = 1
        #     ret['tracking'][k] = pre_ct - ct_int
        #     gt_det['tracking'].append(ret['tracking'][k])
        #   else:
        #     gt_det['tracking'].append(np.zeros(2, np.float32))

        if 'ltrb' in self.opt.heads:
            ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \
              bbox[2] - ct_int[0], bbox[3] - ct_int[1]
            ret['ltrb_mask'][k] = 1

        if 'ltrb_amodal' in self.opt.heads:
            ret['ltrb_amodal'][k] = \
              bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
              bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
            ret['ltrb_amodal_mask'][k] = 1
            gt_det['ltrb_amodal'].append(bbox_amodal)

        if 'nuscenes_att' in self.opt.heads:
            if ('attributes' in ann) and ann['attributes'] > 0:
                att = int(ann['attributes'] - 1)
                ret['nuscenes_att'][k][att] = 1
                ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1
            gt_det['nuscenes_att'].append(ret['nuscenes_att'][k])

        if 'velocity' in self.opt.heads:
            if ('velocity' in ann) and min(ann['velocity']) > -1000:
                ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3]
                ret['velocity_mask'][k] = 1
示例#12
0
    def __getitem__(self, index):
        image_fn = self.flist[index]
        image = cv2.imread(image_fn)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
        box_fn = str(Path(self.box_root)/(Path(image_fn).stem + '.txt'))
        
        if osp.exists(box_fn):
            xywh = np.loadtxt(box_fn)
        
        
            xx,yy,ww,hh = xywh
            x1,y1,x2,y2 = xx-ww/2,yy-hh/2,xx+ww/2,yy+hh/2

        
            boxes = np.array([[x1,y1,x2,y2]]).astype('float32')
        else:
            boxes = np.array([[0.0,0.0,1.0,1.0]]).astype('float32')
        
 
        if self.transform:
           image, boxes = self.transform(image, boxes)

        
        #generate box_gt for loss
        #box x1,y1,x2,y2, [0,1]
        output_h,output_w,grid_wh = self.configs.hh,self.configs.ww,self.configs.grid_wh
        hin,win = self.configs.image_size
        
        hm = np.zeros((self.configs.num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.configs.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        dense_xy = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.configs.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.configs.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.configs.max_objs), dtype=np.uint8)


        
        num_objs = min(boxes.shape[0], self.configs.max_objs)
        
        
#        gt_det = []
        for k in range(num_objs):
          bbox = boxes[k]
          h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
          if h > 0 and w > 0:
            radius = gaussian_radius((math.ceil(h*grid_wh), math.ceil(w*grid_wh)))
            radius = max(0, int(radius))
            #radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            ct = np.array(
              [(bbox[0] + bbox[2]) / 2.0 * grid_wh, (bbox[1] + bbox[3]) / 2.0* grid_wh], dtype=np.float32)
            ct_int = ct.astype(np.int32)
            ct_int = np.clip(ct_int, 0, grid_wh-1)
            
            draw_umich_gaussian(hm[k], ct_int, radius)
            
            wh[k] = 1. * w, 1. * h
            ind[k] = ct_int[1] * output_w + ct_int[0]
            reg[k] = ct - ct_int
            reg_mask[k] = 1

            
            draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
            draw_dense_reg(dense_xy, hm.max(axis=0), ct_int, reg[k], radius)
            
#            gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
#                           ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
#        
        #ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
        #if self.opt.dense_wh:
        hm_a = hm.max(axis=0, keepdims=True)
        dense_mask = np.concatenate([hm_a, hm_a], axis=0)
        
        ret = {'hm': hm, 'wh': wh, 'xy': reg, 'ind': ind,'dense_xy': dense_xy,'dense_wh': dense_wh,'dense_mask':dense_mask, 'boxes': boxes}
        
        #ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
        #del ret['wh']
        #elif self.opt.cat_spec_wh:
          #ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
          #del ret['wh']
        #if self.opt.reg_offset:
          #ret.update({'reg': reg})
#        if self.opt.debug > 0 or not self.split == 'train':
#          gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
#                   np.zeros((1, 6), dtype=np.float32)
#          meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
#          ret['meta'] = meta
#        return ret        
#        
        
        return image, ret
示例#13
0
    def _add_kmf_att(self,
                     ret,
                     trans_input,
                     ann=None,
                     bbox=None,
                     init=False,
                     conf=1,
                     draw=True):
        trans = trans_input
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        if bbox is None and ann is not None:
            if 'bbox' not in ann.keys():
                ann['bbox'] = mask_utils.toBbox(ann['segmentation'])
            bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

        if (h > 0 and w > 0):
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)
            if self.opt.guss_rad:
                min_overlap = 0.2 if init else 0.6
                conf = self.opt.init_conf if init else 1
                radius = gaussian_radius_center((math.ceil(h), math.ceil(w)),
                                                min_overlap=0.2)
            else:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            radius = max(0, int(radius))

            ct0 = ct.copy()

            ct[0] = ct[0] + np.random.randn() * self.opt.att_hm_disturb * w
            ct[1] = ct[1] + np.random.randn() * self.opt.att_hm_disturb * h
            conf = conf if np.random.random(
            ) > self.opt.att_lost_disturb else 0
            ct_int = ct.astype(np.int32)
            if self.opt.guss_oval and draw:
                radius = radius if (self.opt.guss_rad and init) or (
                    self.opt.guss_rad and self.opt.guss_rad_always) else 0
                draw_umich_gaussian_oval(ret['kmf_att'][0],
                                         ct_int,
                                         radius_h=h // 2 + radius,
                                         radius_w=w // 2 + radius,
                                         k=conf)
            elif draw:
                draw_umich_gaussian(ret['kmf_att'][0], ct_int, radius, k=conf)

            if np.random.random(
            ) < self.opt.att_fp_disturb:  # generate false positive
                ct2 = ct0.copy()
                # Hard code heatmap disturb ratio, haven't tried other numbers.
                ct2[0] = ct2[0] + np.random.randn(
                ) * self.opt.att_disturb_dist * w
                ct2[1] = ct2[1] + np.random.randn(
                ) * self.opt.att_disturb_dist * h
                ct2_int = ct2.astype(np.int32)
                if self.opt.guss_oval and draw:
                    draw_umich_gaussian_oval(ret['kmf_att'][0],
                                             ct2_int,
                                             radius_h=h // 2,
                                             radius_w=w // 2,
                                             k=conf)
                elif draw:
                    draw_umich_gaussian(ret['kmf_att'][0],
                                        ct2_int,
                                        radius,
                                        k=conf)
        else:
            return None
        return ct_int
示例#14
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            polygons = get_connected_polygon_using_mask(
                anno['segmentation'], (h_img, w_img),
                n_vertices=self.n_vertices,
                closing_max_kernel=50)

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > self.n_vertices:
                fixed_contour = resample(contour, num=self.n_vertices)
            else:
                fixed_contour = turning_angle_resample(contour,
                                                       self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [
                np.min(fixed_contour[:, 0]),
                np.min(fixed_contour[:, 1]),
                np.max(fixed_contour[:, 0]),
                np.max(fixed_contour[:, 1])
            ]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        # bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(160, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt mass centers to bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)
        contour_std_ = np.zeros(
            (self.max_objs, 1),
            dtype=np.float32)  # keep track of codes that is activated
        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            contour_std = np.std(indexed_shape, axis=0) + 1e-4
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            # centered_shape = indexed_shape - mass_center
            norm_shape = (indexed_shape - mass_center) / np.sqrt(
                np.sum(contour_std**2))

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                # obj_c = mass_center
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = norm_shape.reshape((1, -1))
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)
                contour_std_[k] = np.sqrt(np.sum(contour_std**2))

                w_h_[k] = 1. * w, 1. * h
                # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \
                #           mass_center[0] - bbox[0], bbox[2] - mass_center[0]  # [top, bottom, left, right] distance
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'std': contour_std_,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
示例#15
0
    def _add_instance(
        self,
        ret,
        gt_det,
        k,
        cls_id,
        bbox,
        bbox_amodal,
        ann,
        trans_output,
        aug_s,
        calib,
        pre_cts=None,
        track_ids=None,
    ):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret["cat"][k] = cls_id - 1
        ret["mask"][k] = 1
        if "wh" in ret:
            ret["wh"][k] = 1.0 * w, 1.0 * h
            ret["wh_mask"][k] = 1
        ret["ind"][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret["reg"][k] = ct - ct_int
        ret["reg_mask"][k] = 1
        draw_umich_gaussian(ret["hm"][cls_id - 1], ct_int, radius)

        gt_det["bboxes"].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32,
            ))
        gt_det["scores"].append(1)
        gt_det["clses"].append(cls_id - 1)
        gt_det["cts"].append(ct)

        if "tracking" in self.opt.heads:

            if ann["track_id"] in track_ids:
                pre_ct = pre_cts[track_ids.index(ann["track_id"])]
                ret["tracking_mask"][k] = 1
                ret["tracking"][k] = 0 * (pre_ct - ct_int)

                gt_det["tracking"].append(ret["tracking"][k])
            else:
                gt_det["tracking"].append(np.zeros(2, np.float32))

        if "ltrb" in self.opt.heads:
            ret["ltrb"][k] = (
                bbox[0] - ct_int[0],
                bbox[1] - ct_int[1],
                bbox[2] - ct_int[0],
                bbox[3] - ct_int[1],
            )
            ret["ltrb_mask"][k] = 1

        if "ltrb_amodal" in self.opt.heads:
            ret["ltrb_amodal"][k] = (
                bbox_amodal[0] - ct_int[0],
                bbox_amodal[1] - ct_int[1],
                bbox_amodal[2] - ct_int[0],
                bbox_amodal[3] - ct_int[1],
            )
            ret["ltrb_amodal_mask"][k] = 1
            gt_det["ltrb_amodal"].append(bbox_amodal)

        if "nuscenes_att" in self.opt.heads:
            if ("attributes" in ann) and ann["attributes"] > 0:
                att = int(ann["attributes"] - 1)
                ret["nuscenes_att"][k][att] = 1
                ret["nuscenes_att_mask"][k][self.nuscenes_att_range[att]] = 1
            gt_det["nuscenes_att"].append(ret["nuscenes_att"][k])

        if "velocity" in self.opt.heads:
            if ("velocity" in ann) and min(ann["velocity"]) > -1000:
                ret["velocity"][k] = np.array(ann["velocity"], np.float32)[:3]
                ret["velocity_mask"][k] = 1
            gt_det["velocity"].append(ret["velocity"][k])

        if "hps" in self.opt.heads:
            self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h,
                          w)

        if "rot" in self.opt.heads:
            self._add_rot(ret, ann, k, gt_det)

        if "dep" in self.opt.heads:
            if "depth" in ann:
                ret["dep_mask"][k] = 1
                ret["dep"][k] = ann["depth"] * aug_s
                gt_det["dep"].append(ret["dep"][k])
            else:
                gt_det["dep"].append(2)

        if "dim" in self.opt.heads:
            if "dim" in ann:
                ret["dim_mask"][k] = 1
                ret["dim"][k] = ann["dim"]
                gt_det["dim"].append(ret["dim"][k])
            else:
                gt_det["dim"].append([1, 1, 1])

        if "amodel_offset" in self.opt.heads:
            if "amodel_center" in ann:
                amodel_center = affine_transform(ann["amodel_center"],
                                                 trans_output)
                ret["amodel_offset_mask"][k] = 1
                ret["amodel_offset"][k] = amodel_center - ct_int
                gt_det["amodel_offset"].append(ret["amodel_offset"][k])
            else:
                gt_det["amodel_offset"].append([0, 0])
示例#16
0
    def _get_pre_dets(self, anns, trans_input, trans_output):
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        down_ratio = self.opt.down_ratio
        trans = trans_input
        reutrn_hm = self.opt.pre_hm
        pre_hm = np.zeros((1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
        pre_cts, pre_whs, track_ids, pre_bboxes, pre_bbox_amodals= [], [], [],[],[]
        ignore_regions =[]
        for ann in anns:
            cls_id = int(ann['category_id'])
            if cls_id > self.opt.num_classes or cls_id <= -999 or cls_id <= 0 or (
                    'iscrowd' in ann and ann['iscrowd'] > 0):
                bbox, _ = self._get_bbox_output(
                    ann['bbox'], trans_output, hm_h, hm_w)
                ignore_regions.append(bbox)

        for ann in anns:
            cls_id = int(ann['category_id'])
            if cls_id > self.opt.num_classes or cls_id <= -99 or \
                    ('iscrowd' in ann and ann['iscrowd'] > 0):
                continue
            ## bbox input
            bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            max_rad = 1
            ignored = False
            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                max_rad = max(max_rad, radius)
                ct = np.array(
                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct0 = ct.copy()
                conf = 1
                for area in ignore_regions:
                    if (area[0] <= ct[0] and ct[0] <= area[2]) and (area[1] <= ct[1] and ct[1] <= area[3]):
                        ignored = True
                        break
                if ignored:
                    continue
                else:
                    ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w
                    ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h
                    conf = 1 if np.random.random() > self.opt.lost_disturb else 0

                    ct_int = ct.astype(np.int32)
                    if conf == 0:
                        pre_cts.append(ct / down_ratio) ### output ct
                    else:
                        pre_cts.append(ct0 / down_ratio)

                    track_ids.append(ann['track_id'] if 'track_id' in ann else -1)
                    if reutrn_hm:
                        draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)

                    if np.random.random() < self.opt.fp_disturb and reutrn_hm:
                        ct2 = ct0.copy()
                        # Hard code heatmap disturb ratio, haven't tried other numbers.
                        ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
                        ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
                        ct2_int = ct2.astype(np.int32)
                        draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)
                    ## get the bbox out
                    bbox_out, bbox_amodal = self._get_bbox_output(ann['bbox'], trans_output)
                    pre_bboxes.append(np.array(bbox_out))
                    pre_bbox_amodals.append(np.array(bbox_amodal))
                    h_out, w_out = bbox_out[3] - bbox_out[1], bbox_out[2] - bbox_out[0]
                    pre_wh = np.array(
                        [w_out, h_out], dtype=np.float32)
                    pre_whs.append(pre_wh)

        return pre_hm, pre_cts, track_ids, pre_whs, pre_bboxes,pre_bbox_amodals
示例#17
0
    def __getitem__(self, index):
        img_id = self.images[index]
        video_info = self.coco.loadImgs(ids=[img_id])[0]
        file_name = video_info['file_name']
        image_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        input_h, input_w = self.opt.input_h, self.opt.input_w

        centers = np.array([ann['keypoints_2d'] for ann in anns])[:, 0::9, :2]
        centers = centers.reshape(-1, 2)
        keep = np.where(np.all((0 < centers) & (1 > centers), axis=1) == True)
        centers = centers[keep]
        anns = [anns[i] for i in keep[0]]

        img = cv2.imread(image_path)

        # resize, pad, and color augs
        centers[:, 0], centers[:, 1] = centers[:, 0]*img.shape[1], centers[:, 1]*img.shape[0]
        augmented = self.augs(image=img, keypoints=centers)
        inp, centers = augmented['image'], np.array(augmented['keypoints'])
        num_objs = min(len(centers), self.max_objs)
        wh_ratio = img.shape[1] / img.shape[0]
        c = np.array([inp.shape[1] / 2., inp.shape[0] / 2.], dtype=np.float32)
        s = max(inp.shape[0], inp.shape[1]) * 1.0

        aug = False
        if self.split == 'train' and np.random.random() < self.opt.aug_ddd and num_objs > 0:
            aug = True
            sf = self.opt.scale
            # cf = self.opt.shift
            scale_rand = np.random.random()
            s = s * np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf)

            trans_input = get_affine_transform(
                c, s, 0, [input_w, input_h])
            inp = cv2.warpAffine(inp, trans_input,
                                 (input_w, input_h),
                                 flags=cv2.INTER_LINEAR)

            centers = np.concatenate([centers, np.ones((centers.shape[0], 1))], axis=1)
            centers = np.matmul(trans_input, centers.T).T

        if num_objs > 0:
            centers[:, 0], centers[:, 1] = centers[:, 0] / inp.shape[1], centers[:, 1] / inp.shape[0]

        inp = (inp.astype(np.float32) / 255.)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio

        # empty input
        heat_map = np.zeros([self.num_classes, output_h, output_w], dtype=np.float32)
        scales = np.zeros([self.max_objs, 3], dtype=np.float32)
        translations = np.zeros([self.max_objs, 3], dtype=np.float32)
        rotvecs = np.zeros([self.max_objs, 3], dtype=np.float32)
        reg_mask = np.zeros([self.max_objs], dtype=np.uint8)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)

        for k in range(num_objs):
            ann = anns[k]
            bbox = np.array(ann['bbox'])
            scale = np.array(ann['scale'])
            rot_angles = np.array(ann['rot'])
            translation = np.array(ann['translation'])

            if aug:
                translation[2] *= np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf)
                # translation[0] += translation[0] * y_shift * cf
                # translation[1] -= (x_shift * cf) * 0.3

            ct = centers[k][:2]

            ct[0], ct[1] = ct[0] * output_h, ct[1] * output_w
            ct[0], ct[1] = np.clip(ct[0], 0, output_w - 1), np.clip(ct[1], 0, output_w - 1)

            cls_id = int(self.cat_ids[ann['category_id']])

            bbox[[0, 2]] *= output_w
            bbox[[1, 3]] *= output_h

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius/2))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct_int = ct.astype(np.int32)
                draw_umich_gaussian(heat_map[cls_id], ct_int, radius)
                scales[k] = scale
                translations[k] = translation
                rotvecs[k] = rot_angles

                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                if DEBUG:
                    lines = (
                        [1, 5], [2, 6], [3, 7], [4, 8],  # lines along x-axis
                        [1, 3], [5, 7], [2, 4], [6, 8],  # lines along y-axis
                        [1, 2], [3, 4], [5, 6], [7, 8]  # lines along z-axis
                    )

                    plt.scatter(ct_int[0], ct_int[1])
                    r = R.from_euler('zyx', rot_angles).as_matrix()

                    box_3d = Box.from_transformation(r, translation, scale).vertices
                    points_2d = project_points(box_3d, np.array(video_info['projection_matrix']))
                    points_2d[:, 0] = points_2d[:, 0] * (128*wh_ratio) + 128*(1-wh_ratio)/2
                    points_2d[:, 1] *= 128
                    points_2d = points_2d.astype(int)
                    for ids in lines:
                        plt.plot(
                            (points_2d[ids[0]][0], points_2d[ids[1]][0]),
                            (points_2d[ids[0]][1], points_2d[ids[1]][1]),
                            color='r',
                        )

                    # points_2d = np.array(ann['keypoints_2d'])
                    # points_2d[:, 0] *= 128
                    # points_2d[:, 1] *= 128
                    #
                    # points_2d = points_2d.astype(int)
                    # for ids in lines:
                    #     plt.plot(
                    #         (points_2d[ids[0]][0], points_2d[ids[1]][0]),
                    #         (points_2d[ids[0]][1], points_2d[ids[1]][1]),
                    #         color='b',
                    #     )


        ret = {
            'input': inp,
            'hm': heat_map,
            'reg_mask': reg_mask,
            'ind': ind,
            'dim': scales,
            'rot': rotvecs,
            'loc': translations
        }

        if self.opt.reg_offset:
            ret.update({'reg': reg})

        if DEBUG:
            if inp.shape[0] == 3:
                plot_img = inp.transpose(1, 2, 0)
                plot_img = (plot_img * self.std) + self.mean
            else:
                plot_img = inp.copy()

            plot_img = cv2.resize(plot_img, (output_w, output_h))
            plot_img = cv2.cvtColor(plot_img, cv2.COLOR_BGR2RGB)
            plt.imshow(plot_img)
            plt.show()
            plt.imshow(heat_map[0])
            plt.show()

        return ret
示例#18
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        labels = np.array(
            [self.cat_ids[anno['category_id']] for anno in annotations])
        bboxes = np.array([anno['bbox'] for anno in annotations],
                          dtype=np.float32)
        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(img, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        # detections = []
        for k, (bbox, label) in enumerate(zip(bboxes, labels)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        # img = self.coco.loadImgs(ids=[img_id])[0]
        # w_img = int(img['width'])
        # h_img = int(img['height'])
        # if w_img < 2 or h_img < 2:
        #     continue

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1 or type(anno['segmentation']) != list:  # Excludes crowd objects
                continue

            if len(anno['segmentation']) > 1:
                obj_contours = [np.array(s).reshape((-1, 2)).astype(np.int32) for s in anno['segmentation']]
                obj_contours = sorted(obj_contours, key=cv2.contourArea)
                polygons = obj_contours[-1]
            else:
                polygons = anno['segmentation'][0]

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            if gt_w < 5 or gt_h < 5:
                continue
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if cv2.contourArea(contour.astype(np.int32)) < 35:
                continue

            fixed_contour = uniformsample(contour, self.n_vertices)

            # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]),
                            np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1])]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        # bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(150, width)
            h_border = get_border(150, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap
        votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32)  # votes for hmap and code
        w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height of bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros((self.max_objs, 2), dtype=np.float32)  # gt mass centers to bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs,), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            # contour_std = np.std(indexed_shape, axis=0) + 1e-4
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            # centered_shape = indexed_shape - mass_center
            norm_shape = (indexed_shape - mass_center) / np.array([w / 2., h / 2.])

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = norm_shape.reshape((1, -1))
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary,
                                         lmbda=self.sparse_alpha, max_iter=80)
                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

                # getting the gt votes
                shifted_poly = indexed_shape - np.array([bbox[0], bbox[1]]) + 1  # crop to the bbox, add padding 1
                # obj_mask = polys_to_mask([np.ndarray.flatten(shifted_poly, order='C').tolist()], h + 2, w + 2) * 255
                obj_mask = np.zeros((int(h) + 3, int(w) + 3), dtype=np.uint8)
                cv2.drawContours(obj_mask, shifted_poly[None, :, :].astype(np.int32), color=255, contourIdx=-1,
                                 thickness=-1)

                # instance = obj_mask.copy()
                # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim),
                #                       interpolation=cv2.INTER_LINEAR) * 1.
                # votes_[k] = obj_mask.reshape((1, -1)) / 255.
                # votes_[k] = (obj_mask.reshape((1, -1)) > 255 * 0.4) * 1.0

                # show debug masks
                obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim),
                                      interpolation=cv2.INTER_LINEAR)  # INTER_AREA
                # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim),
                #                       interpolation=cv2.INTER_AREA)
                votes_[k] = (obj_mask.reshape((1, -1)) > 0.2 * 255) * 1.0
                # cv2.imshow('obj_mask', instance.astype(np.uint8))
                # cv2.waitKey()
                # cv2.imshow('votes', obj_mask.astype(np.uint8))
                # cv2.waitKey()

        return {'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'votes': votes_,
                'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
                'c': center, 's': scale, 'img_id': img_id}
示例#20
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            # polygons = anno['segmentation'][0]
            polygons = anno['segmentation']
            if len(polygons) > 1:
                bg = np.zeros((h_img, w_img, 1), dtype=np.uint8)
                for poly in polygons:
                    len_poly = len(poly)
                    vertices = np.zeros((1, len_poly // 2, 2), dtype=np.int32)
                    for i in range(len_poly // 2):
                        vertices[0, i, 0] = int(poly[2 * i])
                        vertices[0, i, 1] = int(poly[2 * i + 1])
                    # cv2.fillPoly(bg, vertices, color=(255))
                    cv2.drawContours(bg,
                                     vertices,
                                     color=(255),
                                     contourIdx=-1,
                                     thickness=-1)

                pads = 5
                while True:
                    kernel = np.ones((pads, pads), np.uint8)
                    bg_closed = cv2.morphologyEx(bg, cv2.MORPH_CLOSE, kernel)
                    obj_contours, _ = cv2.findContours(bg_closed,
                                                       cv2.RETR_TREE,
                                                       cv2.CHAIN_APPROX_SIMPLE)
                    if len(obj_contours) > 1:
                        pads += 5
                    else:
                        polygons = obj_contours[0]
                        break
            else:
                # continue
                polygons = anno['segmentation'][0]

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            fixed_contour = resample(contour, num=self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)
            # contour_mean = np.mean(fixed_contour, axis=0)
            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        # if img_id in self.all_annotations.keys():
        #     annotations = self.all_annotations[img_id]
        #     shape_annots = self.all_shapes[img_id]
        #     labels = annotations['cat_id']
        #     bboxes = annotations['bbox']  # xyxy format
        #     shapes = shape_annots['shape']  # polygonal vertices format xyxyxyxyxy...
        #     codes = annotations['codes']
        #     labels = np.array(labels)
        #     bboxes = np.array(bboxes, dtype=np.float32)
        #     codes = np.array(codes, dtype=np.float32)
        #     shapes = np.array(shapes, dtype=np.float32)
        # else:
        #     bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
        #     labels = np.array([[0]])
        #     codes = np.zeros(shape=(1, self.n_codes), dtype=np.float32)
        #     shapes = np.zeros(shape=(1, self.n_vertices * 2), dtype=np.float32)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label, shape in zip(bboxes, labels, shapes):
        #     if flipped:
        #         bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #         # Flip the contour
        #         for m in range(self.n_vertices):
        #             shape[2 * m] = width - shape[2 * m] - 1
        #     bbox[:2] = affine_transform(bbox[:2], trans_img)
        #     bbox[2:] = affine_transform(bbox[2:], trans_img)
        #     bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #     bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #
        #     # generate gt shape mean and std from contours
        #     for m in range(self.n_vertices):  # apply scale and crop transform to shapes
        #         shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_img)
        #
        #     contour = np.reshape(shape, (self.n_vertices, 2))
        #     # Indexing from the left-most vertex, argmin x-axis
        #     idx = np.argmin(contour[:, 0])
        #     indexed_shape = np.concatenate((contour[idx:, :], contour[:idx, :]), axis=0)
        #
        #     clockwise_flag = check_clockwise_polygon(indexed_shape)
        #     if not clockwise_flag:
        #         fixed_contour = np.flip(indexed_shape, axis=0)
        #     else:
        #         fixed_contour = indexed_shape
        #
        #     contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.img_size['w'] - 1)
        #     contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.img_size['h'] - 1)
        #
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #     # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2)
        #     cv2.drawContours(image_show, [contour.astype(np.int32)],
        #                      color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
        #                      contourIdx=-1, thickness=-1)
        #
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        # w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height of the shape
        w_h_std = np.zeros((self.max_objs, 2),
                           dtype=np.float32)  # width and height of the shape
        codes_ = np.zeros((self.max_objs, self.n_codes),
                          dtype=np.float32)  # gt coefficients/codes for shapes
        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        # detections = []
        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            contour = np.reshape(shape, (self.n_vertices, 2))
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(contour[:, 0])
            indexed_shape = np.concatenate(
                (contour[idx:, :], contour[:idx, :]), axis=0)

            clockwise_flag = check_clockwise_polygon(indexed_shape)
            if not clockwise_flag:
                fixed_contour = np.flip(indexed_shape, axis=0)
            else:
                fixed_contour = indexed_shape.copy()

            contour[:, 0] = np.clip(fixed_contour[:, 0], 0,
                                    self.fmap_size['w'] - 1)
            contour[:, 1] = np.clip(fixed_contour[:, 1], 0,
                                    self.fmap_size['h'] - 1)

            contour_mean = np.mean(contour, axis=0)
            contour_std = np.std(contour, axis=0)
            if np.sqrt(np.sum(contour_std**2)) <= 1e-6:
                continue
            else:
                norm_shape = (contour - contour_mean) / np.sqrt(
                    np.sum(contour_std**2))

            if h > 0 and w > 0 and np.sqrt(np.sum(contour_std**2)) > 1e-6:
                obj_c = contour_mean
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_std[k] = contour_std
                temp_codes, _ = fast_ista(norm_shape.reshape((1, -1)),
                                          self.dictionary,
                                          lmbda=self.sparse_alpha,
                                          max_iter=80)
                codes_[k] = np.exp(temp_codes)
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        # -----------------------------------debug---------------------------------
        # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32)
        # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3))
        # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3))
        # print(w_h_[0], regs[0])
        # cv2.imshow('hmap', canvas)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label, shape in zip(bboxes, labels, shapes):
        #     cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #     cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {
            'image': img,
            'codes': codes_,
            'hmap': hmap,
            'w_h_std': w_h_std,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        a_bboxes = []
        shapes = []
        a_shapes = []

        for anno in annotations:
            if anno['category_id'] not in KINS_IDS:
                continue  # excludes 3: person-sitting class for evaluation

            a_polygons = anno['segmentation'][
                0]  # only one mask for each instance
            polygons = anno['i_segm'][0]

            # gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox']  # this is used to clip resampled polygons
            a_contour = np.array(a_polygons).reshape((-1, 2))
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if cv2.contourArea(contour.astype(
                    np.int32)) < 5:  # remove tiny objects
                continue
            fixed_contour = uniformsample(a_contour, self.n_vertices)
            i_contour = uniformsample(contour, self.n_vertices)

            # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            # contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
            #     continue

            shapes.append(np.ndarray.flatten(i_contour).tolist())
            a_shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])
            a_bboxes.append(anno['a_bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        a_bboxes = np.array(a_bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)
        a_shapes = np.array(a_shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            a_bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
            a_shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)

        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy
        a_bboxes[:, 2:] += a_bboxes[:, :2]

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(360, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()

        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h']))

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap of centers
        occ_map = np.zeros(
            (1, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # grayscale map for occlusion levels
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of inmodal bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt amodal mass centers to inmodal bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes),
                          dtype=np.float32)  # gt amodal coefficients
        regs = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # regression for quantization error
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        votes_ = np.zeros((self.max_objs, self.vote_length),
                          dtype=np.float32)  # voting for heatmaps

        for k, (bbox, a_bbox, label, shape, a_shape) in enumerate(
                zip(bboxes, a_bboxes, labels, shapes, a_shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                a_bbox[[0, 2]] = width - a_bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    a_shape[2 * m] = width - a_shape[2 * m] - 1
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[
                0]  # This box is the inmodal boxes

            a_bbox[:2] = affine_transform(a_bbox[:2], trans_fmap)
            a_bbox[2:] = affine_transform(a_bbox[2:], trans_fmap)
            a_bbox[[0, 2]] = np.clip(a_bbox[[0, 2]], 0,
                                     self.fmap_size['w'] - 1)
            a_bbox[[1, 3]] = np.clip(a_bbox[[1, 3]], 0,
                                     self.fmap_size['h'] - 1)

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                a_shape[2 * m:2 * m + 2] = affine_transform(
                    a_shape[2 * m:2 * m + 2], trans_fmap)
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(a_shape, (self.n_vertices, 2))
            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            i_shape_clipped = np.reshape(shape, (self.n_vertices, 2))
            i_shape_clipped[:, 0] = np.clip(i_shape_clipped[:, 0], 0,
                                            self.fmap_size['w'] - 1)
            i_shape_clipped[:, 1] = np.clip(i_shape_clipped[:, 1], 0,
                                            self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            centered_shape = indexed_shape - mass_center  # these are amodal mask shapes

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = centered_shape.reshape((1, -1))

                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)

                a_shifted_poly = indexed_shape - np.array([
                    a_bbox[0], a_bbox[1]
                ])  # crop amodal shapes to the amodal bboxes
                amodal_obj_mask = self.polys_to_mask(
                    [np.ndarray.flatten(a_shifted_poly, order='C').tolist()],
                    a_bbox[3], a_bbox[2])

                i_shifted_poly = i_shape_clipped - np.array([
                    a_bbox[0], a_bbox[1]
                ])  # crop inmodal shapes to the same amodal bboxes
                inmodal_obj_mask = self.polys_to_mask(
                    [np.ndarray.flatten(i_shifted_poly, order='C').tolist()],
                    a_bbox[3], a_bbox[2])

                obj_mask = (
                    amodal_obj_mask + inmodal_obj_mask
                ) * 255. / 2  # convert to float type in image scale
                obj_mask = cv2.resize(
                    obj_mask.astype(np.uint8),
                    dsize=(self.vote_vec_dim, self.vote_vec_dim),
                    interpolation=cv2.INTER_LINEAR) * 1.
                votes_[k] = obj_mask.reshape((1, -1)) / 255.

                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

                # occlusion level map gt
                occ_map[0] += self.polys_to_mask(
                    [np.ndarray.flatten(indexed_shape).tolist()],
                    self.fmap_size['h'], self.fmap_size['w']) * 1.

        occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ

        # -----------------------------------debug---------------------------------
        # for bbox, label, shape in zip(bboxes, labels, shapes_):
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)
        #     cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        #     # print(shape, shape.shape)
        #     cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255),
        #                   thickness=1)
        # # cv2.imshow('img', image_show)
        # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255)
        # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]),
        #                      code=cv2.COLOR_GRAY2BGR)
        # cat_img = np.concatenate([m_img, image_show], axis=0)
        # cv2.imshow('segm', cat_img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'occ_map': occ_map,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'votes': votes_,
            'c': center,
            's': scale,
            'img_id': img_id
        }
示例#22
0
  def _add_instance(
    self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output,
    aug_s, calib, pre_cts=None, track_ids=None):
    h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
    if h <= 0 or w <= 0:
      return
    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
    radius = max(0, int(radius)) 
    ct = np.array(
      [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
    ct_int = ct.astype(np.int32)
    ret['cat'][k] = cls_id - 1
    ret['mask'][k] = 1
    if 'wh' in ret:
      ret['wh'][k] = 1. * w, 1. * h
      ret['wh_mask'][k] = 1
    ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
    ret['reg'][k] = ct - ct_int
    ret['reg_mask'][k] = 1
    draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)

    gt_det['bboxes'].append(
      np.array([ct[0] - w / 2, ct[1] - h / 2,
                ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32))
    gt_det['scores'].append(1)
    gt_det['clses'].append(cls_id - 1)
    gt_det['cts'].append(ct)

    if 'tracking' in self.opt.heads:
      if ann['track_id'] in track_ids:
        pre_ct = pre_cts[track_ids.index(ann['track_id'])]
        ret['tracking_mask'][k] = 1
        ret['tracking'][k] = pre_ct - ct_int
        gt_det['tracking'].append(ret['tracking'][k])
      else:
        gt_det['tracking'].append(np.zeros(2, np.float32))

    if 'ltrb' in self.opt.heads:
      ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \
        bbox[2] - ct_int[0], bbox[3] - ct_int[1]
      ret['ltrb_mask'][k] = 1

    if 'ltrb_amodal' in self.opt.heads:
      ret['ltrb_amodal'][k] = \
        bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
        bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
      ret['ltrb_amodal_mask'][k] = 1
      gt_det['ltrb_amodal'].append(bbox_amodal)

    if 'nuscenes_att' in self.opt.heads:
      if ('attributes' in ann) and ann['attributes'] > 0:
        att = int(ann['attributes'] - 1)
        ret['nuscenes_att'][k][att] = 1
        ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1
      gt_det['nuscenes_att'].append(ret['nuscenes_att'][k])

    if 'velocity' in self.opt.heads:
      if ('velocity' in ann) and min(ann['velocity']) > -1000:
        ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3]
        ret['velocity_mask'][k] = 1
      gt_det['velocity'].append(ret['velocity'][k])

    if 'hps' in self.opt.heads:
      self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w)

    if 'rot' in self.opt.heads:
      self._add_rot(ret, ann, k, gt_det)

    if 'dep' in self.opt.heads:
      if 'depth' in ann:
        ret['dep_mask'][k] = 1
        ret['dep'][k] = ann['depth'] * aug_s
        gt_det['dep'].append(ret['dep'][k])
      else:
        gt_det['dep'].append(2)

    if 'dim' in self.opt.heads:
      if 'dim' in ann:
        ret['dim_mask'][k] = 1
        ret['dim'][k] = ann['dim']
        gt_det['dim'].append(ret['dim'][k])
      else:
        gt_det['dim'].append([1,1,1])
    
    if 'amodel_offset' in self.opt.heads:
      if 'amodel_center' in ann:
        amodel_center = affine_transform(ann['amodel_center'], trans_output)
        ret['amodel_offset_mask'][k] = 1
        ret['amodel_offset'][k] = amodel_center - ct_int
        gt_det['amodel_offset'].append(ret['amodel_offset'][k])
      else:
        gt_det['amodel_offset'].append([0, 0])
示例#23
0
    def _add_instance(self,
                      ret,
                      gt_det,
                      k,
                      cls_id,
                      bbox,
                      bbox_amodal,
                      ann,
                      trans_output,
                      aug_s,
                      calib,
                      pre_cts=None,
                      track_ids=None,
                      flipped=False):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret['cat'][k] = cls_id - 1
        ret['mask'][k] = 1
        if 'wh' in ret:
            ret['wh'][k] = 1. * w, 1. * h
            ret['wh_mask'][k] = 1
        ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret['reg'][k] = ct - ct_int
        ret['reg_mask'][k] = 1
        draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)

        gt_det['bboxes'].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32))
        gt_det['scores'].append(1)
        gt_det['clses'].append(cls_id - 1)
        gt_det['cts'].append(ct)

        if 'tracking' in self.opt.heads:
            if ann['track_id'] in track_ids:
                pre_ct = pre_cts[track_ids.index(ann['track_id'])]
                ret['tracking_mask'][k] = 1
                ret['tracking'][k] = pre_ct - ct_int
                gt_det['tracking'].append(ret['tracking'][k])
            else:
                gt_det['tracking'].append(np.zeros(2, np.float32))

        if 'ltrb' in self.opt.heads:
            ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \
              bbox[2] - ct_int[0], bbox[3] - ct_int[1]
            ret['ltrb_mask'][k] = 1

        if 'ltrb_amodal' in self.opt.heads:
            ret['ltrb_amodal'][k] = \
              bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
              bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
            ret['ltrb_amodal_mask'][k] = 1
            gt_det['ltrb_amodal'].append(bbox_amodal)

        if 'nuscenes_att' in self.opt.heads:
            if ('attributes' in ann) and ann['attributes'] > 0:
                att = int(ann['attributes'] - 1)
                ret['nuscenes_att'][k][att] = 1
                ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1
            gt_det['nuscenes_att'].append(ret['nuscenes_att'][k])

        if 'velocity' in self.opt.heads:
            if ('velocity' in ann) and min(ann['velocity']) > -1000:
                ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3]
                ret['velocity_mask'][k] = 1
            gt_det['velocity'].append(ret['velocity'][k])

        if 'hps' in self.opt.heads:
            self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h,
                          w)

        if 'rot' in self.opt.heads:
            self._add_rot(ret, ann, k, gt_det)

        if 'dep' in self.opt.heads:
            if 'depth' in ann:
                ret['dep_mask'][k] = 1
                ret['dep'][k] = ann['depth'] * aug_s
                gt_det['dep'].append(ret['dep'][k])
            else:
                gt_det['dep'].append(2)

        if 'dim' in self.opt.heads:
            if 'dim' in ann:
                ret['dim_mask'][k] = 1
                ret['dim'][k] = ann['dim']
                gt_det['dim'].append(ret['dim'][k])
            else:
                gt_det['dim'].append([1, 1, 1])

        if 'amodel_offset' in self.opt.heads:
            if 'amodel_center' in ann:
                amodel_center = affine_transform(ann['amodel_center'],
                                                 trans_output)
                ret['amodel_offset_mask'][k] = 1
                ret['amodel_offset'][k] = amodel_center - ct_int
                gt_det['amodel_offset'].append(ret['amodel_offset'][k])
            else:
                gt_det['amodel_offset'].append([0, 0])

        #######track seg
        if 'seg' in self.opt.heads:
            if ann['segmentation'] != None:
                segment = self.coco.annToMask(ann)
            if flipped:
                if ann['segmentation'] != None:
                    segment = segment[:, ::-1]
            if ann['segmentation'] != None:
                segment = cv2.warpAffine(
                    segment,
                    trans_output, (self.opt.output_w, self.opt.output_h),
                    flags=cv2.INTER_LINEAR)
                segment = segment.astype(np.float32)
                segment_mask = np.ones_like(segment)
                pad_rate = 0.1
                x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,self.opt.output_w - 1)).astype(np.int), \
                      (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,self.opt.output_h - 1)).astype(np.int)
                segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                segment[segment > 0] = 1
                segment[segment_mask == 1] = 255
                ret['seg'][k] = segment
示例#24
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']

        # to fit for any given global data path:
        if 'images' not in self.img_dir:
            img_folder = self.img_dir.split('/')[-1]
            self.img_dir = self.img_dir.replace(img_folder,
                                                "images/" + img_folder)

        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        try:
            height, width = img.shape[0], img.shape[1]
        except AttributeError:
            print("None type image! path: {}".format(img_path))

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                try:
                    img = img[:, ::-1, :]
                except IndexError:
                    img = img[:, ::-1]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec,
                      self.opt.color_aug_var)
        inp = (inp - np.mean(self.mean)) / np.mean(self.std)
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        # Get ground truth gradient magnitude
        if self.opt.loss_hm_magnitude:
            img_mag_path = os.path.join(self.img_dir + '_mag',
                                        file_name.replace('.png', '_mag.png'))
            inp_grad_magnitude = cv2.imread(img_mag_path, 0)
            inp_grad_magnitude = cv2.warpAffine(inp_grad_magnitude,
                                                trans_output,
                                                (output_w, output_h),
                                                flags=cv2.INTER_LINEAR)
            inp_grad_magnitude = (inp_grad_magnitude.astype(np.float32) / 255.)

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        direct = np.zeros((self.max_objs, 1), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        gt_line = []
        for k in range(num_objs):
            ann = anns[k]
            cord = self._semline_cord_to_box(ann['cord'])
            try:
                cls_id = int(self.cat_ids[ann['category_id']])
            except KeyError:
                print("Wrong label!!! ", file_name)
                continue

            if cord[0] <= cord[2]:
                x_left = cord[0]
                x_right = cord[2]
                y_left = cord[1]
                y_right = cord[3]
            else:  # cord[0] > cord[2]:
                x_left = cord[2]
                x_right = cord[0]
                y_left = cord[3]
                y_right = cord[1]
            cord[0] = x_left
            cord[1] = y_left
            cord[2] = x_right
            cord[3] = y_right

            if flipped:
                cord[[0, 2]] = width - cord[[2, 0]] - 1
                cord[[1, 3]] = cord[[3, 1]]

            cord[:2] = affine_transform(cord[:2], trans_output)
            cord[2:] = affine_transform(cord[2:], trans_output)

            direct_str = 'lt2rb' if cord[0] < cord[2] and cord[1] < cord[
                3] else 'lb2rt'

            if 0 < cord[0] < output_w and 0 < cord[2] < output_w \
                and 0 < cord[1] < output_h and 0 < cord[3] < output_h:
                if cord[0] == cord[2]:  # vertical line
                    angle = 90
                else:
                    a = (cord[1] - cord[3]) / (cord[0] - cord[2])
                    angle = np.arctan(a) * 180 / 3.14159265359
                pass
            else:
                if cord[0] == cord[2]:  # vertical line
                    if cord[0] < 0 or cord[0] >= output_w:
                        continue
                    cord[[1, 3]] = np.clip(cord[[1, 3]], 0, output_h - 1)
                    if cord[1] == cord[3]:
                        continue
                elif cord[1] == cord[3]:  # horizontal line
                    if cord[1] < 0 or cord[1] >= output_h:
                        continue
                    cord[[0, 2]] = np.clip(cord[[0, 2]], 0, output_w - 1)
                    if cord[0] == cord[2]:
                        continue
                else:
                    a = (cord[1] - cord[3]) / (cord[0] - cord[2])
                    b = (cord[0] * cord[3] - cord[2] * cord[1]) / (cord[0] -
                                                                   cord[2])

                    # Clip y first, then update x.
                    x0, y0, x1, y1 = cord[[0, 1, 2, 3]]
                    (y0, y1) = np.clip((y0, y1), 0, output_h - 1)
                    if y0 == y1:
                        continue
                    if y0 != cord[1]:
                        x0 = (y0 - b) / a
                    if y1 != cord[3]:
                        x1 = (y1 - b) / a
                    # Then clip x, then update y:
                    (x0, x1) = np.clip((x0, x1), 0, output_w - 1)
                    if x0 == x1:
                        continue
                    if x0 != cord[0]:
                        y0 = a * x0 + b
                    if x1 != cord[2]:
                        y1 = a * x1 + b

                    # Copy back to cord:
                    if direct_str == 'lt2rb':
                        cord[[0, 1, 2,
                              3]] = min(x0, x1), min(y0,
                                                     y1), max(x0,
                                                              x1), max(y0, y1)
                    else:
                        cord[[0, 1, 2,
                              3]] = min(x0, x1), max(y0,
                                                     y1), max(x0,
                                                              x1), min(y0, y1)

            h, w = abs(cord[3] - cord[1]), abs(cord[2] - cord[0])
            w = 0.25 if w == 0 else w
            h = 0.25 if h == 0 else h
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))

                ct = np.array([(cord[0] + cord[2]) / 2,
                               (cord[1] + cord[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)

                radius = max(0, int(radius))
                hm[cls_id] = draw_umich_gaussian(hm[cls_id], ct_int, radius)

                wh[k] = 1. * w, 1. * h
                direct[k] = 1 if direct_str == 'lt2rb' else 0
                direct2append = direct[k]

                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_line.append([
                    cord[0], cord[1], cord[2], cord[3], 1, cls_id,
                    direct2append
                ])

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind}
        ret.update({'wh': wh})
        ret.update({'direct': direct})

        if self.opt.reg_offset:
            ret.update({'reg': reg})

        if self.opt.loss_hm_magnitude:
            ret.update({'grad_magnitude': inp_grad_magnitude})
        if self.opt.debug > 0 or self.split == 'test':
            gt_line = np.array(gt_line, dtype=np.float32) if len(gt_line) > 0 else \
                             np.zeros((1, 7), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_line': gt_line, 'img_id': img_id}
            ret['meta'] = meta

        return ret
示例#25
0
    def _get_pre_dets(self, anns, trans_input, trans_output, ret):
        k = 0
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        down_ratio = self.opt.down_ratio
        trans = trans_input
        reutrn_hm = self.opt.pre_hm
        pre_hm = np.zeros(
            (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
        pre_cts, track_ids = [], []

        for i, ann in enumerate(anns):
            cls_id = int(self.cat_ids[ann['category_id']])
            if cls_id > self.opt.num_classes or cls_id <= -99 or \
              ('iscrowd' in ann and ann['iscrowd'] > 0) or cls_id == 0: # cls_id add by vtsai01
                continue
            if 'bbox' not in anns[i].keys():
                ann['bbox'] = mask_utils.toBbox(ann['segmentation'])
            bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            max_rad = 1

            track_id = ann['track_id'] if 'track_id' in ann else -1

            if (h > 0 and w > 0):
                if 'seg' in self.opt.task and self.opt.seg_center:
                    seg_mask = self.get_masks_as_input(ann, trans)
                    if np.sum(seg_mask) <= 0:
                        continue
                    ct = np.array([
                        np.mean(np.where(seg_mask >= 0.5)[1]),
                        np.mean(np.where(seg_mask >= 0.5)[0])
                    ],
                                  dtype=np.float32)
                else:
                    ct = np.array([(bbox[0] + bbox[2]) / 2,
                                   (bbox[1] + bbox[3]) / 2],
                                  dtype=np.float32)

                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                max_rad = max(max_rad, radius)

                ct0 = ct.copy()
                conf = 1

                ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w
                ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h
                conf = 1 if np.random.random() > self.opt.lost_disturb else 0

                ct_int = ct.astype(np.int32)
                if conf == 0:
                    pre_cts.append(ct / down_ratio)
                else:
                    pre_cts.append(ct0 / down_ratio)

                track_ids.append(ann['track_id'] if 'track_id' in ann else -1)
                if reutrn_hm:
                    draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)

                if np.random.random() < self.opt.fp_disturb and reutrn_hm:
                    ct2 = ct0.copy()
                    # Hard code heatmap disturb ratio, haven't tried other numbers.
                    ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
                    ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
                    ct2_int = ct2.astype(np.int32)
                    draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)

        return pre_hm, pre_cts, track_ids
示例#26
0
    def _add_instance(self,
                      ret,
                      gt_det,
                      k,
                      cls_id,
                      bbox,
                      bbox_amodal,
                      ann,
                      trans_output,
                      aug_s,
                      calib,
                      pre_cts=None,
                      track_ids=None):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret['cat'][k] = cls_id - 1
        ret['mask'][k] = 1
        if 'wh' in ret:
            ret['wh'][k] = 1. * w, 1. * h
            ret['wh_mask'][k] = 1
        ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret['reg'][k] = ct - ct_int
        ret['reg_mask'][k] = 1
        draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)

        gt_det['bboxes'].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32))
        gt_det['scores'].append(1)
        gt_det['clses'].append(cls_id - 1)
        gt_det['cts'].append(ct)

        if 'tracking' in self.opt.heads:
            if ann['track_id'] in track_ids:
                pre_ct = pre_cts[track_ids.index(ann['track_id'])]
                ret['tracking_mask'][k] = 1
                ret['tracking'][k] = pre_ct - ct_int
                gt_det['tracking'].append(ret['tracking'][k])
            else:
                gt_det['tracking'].append(np.zeros(2, np.float32))

        if 'ltrb' in self.opt.heads:
            ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \
                             bbox[2] - ct_int[0], bbox[3] - ct_int[1]
            ret['ltrb_mask'][k] = 1

        ## ltrb_amodal is to use the left, top, right, bottom bounding box representation
        # to enable detecting out-of-image bounding box (important for MOT datasets)
        if 'ltrb_amodal' in self.opt.heads:
            ret['ltrb_amodal'][k] = \
                bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
                bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
            ret['ltrb_amodal_mask'][k] = 1
            gt_det['ltrb_amodal'].append(bbox_amodal)

        if 'nuscenes_att' in self.opt.heads:
            if ('attributes' in ann) and ann['attributes'] > 0:
                att = int(ann['attributes'] - 1)
                ret['nuscenes_att'][k][att] = 1
                ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1
            gt_det['nuscenes_att'].append(ret['nuscenes_att'][k])

        if 'velocity' in self.opt.heads:
            if ('velocity_cam' in ann) and min(ann['velocity_cam']) > -1000:
                ret['velocity'][k] = np.array(ann['velocity_cam'],
                                              np.float32)[:3]
                ret['velocity_mask'][k] = 1
            gt_det['velocity'].append(ret['velocity'][k])

        if 'hps' in self.opt.heads:
            self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h,
                          w)

        if 'rot' in self.opt.heads:
            self._add_rot(ret, ann, k, gt_det)

        if 'dep' in self.opt.heads:
            if 'depth' in ann:
                ret['dep_mask'][k] = 1
                ret['dep'][k] = ann['depth'] * aug_s
                gt_det['dep'].append(ret['dep'][k])
            else:
                gt_det['dep'].append(2)

        if 'dim' in self.opt.heads:
            if 'dim' in ann:
                ret['dim_mask'][k] = 1
                ret['dim'][k] = ann['dim']
                gt_det['dim'].append(ret['dim'][k])
            else:
                gt_det['dim'].append([1, 1, 1])

        if 'amodel_offset' in self.opt.heads:
            if 'amodel_center' in ann:
                amodel_center = affine_transform(ann['amodel_center'],
                                                 trans_output)
                ret['amodel_offset_mask'][k] = 1
                ret['amodel_offset'][k] = amodel_center - ct_int
                gt_det['amodel_offset'].append(ret['amodel_offset'][k])
            else:
                gt_det['amodel_offset'].append([0, 0])

        if self.opt.pointcloud:
            ## get pointcloud heatmap
            if self.opt.disable_frustum:
                ret['pc_hm'] = ret['pc_dep']
                if opt.normalize_depth:
                    ret['pc_hm'][
                        self.opt.pc_feat_channels['pc_dep']] /= opt.max_pc_dist
            else:
                dist_thresh = get_dist_thresh(calib, ct, ann['dim'],
                                              ann['alpha'])
                pc_dep_to_hm(ret['pc_hm'], ret['pc_dep'], ann['depth'], bbox,
                             dist_thresh, self.opt)
    def __getitem__(self, idx):
        imgPath = self.root + "/" + self.imgPath[idx]
        ptsPath = self.root + "/" + self.ptsPath[idx]
        img = plt.imread(imgPath)
        #print(imgPath)
        if(len(img.shape)==2):
            # gray to rgb
            img = img.reshape(img.shape[0],img.shape[1],1)
            img = np.repeat(img,3,axis=2)
        w,h,c = img.shape
        with open(ptsPath) as ptsf:
            rows = [rows.strip() for rows in ptsf][3:-1]
            if len(rows) != 68:
                print("points are not 68")
                return None
            tofloat = lambda lst: [float(i) for i in lst]
            rows = [tofloat(pair.split(' ')) for pair in rows]
            rows = np.array(rows)

        minx,maxx = rows[:,0].min(),rows[:,0].max()
        miny,maxy = rows[:,1].min(),rows[:,1].max()
        face_h = maxx-minx

        img = img[int(max(0,miny-face_h)):int(min(maxy+self.crop_pad,w)),
                  int(max(0,minx-self.crop_pad)):int(min(maxx+self.crop_pad,h)), : ]

        rows[:,1] -= max(0,miny-face_h)
        rows[:,0] -= max(0,minx-self.crop_pad)

        if(self.frame):
            csh = img.shape
            frame = np.zeros((max(csh[0],csh[1]),max(csh[0],csh[1]),3))
            frame_ctr = np.array([max(csh[0],csh[1])//2,max(csh[0],csh[1])//2])

            frame[math.ceil(frame_ctr[0]-csh[0]/2.):math.ceil(frame_ctr[0]+csh[0]/2.),
                  math.ceil(frame_ctr[1]-csh[1]/2.):math.ceil(frame_ctr[1]+csh[1]/2.),:] = img

            if(csh[1] != frame.shape[1]):
                #가로패딩
                rows[:,0] += (frame.shape[0]-csh[1])/2.
            else:
                #새로패딩
                rows[:,1] += (frame.shape[0]-csh[0])/2.


            if(self.resize != None):
                rows /= frame.shape[0]
                frame = cv2.resize(frame, dsize=(self.resize, self.resize), interpolation=cv2.INTER_LINEAR)
                rows *= float(self.resize)

        hmap = np.zeros((68+1, 64, 64), dtype=np.float32)
        M = np.zeros((68+1, 64, 64), dtype=np.float32)
        for ind, xy in enumerate(rows):
            hmap[ind] = draw_umich_gaussian(hmap[ind], xy/256.*64, 7)
        hmap[-1] = draw_boundary(hmap[-1],np.clip((rows/256.*64).astype(np.int),0,63))

        for i in range(len(M)):
            M[i] = grey_dilation(hmap[i], size=(3,3))
        M = np.where(M>=0.5, 1, 0)

        return frame, hmap , M, rows
示例#28
0
    def __getitem__(self, indices):
        if isinstance(indices, int):
            indices = [indices] + [
                np.random.randint(0,
                                  len(self.images) - 1) for _ in range(3)
            ]
        img_list, anns_list = [], []
        for i, index in enumerate(indices):
            img, anns, img_info, img_path = self._load_data(index)
            # print(i, ': ', img_info['file_name'])
            ori_h, ori_w, c = img.shape
            if np.random.random() < self.opt.flip:
                img = img[:, ::-1, :]
                anns = self._flip_anns(anns, ori_w)
            img_list.append(img)
            anns_list.append(anns)
        img4_norm, img4, anns_img4, _, _ = self._mosaic(img_list, anns_list)
        pre_img4_norm, pre_img4, pre_anns_img4, track_ids, pre_cts = self._mosaic(
            img_list, anns_list)

        ret = {'image': img4_norm, 'pre_img': pre_img4_norm}
        gt_det = {'bboxes': [], 'scores': [], 'clses': [], 'cts': []}
        self._init_ret(ret, gt_det)

        num_objs = min(len(anns_img4), self.max_objs)
        for k in range(num_objs):
            ann = anns_img4[k]
            bbox = ann['bbox'] / self.opt.down_ratio  # 除以down_ratio后表示输出层的坐标
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            cls_id = int(self.cat_ids[ann['category_id']])
            if cls_id > self.opt.num_classes or cls_id <= -999:
                continue
            radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            radius = max(0, int(radius))
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)
            ct_int = ct.astype(np.int32)
            ret['cat'][k] = cls_id - 1
            ret['mask'][k] = 1
            if 'wh' in ret:
                ret['wh'][k] = 1. * w, 1. * h
                ret['wh_mask'][k] = 1
            ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
            ret['reg'][k] = ct - ct_int
            ret['reg_mask'][k] = 1
            draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)

            gt_det['bboxes'].append(
                np.array([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2
                ],
                         dtype=np.float32))
            gt_det['scores'].append(1)
            gt_det['clses'].append(cls_id - 1)
            gt_det['cts'].append(ct)

            if ann['track_id'] in track_ids:
                pre_ct = pre_cts[track_ids.index(
                    ann['track_id'])] / self.opt.down_ratio
                ret['tracking_mask'][k] = 1
                ret['tracking'][k] = pre_ct - ct_int
                gt_det['tracking'].append(ret['tracking'][k])
            else:
                gt_det['tracking'].append(np.zeros(2, np.float32))
        if self.opt.debug > 0:
            gt_det = self._format_gt_det(gt_det)
            meta = {'gt_det': gt_det}
            ret['meta'] = meta

        # for ann in anns_img4:
        #     cv2.rectangle(img4, (int(ann['bbox'][0]), int(ann['bbox'][1])),
        #                   (int(ann['bbox'][2]), int(ann['bbox'][3])), (0, 0, 255), 1)
        # cv2.imshow('img4', img4)
        #
        # for ann in pre_anns_img4:
        #     cv2.rectangle(pre_img4, (int(ann['bbox'][0]), int(ann['bbox'][1])),
        #                   (int(ann['bbox'][2]), int(ann['bbox'][3])), (0, 0, 255), 1)
        # cv2.imshow('pre_img4', pre_img4)
        # cv2.waitKey(0)
        return ret
示例#29
0
  def __getitem__(self, index):
    img_id = self.images[index]
    img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    annotations = self.coco.loadAnns(ids=ann_ids)

    labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations])
    bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32)

    if len(bboxes) == 0:
      bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
      labels = np.array([[0]])

    bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

    # print("===============", img_path)
    img = cv2.imread(img_path)
    height, width = img.shape[0], img.shape[1]
    # 获取中心坐标p
    center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
    scale = max(height, width) * 1.0 # 仿射变换

    flipped = False
    if self.split == 'train':
      scale = scale * np.random.choice(self.rand_scales)
      w_border = get_border(128, width)
      h_border = get_border(128, height)
      center[0] = np.random.randint(low=w_border, high=width - w_border)
      center[1] = np.random.randint(low=h_border, high=height - h_border)

      if np.random.random() < 0.5:
        flipped = True
        img = img[:, ::-1, :]
        center[0] = width - center[0] - 1

    # 实行仿射变换
    trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
    img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

    img = (img.astype(np.float32) / 255.)
    if self.split == 'train':
      color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

    img -= self.mean
    img /= self.std
    img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

    trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

    # 3个最重要的变量
    hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap
    w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height
    regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression

    inds = np.zeros((self.max_objs,), dtype=np.int64)
    ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

    for k, (bbox, label) in enumerate(zip(bboxes, labels)):
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_fmap)
      bbox[2:] = affine_transform(bbox[2:], trans_fmap)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

      if h > 0 and w > 0:
        obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        obj_c_int = obj_c.astype(np.int32)
        # 椭圆形状
        radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
        # 得到高斯分布
        draw_umich_gaussian(hmap[label], obj_c_int, radius)

        w_h_[k] = 1. * w, 1. * h
        # 记录偏移量
        regs[k] = obj_c - obj_c_int  # discretization error
        # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数
        inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
        # 进行mask标记?
        ind_masks[k] = 1

    return {'image': img,
            'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
            'c': center, 's': scale, 'img_id': img_id}
    def __getitem__(self, index):
        img_id = self.ids[index]
        img_path = self.data_dir + "/images/" + img_id + ".jpeg"
        annot_path = self.data_dir + "/annotations/" + img_id + ".xml"

        tree = elemTree.parse(annot_path)
        annotations = [
            [
                float(obj.find('robndbox').find('cx').text),  #ctrX
                float(obj.find('robndbox').find('cy').text),  #ctrY
                float(obj.find('robndbox').find('w').text),  #W
                float(obj.find('robndbox').find('h').text),  #H
                float(obj.find('robndbox').find('angle').text)
            ]  #angle
            for obj in tree.findall('./object')
        ]

        labels = np.array([1. for anno in annotations])
        bboxes = np.array([anno for anno in annotations], dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(self.img_size['w'], width)
            h_border = get_border(self.img_size['h'], height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

        img = img.astype(np.float32) / 255.

        #if self.split == 'train':
        #color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        #img -= self.mean
        #img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        thetas = np.zeros((self.max_objs, 1), dtype=np.float32)
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        objCnt = np.zeros((self.max_objs, 2), dtype=np.float32)

        # detections = []
        for k, (rbox, label) in enumerate(zip(bboxes, labels)):
            w, h, angle = rbox[2], rbox[3], rbox[-1]
            if h > 0 and w > 0:
                obj_c = np.array([rbox[0], rbox[1]], dtype=np.float32) / float(
                    self.down_ratio)
                objCnt[k] = obj_c
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[int(label) - 1], obj_c_int, radius)
                w_h_[k] = w / self.img_size['w'], h / self.img_size['h']
                thetas[k] = angle
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id,
            'theta': thetas,
            'center': objCnt
        }