def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy # print("===============", img_path) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] # 获取中心坐标p center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 # 仿射变换 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 # 实行仿射变换 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = (img.astype(np.float32) / 255.) if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # 3个最重要的变量 hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) # 椭圆形状 radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) # 得到高斯分布 draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h # 记录偏移量 regs[k] = obj_c - obj_c_int # discretization error # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数 inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] # 进行mask标记? ind_masks[k] = 1 return {'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] a_bboxes = [] shapes = [] a_shapes = [] for anno in annotations: if anno['category_id'] not in KINS_IDS: continue # excludes 3: person-sitting class for evaluation a_polygons = anno['segmentation'][ 0] # only one mask for each instance polygons = anno['i_segm'][0] # gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox'] # this is used to clip resampled polygons a_contour = np.array(a_polygons).reshape((-1, 2)) contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if cv2.contourArea(contour.astype( np.int32)) < 5: # remove tiny objects continue fixed_contour = uniformsample(a_contour, self.n_vertices) i_contour = uniformsample(contour, self.n_vertices) # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes # continue shapes.append(np.ndarray.flatten(i_contour).tolist()) a_shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) a_bboxes.append(anno['a_bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) a_bboxes = np.array(a_bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) a_shapes = np.array(a_shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) a_bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) a_shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy a_bboxes[:, 2:] += a_bboxes[:, :2] img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(360, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) # -----------------------------------debug--------------------------------- # image_show = img.copy() img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) # -----------------------------------debug--------------------------------- # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h'])) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap of centers occ_map = np.zeros( (1, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # grayscale map for occlusion levels w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of inmodal bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt amodal mass centers to inmodal bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt amodal coefficients regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for quantization error inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32) # voting for heatmaps for k, (bbox, a_bbox, label, shape, a_shape) in enumerate( zip(bboxes, a_bboxes, labels, shapes, a_shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 a_bbox[[0, 2]] = width - a_bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): a_shape[2 * m] = width - a_shape[2 * m] - 1 shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[ 0] # This box is the inmodal boxes a_bbox[:2] = affine_transform(a_bbox[:2], trans_fmap) a_bbox[2:] = affine_transform(a_bbox[2:], trans_fmap) a_bbox[[0, 2]] = np.clip(a_bbox[[0, 2]], 0, self.fmap_size['w'] - 1) a_bbox[[1, 3]] = np.clip(a_bbox[[1, 3]], 0, self.fmap_size['h'] - 1) # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes a_shape[2 * m:2 * m + 2] = affine_transform( a_shape[2 * m:2 * m + 2], trans_fmap) shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(a_shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) i_shape_clipped = np.reshape(shape, (self.n_vertices, 2)) i_shape_clipped[:, 0] = np.clip(i_shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) i_shape_clipped[:, 1] = np.clip(i_shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) if h < 1e-6 or w < 1e-6: # remove small bboxes continue centered_shape = indexed_shape - mass_center # these are amodal mask shapes if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = centered_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) a_shifted_poly = indexed_shape - np.array([ a_bbox[0], a_bbox[1] ]) # crop amodal shapes to the amodal bboxes amodal_obj_mask = self.polys_to_mask( [np.ndarray.flatten(a_shifted_poly, order='C').tolist()], a_bbox[3], a_bbox[2]) i_shifted_poly = i_shape_clipped - np.array([ a_bbox[0], a_bbox[1] ]) # crop inmodal shapes to the same amodal bboxes inmodal_obj_mask = self.polys_to_mask( [np.ndarray.flatten(i_shifted_poly, order='C').tolist()], a_bbox[3], a_bbox[2]) obj_mask = ( amodal_obj_mask + inmodal_obj_mask ) * 255. / 2 # convert to float type in image scale obj_mask = cv2.resize( obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), interpolation=cv2.INTER_LINEAR) * 1. votes_[k] = obj_mask.reshape((1, -1)) / 255. w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # occlusion level map gt occ_map[0] += self.polys_to_mask( [np.ndarray.flatten(indexed_shape).tolist()], self.fmap_size['h'], self.fmap_size['w']) * 1. occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ # -----------------------------------debug--------------------------------- # for bbox, label, shape in zip(bboxes, labels, shapes_): # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # # print(shape, shape.shape) # cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255), # thickness=1) # # cv2.imshow('img', image_show) # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255) # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]), # code=cv2.COLOR_GRAY2BGR) # cat_img = np.concatenate([m_img, image_show], axis=0) # cv2.imshow('segm', cat_img) # cv2.waitKey() # -----------------------------------debug--------------------------------- return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'occ_map': occ_map, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'votes': votes_, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue polygons = get_connected_polygon_using_mask( anno['segmentation'], (h_img, w_img), n_vertices=self.n_vertices, closing_max_kernel=50) gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if len(contour) > self.n_vertices: fixed_contour = resample(contour, num=self.n_vertices) else: fixed_contour = turning_angle_resample(contour, self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [ np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1]) ] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(160, width) h_border = get_border(160, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros( (self.max_objs, 2), dtype=np.float32) # gt mass centers to bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) contour_std_ = np.zeros( (self.max_objs, 1), dtype=np.float32) # keep track of codes that is activated regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) contour_std = np.std(indexed_shape, axis=0) + 1e-4 if h < 1e-6 or w < 1e-6: # remove small bboxes continue # centered_shape = indexed_shape - mass_center norm_shape = (indexed_shape - mass_center) / np.sqrt( np.sum(contour_std**2)) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # obj_c = mass_center obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = norm_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=60) contour_std_[k] = np.sqrt(np.sum(contour_std**2)) w_h_[k] = 1. * w, 1. * h # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \ # mass_center[0] - bbox[0], bbox[2] - mass_center[0] # [top, bottom, left, right] distance regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 return { 'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'std': contour_std_, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), cfg.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.split == 'train': s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = cfg.train_resolution[0], cfg.train_resolution[1] else: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) flipped = False if self.split == 'train': s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = get_border(128, img.shape[1]) h_border = get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_matrix = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_matrix, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = inp.astype(np.float32) / 255. # TODO:inp appears numbers below 0 after color_aug (myself) if self.split == 'train': color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - cfg.mean) / cfg.std inp = inp.transpose(2, 0, 1) output_h = input_h // cfg.down_ratio output_w = input_w // cfg.down_ratio trans_matrix = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((cfg.max_objs, 2), dtype=np.float32) reg = np.zeros((cfg.max_objs, 2), dtype=np.float32) ind = np.zeros(cfg.max_objs, dtype=np.int64) reg_mask = np.zeros(cfg.max_objs, dtype=np.uint8) gt_box = [] for i in range(num_objs): ann = anns[i] bbox = coco2x1y1x2y2(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_matrix) bbox[2:] = affine_transform(bbox[2:], trans_matrix) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: # get an object size-adapative radius radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_umich_gaussian(hm[cls_id], ct_int, radius) wh[i] = 1. * w, 1. * h ind[i] = ct_int[1] * output_w + ct_int[0] reg[i] = ct - ct_int reg_mask[i] = 1 gt_box.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg': reg, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } if self.opt.debug > 0 or not self.split == 'train': gt_box = np.array( gt_box, dtype=np.float32) if len(gt_box) > 0 else np.zeros( (1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_box, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.ids[index] img_path = self.data_dir + "/images/" + img_id + ".jpeg" annot_path = self.data_dir + "/annotations/" + img_id + ".xml" tree = elemTree.parse(annot_path) annotations = [ [ float(obj.find('robndbox').find('cx').text), #ctrX float(obj.find('robndbox').find('cy').text), #ctrY float(obj.find('robndbox').find('w').text), #W float(obj.find('robndbox').find('h').text), #H float(obj.find('robndbox').find('angle').text) ] #angle for obj in tree.findall('./object') ] labels = np.array([1. for anno in annotations]) bboxes = np.array([anno for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(self.img_size['w'], width) h_border = get_border(self.img_size['h'], height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) img = img.astype(np.float32) / 255. #if self.split == 'train': #color_aug(self.data_rng, img, self.eig_val, self.eig_vec) #img -= self.mean #img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height thetas = np.zeros((self.max_objs, 1), dtype=np.float32) regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) objCnt = np.zeros((self.max_objs, 2), dtype=np.float32) # detections = [] for k, (rbox, label) in enumerate(zip(bboxes, labels)): w, h, angle = rbox[2], rbox[3], rbox[-1] if h > 0 and w > 0: obj_c = np.array([rbox[0], rbox[1]], dtype=np.float32) / float( self.down_ratio) objCnt[k] = obj_c obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[int(label) - 1], obj_c_int, radius) w_h_[k] = w / self.img_size['w'], h / self.img_size['h'] thetas[k] = angle regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id, 'theta': thetas, 'center': objCnt }
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) labels = np.array( [self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # for bbox, label in zip(bboxes, labels): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(img, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # cv2.imshow('img', img) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) # detections = [] for k, (bbox, label) in enumerate(zip(bboxes, labels)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) return { 'image': img, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) # img = self.coco.loadImgs(ids=[img_id])[0] # w_img = int(img['width']) # h_img = int(img['height']) # if w_img < 2 or h_img < 2: # continue labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1 or type(anno['segmentation']) != list: # Excludes crowd objects continue if len(anno['segmentation']) > 1: obj_contours = [np.array(s).reshape((-1, 2)).astype(np.int32) for s in anno['segmentation']] obj_contours = sorted(obj_contours, key=cv2.contourArea) polygons = obj_contours[-1] else: polygons = anno['segmentation'][0] gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] if gt_w < 5 or gt_h < 5: continue contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if cv2.contourArea(contour.astype(np.int32)) < 35: continue fixed_contour = uniformsample(contour, self.n_vertices) # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue updated_bbox = [np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]), np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1])] shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(updated_bbox) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) # bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(150, width) h_border = get_border(150, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap votes_ = np.zeros((self.max_objs, self.vote_length), dtype=np.float32) # votes for hmap and code w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of bboxes shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32) # gt amodal segmentation polygons center_offsets = np.zeros((self.max_objs, 2), dtype=np.float32) # gt mass centers to bbox center codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) regs = np.zeros((self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs,), dtype=np.int64) ind_masks = np.zeros((self.max_objs,), dtype=np.uint8) for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour x-axis for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap) shape_clipped = np.reshape(shape, (self.n_vertices, 2)) shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1) shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1) clockwise_flag = check_clockwise_polygon(shape_clipped) if not clockwise_flag: fixed_contour = np.flip(shape_clipped, axis=0) else: fixed_contour = shape_clipped.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) mass_center = np.mean(indexed_shape, axis=0) # contour_std = np.std(indexed_shape, axis=0) + 1e-4 if h < 1e-6 or w < 1e-6: # remove small bboxes continue # centered_shape = indexed_shape - mass_center norm_shape = (indexed_shape - mass_center) / np.array([w / 2., h / 2.]) if h > 0 and w > 0: obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) obj_c_int = obj_c.astype(np.int32) radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) shapes_[k] = norm_shape.reshape((1, -1)) center_offsets[k] = mass_center - obj_c codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=80) w_h_[k] = 1. * w, 1. * h regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # getting the gt votes shifted_poly = indexed_shape - np.array([bbox[0], bbox[1]]) + 1 # crop to the bbox, add padding 1 # obj_mask = polys_to_mask([np.ndarray.flatten(shifted_poly, order='C').tolist()], h + 2, w + 2) * 255 obj_mask = np.zeros((int(h) + 3, int(w) + 3), dtype=np.uint8) cv2.drawContours(obj_mask, shifted_poly[None, :, :].astype(np.int32), color=255, contourIdx=-1, thickness=-1) # instance = obj_mask.copy() # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), # interpolation=cv2.INTER_LINEAR) * 1. # votes_[k] = obj_mask.reshape((1, -1)) / 255. # votes_[k] = (obj_mask.reshape((1, -1)) > 255 * 0.4) * 1.0 # show debug masks obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), interpolation=cv2.INTER_LINEAR) # INTER_AREA # obj_mask = cv2.resize(obj_mask.astype(np.uint8), dsize=(self.vote_vec_dim, self.vote_vec_dim), # interpolation=cv2.INTER_AREA) votes_[k] = (obj_mask.reshape((1, -1)) > 0.2 * 255) * 1.0 # cv2.imshow('obj_mask', instance.astype(np.uint8)) # cv2.waitKey() # cv2.imshow('votes', obj_mask.astype(np.uint8)) # cv2.waitKey() return {'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'votes': votes_, 'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id}
def __getitem__(self, index): img_id = self.images[index] img_path = os.path.join( self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) annotations = self.coco.loadAnns(ids=ann_ids) img = self.coco.loadImgs(ids=[img_id])[0] w_img = int(img['width']) h_img = int(img['height']) labels = [] bboxes = [] shapes = [] for anno in annotations: if anno['iscrowd'] == 1: # Excludes crowd objects continue # polygons = anno['segmentation'][0] polygons = anno['segmentation'] if len(polygons) > 1: bg = np.zeros((h_img, w_img, 1), dtype=np.uint8) for poly in polygons: len_poly = len(poly) vertices = np.zeros((1, len_poly // 2, 2), dtype=np.int32) for i in range(len_poly // 2): vertices[0, i, 0] = int(poly[2 * i]) vertices[0, i, 1] = int(poly[2 * i + 1]) # cv2.fillPoly(bg, vertices, color=(255)) cv2.drawContours(bg, vertices, color=(255), contourIdx=-1, thickness=-1) pads = 5 while True: kernel = np.ones((pads, pads), np.uint8) bg_closed = cv2.morphologyEx(bg, cv2.MORPH_CLOSE, kernel) obj_contours, _ = cv2.findContours(bg_closed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(obj_contours) > 1: pads += 5 else: polygons = obj_contours[0] break else: # continue polygons = anno['segmentation'][0] gt_x1, gt_y1, gt_w, gt_h = anno['bbox'] contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices fixed_contour = resample(contour, num=self.n_vertices) fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w) fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h) # contour_mean = np.mean(fixed_contour, axis=0) contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2)) if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan: # invalid shapes continue shapes.append(np.ndarray.flatten(fixed_contour).tolist()) labels.append(self.cat_ids[anno['category_id']]) bboxes.append(anno['bbox']) labels = np.array(labels) bboxes = np.array(bboxes, dtype=np.float32) shapes = np.array(shapes, dtype=np.float32) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([[0]]) shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy # if img_id in self.all_annotations.keys(): # annotations = self.all_annotations[img_id] # shape_annots = self.all_shapes[img_id] # labels = annotations['cat_id'] # bboxes = annotations['bbox'] # xyxy format # shapes = shape_annots['shape'] # polygonal vertices format xyxyxyxyxy... # codes = annotations['codes'] # labels = np.array(labels) # bboxes = np.array(bboxes, dtype=np.float32) # codes = np.array(codes, dtype=np.float32) # shapes = np.array(shapes, dtype=np.float32) # else: # bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) # labels = np.array([[0]]) # codes = np.zeros(shape=(1, self.n_codes), dtype=np.float32) # shapes = np.zeros(shape=(1, self.n_vertices * 2), dtype=np.float32) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] center = np.array([width / 2., height / 2.], dtype=np.float32) # center of image scale = max(height, width) * 1.0 flipped = False if self.split == 'train': scale = scale * np.random.choice(self.rand_scales) w_border = get_border(128, width) h_border = get_border(128, height) center[0] = np.random.randint(low=w_border, high=width - w_border) center[1] = np.random.randint(low=h_border, high=height - h_border) if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 trans_img = get_affine_transform( center, scale, 0, [self.img_size['w'], self.img_size['h']]) img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label, shape in zip(bboxes, labels, shapes): # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # # Flip the contour # for m in range(self.n_vertices): # shape[2 * m] = width - shape[2 * m] - 1 # bbox[:2] = affine_transform(bbox[:2], trans_img) # bbox[2:] = affine_transform(bbox[2:], trans_img) # bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1) # bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1) # # # generate gt shape mean and std from contours # for m in range(self.n_vertices): # apply scale and crop transform to shapes # shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_img) # # contour = np.reshape(shape, (self.n_vertices, 2)) # # Indexing from the left-most vertex, argmin x-axis # idx = np.argmin(contour[:, 0]) # indexed_shape = np.concatenate((contour[idx:, :], contour[:idx, :]), axis=0) # # clockwise_flag = check_clockwise_polygon(indexed_shape) # if not clockwise_flag: # fixed_contour = np.flip(indexed_shape, axis=0) # else: # fixed_contour = indexed_shape # # contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.img_size['w'] - 1) # contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.img_size['h'] - 1) # # # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2) # cv2.drawContours(image_show, [contour.astype(np.int32)], # color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), # contourIdx=-1, thickness=-1) # # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- img = img.astype(np.float32) / 255. if self.split == 'train': color_aug(self.data_rng, img, self.eig_val, self.eig_vec) img -= self.mean img /= self.std img = img.transpose(2, 0, 1) # from [H, W, C] to [C, H, W] trans_fmap = get_affine_transform( center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) hmap = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) # heatmap # w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of the shape w_h_std = np.zeros((self.max_objs, 2), dtype=np.float32) # width and height of the shape codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32) # gt coefficients/codes for shapes regs = np.zeros( (self.max_objs, 2), dtype=np.float32) # regression for offsets of shape center inds = np.zeros((self.max_objs, ), dtype=np.int64) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) # detections = [] for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)): if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # Flip the contour for m in range(self.n_vertices): shape[2 * m] = width - shape[2 * m] - 1 bbox[:2] = affine_transform(bbox[:2], trans_fmap) bbox[2:] = affine_transform(bbox[2:], trans_fmap) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # generate gt shape mean and std from contours for m in range(self.n_vertices ): # apply scale and crop transform to shapes shape[2 * m:2 * m + 2] = affine_transform( shape[2 * m:2 * m + 2], trans_fmap) contour = np.reshape(shape, (self.n_vertices, 2)) # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(contour[:, 0]) indexed_shape = np.concatenate( (contour[idx:, :], contour[:idx, :]), axis=0) clockwise_flag = check_clockwise_polygon(indexed_shape) if not clockwise_flag: fixed_contour = np.flip(indexed_shape, axis=0) else: fixed_contour = indexed_shape.copy() contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.fmap_size['w'] - 1) contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.fmap_size['h'] - 1) contour_mean = np.mean(contour, axis=0) contour_std = np.std(contour, axis=0) if np.sqrt(np.sum(contour_std**2)) <= 1e-6: continue else: norm_shape = (contour - contour_mean) / np.sqrt( np.sum(contour_std**2)) if h > 0 and w > 0 and np.sqrt(np.sum(contour_std**2)) > 1e-6: obj_c = contour_mean obj_c_int = obj_c.astype(np.int32) radius = max( 0, int( gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) draw_umich_gaussian(hmap[label], obj_c_int, radius) w_h_std[k] = contour_std temp_codes, _ = fast_ista(norm_shape.reshape((1, -1)), self.dictionary, lmbda=self.sparse_alpha, max_iter=80) codes_[k] = np.exp(temp_codes) regs[k] = obj_c - obj_c_int # discretization error inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] ind_masks[k] = 1 # groundtruth bounding box coordinate with class # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2, # obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label]) # detections = np.array(detections, dtype=np.float32) \ # if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32) # -----------------------------------debug--------------------------------- # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32) # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3)) # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3)) # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3)) # print(w_h_[0], regs[0]) # cv2.imshow('hmap', canvas) # cv2.waitKey() # -----------------------------------debug--------------------------------- # -----------------------------------debug--------------------------------- # image_show = img.copy() # for bbox, label, shape in zip(bboxes, labels, shapes): # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2) # cv2.imshow('img', image_show) # cv2.waitKey() # -----------------------------------debug--------------------------------- return { 'image': img, 'codes': codes_, 'hmap': hmap, 'w_h_std': w_h_std, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks, 'c': center, 's': scale, 'img_id': img_id }