def get_transform(image): seq = [] if image.shape[0] >= image.shape[1]: seq.append(iaa.Resize({"height": 480, "width": "keep-aspect-ratio"})) if image.shape[1] / image.shape[1] * 480. > 640.: seq.append( iaa.Resize({ "width": 640, "height": "keep-aspect-ratio" })) if image.shape[0] < image.shape[1]: seq.append(iaa.Resize({"width": 640, "height": "keep-aspect-ratio"})) if image.shape[0] / image.shape[1] * 640. > 480.: seq.append( iaa.Resize({ "height": 480, "width": "keep-aspect-ratio" })) seq.append(iaa.CenterPadToFixedSize( height=480, width=640, )) return iaa.Sequential(seq)
def __augment_img(self, img): 'Scale, pad, and augment the image.' w = img.shape[0] h = img.shape[1] scale = min(self.dim[0] / w, self.dim[1] / h) # choose whichever scales down the most seq = iaa.Sequential([ iaa.Resize(float(scale)), iaa.CenterPadToFixedSize(width=self.dim[0], height=self.dim[1]), iaa.Fliplr(0.5), iaa.Affine(rotate=[0,90,180,270]), ]) img = seq(image=img) / 255 # 0 to 1 range return img
# depth = cv2.imread(args.path_depth, cv2.IMREAD_UNCHANGED).astype(np.float32) / 100 depth = read_depth(args.path_depth, args.dataset, args.max_depth) else: depth = np.ones((img.shape[0], img.shape[1]), dtype=np.float32) if args.path_lidar != '': # lidar = cv2.imread(args.path_lidar, cv2.IMREAD_UNCHANGED).astype(np.float32) / 100 lidar = read_depth(args.path_lidar, args.dataset, args.max_depth) else: lidar = to_sparse(image=depth) # pad为网络可输入的大小 mul_times = 32 h_ori, w_ori = img.shape[0], img.shape[1] h_pad = int(np.ceil(h_ori / mul_times) * mul_times) w_pad = int(np.ceil(w_ori / mul_times) * mul_times) img = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=img) lidar = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=lidar) depth = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=depth) # 标准化 img = img_process(img.copy()).unsqueeze(0) depth = to_tensor(depth.copy()).unsqueeze(0) lidar = to_tensor(lidar.copy()).unsqueeze(0) # 定义网络等 device_str = "cpu" if args.gpu is None else "cuda:{}".format(args.gpu) device = torch.device(device_str if torch.cuda.is_available() else "cpu") torch.cuda.set_device(args.gpu) torch.backends.cudnn.benchmark = True # model = BtsModel(args.max_depth, args.encoder).to(device)
def __getitem__(self, index): sample_path = self.data_list[index].split() img = Image.open(os.path.join( self.dir_imgs, sample_path[self.idx_img])).convert("RGB") lidar = None depth = None item = [] if self.mode == 'train': depth = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_depth]), self.dataset_name, self.max_depth) if self.lidar_exist: lidar = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_lidar]), self.dataset_name, self.max_depth) else: if self.gen_sparse_online: lidar = self.to_sparse(image=depth) else: lidar = self.lidar_persudo # show(depth), show(lidar), show(img) # 增强 rsz_size = img.size[:: -1] if self.resize_size is None else self.resize_size # h*w crp_size = img.size[:: -1] if self.crop_size is None else self.crop_size # h*w depth_rsz = transforms.Compose( [transforms.ToPILImage(), transforms.Resize(rsz_size, 0)]) # 不用插值 img = transforms.Resize(rsz_size)(img) # 默认resize为双线性插值 depth = depth_rsz(depth) lidar = depth_rsz(lidar) # kitti的上部没有值,先裁剪掉 if self.dataset_name == 'kitti': img = F.crop(img, rsz_size[0] - crp_size[0], 0, crp_size[0], rsz_size[1]) depth = F.crop(depth, rsz_size[0] - crp_size[0], 0, crp_size[0], rsz_size[1]) lidar = F.crop(lidar, rsz_size[0] - crp_size[0], 0, crp_size[0], rsz_size[1]) img = np.asarray(img, dtype=np.float32) / 255.0 depth = np.asarray(depth) lidar = np.asarray(lidar) # li = cv2.resize(lidar.astype(np.uint16), rsz_size[::-1], 0) # opencv的resize会增大稀疏点的比例 if self.aug: img, depth, lidar = self.augment_3(img, depth, lidar, crp_size, self.degree) # 标准化 img = self.img_process(img.copy()) depth = self.to_tensor(depth.copy()) lidar = self.to_tensor(lidar.copy()) item = [img, lidar, depth] elif self.mode == 'val': depth = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_depth]), self.dataset_name, self.max_depth) if self.lidar_exist: lidar = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_lidar]), self.dataset_name, self.max_depth) else: if self.gen_sparse_online: lidar = self.to_sparse(image=depth) else: lidar = self.lidar_persudo img = np.asarray(img, dtype=np.float32) / 255.0 # pad为网络可输入的大小 h_ori, w_ori = img.shape[0], img.shape[1] h_pad = int(np.ceil(h_ori / self.mul_times) * self.mul_times) w_pad = int(np.ceil(w_ori / self.mul_times) * self.mul_times) img = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=img) lidar = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=lidar) depth = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=depth) lidar = lidar.astype(np.float32) depth = depth.astype(np.float32) # 标准化 img = self.img_process(img.copy()) depth = self.to_tensor(depth.copy()) lidar = self.to_tensor(lidar.copy()) item = [img, lidar, depth] elif self.mode == 'test': if self.lidar_exist: lidar = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_lidar]), self.dataset_name, self.max_depth) else: lidar = self.lidar_persudo img = np.asarray(img, dtype=np.float32) / 255.0 # pad为网络可输入的大小 h_ori, w_ori = img.shape[0], img.shape[1] h_pad = int(np.ceil(h_ori / self.mul_times) * self.mul_times) w_pad = int(np.ceil(w_ori / self.mul_times) * self.mul_times) img = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=img) lidar = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=lidar) lidar = lidar.astype(np.float32) # 标准化 img = self.img_process(img.copy()) lidar = self.to_tensor(lidar.copy()) item = [img, lidar, lidar] return item
elif augmentation == 'pad_to_powers_of': transform = iaa.CropToMultiplesOf(height_multiple=32, width_multiple=32) transformed_image = transform(image=image) elif augmentation == 'pad_to_aspect_ratio': transform = iaa.PadToAspectRatio(2.0) transformed_image = transform(image=image) elif augmentation == 'pad_to_square': transform = Resize(always_apply=True, height=200, width=400) transformed_image = transform(image=image)['image'] transform = iaa.PadToSquare() transformed_image = transform(image=transformed_image) elif augmentation == 'center_pad_to_fixed_size': transform = iaa.CenterPadToFixedSize(width=1000, height=1000) transformed_image = transform(image=image) elif augmentation == 'center_pad_to_multiples_of': transform = iaa.CenterPadToPowersOf(height_base=3, width_base=2) transformed_image = transform(image=image) elif augmentation == 'center_pad_to_powers_of': transform = iaa.CenterPadToMultiplesOf(height_multiple=32, width_multiple=32) transformed_image = transform(image=image) elif augmentation == 'center_pad_to_aspect_ratio': transform = iaa.CenterPadToAspectRatio(2.0) transformed_image = transform(image=image) elif augmentation == 'center_pad_to_square':
# NYU depth_proj = depth.astype(np.float32) / 1000 * 256 depth_proj = depth_proj.astype(np.uint16) # OB # depth[depth > 5000] = 5000 # depth_proj = depth.astype(np.float32) / 100 * 256 # depth_proj = depth_proj.astype(np.uint16) # KITTI # depth_proj = depth crop_h = 352 crop_w = 1216 seq1 = iaa.Sequential([ iaa.CenterPadToFixedSize(height=crop_h, width=crop_w), # 保证可crop iaa.CenterCropToFixedSize(height=crop_h, width=crop_w), ], random_order=True) seq2 = iaa.Sequential([ # iaa.CoarseDropout(0.19, size_px=200), iaa.Dropout(1-0.05), ], random_order=True) img_aug = seq1(image=img) # depth_aug = seq1(image=depth_proj) depth_aug = seq1(image=seq2(image=depth_proj)) show(img_aug) show(depth) show(depth_aug)
def main(args): print("Hello world!") dataset_name = args.dataset dataset = os.path.join('/home/ubuntu/workspace_aihub/data/raw/', dataset_name) class_wise = glob(os.path.join(dataset, '*')) for per_class_dir in tqdm(class_wise): datas = glob(os.path.join(per_class_dir, '*')) for file in tqdm(datas): if file.split('.')[-1] != 'json': continue base_name = file.strip('.json') image_file = None if os.path.isfile(base_name + '.jpg'): image_file = base_name + '.jpg' if os.path.isfile(base_name + '.JPG'): image_file = base_name + '.JPG' if image_file is None: print('no image') continue conf = OmegaConf.create() image = cv2.imread(image_file) if image is None: print('no image!') continue if len(image.shape) != 3: print('this image dose not have 3 chennels') with open(file) as json_file: json_data = json.load(json_file) if len(json_data['regions']) != 1: print('this regin has more than 1 bbx') print(json_data['regions']) conf['regions'] = json_data['regions'] x1 = json_data['regions'][0]['boxcorners'][0] y1 = json_data['regions'][0]['boxcorners'][1] x2 = json_data['regions'][0]['boxcorners'][2] y2 = json_data['regions'][0]['boxcorners'][3] bbs = BoundingBoxesOnImage( [BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2)], shape=image.shape) if image.shape[0] > image.shape[1]: seq = iaa.Sequential([ iaa.Resize({ "height": 480, "width": "keep-aspect-ratio" }), iaa.CenterPadToFixedSize( height=480, width=640, ) ]) else: seq = iaa.Sequential([ iaa.Resize({ "width": 640, "height": "keep-aspect-ratio" }), iaa.CenterPadToFixedSize( height=480, width=640, ) ]) #image = np.transpose(image, (1,0,2)) image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs) x1 = int(bbs_aug[0].x1) y1 = int(bbs_aug[0].y1) x2 = int(bbs_aug[0].x2) y2 = int(bbs_aug[0].y2) conf['bbox'] = {} conf['bbox']['x1'] = x1 conf['bbox']['y1'] = y1 conf['bbox']['x2'] = x2 conf['bbox']['y2'] = y2 rand_number = random.randint(0, 9) if rand_number == 0: mode = 'test2' elif rand_number == 1: mode = 'val2' else: mode = 'train2' base_name = (os.path.basename(base_name)).replace('-', '_') label = base_name.split('_')[0] dst_path = os.path.join( '/home/ubuntu/workspace_aihub/data/refined', mode, label) if not os.path.isdir(dst_path): os.mkdir(dst_path) OmegaConf.save( conf, os.path.join(dst_path, f'{os.path.basename(base_name)}.yaml')) cv2.imwrite( os.path.join(dst_path, f'{os.path.basename(base_name)}.jpg'), image_aug)