def __init__(self, root, isTrain=True): self.images_root = os.path.join(root, 'img') self.labels_root = os.path.join(root, 'gt') self.list_root = os.path.join(root, 'list') # print('image root = ', self.images_root) # print('labels root = ', self.labels_root) if isTrain: list_path = os.path.join(self.list_root, 'train_aug.txt') self.input_transform = transforms.Compose([ transforms.RandomRotation(10), # 随机旋转 transforms.CenterCrop(256), transforms.RandomHorizontalFlip(), # 随机翻转 transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) self.target_transform = transforms.Compose( [transforms.CenterCrop(256), transform.ToLabel()]) else: list_path = os.path.join(self.list_root, 'val.txt') self.input_transform = transforms.Compose([ transforms.CenterCrop(256), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) self.target_transform = transforms.Compose( [transforms.CenterCrop(256), transform.ToLabel()]) self.filenames = [i_id.strip() for i_id in open(list_path)]
def __getitem__(self, index): rgb, depth = self.__getraw__(index) if rgb.ndim < 2 and depth.ndim != 2: print("Wrong DEPTH ", depth) return None if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np to_tensor = transforms.ToTensor() input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor
def __init__(self, opt, set_name='train', train=True): """ Args: root_dir (string): COCO directory. transform (callable, optional): Optional transform to be applied on a sample. """ # self.opt = opt if train: self.root_dir = osp.join(opt.root_dir, 'VisDrone2019-DET-train') else: self.root_dir = osp.join(opt.root_dir, 'VisDrone2019-DET-val') self.anno_dir = osp.join(self.root_dir, 'annotations_json') self.img_dir = osp.join(self.root_dir, 'images') self.set_name = set_name self.train = train self.coco = COCO(osp.join(self.anno_dir, INSTANCES_SET.format(self.set_name))) self.image_ids = self.coco.getImgIds() self.load_classes() self.input_size = opt.input_size if self.train: self.transform = transforms.Compose([ # tsf.RandomColorJeter(0.3, 0.3, 0.3, 0.3), # tsf.RandomGaussianBlur(), tsf.RandomHorizontalFlip(), tsf.Resizer(self.input_size), tsf.Normalizer(**opt.norm_cfg), tsf.ToTensor() ]) else: self.transform = transforms.Compose([ tsf.Resizer(self.input_size), tsf.Normalizer(**opt.norm_cfg), tsf.ToTensor() ])
def __init__(self, opt, set_name='train', train=True): """ Args: root_dir (string): COCO directory. transform (callable, optional): Optional transform to be applied on a sample. """ # self.opt = opt self.root_dir = opt.root_dir self.anno_dir = osp.join(self.root_dir, ANNO_ROOT) self.img_dir = osp.join(self.root_dir, IMG_ROOT) self.set_name = set_name self.train = train self.coco = COCO( osp.join(self.anno_dir, INSTANCES_SET.format(self.set_name))) self.image_ids = self.coco.getImgIds() self.load_classes() self.min_size = opt.min_size self.max_size = opt.max_size self.input_size = (self.min_size, self.max_size) self.resize = self.resizes(opt.resize_type) if self.train: self.transform = transforms.Compose([ tsf.RandomColorJeter(0.3, 0.3, 0.3, 0.3), tsf.RandomGaussianBlur(), self.resize, tsf.Normalizer(**opt.norm_cfg), tsf.ToTensor() ]) else: self.transform = transforms.Compose( [self.resize, tsf.Normalizer(**opt.norm_cfg), tsf.ToTensor()])
def create_loader(args): if args.dataset == 'vocaug': composed_transforms_tr = transforms.Compose([ tr.RandomSized(512), tr.RandomRotate(15), tr.RandomHorizontalFlip(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) composed_transforms_ts = transforms.Compose([ tr.FixedResize(size=(512, 512)), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) train_set = VOCAug(split='train', transform=composed_transforms_tr) val_set = VOCAug(split='val', transform=composed_transforms_ts) else: print('Database {} not available.'.format(args.dataset)) raise NotImplementedError train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = DataLoader(val_set, batch_size=16, shuffle=False, num_workers=args.workers, pin_memory=True) return train_loader, val_loader
def __getitem__(self, index): rgb, depth, pose = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np, pose = self.transform(rgb, depth, pose) else: raise (RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np to_tensor = transforms.ToTensor() input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np).unsqueeze(0) pose_tensor = to_tensor(pose) #.unsqueeze(0) return input_tensor, depth_tensor, pose_tensor
if rgb is not None: rgb = transform(rgb) if sparse is not None: sparse = transform(sparse) if target is not None: target = transform(target) if rgb_near is not None: rgb_near = transform(rgb_near) return rgb, sparse, target, rgb_near def no_transform(rgb, sparse, target, rgb_near, args): return rgb, sparse, target, rgb_near to_tensor = transforms.ToTensor() to_float_tensor = lambda x: to_tensor(x).float() def handle_gray(rgb, args): if rgb is None: return None, None if not args.use_g: return rgb, None else: img = np.array(Image.fromarray(rgb).convert('L')) img = np.expand_dims(img, -1) if not args.use_rgb: rgb_ret = None else: rgb_ret = rgb
return 'VOCAug(split=' + str(self.split) + ')' if __name__ == '__main__': from dataloaders import transforms as tr from libs.utils import decode_segmap from torch.utils.data import DataLoader from torchvision import transforms import matplotlib.pyplot as plt import numpy as np composed_transforms_tr = transforms.Compose([ tr.RandomHorizontalFlip(), tr.RandomSized(512), tr.RandomRotate(15), tr.ToTensor() ]) voc_train = VOCAug(split='train', transform=composed_transforms_tr) dataloader = DataLoader(voc_train, batch_size=5, shuffle=True, num_workers=1) print(len(dataloader)) for ii, sample in enumerate(dataloader): print(sample['image'].size()) img = sample['image'].numpy() gt = sample['label'].numpy()
with open(file_path, 'r') as f: lines = f.readlines() for line in lines: im_path = os.path.join(root_dir, line.split()[0]) gt_path = os.path.join(root_dir, line.split()[1]) im_gt_paths.append((im_path, gt_path)) return im_gt_paths # array to tensor from dataloaders import transforms as my_transforms to_tensor = my_transforms.ToTensor() class KittiFolder(Dataset): """ RGB: kitti_raw_data/2011-xx-xx/2011_xx_xx_drive_xxxx_sync/image_02/data/xxxxxxxx01.png Depth: train: train_gt16bit/xxxxx.png val: val_gt16bit/xxxxx.png test: test_gt16bit/xxxxx.png """ def __init__(self, root_dir='/home/data/UnsupervisedDepth/wangixn/KITTI', mode='train', loader=pil_loader,
def main(): print('Testing data on ' + args.camera + '!') assert args.data == 'nyudepthv2', '=> only nyudepthv2 ' \ 'available at this ' \ 'point' to_tensor = transforms.ToTensor() assert not ( args.camera == 'webcam' and not args.modality == 'rgb'), '=> webcam only accept RGB ' \ 'model' output_directory = utils.get_output_directory(args) best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format( best_model_filename) print("=> loading best model '{}'".format( best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] model = checkpoint['model'] model.eval() switch = True if args.camera == 'kinect': kinect = PyKinectRuntime.PyKinectRuntime( PyKinectV2.FrameSourceTypes_Color | PyKinectV2.FrameSourceTypes_Depth) counter = 0 assert not kinect._sensor is None, '=> No Kinect ' \ 'device ' \ 'detected!' while True: if kinect.has_new_color_frame() and \ kinect.has_new_depth_frame(): bgra_frame = kinect.get_last_color_frame() bgra_frame = bgra_frame.reshape(( kinect.color_frame_desc.Height, kinect.color_frame_desc.Width, 4), order='C') rgb_frame = cv2.cvtColor(bgra_frame, cv2.COLOR_BGRA2RGB) depth_frame = kinect.get_last_depth_frame() merged_image, rmse = depth_estimate(model, rgb_frame, depth_frame, save=False) merged_image_bgr = cv2.cvtColor( merged_image.astype('uint8'), cv2.COLOR_RGB2BGR, switch) switch = False cv2.imshow('my webcam', merged_image_bgr.astype('uint8')) if counter == 15: print('RMSE = ' + str(rmse)) counter = counter + 1 if counter == 16: counter = 0 if cv2.waitKey(1) == 27: break elif args.camera == 'webcam': cam = cv2.VideoCapture(0) while True: ret_val, img = cam.read() img = cv2.flip(img, 1) rgb = cv2.cvtColor(np.array(img), cv2.COLOR_BGRA2RGB) transform = transforms.Compose([ transforms.Resize([228, 304]), ]) rgb_image = transform(rgb) if args.modality == 'rgbd': assert '=> can\'t test webcam with depth ' \ 'information!' rgb_np = np.asfarray(rgb_image, dtype='float') / 255 input_tensor = to_tensor(rgb_np) while input_tensor.dim() < 4: input_tensor = input_tensor.unsqueeze(0) input_tensor = input_tensor.cuda() torch.cuda.synchronize() end = time.time() with torch.no_grad(): pred = model(input_tensor) torch.cuda.synchronize() gpu_time = time.time() - end pred_depth = np.squeeze(pred.cpu().numpy()) d_min = np.min(pred_depth) d_max = np.max(pred_depth) pred_color_map = color_map(pred_depth, d_min, d_max, plt.cm.viridis) merged_image = np.hstack( [rgb_image, pred_color_map]) merged_image_bgr = cv2.cvtColor( merged_image.astype('uint8'), cv2.COLOR_RGB2BGR) cv2.imshow('my webcam', merged_image_bgr.astype('uint8')) if cv2.waitKey(1) == 27: break # esc to quit else: file_name = args.kinectdata + '.p' pickle_path = os.path.join('CameraData', file_name) print(pickle_path) if not os.path.exists('CameraData'): assert '=>do data find at ' + pickle_path f = open(pickle_path, 'rb') pickle_file = pickle.load(f) f.close() bgr_frame = pickle_file['rgb'] depth = pickle_file['depth'] rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB) merged_image, rmse = depth_estimate(model, rgb_frame, depth, save=True, switch=True) plt.figure('Merged Image') plt.imshow(merged_image.astype('uint8')) plt.show() print('RMSE = ' + str(rmse)) cv2.destroyAllWindows()
def depth_estimate(model, rgb_frame, depth, save=False, switch=False): to_tensor = transforms.ToTensor() cmap_depth = plt.cm.viridis cmap_error = plt.cm.inferno rgb_np, depth_np = image_transform(rgb_frame, depth) ##creat sparse depth mask_keep = sampler(depth_np, args.sample_spacing) sample_number = np.count_nonzero( mask_keep.astype('int')) if args.modality == 'rgb': sample_number = 0 if switch: print('Total samples = ' + str(sample_number)) sparse_depth = np.zeros(depth_np.shape) sparse_depth[mask_keep] = depth_np[mask_keep] sparse_depth_np = sparse_depth ##choose input if args.modality == 'rgb': input_np = rgb_np elif args.modality == 'rgbd': rgbd = np.append(rgb_np, np.expand_dims(sparse_depth_np, axis=2), axis=2) input_np = np.asfarray(rgbd, dtype='float') elif args.modality == 'd': input_np = sparse_depth_np input_tensor = to_tensor(input_np) while input_tensor.dim() < 4: input_tensor = input_tensor.unsqueeze(0) input_tensor = input_tensor.cuda() torch.cuda.synchronize() ##get prediction end = time.time() with torch.no_grad(): pred = model(input_tensor) torch.cuda.synchronize() gpu_time = time.time() - end ##get result target_tensor = to_tensor(depth_np) while target_tensor.dim() < 4: target_tensor = target_tensor.unsqueeze(0) target_tensor = target_tensor.cuda() torch.cuda.synchronize() result = Result() result.evaluate(pred.data, target_tensor.data) pred_depth = np.squeeze(pred.data.cpu().numpy()) ##convert depth to colour map d_min = min(np.min(pred_depth), np.min(depth_np)) d_max = max(np.max(pred_depth), np.max(depth_np)) # # d_min = float(0.5) # # d_max = float(6) pred_depth_rgb_map = color_map(pred_depth, d_min, d_max, cmap_depth) input_depth_color_map = color_map(depth_np, d_min, d_max, cmap_depth) sparse_depth_color_map = color_map(sparse_depth, d_min, d_max, cmap_depth) ##get error map mask = depth_np <= 0 combined_map = depth_np combined_map[mask] = pred_depth[mask] abs_error_map = np.absolute(combined_map - pred_depth) # error_min = min(np.min(combined_map), np.min( # pred_depth)) # error_max = max(np.max(combined_map), np.max( # pred_depth)) error_min = np.min(abs_error_map) error_max = np.max(abs_error_map) error_map_color = color_map(abs_error_map, error_min, error_max, cmap_error) ##show images rgb_image = rgb_np * 255 merged_image = np.hstack([rgb_image, pred_depth_rgb_map, input_depth_color_map, sparse_depth_color_map, error_map_color]) ##save image if args.write and save: images_save(sample_number, pred_depth_rgb_map, sparse_depth_color_map, error_map_color, rgb_image, input_depth_color_map, result) return merged_image, result.rmse
def __init__(self, directory): self.directory = directory self.data = fetch_data(self.directory) self.transform = None self.to_tensor = transforms.ToTensor()