def load_data(args): # init transforms transform = { 'train': get_transform(args.dataset, augment=True), 'eval': get_transform(args.dataset, augment=False) } train_data = get_dataset(args.dataset, train=True, transform=transform['train'], datasets_path=args.data) valid_data = get_dataset(args.dataset, train=False, transform=transform['eval'], datasets_path=args.data) num_train = len(train_data) indices = list(range(num_train)) # split = int(floor(args.train_portion * num_train)) train_queue = DataLoader(train_data, batch_size=args.batch_size, sampler=SubsetRandomSampler(indices), pin_memory=True, num_workers=args.workers) valid_queue = DataLoader(valid_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) # init create DataLoader function createDataLoader = lambda data, _indices: DataLoader(data, batch_size=args.batch_size, sampler=SubsetRandomSampler(_indices), pin_memory=True, num_workers=args.workers) # build search_queue as list of DataLoaders create_search_queue = lambda: splitDataToParts(train_data, indices, args.alphas_data_parts, createDataLoader) return train_queue, valid_queue, create_search_queue
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): imgL_o = Image.open(test_left_img[inx]).convert('RGB') imgR_o = Image.open(test_right_img[inx]).convert('RGB') imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) top_pad = 384 - imgL.shape[2] left_pad = 1248 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_seg = val(imgL, imgR) #(384,1248),(1,21,384,1248) print('time = %.2f' % (time.time() - start_time)) #FCN OF SEGMETATION pred_seg = pred_seg[:, :, top_pad:, :-left_pad] N, _, h, w = pred_seg.shape # 4,12,192,704,numpy pred_segmap = pred_seg.transpose(0, 2, 3, 1).reshape( -1, args.n_class).argmax(axis=1).reshape(N, h, w) img = drawseg.direct_render(pred_segmap, args.n_class, imgL_o) skimage.io.imsave(args.saveseg + (test_left_img[inx].split('/')[-1]), img[0])
def main(): processed = preprocess.get_transform(augment=False) if not os.path.isdir(args.output): os.mkdir(args.output) for inx in range(len(dataloader)): imgL_o, imgR_o, disp, sparse_disp_L = dataloader[inx] print('sparse depth:', len(np.nonzero(sparse_disp_L)[0])) if args.datatype == 'kitti_object': frame_id = dataloader.frame_ids[inx] sparse_disp_L = disp # disp is still sparse in kitti_object else: frame_id = str(inx) imgL = imgL_o.numpy() imgR = imgR_o.numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) sparse_disp_L = np.reshape( sparse_disp_L, [1, sparse_disp_L.shape[0], sparse_disp_L.shape[1]]) #print('mean of gt:', np.mean(disp)) start_time = time.time() pred_disp = test(imgL, imgR, sparse_disp_L, refine=True) print('%s: time = %.2f' % (frame_id, time.time() - start_time)) print(pred_disp.shape) # TODO: crop output to original size np.save(os.path.join(args.output, frame_id + '.npy'), pred_disp)
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) top_pad = 384 - imgL.shape[2] left_pad = 1248 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) top_pad = 384 - imgL_o.shape[0] left_pad = 1248 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] skimage.io.imsave(test_left_img[inx].split('/')[-1], (img * 256).astype('uint16'))
def __init__(self, left, right, left_disparity, right_disparity=None, left_entropy=None, is_validation=False, loader=default_loader, dploader=disparity_loader, rand_scale=[0.225,0.6], rand_bright=[0.5,2.], order=0, entropy_threshold=None, testres=None, use_pseudoGT=False, no_aug=False): self.left = left self.right = right self.disp_L = left_disparity self.disp_R = right_disparity self.loader = loader self.dploader = dploader self.rand_scale = rand_scale self.rand_bright = rand_bright self.order = order self.left_entropy = left_entropy self.entropy_threshold = entropy_threshold self.is_validation = is_validation self.processed = get_transform() self.testres = testres self.no_aug = no_aug self.use_pseudoGT = use_pseudoGT if self.is_validation and self.testres is None: raise ValueError("testres argument is required for validation") if self.use_pseudoGT: if entropy_threshold is None or left_entropy is None: raise ValueError("when using pseudo GT, entorpy_threshold and left_entropy must be provided")
def __init__(self, params): self.args = params self.args.cuda = not self.args.no_cuda and torch.cuda.is_available() torch.manual_seed(self.args.seed) if self.args.cuda: torch.cuda.manual_seed(self.args.seed) # load model if self.args.model == 'stackhourglass': self.model = stackhourglass(self.args.maxdisp) elif self.args.model == 'basic': self.model = basic(self.args.maxdisp) else: print('no model') self.model = nn.DataParallel(self.model, device_ids=[0]) self.model.cuda() if self.args.loadmodel is not None: state_dict = torch.load(self.args.loadmodel) self.model.load_state_dict(state_dict['state_dict']) print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in self.model.parameters()]))) # process operations self.processed = preprocess.get_transform(augment=False)
def main(): processed = preprocess.get_transform(augment=False) if args.isgray: imgL_o = cv2.cvtColor(cv2.imread(args.leftimg, 0), cv2.COLOR_GRAY2RGB) imgR_o = cv2.cvtColor(cv2.imread(args.rightimg, 0), cv2.COLOR_GRAY2RGB) else: imgL_o = (skimage.io.imread(args.leftimg).astype('float32')) imgR_o = (skimage.io.imread(args.rightimg).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to width and hight to 16 times if imgL.shape[2] % 16 != 0: times = imgL.shape[2] // 16 top_pad = (times + 1) * 16 - imgL.shape[2] else: top_pad = 0 if imgL.shape[3] % 16 != 0: times = imgL.shape[3] // 16 left_pad = (times + 1) * 16 - imgL.shape[3] else: left_pad = 0 imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) #每行前增加了top_pad行,每列前增加了left_pad列 imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)
def main(): processed = preprocess.get_transform(augment=False) #normalization t = 0 for inx in range(len(test_left_img)): imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) top_pad = 384-imgL.shape[2] right_pad = 1248-imgL.shape[3] imgL = np.lib.pad(imgL, ((0,0),(0,0),(top_pad,0), (0,right_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0,0),(0,0),(top_pad,0), (0,right_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL,imgR) t += time.time() - start_time print('time = %.2f' %(time.time() - start_time)) top_pad = 384-imgL_o.shape[0] right_pad = 1248-imgL_o.shape[1] img = pred_disp[top_pad : , : -right_pad] skimage.io.imsave('./disp_image_' + str(args.KITTI) + '/' + test_left_img[inx].split('/')[-1], (img*256).astype('uint16')) #第一个参数表示保存的路径和名称,第二个参数表示需要保存的数组标量 # skimage.io.imsave('./disp_image_' + str(args.KITTI) + '/' + test_left_img[inx].split('/')[-1], (img).astype('uint8')) #第一个参数表示保存的路径和名称,第二个参数表示需要保存的数组标量 mean_time = t/(len(test_left_img)) print(mean_time)
def __getitem__(self, index): img_left = img_read(self.left_imgs[index]) img_right = img_read(self.right_imgs[index]) file_name = os.path.basename(self.left_imgs[index]) w, h = img_left.size processed = preprocess.get_transform(augment=False) img_left = processed(img_left).numpy() img_right = processed(img_right).numpy() img_left = np.reshape(img_left, [1, 3, h, w]) img_right = np.reshape(img_right, [1, 3, h, w]) pad_w = 32 * (math.floor(w / 32) + 1) pad_h = 32 * (math.floor(h / 32) + 1) top_pad = (int)(pad_h - h) left_pad = (int)(pad_w - w) img_left = np.lib.pad(img_left, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) img_right = np.lib.pad(img_right, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) return top_pad, left_pad, img_left, img_right, file_name
def __getitem__(self, index): left = self.left[index] right = self.right[index] disp_L= self.disp_L[index] left_img = self.loader(left) right_img = self.loader(right) dataL = self.dploader(disp_L) if self.training: w, h = left_img.size th, tw = 256, 512 x1 = random.randint(0, w - tw) y1 = random.randint(0, h - th) left_img = left_img.crop((x1, y1, x1 + tw, y1 + th))#(左,上,右,下) right_img = right_img.crop((x1, y1, x1 + tw, y1 + th)) dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256 dataL = dataL[y1:y1 + th, x1:x1 + tw] processed = preprocess.get_transform(augment=False) left_img = processed(left_img) right_img = processed(right_img) return left_img, right_img, dataL else: w, h = left_img.size left_img = left_img.crop((w-1232, h-368, w, h)) right_img = right_img.crop((w-1232, h-368, w, h)) w1, h1 = left_img.size dataL = dataL.crop((w-1232, h-368, w, h)) dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256 #dataL = dataL[h-368:h, w-1232:w] processed = preprocess.get_transform(augment=False) left_img = processed(left_img) right_img = processed(right_img) return left_img, right_img, dataL
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): # imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) # imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL_o = np.array(default_loader(test_left_img[inx])) imgR_o = np.array(default_loader(test_right_img[inx])) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) top_pad = 512 - imgL.shape[2] # top_pad = 384-imgL.shape[2] # left_pad = 1248-imgL.shape[3] # imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) # imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, 0)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, 0)), mode='constant', constant_values=0) start_time = time.time() with torch.no_grad(): pred_disp, pred_R_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) top_pad = 512 - imgL_o.shape[0] # top_pad = 384-imgL_o.shape[0] # left_pad = 1248-imgL_o.shape[1] disparity_dir = '/home/yotamg/data/sintel_depth/training/disparities_viz/' # file_splits = test_left_img[inx].split('/')[-1].split("_frame_") # a = plt.imread(os.path.join(disparity_dir, file_splits[0],'frame_' + file_splits[1])) img = pred_disp[top_pad:, :] imgR = pred_R_disp[top_pad:, :] img = 1 / img imgR = 1 / imgR # plt.figure(1) # plt.subplot(1,2,1) # plt.imshow(img) # plt.subplot(1,2,2) # plt.imshow(a) # plt.show() outdir = os.path.join('./outputs', args.outdir) if not os.path.isdir(outdir): os.makedirs(outdir) plt.imsave(os.path.join(outdir, test_left_img[inx].split('/')[-1]), (img * 256).astype('uint16'), cmap='jet') plt.imsave(os.path.join(outdir, 'R_' + test_left_img[inx].split('/')[-1]), (imgR * 256).astype('uint16'), cmap='jet')
def main(): processed = preprocess.get_transform(augment=False) is_result_dir = False for leftimg, rightimg in zip(leftimages_path, rightimages_path): print("Left Image : {}, Right Image : {}".format(leftimg, rightimg)) if args.isgray: imgL_o = cv2.cvtColor(cv2.imread(leftimg, 0), cv2.COLOR_GRAY2RGB) imgR_o = cv2.cvtColor(cv2.imread(rightimg, 0), cv2.COLOR_GRAY2RGB) else: imgL_o = (skimage.io.imread(leftimg).astype('float32')) imgR_o = (skimage.io.imread(rightimg).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to width and hight to 16 times if imgL.shape[2] % 16 != 0: times = imgL.shape[2] // 16 top_pad = (times + 1) * 16 - imgL.shape[2] else: top_pad = 0 if imgL.shape[3] % 16 != 0: times = imgL.shape[3] // 16 left_pad = (times + 1) * 16 - imgL.shape[3] else: left_pad = 0 imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # Generating the depth map here pred_disp = test(imgL, imgR) # Removing the padded pixels if top_pad != 0 or left_pad != 0: img = pred_disp[top_pad:, :-left_pad] else: img = pred_disp img = (img * 256).astype('uint16') imgname = leftimg.split('/')[-1].split('.')[0] + "_disparity.png" if not is_result_dir: os.makedirs(args.savedir, exist_ok=True) skimage.io.imsave(os.path.join(args.savedir, imgname), img)
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): #len(test_left_img) imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) sizex = 640 #1024 # = 2048/6.4 sizey = 384 #512 # = 1024/3.2 logger.debug("Before resize {}:{}:{}".format(imgL_o.shape[0], imgL_o.shape[1], imgL_o.shape[2])) imgL_o = skimage.transform.resize(imgL_o, (sizey, sizex), anti_aliasing=True) imgR_o = skimage.transform.resize(imgR_o, (sizey, sizex), anti_aliasing=True) logger.debug("After resize {}:{}:{}".format(imgL_o.shape[0], imgL_o.shape[1], imgL_o.shape[2])) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() # crop image Cityscapes: # logger.debug("Before crope {}:{}:{}".format(imgL.shape[0], imgL.shape[1], imgL.shape[2])) # imgL = crop_center(imgL, sizex, sizey) # imgR = crop_center(imgR, sizex, sizey) # logger.debug("After crope {}:{}:{}".format(imgL.shape[0], imgL.shape[1], imgL.shape[2])) imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) # Doan code nay dung cho bo KITTI voi kich thuoc anh nho hon (384, 1248) # phai them vao de dat duoc kich thuoc anh phu hop # top_pad = 384-imgL.shape[2] # left_pad = 1248-imgL.shape[3] # imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) # imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) logger.info('time = {}'.format(time.time() - start_time)) # top_pad = 384-imgL_o.shape[0] # left_pad = 1248-imgL_o.shape[1] # img = pred_disp[top_pad:,:-left_pad] img = pred_disp dispmap = "disparity/" + test_left_img[inx].split('/')[-1] skimage.io.imsave(dispmap, (img * 256).astype('uint16')) logger.info('disparity map was saved at {}'.format(dispmap))
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): if args.colormode == 1: asgray = False imgL_o = (skimage.io.imread(test_left_img[inx], as_gray=asgray).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx], as_gray=asgray).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) else: asgray = True imgL_o = (skimage.io.imread(test_left_img[inx], as_gray=asgray).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx], as_gray=asgray).astype('float32')) imgL = np.reshape(imgL_o, [1, 1, imgL_o.shape[0], imgL_o.shape[1]]) imgR = np.reshape(imgR_o, [1, 1, imgR_o.shape[0], imgR_o.shape[1]]) # pad to (384, 1248) if args.KITTI == 'sf': top_pad = 576 - imgL.shape[2] left_pad = 960 - imgL.shape[3] else: top_pad = 384 - imgL.shape[2] left_pad = 1280 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) if args.KITTI == 'sf': top_pad = 576 - imgL_o.shape[2] left_pad = 960 - imgL_o.shape[3] else: top_pad = 384 - imgL_o.shape[0] left_pad = 1280 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] skimage.io.imsave( os.path.join(args.savepath, test_left_img[inx].split('/')[-1]), (img * 256).astype('uint16'))
def main(): processed = preprocess.get_transform(augment=False) min_disp = 80 sample_frame_id = 365 for inx in range(sample_frame_id,sample_frame_id+1): # for inx in range(3244): imgLfile = os.path.join(args.datapath, 'left/%07d.png' % (inx+1)) imgRfile = os.path.join(args.datapath, 'right/%07d.png' % (inx+1)) imgL_o = (skimage.io.imread(imgLfile).astype('float32')) imgR_o = (skimage.io.imread(imgRfile).astype('float32')) imgL_o = skimage.transform.resize(imgL_o, (384,1248), preserve_range=True) imgR_o = skimage.transform.resize(imgR_o, (384,1248), preserve_range=True) imgL_o = imgL_o[:,:-min_disp,:] imgR_o = imgR_o[:,min_disp:,:] imgL_o = skimage.transform.resize(imgL_o, (384,1248), preserve_range=True) imgR_o = skimage.transform.resize(imgR_o, (384,1248), preserve_range=True) plt.subplot(4,1,1) plt.imshow(imgL_o.astype('uint8')) plt.subplot(4,1,2) plt.imshow(imgR_o.astype('uint8')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) # print(imgL.shape) # pad to (384, 1248) # top_pad = 384-imgL.shape[2] # left_pad = 1248-imgL.shape[3] # imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) # imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) start_time = time.time() pred_disp, pred_cost = test(imgL,imgR) print('time = %.2f' %(time.time() - start_time)) print(pred_disp.max()) print(pred_disp.min()) # top_pad = 384-imgL_o.shape[0] # left_pad = 1248-imgL_o.shape[1] # img = pred_disp[top_pad:,:-left_pad] img = pred_disp plt.subplot(4,1,3) plt.imshow(img) plt.colorbar() plt.subplot(4,1,4) plt.plot(pred_cost[:,200,600].flatten()) plt.show() skimage.io.imsave(os.path.join(args.outpath,'%07d.png'%(inx)), (img*256).astype('uint16'))
def main(): processed = preprocess.get_transform(augment=False) if args.isgray.lower() == 'true': print('reading gray images') imgL_o = cv2.cvtColor(cv2.imread(args.leftimg, 0), cv2.COLOR_GRAY2RGB) imgR_o = cv2.cvtColor(cv2.imread(args.rightimg, 0), cv2.COLOR_GRAY2RGB) else: #imgL_o = (skimage.io.imread(args.leftimg).astype('float32')) #imgR_o = (skimage.io.imread(args.rightimg).astype('float32')) #updated by CCJ: imgL_o = Image.open(args.leftimg) #imgL_o = np.asarray(imgL_o) imgR_o = Image.open(args.rightimg) #imgR_o = np.asarray(imgR_o) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to width and hight to 16 times if imgL.shape[2] % 16 != 0: times = imgL.shape[2] // 16 top_pad = (times + 1) * 16 - imgL.shape[2] else: top_pad = 0 if imgL.shape[3] % 16 != 0: times = imgL.shape[3] // 16 left_pad = (times + 1) * 16 - imgL.shape[3] else: left_pad = 0 imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) if top_pad != 0 or left_pad != 0: img = pred_disp[top_pad:, :-left_pad] else: img = pred_disp img = (img * 256).astype('uint16') img = Image.fromarray(img) img.save('disparity.png')
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): '''imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]])''' imgL_o = Image.open(test_left_img[inx]).convert('RGB') imgR_o = Image.open(test_right_img[inx]).convert('RGB') imgL = processed(imgL_o) imgR = processed(imgR_o) imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) top_pad = 512 - imgL.shape[2] left_pad = 512 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) '''img_raw = get_normalize_invert(pred_disp) # 图像去标准化 img_raw = np.array(img_raw * 255).clip(0, 255).squeeze().astype('uint8')''' print('time = %.2f' % (time.time() - start_time)) top_pad = 512 - imgL.shape[2] left_pad = 512 - imgL.shape[3] print(top_pad, left_pad) if top_pad == 0 and left_pad == 0: img = pred_disp else: img = pred_disp[top_pad:, :-left_pad] img = np.around(img, 1) skimage.io.imsave(test_left_img[inx].split('/')[-1], (img * 255).astype('uint8'))
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): imgL_o = skimage.io.imread(test_left_img[inx]).astype('float32') print(imgL_o.shape) ### #imgL_o = np.stack([imgL_o, imgL_o, imgL_o], 2) ### imgR_o = skimage.io.imread(test_right_img[inx]).astype('float32') ### #imgR_o = np.stack([imgR_o, imgR_o, imgR_o], 2) ### raw_h, raw_w = imgL_o.shape[0], imgL_o.shape[1] imgL_o = cv2.resize(imgL_o, (1242, 376)) imgR_o = cv2.resize(imgR_o, (1242, 376)) #imgL_o = imgL_o.resize( (1242, 376), Image.BILINEAR ) #imgR_o = imgR_o.resize( (1242, 376), Image.BILINEAR ) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) top_pad = 384 - imgL.shape[2] left_pad = 1248 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) top_pad = 384 - imgL_o.shape[0] left_pad = 1248 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] #writePFM(str(inx)+"result_syn.pfm", cv2.resize(img, (384,512)).astype(np.float32)) writePFM(args.output, cv2.resize(img, (raw_w, raw_h)).astype(np.float32))
def main(): processed = preprocess.get_transform(augment=False) padsize = 64 if args.isgray: imgL_o = cv2.cvtColor(cv2.imread(args.leftimg,0), cv2.COLOR_GRAY2RGB) imgR_o = cv2.cvtColor(cv2.imread(args.rightimg,0), cv2.COLOR_GRAY2RGB) else: # imgL_o = cv2.cvtColor(cv2.imread(args.leftimg), cv2.COLOR_BGR2RGB) # imgR_o = cv2.cvtColor(cv2.imread(args.rightimg), cv2.COLOR_BGR2RGB) imgL_o = skimage.io.imread(args.leftimg) imgR_o = skimage.io.imread(args.rightimg) # imgL_o = Image.open(args.leftimg) # imgR_o = Image.open(args.rightimg) if real: imgL_o = np.pad(imgL_o,((0,0),(padsize,0),(0,0))) imgR_o = np.pad(imgR_o,((0,0),(0,padsize),(0,0))) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) # pad to width and hight to 16 times if imgL.shape[2] % 16 != 0: times = imgL.shape[2]//16 top_pad = (times+1)*16 -imgL.shape[2] else: top_pad = 0 if imgL.shape[3] % 16 != 0: times = imgL.shape[3]//16 left_pad = (times+1)*16-imgL.shape[3] else: left_pad = 0 imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) start_time = time.time() pred_disp = test(imgL,imgR) print('time = %.2f' %(time.time() - start_time)) if top_pad !=0 or left_pad != 0: img = pred_disp[top_pad:,:] else: img = pred_disp img = img[:,padsize:] writePFM(args.output, img.astype(np.float32))
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): if not os.path.exists(test_left_img[inx]): continue imgL_o = cv2.imread(test_left_img[inx]) shp = imgL_o.shape imgL_o = cv2.resize(imgL_o, (shp[1] // 2, shp[0] // 2), interpolation=cv2.INTER_LINEAR) imgL_o = imgL_o.astype('float32') if not os.path.exists(test_right_img[inx]): continue imgR_o = cv2.imread(test_right_img[inx]) imgR_o = cv2.resize(imgR_o, (shp[1] // 2, shp[0] // 2), interpolation=cv2.INTER_LINEAR) imgR_o = imgR_o.astype('float32') #imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) #imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) top_pad = padding_size_x - imgL.shape[2] left_pad = padding_size_y - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) top_pad = padding_size_x - imgL_o.shape[0] left_pad = padding_size_y - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] if not os.path.exists(args.result_path): os.makedirs(args.result_path) skimage.io.imsave( args.result_path + test_left_img[inx].split('/')[-1][:-4] + '.png', (img * 256).astype('uint16'))
def __init__(self, args): self.folders = [] self.stereo_pair_subfolders = [] self.kitti_testres = 1.8 self.middlebury_testres = 1 self.eth_testres = args.eth_testres self.transform = get_transform() self.variable_testres = (args.testres == -1) self.testres = args.testres self.debug = args.debug self.folders.append(os.path.join(args.datapath, 'training')) if not args.eval_train_only: self.folders.append(os.path.join(args.datapath, 'test')) self.stereo_pair_subfolders = {} self.total_len = 0 for folder in self.folders: self.stereo_pair_subfolders[folder] = [] datasets = [ dataset for dataset in os.listdir(folder) if os.path.isdir(os.path.join(folder, dataset)) ] for dataset_name in datasets: skip_this_image = False if not args.all_data: skip_this_image = True if args.debug_image != None and not args.debug_image in dataset_name: skip_this_image = True if args.kitti and "kitti2015" in dataset_name.lower(): skip_this_image = False if args.eth and "eth3d" in dataset_name.lower(): skip_this_image = False if args.mb and "middlebury" in dataset_name.lower(): skip_this_image = False if not skip_this_image: self.stereo_pair_subfolders[folder].append(dataset_name) self.total_len += len(self.stereo_pair_subfolders[folder]) self.args = args
def main(): processed = preprocess.get_transform(augment=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) for inx in range(len(test_left_img)): imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) calib = test_calib[inx] # pad to (384, 1248) top_pad = 384 - imgL.shape[2] left_pad = 1248 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR, calib) print('time = %.2f' % (time.time() - start_time)) top_pad = 384 - imgL_o.shape[0] left_pad = 1248 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] print(test_left_img[inx].split('/')[-1]) if args.save_figure: skimage.io.imsave( args.save_path + '/' + test_left_img[inx].split('/')[-1], (img * 256).astype('uint16')) else: np.save( args.save_path + '/' + test_left_img[inx].split('/')[-1][:-4], img)
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) print('imgL_o.shape', imgL_o.shape) print('imgR_o.shape', imgR_o.shape) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (512, 384) print('imgL.shape', imgL.shape) top_pad = 544 - imgL.shape[2] left_pad = 416 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) print('imgL.shape', imgL.shape) print('imgR.shape', imgR.shape) start_time = time.time() pred_disp = test(imgL, imgR) print('pred_disp.shape', pred_disp.shape) print('time = %.2f' % (time.time() - start_time)) top_pad = 544 - imgL_o.shape[0] left_pad = 416 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] # img = pred_disp[:] # prin(img) # skimage.io.imsave(test_left_img[inx].split('/')[-1],(img*256).astype('uint16')) plt.imsave(test_left_img[inx].split('/')[-1], img, cmap='jet') util.writePFM(str(inx) + '.pfm', img)
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (384, 1248) # you can not pad to original resolution height = 384 width = 1248 #height = 1110 #width = 1282 top_pad = height - imgL.shape[2] left_pad = width - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) start_time = time.time() pred_disp = test(imgL, imgR) print('time = %.2f' % (time.time() - start_time)) top_pad = 384 - imgL_o.shape[0] left_pad = 1248 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] print(test_left_img[inx].split('/')[-1]) #predict_folder = "MB_test/" predict_folder = "KITTI2012_test_pretrained_2015/" skimage.io.imsave(predict_folder + test_left_img[inx].split('/')[-1], (img * 256).astype('uint16'))
def main(): processed = preprocess.get_transform(augment=False) for inx in range(len(test_left_img)): imgL_o = (cv2.imread(test_left_img[inx]).astype('float32')) imgR_o = (cv2.imread(test_right_img[inx]).astype('float32')) print('imgL_o.shape', imgL_o.shape) print('imgR_o.shape', imgR_o.shape) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # pad to (428, 320) print('imgL.shape', imgL.shape) top_pad = 480 - imgL.shape[2] left_pad = 352 - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) print('imgL.shape', imgL.shape) print('imgR.shape', imgR.shape) start_time = time.time() pred_disp = test(imgL, imgR) print('pred_disp.shape', pred_disp.shape) print('time = %.2f' % (time.time() - start_time)) top_pad = 480 - imgL_o.shape[0] left_pad = 352 - imgL_o.shape[1] img = pred_disp[top_pad:, :-left_pad] plt.imsave('r' + str(inx) + '.png', img, cmap='jet') util.writePFM(str(inx) + '.pfm', img)
def main_worker(args, ml_logger): global best_acc1 if args.gpu_ids is not None: print("Use GPU: {} for training".format(args.gpu_ids)) if args.log_stats: from utils.stats_trucker import StatsTrucker as ST ST("W{}A{}".format(args.bit_weights, args.bit_act)) if 'resnet' in args.arch and args.custom_resnet: model = custom_resnet(arch=args.arch, pretrained=args.pretrained, depth=arch2depth(args.arch), dataset=args.dataset) elif 'inception_v3' in args.arch and args.custom_inception: model = custom_inception(pretrained=args.pretrained) else: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=args.pretrained) device = torch.device('cuda:{}'.format(args.gpu_ids[0])) cudnn.benchmark = True torch.cuda.set_device(args.gpu_ids[0]) model = model.to(device) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, device) args.start_epoch = checkpoint['epoch'] # best_acc1 = checkpoint['best_acc1'] # best_acc1 may be from a checkpoint from a different GPU # best_acc1 = best_acc1.to(device) checkpoint['state_dict'] = { normalize_module_name(k): v for k, v in checkpoint['state_dict'].items() } model.load_state_dict(checkpoint['state_dict'], strict=False) # optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if len(args.gpu_ids) > 1: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features, args.gpu_ids) else: model = torch.nn.DataParallel(model, args.gpu_ids) default_transform = { 'train': get_transform(args.dataset, augment=True), 'eval': get_transform(args.dataset, augment=False) } val_data = get_dataset(args.dataset, 'val', default_transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(device) train_data = get_dataset(args.dataset, 'train', default_transform['train']) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) # TODO: replace this call by initialization on small subset of training data # TODO: enable for activations # validate(val_loader, model, criterion, args, device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) lr_scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1) mq = None if args.quantize: if args.bn_folding: print( "Applying batch-norm folding ahead of post-training quantization" ) from utils.absorb_bn import search_absorbe_bn search_absorbe_bn(model) all_convs = [ n for n, m in model.named_modules() if isinstance(m, nn.Conv2d) ] # all_convs = [l for l in all_convs if 'downsample' not in l] all_relu = [ n for n, m in model.named_modules() if isinstance(m, nn.ReLU) ] all_relu6 = [ n for n, m in model.named_modules() if isinstance(m, nn.ReLU6) ] layers = all_relu[1:-1] + all_relu6[1:-1] + all_convs[1:] replacement_factory = { nn.ReLU: ActivationModuleWrapper, nn.ReLU6: ActivationModuleWrapper, nn.Conv2d: ParameterModuleWrapper } mq = ModelQuantizer( model, args, layers, replacement_factory, OptimizerBridge(optimizer, settings={ 'algo': 'SGD', 'dataset': args.dataset })) if args.resume: # Load quantization parameters from state dict mq.load_state_dict(checkpoint['state_dict']) mq.log_quantizer_state(ml_logger, -1) if args.model_freeze: mq.freeze() if args.evaluate: if args.log_stats: mean = [] var = [] skew = [] kurt = [] for n, p in model.named_parameters(): if n.replace('.weight', '') in all_convs[1:]: mu = p.mean() std = p.std() mean.append((n, mu.item())) var.append((n, (std**2).item())) skew.append((n, torch.mean(((p - mu) / std)**3).item())) kurt.append((n, torch.mean(((p - mu) / std)**4).item())) for i in range(len(mean)): ml_logger.log_metric(mean[i][0] + '.mean', mean[i][1]) ml_logger.log_metric(var[i][0] + '.var', var[i][1]) ml_logger.log_metric(skew[i][0] + '.skewness', skew[i][1]) ml_logger.log_metric(kurt[i][0] + '.kurtosis', kurt[i][1]) ml_logger.log_metric('weight_mean', np.mean([s[1] for s in mean])) ml_logger.log_metric('weight_var', np.mean([s[1] for s in var])) ml_logger.log_metric('weight_skewness', np.mean([s[1] for s in skew])) ml_logger.log_metric('weight_kurtosis', np.mean([s[1] for s in kurt])) acc = validate(val_loader, model, criterion, args, device) ml_logger.log_metric('Val Acc1', acc) if args.log_stats: stats = ST().get_stats() for s in stats: ml_logger.log_metric(s, np.mean(stats[s])) return # evaluate on validation set acc1 = validate(val_loader, model, criterion, args, device) ml_logger.log_metric('Val Acc1', acc1, -1) # evaluate with k-means quantization # if args.model_freeze: # with mq.disable(): # acc1_nq = validate(val_loader, model, criterion, args, device) # ml_logger.log_metric('Val Acc1 fp32', acc1_nq, -1) for epoch in range(0, args.epochs): # train for one epoch print('Timestamp Start epoch: {:%Y-%m-%d %H:%M:%S}'.format( datetime.datetime.now())) train(train_loader, model, criterion, optimizer, epoch, args, device, ml_logger, val_loader, mq) print('Timestamp End epoch: {:%Y-%m-%d %H:%M:%S}'.format( datetime.datetime.now())) if not args.lr_freeze: lr_scheduler.step() # evaluate on validation set acc1 = validate(val_loader, model, criterion, args, device) ml_logger.log_metric('Val Acc1', acc1, step='auto') # evaluate with k-means quantization # if args.model_freeze: # with mq.quantization_method('kmeans'): # acc1_kmeans = validate(val_loader, model, criterion, args, device) # ml_logger.log_metric('Val Acc1 kmeans', acc1_kmeans, epoch) # with mq.disable(): # acc1_nq = validate(val_loader, model, criterion, args, device) # ml_logger.log_metric('Val Acc1 fp32', acc1_nq, step='auto') if args.quantize: mq.log_quantizer_state(ml_logger, epoch) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict() if len(args.gpu_ids) == 1 else model.module.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)
def __getitem__(self, index): left = self.left[index] right = self.right[index] left_img = self.loader(left) right_img = self.loader(right) disp_L = self.disp_L[index] dataL = self.dploader(disp_L) dataL[dataL == np.inf] = 0 if not (self.disp_R is None): disp_R = self.disp_R[index] dataR = self.dploader(disp_R) dataR[dataR == np.inf] = 0 max_h = 2048 // 4 max_w = 3072 // 4 # photometric unsymmetric-augmentation random_brightness = np.random.uniform(self.rand_bright[0], self.rand_bright[1], 2) random_gamma = np.random.uniform(0.8, 1.2, 2) random_contrast = np.random.uniform(0.8, 1.2, 2) left_img = torchvision.transforms.functional.adjust_brightness( left_img, random_brightness[0]) left_img = torchvision.transforms.functional.adjust_gamma( left_img, random_gamma[0]) left_img = torchvision.transforms.functional.adjust_contrast( left_img, random_contrast[0]) right_img = torchvision.transforms.functional.adjust_brightness( right_img, random_brightness[1]) right_img = torchvision.transforms.functional.adjust_gamma( right_img, random_gamma[1]) right_img = torchvision.transforms.functional.adjust_contrast( right_img, random_contrast[1]) right_img = np.asarray(right_img) left_img = np.asarray(left_img) # horizontal flip if not (self.disp_R is None): if np.random.binomial(1, 0.5): tmp = right_img right_img = left_img[:, ::-1] left_img = tmp[:, ::-1] tmp = dataR dataR = dataL[:, ::-1] dataL = tmp[:, ::-1] # geometric unsymmetric-augmentation angle = 0 px = 0 if np.random.binomial(1, 0.5): angle = 0.1 px = 2 co_transform = flow_transforms.Compose([ flow_transforms.RandomVdisp(angle, px), flow_transforms.Scale(np.random.uniform(self.rand_scale[0], self.rand_scale[1]), order=self.order), flow_transforms.RandomCrop((max_h, max_w)), ]) augmented, dataL = co_transform([left_img, right_img], dataL) left_img = augmented[0] right_img = augmented[1] # randomly occlude a region if np.random.binomial(1, 0.5): sx = int(np.random.uniform(50, 150)) sy = int(np.random.uniform(50, 150)) cx = int(np.random.uniform(sx, right_img.shape[0] - sx)) cy = int(np.random.uniform(sy, right_img.shape[1] - sy)) right_img[cx - sx:cx + sx, cy - sy:cy + sy] = np.mean(np.mean(right_img, 0), 0)[np.newaxis, np.newaxis] h, w, _ = left_img.shape top_pad = max_h - h left_pad = max_w - w left_img = np.lib.pad(left_img, ((top_pad, 0), (0, left_pad), (0, 0)), mode='constant', constant_values=0) right_img = np.lib.pad(right_img, ((top_pad, 0), (0, left_pad), (0, 0)), mode='constant', constant_values=0) dataL = np.expand_dims(np.expand_dims(dataL, 0), 0) dataL = np.lib.pad(dataL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)[0, 0] dataL = np.ascontiguousarray(dataL, dtype=np.float32) processed = preprocess.get_transform() left_img = processed(left_img) right_img = processed(right_img) return left_img, right_img, dataL
model.eval() imgL = torch.FloatTensor(imgL).cuda() imgR = torch.FloatTensor(imgR).cuda() imgL, imgR= Variable(imgL), Variable(imgR) with torch.no_grad(): disp = model(imgL,imgR) disp = torch.squeeze(disp) pred_disp = disp.data.cpu().numpy() return pred_disp processed = preprocess.get_transform(augment=False) def cal_disp(imgL_o,imgR_o): imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) # pad to width and hight to 16 times if imgL.shape[2] % 16 != 0: times = imgL.shape[2]//16 top_pad = (times+1)*16 -imgL.shape[2] else: top_pad = 0 if imgL.shape[3] % 16 != 0:
def main(): processed = get_transform() model.eval() for inx in range(len(test_left_img)): print(test_left_img[inx]) imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32'))[:,:,:3] imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32'))[:,:,:3] imgsize = imgL_o.shape[:2] if args.max_disp>0: if args.max_disp % 16 != 0: args.max_disp = 16 * math.floor(args.max_disp/16) max_disp = int(args.max_disp) else: with open(test_left_img[inx].replace('im0.png','calib.txt')) as f: lines = f.readlines() max_disp = int(int(lines[6].split('=')[-1])) ## change max disp tmpdisp = int(max_disp*args.testres//64*64) if (max_disp*args.testres/64*64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp ==64: model.module.maxdisp=128 model.module.disp_reg8 = disparityregression(model.module.maxdisp,16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp,16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp,32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp,64).cuda() print(model.module.maxdisp) # resize imgL_o = cv2.resize(imgL_o,None,fx=args.testres,fy=args.testres,interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o,None,fx=args.testres,fy=args.testres,interpolation=cv2.INTER_CUBIC) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h-imgL.shape[2] left_pad = max_w-imgL.shape[3] imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) # test imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp,entropy = model(imgL,imgR) torch.cuda.synchronize() ttime = (time.time() - start_time); print('time = %.2f' % (ttime*1000) ) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h-imgL_o.shape[0] left_pad = max_w-imgL_o.shape[1] entropy = entropy[top_pad:,:pred_disp.shape[1]-left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:,:pred_disp.shape[1]-left_pad] # save predictions idxname = test_left_img[inx].split('/')[-2] if not os.path.exists('%s/%s'%(args.outdir,idxname)): os.makedirs('%s/%s'%(args.outdir,idxname)) idxname = '%s/disp0HSM'%(idxname) # resize to highres pred_disp = cv2.resize(pred_disp/args.testres,(imgsize[1],imgsize[0]),interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf,pred_disp!=pred_disp) pred_disp[invalid] = np.inf np.save('%s/%s-disp.npy'% (args.outdir, idxname.split('/')[0]),(pred_disp)) np.save('%s/%s-ent.npy'% (args.outdir, idxname.split('/')[0]),(entropy)) cv2.imwrite('%s/%s-disp.png'% (args.outdir, idxname.split('/')[0]),pred_disp/pred_disp[~invalid].max()*255) cv2.imwrite('%s/%s-ent.png'% (args.outdir, idxname.split('/')[0]),entropy/entropy.max()*255) with open('%s/%s.pfm'% (args.outdir, idxname),'w') as f: save_pfm(f,pred_disp[::-1,:]) with open('%s/%s/timeHSM.txt'%(args.outdir,idxname.split('/')[0]),'w') as f: f.write(str(ttime)) torch.cuda.empty_cache()
def main(): processed = get_transform() model.eval() # save predictions out_path = os.path.join("./kitti_submission_output", args.name) if not os.path.exists(out_path): os.mkdir(out_path) out_dir = os.path.join(out_path, "disp_0") if not os.path.exists(out_dir): os.mkdir(out_dir) for (left_img_path, right_img_path, disp_path) in zip(left_val, right_val, disp_val_L): # print(test_left_img[inx]) print(left_img_path) imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3] imgR_o = ( skimage.io.imread(right_img_path).astype('float32'))[:, :, :3] imgsize = imgL_o.shape[:2] # torch.save(imgL_o, "/home/isaac/high-res-stereo/debug/my_submission/img0.pt") if args.max_disp > 0: max_disp = int(args.max_disp) else: with open( '/home/isaac/rvc_devkit/stereo/datasets_middlebury2014/training/Kitti2015_000028_10/calib.txt' ) as f: lines = f.readlines() max_disp = int(int(lines[6].split('=')[-1])) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() # resize imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) # torch.save(imgL_o, "/home/isaac/high-res-stereo/debug/my_submission/img1.pt") imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img2.pt") imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img3.pt") ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h - imgL.shape[2] left_pad = max_w - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img4.pt") # test imgL = torch.FloatTensor(imgL) imgR = torch.FloatTensor(imgR) wandb.log({ "imgL": wandb.Image(imgL, caption=str(imgL.shape)), "imgR": wandb.Image(imgR, caption=str(imgR.shape)) }) imgL = imgL.cuda() imgR = imgR.cuda() with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img_final.pt") pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - imgL_o.shape[0] left_pad = max_w - imgL_o.shape[1] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] img_name = os.path.basename(os.path.normpath(left_img_path)) # resize to highres pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf # np.save('%s/%s' % (out_dir, img_name), (pred_disp)) # np.save('%s/%s-ent.npy' % (out_dir, idxname.split('/')[0]), (entropy)) pred_disp_png = (pred_disp * 256).astype('uint16') cv2.imwrite(os.path.join(out_dir, img_name), pred_disp_png) entropy_png = (entropy * 256).astype('uint16') # cv2.imwrite(os.path.join(out_dir, img_name), entropy_png) wandb.log({ "disp": wandb.Image(pred_disp_png, caption=str(pred_disp_png.shape)), "entropy": wandb.Image(entropy_png, caption=str(entropy_png.shape)) }) # with open('%s/%s.pfm' % (out_dir, idxname), 'w') as f: # save_pfm(f, pred_disp[::-1, :]) # with open('%s/%s/timeHSM.txt' % (out_dir, idxname.split('/')[0]), 'w') as f: # f.write(str(ttime)) torch.cuda.empty_cache() subprocess.run( ["/home/isaac/KITTI2015_devkit/cpp/eval_scene_flow", out_path + "/"])