def calOpt(height=240, width=320, maxdisp=256, fac=1, modelpath='finetune_67999.tar'): # Calculate model hyperparameters # Resize to 64X maxh = height maxw = width max_h = int( maxh // 64 * 64 ) # Basically this is performing an integer division and modulo operation max_w = int(maxw // 64 * 64) # if modulo is not zero, then round it up if max_h < maxh: # The rounded-up integer is multiplied by 64x max_h += 64 if max_w < maxw: max_w += 64 # load model if (MODEL_OPTION == 'base'): model = VCN([1, max_w, max_h], md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4], fac=fac) else: model = VCN_small([1, max_w, max_h], md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4], fac=fac) model = nn.DataParallel(model, device_ids=[0]) model.cuda() # load weights pretrained_dict = torch.load(modelpath) mean_L = pretrained_dict['mean_L'] mean_R = pretrained_dict['mean_R'] model.load_state_dict(pretrained_dict['state_dict'], strict=False) model.eval() print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) cap = cv2.VideoCapture('video.mp4') ret, old_frame = cap.read() while (1): ret, frame = cap.read() input_size = old_frame.shape imgL = cv2.resize(old_frame, (max_w, max_h)) imgR = cv2.resize(frame, (max_w, max_h)) # For gray input images # The model expects RGB images, in other words, 3 channels # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3] if len(old_frame.shape) == 2: old_frame = np.tile(old_frame[:, :, np.newaxis], (1, 1, 3)) frame = np.tile(frame[:, :, np.newaxis], (1, 1, 3)) # Flip channel, subtract mean # The model expects inputs of format [C,H,W] instead of [H,W,C] imgL = imgL[:, :, ::-1].copy() / 255. - np.asarray(mean_L).mean(0)[ np.newaxis, np.newaxis, :] imgR = imgR[:, :, ::-1].copy() / 255. - np.asarray(mean_R).mean(0)[ np.newaxis, np.newaxis, :] imgL = np.transpose(imgL, [2, 0, 1])[np.newaxis] imgR = np.transpose(imgR, [2, 0, 1])[np.newaxis] # Image to Torch tensor imgL = torch.FloatTensor(imgL).cuda() imgR = torch.FloatTensor(imgR).cuda() # Forward with torch.no_grad(): imgLR = torch.cat([imgL, imgR], 0) time1 = time.time() rts = model(imgLR) pred_disp, entropy = rts print(time.time() - time1) k = cv2.waitKey(25) if k == 27: break old_frame = frame.copy() # Upsampling pred_disp = torch.squeeze(pred_disp).data.cpu().numpy( ) # Remove batch dimension, torch tensor to numpy ndarray pred_disp = cv2.resize( np.transpose(pred_disp, (1, 2, 0)), (input_size[1], input_size[0]) ) # Resize to the original size, and transpose from [C,H,W] -> [H,W,C] pred_disp[:, :, 0] *= input_size[1] / max_w pred_disp[:, :, 1] *= input_size[0] / max_h flow = np.ones([pred_disp.shape[0], pred_disp.shape[1], 3]) flow[:, :, :2] = pred_disp # entropy = torch.squeeze(entropy).data.cpu().numpy() # entropy = cv2.resize(entropy, (input_size[1], input_size[0])) cv2.imshow('frame', flow_to_image(flow))
##iml0, iml1, flowl0, _, _, _,_,_ = lsfs.dataloader('%s/sceneflow/'%args.database) ##loader_stereo_sf = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=1, order=1, dploader=disparity_loader_sf) # data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*75+[loader_stereo_12]*75+[loader_stereo_mb]*600+[loader_stereo_sf]) # data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*50+[loader_stereo_12]*50+[loader_stereo_mb]*600+[loader_chairs]) # data_inuse = torch.utils.data.ConcatDataset([loader_chairs]*2 + [loader_things] +[loader_stereo_15]*300+[loader_stereo_12]*300) # stereo transfer # data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*20+[loader_stereo_12]*20) # stereo transfer # data_inuse = torch.utils.data.ConcatDataset([loader_kitti15]*20+[loader_kitti12]*20+[loader_stereo_15]*20+[loader_stereo_12]*20) print('%d batches per epoch' % (len(data_inuse) // batch_size)) # TODO model = VCN([batch_size // ngpus] + data_inuse.datasets[0].shape[::-1], md=[int(4 * (args.maxdisp / 256)), 4, 4, 4, 4], fac=args.fac) model = nn.DataParallel(model) model.cuda() total_iters = 0 mean_L = [[1.]] mean_R = [[1.]] if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items()} model.load_state_dict(pretrained_dict['state_dict'], strict=False) if args.retrain == 'true': print('re-training') with open('./iter_counts-%d.txt' % int(args.logname.split('-')[-1]), 'r') as f: total_iters = int(f.readline()) else: with open('./iter_counts-%d.txt' % int(args.logname.split('-')[-1]), 'r') as f:
def calOpt(height=240, width=320, maxdisp=256, fac=1, modelpath='finetune_67999.tar'): # Calculate model hyperparameters # Resize to 64X maxh = height maxw = width max_h = int(maxh // 64 * 64) # Basically this is performing an integer division and modulo operation max_w = int(maxw // 64 * 64) # if modulo is not zero, then round it up if max_h < maxh: # The rounded-up integer is multiplied by 64x max_h += 64 if max_w < maxw: max_w += 64 # load model model = VCN([1, max_w, max_h], md=[int(4*(maxdisp/256)),4,4,4,4], fac=fac) model = nn.DataParallel(model, device_ids=[0]) model.cuda() # load weights pretrained_dict = torch.load(modelpath) mean_L=pretrained_dict['mean_L'] mean_R=pretrained_dict['mean_R'] model.load_state_dict(pretrained_dict['state_dict'], strict=False) model.eval() print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) start_time = time.time() # Load image and Resize # Note that the images are loaded as [H,W,C] i.e. [H,W,3] imgL_o = imageio.imread('image1.png')[:,:,:3] # In some cases, image files include alpha channel (the 4th channel) imgR_o = imageio.imread('image2.png')[:,:,:3] # Only get the RGB channels (1st 3 channels) input_size = imgL_o.shape imgL = cv2.resize(imgL_o,(max_w, max_h)) imgR = cv2.resize(imgR_o,(max_w, max_h)) read_time = time.time() # For gray input images # The model expects RGB images, in other words, 3 channels # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3] if len(imgL_o.shape) == 2: imgL_o = np.tile(imgL_o[:,:,np.newaxis],(1,1,3)) imgR_o = np.tile(imgR_o[:,:,np.newaxis],(1,1,3)) # Flip channel, subtract mean # The model expects inputs of format [C,H,W] instead of [H,W,C] imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(mean_L).mean(0)[np.newaxis,np.newaxis,:] imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(mean_R).mean(0)[np.newaxis,np.newaxis,:] imgL = np.transpose(imgL, [2,0,1])[np.newaxis] imgR = np.transpose(imgR, [2,0,1])[np.newaxis] # Image to Torch tensor imgL = torch.FloatTensor(imgL).cuda() imgR = torch.FloatTensor(imgR).cuda() # Forward with torch.no_grad(): imgLR = torch.cat([imgL,imgR],0) time1 = time.time() rts = model(imgLR) pred_disp, entropy = rts time2 = time.time() print(time2 - time1) forward_time = time.time() # Upsampling pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() # Remove batch dimension, torch tensor to numpy ndarray pred_disp = cv2.resize(np.transpose(pred_disp,(1,2,0)), (input_size[1], input_size[0])) # Resize to the original size, and transpose from [C,H,W] -> [H,W,C] pred_disp[:,:,0] *= input_size[1] / max_w pred_disp[:,:,1] *= input_size[0] / max_h flow = np.ones([pred_disp.shape[0],pred_disp.shape[1],3]) flow[:,:,:2] = pred_disp entropy = torch.squeeze(entropy).data.cpu().numpy() entropy = cv2.resize(entropy, (input_size[1], input_size[0])) upsample_time = time.time() print("Read: {}s".format(read_time - start_time)) print("Forward: {}s".format(forward_time - start_time)) print("Upsample: {}s".format(upsample_time - start_time)) # Show results showImage( flow_to_image(flow), "flow_to_image.png") showImage( point_vec(imgL_o,flow)[:,:,::-1], "vector_on_image.png") showImage( entropy, "entropy.png")