def __init__(self, args, is_cropped, root='/path/to/flyingthings3d', dstype='frames_cleanpass', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image_dirs = sorted(glob(join(root, dstype, 'TRAIN/*/*'))) image_dirs = sorted([join(f, 'left') for f in image_dirs] + [join(f, 'right') for f in image_dirs]) flow_dirs = sorted( glob(join(root, 'optical_flow_flo_format/TRAIN/*/*'))) flow_dirs = sorted([join(f, 'into_future/left') for f in flow_dirs] + [join(f, 'into_future/right') for f in flow_dirs]) assert (len(image_dirs) == len(flow_dirs)) self.image_list = [] self.flow_list = [] for idir, fdir in zip(image_dirs, flow_dirs): images = sorted(glob(join(idir, '*.png'))) flows = sorted(glob(join(fdir, '*.flo'))) for i in range(len(flows)): self.image_list += [[images[i], images[i + 1]]] self.flow_list += [flows[i]] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
def __getitem__(self, index): index = index % self.size img = frame_utils.read_gen(self.gt_images[index]) if self.train: img = resize(img, (224, 224)) else: img = resize(img, (224, 224)) try: img = rgb2lab(img) except ValueError as e: return None img = np.array(img).transpose(2, 0, 1) img = torch.from_numpy(img.astype(np.float32)) return img
def __init__(self, args, is_cropped = False, root = '', dstype = 'clean', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates flow_root = join(root, 'flow') image_root = join(root, dstype) file_list = sorted(glob(join(flow_root, '*/*.flo'))) self.flow_list = [] self.image_list = [] for file in file_list: if 'test' in file: # print file continue fbase = file[len(flow_root)+1:] fprefix = fbase[:-8] fnum = int(fbase[-8:-4]) img1 = join(image_root, fprefix + "%04d"%(fnum+0) + '.png') img2 = join(image_root, fprefix + "%04d"%(fnum+1) + '.png') if not isfile(img1) or not isfile(img2) or not isfile(file): continue self.image_list += [[img1, img2]] self.flow_list += [file] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 args.inference_size = self.render_size assert (len(self.image_list) == len(self.flow_list))
def __init__(self, args, is_cropped=False, root='', img1_dirname='2k_mesh_rasterized', img2_dirname='2k_mesh_rasterized_noised_camera_sigma_5', dstype='train', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates # read 'dstype' list of names with open(os.path.join(root, dstype + '.txt')) as f: frame_names = f.read().splitlines() self.flow_list = [] self.image_list = [] for frame_name in frame_names: flow = os.path.join(img2_dirname, frame_name + '.flo.npy') img1 = os.path.join(img1_dirname, frame_name + '.png') img2 = os.path.join(img2_dirname, frame_name + '.png') if not isfile(img1) or not isfile(img2) or not isfile(flow): continue self.image_list += [[img1, img2]] self.flow_list += [flow] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size assert (len(self.image_list) == len(self.flow_list)) print('There are {} frames in the dataset'.format(self.size))
def __init__(self, args): super().__init__() self.render_size = args['render_size'] self.replicates = args['replicates'] self.frame_1 = [] self.gt_images = [] o_f = open(args['dstype'], 'r') lines = o_f.readlines() for l in lines: frame_, gt_img = l.split() self.frame_1.append(frame_) self.gt_images.append(gt_img) # print( img, gt_img ) # _len = (len(self.frame_1) // 10) * 9 # _len = len(self.frame_1) - 8 self.train = args['train'] # if self.train: # self.gt_images = self.gt_images[:_len] # self.frame_1 = self.frame_1[:_len] # else: # self.gt_images = self.gt_images[_len:] # self.frame_1 = self.frame_1[_len:] ''' for i, g, k in zip(self.images, self.gt_images, self.frame_1): print(i, g, k) exit() ''' assert len(self.frame_1) == len(self.gt_images) # print(len(self.images)) # exit() self.size = len(self.frame_1) self.frame_size = frame_utils.read_gen(self.frame_1[0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 32) or (self.frame_size[1] % 32): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64
def __init__(self, args, is_cropped=False, root='~/data/data_scene_flow', dstype='training', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates l_image_dir = join(root, dstype, 'image_2') r_image_dir = join(root, dstype, 'image_3') disp_dir = join(root, dstype, 'disp_occ_0') self.image_list = [] self.disp_list = [] images_1 = sorted(glob(join(l_image_dir, '*_10.png'))) images_2 = sorted(glob(join(r_image_dir, '*_10.png'))) disps = sorted(glob(join(disp_dir, '*.png'))) assert len(images_1) == len(images_2) assert len(disps) == len(images_1) for i in range(len(disps)): self.image_list += [[images_1[i], images_2[i]]] self.disp_list += [disps[i]] assert len(self.image_list) == len(self.disp_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) / 64) * 64 self.render_size[1] = ((self.frame_size[1]) / 64) * 64 args.inference_size = self.render_size
def __init__(self, args, is_cropped, root='/path/to/folder/of/frames/only/folders', iext='png', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates self.image_list = [] if self.args.intphys == '2d': for f in [join(root, '%04d/frames' % i) for i in range(1, 3001)]: images = sorted(glob(join(f, '*.' + iext))) for i in range(len(images) - 1): im1 = images[i] im2 = images[i + 1] self.image_list += [[im1, im2]] else: for f in [ join(root, '%05d_block_O1_train/scene' % i) for i in range(15000, 15001) ]: images = sorted(glob(join(f, '*.' + iext))) for i in range(len(images) - 1): im1 = images[i] im2 = images[i + 1] self.image_list += [[im1, im2]] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) / 64) * 64 self.render_size[1] = ((self.frame_size[1]) / 64) * 64 args.inference_size = self.render_size
def __init__(self, args, is_cropped=False, root='', dstype='frames', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image_root = join(root, dstype) file_list = sorted(glob(join(image_root, '*.png'))) self.image_list = [] for i in range(len(file_list) - 1): img1 = join(file_list[i]) img2 = join(file_list[i + 1]) if not isfile(img1) or not isfile(img2): continue self.image_list += [[img1, img2]] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = (math.ceil(self.frame_size[0] / 64)) * 64 self.render_size[1] = (math.ceil(self.frame_size[1] / 64)) * 64 args.inference_size = self.render_size
def __init__(self, args, is_cropped, root='/path/to/chairssdhom/data', dstype='train', replicates=1, transforms=None): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates self.transforms = transforms image1 = sorted(glob(join(root, dstype, 't0/*.png'))) image2 = sorted(glob(join(root, dstype, 't1/*.png'))) self.flow_list = sorted(glob(join(root, dstype, 'flow/*.flo'))) assert (len(image1) == len(self.flow_list)) self.image_list = [] for i in range(len(self.flow_list)): im1 = image1[i] im2 = image2[i] self.image_list += [[im1, im2]] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
def __init__(self, args, is_cropped, root = '/path/to/frames/only/folder', iext = 'png', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates images = sorted( glob( join(root, '*.' + iext) ) ) self.image_list = [] for i in range(len(images)-1): im1 = images[i] im2 = images[i+1] self.image_list += [ [ im1, im2 ] ] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])/64 ) * 64 self.render_size[1] = ( (self.frame_size[1])/64 ) * 64 args.inference_size = self.render_size
def __init__(self, image_folder, inference_size, extension='jpg'): self.image_folder = image_folder self.inference_size = inference_size # List all the images images = sorted(glob(join(image_folder, '*.' + extension))) assert len(images) > 0, 'no images found in directory: {}'.format(root) self.image_list = [] for i in range(len(images) - 1): im1 = images[i] im2 = images[i + 1] self.image_list += [[im1, im2]] self.size = len(self.image_list) # Get frame size from first image self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.inference_size[0] < 0) or (self.inference_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.inference_size[0] = ((self.frame_size[0]) // 64) * 64 self.inference_size[1] = ((self.frame_size[1]) // 64) * 64
def __init__(self, args, is_cropped, root='/path/to/FlyingChairs_release/data', replicates=1, transforms=None): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates self.transforms = transforms images = sorted(glob(join(root, '*.ppm'))) self.flow_list = sorted(glob(join(root, '*.flo'))) assert (len(images) // 2 == len(self.flow_list)) self.image_list = [] for i in range(len(self.flow_list)): im1 = images[2 * i] im2 = images[2 * i + 1] self.image_list += [[im1, im2]] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
def __init__(self, args): super().__init__() self.replicates = args['replicates'] self.render_size = [] self.gt_images = [] self.rr = Random_Rotate(9) self.train = args['train'] self.gt_images = glob(args['file']) if not self.train: #only choose 300 images for validation self.gt_images = random.sample(self.gt_images, 30) self.size = len(self.gt_images) self.frame_size = frame_utils.read_gen(self.gt_images[0]).shape if (self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size.append(((self.frame_size[0]) // 64) * 64) self.render_size.append(((self.frame_size[1]) // 64) * 64) else: self.render_size.append(self.frame_size[0]) self.render_size.append(self.frame_size[1])
def __init__(self, args, is_cropped, root='/path/to/flyingthings3d', dstype='frames_cleanpass', replicates=1, validation=False): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image_dirs_A = sorted(glob(join(root, dstype, 'TRAIN/A/*'))) image_dirs_A_L = sorted([join(f, 'left') for f in image_dirs_A]) image_dirs_B = sorted(glob(join(root, dstype, 'TRAIN/B/*'))) image_dirs_B_L = sorted([join(f, 'left') for f in image_dirs_B]) image_dirs_C = sorted(glob(join(root, dstype, 'TRAIN/C/*'))) image_dirs_C_L = sorted([join(f, 'left') for f in image_dirs_C]) flow_dirs_A = sorted(glob(join(root, 'optical_flow/TRAIN/A/*'))) flow_dirs_A_L = sorted( [join(f, 'into_future/left') for f in flow_dirs_A]) flow_dirs_B = sorted(glob(join(root, 'optical_flow/TRAIN/B/*'))) flow_dirs_B_L = sorted( [join(f, 'into_future/left') for f in flow_dirs_B]) flow_dirs_C = sorted(glob(join(root, 'optical_flow/TRAIN/C/*'))) flow_dirs_C_L = sorted( [join(f, 'into_future/left') for f in flow_dirs_C]) len_A_L = len(image_dirs_A_L) len_B_L = len(image_dirs_B_L) len_C_L = len(image_dirs_C_L) assert (len_A_L == len(flow_dirs_A_L)) assert (len_B_L == len(flow_dirs_B_L)) assert (len_C_L == len(flow_dirs_C_L)) num_A = int(0.1 * len_A_L) num_B = int(0.1 * len_B_L) num_C = int(0.1 * len_C_L) if (validation): image_dirs_L = image_dirs_A_L[:num_A - 1] + image_dirs_B_L[:num_B - 1] + image_dirs_C_L[:num_C - 1] flow_dirs = flow_dirs_A_L[:num_A - 1] + flow_dirs_B_L[:num_B - 1] + flow_dirs_C_L[:num_C - 1] else: image_dirs_L = image_dirs_A_L[num_A:] + image_dirs_B_L[ num_B:] + image_dirs_C_L[num_C:] flow_dirs = flow_dirs_A_L[num_A:] + flow_dirs_B_L[ num_B:] + flow_dirs_C_L[num_C:] self.image_list = [] self.flow_list = [] for ldir, fdir in zip(image_dirs_L, flow_dirs): Limages = sorted(glob(join(ldir, '*.png'))) flows = sorted(glob(join(fdir, '*.pfm'))) for i in range(len(flows) - 1): self.image_list += [[Limages[i], Limages[i + 1]]] self.flow_list += [flows[i]] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
'--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() # initial a Net net = FlowNet2(args).cuda() # load the state_dict state_dict = torch.load("./FlowNet2_checkpoint.pth.tar") net.load_state_dict(state_dict["state_dict"]) # load the image pair, you can find this operation in dataset.py img1_fn = "./flownet2-docker/data/0000000-imgL.png" img2_fn = "./flownet2-docker/data/0000001-imgL.png" pim1 = read_gen(img1_fn) pim2 = read_gen(img2_fn) # return numpy array with shape h,w,3 img1 = Image.open(img1_fn) img2 = Image.open(img2_fn) assert (img1.size == img2.size) width, height = img1.size divisor = 64. adapted_width = int(ceil(width / divisor) * divisor) adapted_height = int(ceil(height / divisor) * divisor) img1 = img1.resize((adapted_width, adapted_height), Image.BICUBIC) img2 = img1.resize((adapted_width, adapted_height), Image.BICUBIC) pim1 = np.array(img1) pim2 = np.array(img2)
img_path2 = '/disk1/fanyi-data/DAVIS/DAVIS/JPEGImages/480p/tennis-vest/00023.jpg' flow_file_path = '/home/fanyix/code/flownet2-pytorch/work/flow.flo' flow_img_path = '/home/fanyix/code/flownet2-pytorch/work/flow.png' #initial a Net net = FlowNet2(args).cuda() pretrained_dict = torch.load(args.model_path)['state_dict'] model_dict = net.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) net.load_state_dict(model_dict) net.cuda() net.eval() #load the image pair, you can find this operation in dataset.py pim1 = read_gen(img_path1) pim2 = read_gen(img_path2) orig_h, orig_w = pim1.shape[0], pim1.shape[1] h, w = compute_size(orig_h, orig_w, divisible=64) pim1 = resize(pim1, (h, w), anti_aliasing=True) * 255 pim2 = resize(pim2, (h, w), anti_aliasing=True) * 255 images = [pim1, pim2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda() #process the image pair to obtian the flow result = net(im).squeeze() #save flow, I reference the code in scripts/run-flownet.py in flownet2-caffe project def writeFlow(name, flow):
def main(): folder = sys.argv[1] x = np.float(sys.argv[2]) y = np.float(sys.argv[3]) z = np.float(sys.argv[4]) w = np.float(sys.argv[5]) p = np.float(sys.argv[6]) q = np.float(sys.argv[7]) r = np.float(sys.argv[8]) # Create folder for results if not os.path.exists(folder + '/results'): os.makedirs(folder + '/results') flow_utils = FlowNet2Utils() intermat = InteractionMatrix() init_state = [x, y, z, w, p, q, r] env = hs.HabitatEnv(folder, init_state, 'FLOW') cem = CEM(MSE, 6, sampleMethod='Gaussian', v_min=[-0.5, -0.5, -0.5, -0.5, -0.5, -0.5], v_max=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5]) f = open(folder + "/log.txt", "w+") f_pe = open(folder + "/photo_error.txt", "w+") f_pose = open(folder + "/pose.txt", "w+") img_source_path = folder + "/results/" + "test.rgba.00000.00000.png" img_goal_path = folder + "/des.png" img_src = read_gen(img_source_path) img_goal = read_gen(img_goal_path) d1 = plt.imread(folder + "/results/" + "test.depth.00000.00000.png") photo_error_val = mse_(img_src, img_goal) print("Initial Photometric Error: ") print(photo_error_val) f.write("Photometric error = " + str(photo_error_val) + "\n") f_pe.write(str(photo_error_val) + "\n") start_time = time.time() step = 0 while photo_error_val > 500 and step < 1500: f12 = flow_utils.flow_calculate(img_src, img_goal) if step == 0: vel, Lsx, Lsy = intermat.getData(f12, d1) else: flow_depth_proxy = flow_utils.flow_calculate(img_src, pre_img_src) flow_depth = np.linalg.norm(flow_depth_proxy, axis=2) flow_depth = flow_depth.astype('float64') vel, Lsx, Lsy = intermat.getData(f12, 1 / flow_depth) gtf = np.array(f12) cem.Lsx = Lsx cem.Lsy = Lsy v = cem.eval(gtf) #print(v, MSE(gtf, np.stack([np.sum(Lsx*v, -1), np.sum(Lsy*v, -1)], -1))) f.write("Processing Optimization Step: " + str(step) + "\n") f.write("Predicted Velocities: \n") f.write(str(v)) f.write("\n") img_src, pre_img_src, d1 = env.example(v.reshape(1, 6), step + 1, folder) photo_error_val = mse_(img_src, img_goal) f.write("Photometric error = " + str(photo_error_val) + "\n") print(photo_error_val) f.write("Step Number: " + str(step) + "\n") f_pe.write(str(photo_error_val) + "\n") f_pose.write("Step : " + str(step) + "\n") f_pose.write("Pose : " + str(env.get_agent_pose()) + '\n') step = step + 1 time_taken = time.time() - start_time f.write("Time Taken: " + str(time_taken) + "secs \n") # Cleanup f.close() f_pe.close() env.end_sim() del flow_utils del intermat del env # save indvidial image and gif onlyfiles = [f for f in listdir(folder + "/results") if f.endswith(".png")] onlyfiles.sort() images = [] for filename in onlyfiles: images.append(imageio.imread(folder + '/results/' + filename)) imageio.mimsave(folder + '/results/output.gif', images, fps=4)
def __init__(self, args, is_cropped, root='/path/to/frames/only/folder', iext='png', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates images = sorted(glob(join(root, '*.' + iext))) #for i in range(len(images)): #print(images[i]) self.name = [] tmp_str = images[0].split('/')[-1] tmp_file_header = images[0][0:-1 * len(tmp_str)] for i in images: self.name.append(i.split('/')[-1]) self.city_name = images[0].split('/')[-3] self.video_name = images[0].split('/')[-2] self.name = sorted(self.name, key=lambda x: (x[15:-4], x[6:10])) #for i in range(len(self.name)): #print(self.name[i]) images.clear() for i in self.name: #print(tmp_file_header + i) images.append(tmp_file_header + i) #del (self.name[0]) self.image_list = [] #need2del = [] for i in range(len(images) - 1): im1 = images[i] im2 = images[i + 1] flag1 = im1.split('_')[-1] flag2 = im2.split('_')[-1] frame1 = eval(im1.split('_')[-3].lstrip('0')) frame2 = eval(im2.split('_')[-3].lstrip('0')) #print(frame1,frame2) if flag1 == flag2 and frame2 == frame1 + 1: self.image_list += [[im1, im2]] #print('success: ', im1,' ',im2) #if flag1 == flag2 and frame2 != frame1 + 1: # print('fail: ', im1,' ',im2) #else: # self.name[i] = '' #print('len of pair = ', len(self.image_list)) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
# parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') # parser.add_argument("--rgb_max", type=float, default=255.) # args = parser.parse_args() args = argparse.Namespace(fp16=False, rgb_max=255.0) #initial a Net net = FlowNet2C(args).cuda() #load the state_dict dict = torch.load("../works/FlowNet2C_model_best.pth.tar") # dict = torch.load("../data/FlowNet2_checkpoint.pth.tar") # print(dict["state_dict"]) net.load_state_dict(dict["state_dict"]) #load the image pair, you can find this operation in dataset.py pim1 = read_gen(imAddr1) pim2 = read_gen(imAddr2) print(pim1.shape) images = [pim1, pim2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda() #process the image pair to obtian the flow result = net(im)[0].squeeze() #save flow, I reference the code in scripts/run-flownet.py in flownet2-caffe project def writeFlow(name, flow): f = open(name, 'wb') f.write('PIEH'.encode('utf-8'))
def __getitem__(self, index): if self.is_test: img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) # resize image scaledown = 8 # for 8K down to 6:1280x720, 8:960x540 #(width, height) = (img1.width // scaledown, img1.height // scaledown) width = 960 height = 528 img1 = img1.resize((width, height)) img2 = img2.resize((width, height)) img1 = np.array(img1).astype(np.uint8)[..., :3] img2 = np.array(img2).astype(np.uint8)[..., :3] img1 = torch.from_numpy(img1).permute(2, 0, 1).float() img2 = torch.from_numpy(img2).permute(2, 0, 1).float() return img1, img2, self.extra_info[index] if not self.init_seed: worker_info = torch.utils.data.get_worker_info() if worker_info is not None: torch.manual_seed(worker_info.id) np.random.seed(worker_info.id) random.seed(worker_info.id) self.init_seed = True index = index % len(self.image_list) img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) # resize image scaledown = 8 # for 8K down to 6:1280x720, 8:960x540 img1_orig = img1 img2_orig = img2 #(width, height) = (img1.width // scaledown, img1.height // scaledown) width = 960 height = 528 img1 = img1.resize((width, height)) img2 = img2.resize((width, height)) #(width, height) = (img1_orig.width // 2, img1_orig.height // 2) width = 960 #1920 height = 528#1056 img1_orig = img1_orig.resize((width, height)) img2_orig = img2_orig.resize((width, height)) img1_orig = np.array(img1_orig).astype(np.uint8) img2_orig = np.array(img2_orig).astype(np.uint8) img1 = np.array(img1).astype(np.uint8) img2 = np.array(img2).astype(np.uint8) #print('img1.shape ' + str(img1.shape)) # grayscale images if len(img1.shape) == 2: img1 = np.tile(img1[...,None], (1, 1, 3)) img2 = np.tile(img2[...,None], (1, 1, 3)) img1_orig = np.tile(img1_orig[...,None], (1, 1, 3)) img2_orig = np.tile(img2_orig[...,None], (1, 1, 3)) else: img1 = img1[..., :3] img2 = img2[..., :3] img1_orig = img1_orig[..., :3] img2_orig = img2_orig[..., :3] #if self.augmentor is not None: # if self.sparse: # img1, img2, valid = self.augmentor(img1, img2, valid) # else: # img1, img2 = self.augmentor(img1, img2) img1 = torch.from_numpy(img1).permute(2, 0, 1).float() img2 = torch.from_numpy(img2).permute(2, 0, 1).float() img1_orig = torch.from_numpy(img1_orig).permute(2, 0, 1).float() img2_orig = torch.from_numpy(img2_orig).permute(2, 0, 1).float() return img1, img2, img1_orig, img2_orig
parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() #initial a Net net = FlowNet2(args).cuda() #load the state_dict #PATH = "/mnt/data/flownet2-pytorch/FlowNet2_checkpoint.pth.tar" #PATH = "work/FlowNet2_checkpoint.pth.tar" dict = torch.load("/mnt/data/flownet2-pytorch/FlowNet2_checkpoint.pth.tar") #dict = torch.load("work/FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) #load the image pair, you can find this operation in dataset.py pim1 = read_gen("/mnt/data/FlyingChairs_examples/0000007-img0.ppm") pim2 = read_gen("/mnt/data/FlyingChairs_examples/0000007-img1.ppm") images = [pim1, pim2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda() #process the image pair to obtian the flow result = net(im).squeeze() #save flow, I reference the code in scripts/run-flownet.py in flownet2-caffe project def writeFlow(name, flow): f = open(name, 'wb') f.write('PIEH'.encode('utf-8')) np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) flow = flow.astype(np.float32)
def __init__(self, args, is_cropped=False, scanSubdir=False, annotation_file='', root='/path/to/frames/only/folder', iext='png', replicates=1): self.args = args self.render_size = [-1, -1] self.is_cropped = is_cropped self.crop_size = args.crop_size self.replicates = replicates self.rgb_max = 255 self.in_imgs = [] self.ref_imgs = [] def parseTrainData(path): images = sorted(glob(join(path, '*.' + iext))) for i in range(0, len(images) - 2, 2): im1 = images[i] ref = images[i + 1] im2 = images[i + 2] self.in_imgs += [[im1, im2]] self.ref_imgs += [[ref]] if scanSubdir: print( f"[WARNING]: assuming that all samples have the same or higher resolution than {self.crop_size}" ) subdir_paths = [f.path for f in os.scandir(root) if f.is_dir()] for subdir in subdir_paths: parseTrainData(subdir) elif annotation_file != '': #Vimeo90k print("[LOG] Loading Vimeo90k from .txt description") subdir_paths = [ f"{root}/{x.strip()}/" for x in open(annotation_file) ] self.ref_names = [ f"{x.strip().replace('/', '_')}.png" for x in open(annotation_file) ] # if "test" in annotation_file: # subdir_paths = subdir_paths[:20] # else: # subdir_paths = subdir_paths[:40] for subdir in subdir_paths: parseTrainData(subdir) self.ref_names else: parseTrainData(root) self.ref_names = [x[0].split('/')[-1] for x in self.ref_imgs] self.size = len(self.in_imgs) print(f"Total samples: {self.size}") self.frame_size = frame_utils.read_gen(self.in_imgs[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size assert (len(self.in_imgs) == len(self.ref_imgs))
def __init__(self, args, is_cropped, root='/path/to/flyingchairs', root2='/path/to/flyingthings3d', dstype='frames_cleanpass', replicates=1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates images = sorted(glob(join(root, '*.ppm'))) self.flow_list = sorted(glob(join(root, '*.flo'))) assert (len(images) // 2 == len(self.flow_list)) self.image_list = [] for i in range(len(self.flow_list)): im1 = images[2 * i] im2 = images[2 * i + 1] self.image_list += [[im1, im2]] assert len(self.image_list) == len(self.flow_list) image_dirs_A = sorted(glob(join(root2, dstype, 'TRAIN/A/*'))) image_dirs_A_L = sorted([join(f, 'left') for f in image_dirs_A]) image_dirs_A_R = sorted([join(f, 'right') for f in image_dirs_A]) image_dirs_B = sorted(glob(join(root2, dstype, 'TRAIN/B/*'))) image_dirs_B_L = sorted([join(f, 'left') for f in image_dirs_B]) image_dirs_B_R = sorted([join(f, 'right') for f in image_dirs_B]) image_dirs_C = sorted(glob(join(root2, dstype, 'TRAIN/C/*'))) image_dirs_C_L = sorted([join(f, 'left') for f in image_dirs_C]) image_dirs_C_R = sorted([join(f, 'right') for f in image_dirs_C]) disp_dirs_A = sorted(glob(join(root2, 'disparity/TRAIN/A/*'))) disp_dirs_A_L = sorted([join(f, 'left') for f in disp_dirs_A]) disp_dirs_B = sorted(glob(join(root2, 'disparity/TRAIN/B/*'))) disp_dirs_B_L = sorted([join(f, 'left') for f in disp_dirs_B]) disp_dirs_C = sorted(glob(join(root2, 'disparity/TRAIN/C/*'))) disp_dirs_C_L = sorted([join(f, 'left') for f in disp_dirs_C]) len_A_L = len(image_dirs_A_L) len_B_L = len(image_dirs_B_L) len_C_L = len(image_dirs_C_L) assert (len_A_L == len(image_dirs_A_R)) assert (len_B_L == len(image_dirs_B_R)) assert (len_C_L == len(image_dirs_C_R)) assert (len_A_L == len(disp_dirs_A_L)) assert (len_B_L == len(disp_dirs_B_L)) assert (len_C_L == len(disp_dirs_C_L)) num_A = int(0.1 * len_A_L) num_B = int(0.1 * len_B_L) num_C = int(0.1 * len_C_L) image_dirs_L = image_dirs_A_L[num_A:] + image_dirs_B_L[ num_B:] + image_dirs_C_L[num_C:] image_dirs_R = image_dirs_A_R[num_A:] + image_dirs_B_R[ num_B:] + image_dirs_C_R[num_C:] disp_dirs = disp_dirs_A_L[num_A:] + disp_dirs_B_L[ num_B:] + disp_dirs_C_L[num_C:] for ldir, rdir, ddir in zip(image_dirs_L, image_dirs_R, disp_dirs): Limages = sorted(glob(join(ldir, '*.png'))) Rimages = sorted(glob(join(rdir, '*.png'))) disps = sorted(glob(join(ddir, '*.pfm'))) for i in range(len(disps)): self.image_list += [[Limages[i], Rimages[i]]] self.flow_list += [disps[i]] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
def __init__(self, args, is_cropped, root='/path/to/flyingthings3d', dstype='frames_cleanpass', replicates=1, validation=False): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image_dirs_A = sorted(glob(join(root, dstype, 'TRAIN/A/*'))) image_dirs_A_L = sorted([join(f, 'left') for f in image_dirs_A]) image_dirs_A_R = sorted([join(f, 'right') for f in image_dirs_A]) image_dirs_B = sorted(glob(join(root, dstype, 'TRAIN/B/*'))) image_dirs_B_L = sorted([join(f, 'left') for f in image_dirs_B]) image_dirs_B_R = sorted([join(f, 'right') for f in image_dirs_B]) image_dirs_C = sorted(glob(join(root, dstype, 'TRAIN/C/*'))) image_dirs_C_L = sorted([join(f, 'left') for f in image_dirs_C]) image_dirs_C_R = sorted([join(f, 'right') for f in image_dirs_C]) cont_dirs_A = sorted(glob(join(root, 'object_contour/TRAIN/A/*'))) cont_dirs_A_L = sorted([join(f, 'left') for f in cont_dirs_A]) cont_dirs_B = sorted(glob(join(root, 'object_contour/TRAIN/B/*'))) cont_dirs_B_L = sorted([join(f, 'left') for f in cont_dirs_B]) cont_dirs_C = sorted(glob(join(root, 'object_contour/TRAIN/C/*'))) cont_dirs_C_L = sorted([join(f, 'left') for f in cont_dirs_C]) len_A_L = len(image_dirs_A_L) len_B_L = len(image_dirs_B_L) len_C_L = len(image_dirs_C_L) assert (len_A_L == len(image_dirs_A_R)) assert (len_B_L == len(image_dirs_B_R)) assert (len_C_L == len(image_dirs_C_R)) assert (len_A_L == len(cont_dirs_A_L)) assert (len_B_L == len(cont_dirs_B_L)) assert (len_C_L == len(cont_dirs_C_L)) num_A = int(0.1 * len_A_L) num_B = int(0.1 * len_B_L) num_C = int(0.1 * len_C_L) if (validation): image_dirs_L = image_dirs_A_L[:num_A - 1] + image_dirs_B_L[:num_B - 1] + image_dirs_C_L[:num_C - 1] image_dirs_R = image_dirs_A_R[:num_A - 1] + image_dirs_B_R[:num_B - 1] + image_dirs_C_R[:num_C - 1] cont_dirs = cont_dirs_A_L[:num_A - 1] + cont_dirs_B_L[:num_B - 1] + cont_dirs_C_L[:num_C - 1] else: image_dirs_L = image_dirs_A_L[num_A:] + image_dirs_B_L[ num_B:] + image_dirs_C_L[num_C:] image_dirs_R = image_dirs_A_R[num_A:] + image_dirs_B_R[ num_B:] + image_dirs_C_R[num_C:] cont_dirs = cont_dirs_A_L[num_A:] + cont_dirs_B_L[ num_B:] + cont_dirs_C_L[num_C:] self.image_list = [] self.cont_list = [] for ldir, rdir, cdir in zip(image_dirs_L, image_dirs_R, cont_dirs): Limages = sorted(glob(join(ldir, '*.png'))) Rimages = sorted(glob(join(rdir, '*.png'))) conts = sorted(glob(join(cdir, '*.pgm'))) for i in range(len(conts)): self.image_list += [[Limages[i], Rimages[i]]] self.cont_list += [conts[i]] assert len(self.image_list) == len(self.cont_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or ( self.frame_size[0] % 64) or (self.frame_size[1] % 64): self.render_size[0] = ((self.frame_size[0]) // 64) * 64 self.render_size[1] = ((self.frame_size[1]) // 64) * 64 args.inference_size = self.render_size
def __init__(self, root, is_cropped=True, crop_size=[4, 4], dstype='clean', replicates=1, train=True, sequence_list=None, transforms=None): self.is_cropped = is_cropped self.crop_size = crop_size #self.crop_size_im2 = crop_size[1] self.render_size = crop_size self.replicates = replicates flow_root = join(root, 'flow') image_root = join(root, dstype) self.train = train self.transforms = transforms file_list = sorted(glob(join(flow_root, '*/*.flo'))) self.flow_list = [] self.image_list = [] if train: self.sequence_list = [] else: self.sequence_list = sequence_list for file in file_list: if 'test' in file: # print file continue if train and not file.split('/')[-2] in self.sequence_list: if len(self.sequence_list) <= 18: self.sequence_list.append(file.split('/')[-2]) else: continue if not train and file.split('/')[-2] in self.sequence_list: continue fbase = file[len(flow_root) + 1:] fprefix = fbase[:-8] fnum = int(fbase[-8:-4]) img1 = join(image_root, fprefix + "%04d" % (fnum + 0) + '.png') img2 = join(image_root, fprefix + "%04d" % (fnum + 1) + '.png') if not isfile(img1) or not isfile(img2) or not isfile(file): continue self.image_list += [[img1, img2]] self.flow_list += [file] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).size #if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): # self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 # self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 assert (len(self.image_list) == len(self.flow_list))
parser = argparse.ArgumentParser() parser.add_argument( '--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() # initial a Net net = FlowNet2(args).cuda() # load the state_dict dict = torch.load("model/FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) # load the image pair, you can find this operation in dataset.py pim1 = read_gen("dataset/my_photo-2cropped.png") pim2 = read_gen("dataset/my_photo-3cropped.png") # pad to (384, 1248) height = pim1.shape[0] # 384 #736 #384 #384 width = pim1.shape[1] # 1248 #1504 #704 #1248 pad_h = (height // 64 + 1) * 64 pad_w = (width // 64 + 1) * 64 top_pad = pad_h - pim1.shape[0] left_pad = pad_w - pim1.shape[1] pim1 = np.lib.pad(pim1, ((top_pad, 0), (0, left_pad), (0, 0)), mode='constant', constant_values=0) pim2 = np.lib.pad(pim2, ((top_pad, 0), (0, left_pad), (0, 0)), mode='constant', constant_values=0)
def __getitem__(self, index): if self.is_test: frame_id = self.image_list[index][0] frame_id = frame_id.split('/')[-1] img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) img1 = np.array(img1).astype(np.uint8)[..., :3] img2 = np.array(img2).astype(np.uint8)[..., :3] img1 = torch.from_numpy(img1).permute(2, 0, 1).float() img2 = torch.from_numpy(img2).permute(2, 0, 1).float() return img1, img2, frame_id else: if not self.init_seed: worker_info = torch.utils.data.get_worker_info() if worker_info is not None: np.random.seed(worker_info.id) random.seed(worker_info.id) self.init_seed = True index = index % len(self.image_list) frame_id = self.image_list[index][0] frame_id = frame_id.split('/')[-1] img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow, valid = frame_utils.readFlowKITTI(self.flow_list[index]) img1 = np.array(img1).astype(np.uint8)[..., :3] img2 = np.array(img2).astype(np.uint8)[..., :3] if self.do_augument: img1, img2, flow, valid = self.augumentor( img1, img2, flow, valid) img1 = torch.from_numpy(img1).permute(2, 0, 1).float() img2 = torch.from_numpy(img2).permute(2, 0, 1).float() flow = torch.from_numpy(flow).permute(2, 0, 1).float() valid = torch.from_numpy(valid).float() if self.do_pad: ht, wd = img1.shape[1:] pad_ht = (((ht // 8) + 1) * 8 - ht) % 8 pad_wd = (((wd // 8) + 1) * 8 - wd) % 8 pad_ht1 = [0, pad_ht] pad_wd1 = [pad_wd // 2, pad_wd - pad_wd // 2] pad = pad_wd1 + pad_ht1 img1 = img1.view(1, 3, ht, wd) img2 = img2.view(1, 3, ht, wd) flow = flow.view(1, 2, ht, wd) valid = valid.view(1, 1, ht, wd) img1 = torch.nn.functional.pad(img1, pad, mode='replicate') img2 = torch.nn.functional.pad(img2, pad, mode='replicate') flow = torch.nn.functional.pad(flow, pad, mode='constant', value=0) valid = torch.nn.functional.pad(valid, pad, mode='replicate', value=0) img1 = img1.view(3, ht + pad_ht, wd + pad_wd) img2 = img2.view(3, ht + pad_ht, wd + pad_wd) flow = flow.view(2, ht + pad_ht, wd + pad_wd) valid = valid.view(ht + pad_ht, wd + pad_wd) if self.is_test: return img1, img2, flow, valid, frame_id return img1, img2, flow, valid
help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() # initial a Net net = FlowNet2(args).cuda() # load the state_dict dict = torch.load( "/home/hjj/PycharmProjects/flownet2_pytorch/FlowNet2_checkpoint.pth.tar" ) net.load_state_dict(dict["state_dict"]) # load the image pair, you can find this operation in dataset.py pim1 = read_gen( "/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img0.ppm" ) pim2 = read_gen( "/home/hjj/flownet2-master/data/FlyingChairs_examples/0000007-img1.ppm" ) images = [pim1, pim2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda() # process the image pair to obtian the flow result = net(im).squeeze() # save flow, I reference the code in scripts/run-flownet.py in flownet2-caffe project def writeFlow(name, flow): f = open(name, 'wb') f.write('PIEH'.encode('utf-8'))
def __getitem__(self, index): if self.is_test: img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) img1 = np.array(img1).astype(np.uint8)[..., :3] img2 = np.array(img2).astype(np.uint8)[..., :3] img1 = torch.from_numpy(img1).permute(2, 0, 1).float() img2 = torch.from_numpy(img2).permute(2, 0, 1).float() return img1, img2, self.extra_info[index] if not self.init_seed: worker_info = torch.utils.data.get_worker_info() if worker_info is not None: torch.manual_seed(worker_info.id) np.random.seed(worker_info.id) random.seed(worker_info.id) self.init_seed = True index = index % len(self.image_list) valid = None if self.sparse: flow, valid = frame_utils.readFlowKITTI(self.flow_list[index]) else: flow = frame_utils.read_gen(self.flow_list[index]) img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow = np.array(flow).astype(np.float32) img1 = np.array(img1).astype(np.uint8) img2 = np.array(img2).astype(np.uint8) # grayscale images if len(img1.shape) == 2: img1 = np.tile(img1[..., None], (1, 1, 3)) img2 = np.tile(img2[..., None], (1, 1, 3)) else: img1 = img1[..., :3] img2 = img2[..., :3] if self.augmentor is not None: if self.sparse: img1, img2, flow, valid = self.augmentor( img1, img2, flow, valid) else: img1, img2, flow = self.augmentor(img1, img2, flow) # cv2.imshow(f'img1{index}', cv2.cvtColor(img1, cv2.COLOR_RGB2BGR)) # cv2.imshow(f'img2{index}', cv2.cvtColor(img2, cv2.COLOR_RGB2BGR)) # cv2.imshow(f'flow{index}', cv2.cvtColor(flow_viz.flow_to_image(flow), cv2.COLOR_RGB2BGR)) # cv2.imshow(f'flowx{index}', flow[:, :, 0] / flow.max()) # cv2.imshow(f'flowy{index}', flow[:, :, 1] / flow.max()) img1 = torch.from_numpy(img1).permute(2, 0, 1).float() img2 = torch.from_numpy(img2).permute(2, 0, 1).float() flow = torch.from_numpy(flow).permute(2, 0, 1).float() if valid is not None: valid = torch.from_numpy(valid) else: valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000) # cv2.imshow(f'valid{index}', valid.float().numpy()) # cv2.waitKey(0) return img1, img2, flow, valid.float()
def __getitem__(self, index, debug=False): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow12 = frame_utils.read_gen_flow( self.flow_list[index]) # open-cv dependent !!!!!!!!!!!! mask = frame_utils.read_gen_mask(self.cdm_list[index]) #resize images flow masks img1 = cv2.resize(img1, dsize=(1220, 370), interpolation=cv2.INTER_LINEAR) img2 = cv2.resize(img2, dsize=(1220, 370), interpolation=cv2.INTER_LINEAR) flow12 = cv2.resize(flow12, dsize=(1220, 370), interpolation=cv2.INTER_LINEAR) if debug: print('img1', img1.shape) print('img2', img2.shape) print('flow12', flow12.shape) print('mask', mask.shape) if debug: print(type(img1)) print(type(img2)) print(type(flow12)) print('img1 shape') print((img1.shape)) print('img2 shape') print((img2.shape)) print('flow shape') print((flow12.shape)) print('********************************') print('********************************') print('change detection MASK !!!!!!!!!!') print(mask.shape) print(np.max(mask)) print('********************************') print('********************************') images = [img1, img2] image_size = img1.shape[:2] # width X height if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = StaticCenterCrop(image_size, self.render_size) images = map(cropper, images) flow12 = cropper(flow12) mask = np.expand_dims(mask, axis=2) #print(mask.shape) #mask = np.expand(mask , (mask.shape[0], mask.shape[1] ,1) ) #expand dimension # new cropper beacause the mask has different dimmentions # mask_size = mask.shape[:2] # width X height # if self.is_cropped: # cropperMask = StaticRandomCrop(mask_size, self.crop_size) # else: # cropperMask = StaticCenterCrop(mask_size, self.render_size) mask = cropper(mask) images = np.array(images) flow12 = np.array(flow12) mask = np.array(mask) if debug: print('croped images numpy ') print(type(images)) print((images.shape)) print(' end ------------------------') images = images.transpose(3, 0, 1, 2) # change flow accodingly flow12 = flow12.transpose(2, 0, 1) # to be tested !!!!!!!!!!! mask = mask.transpose(2, 0, 1) if debug: print('images shape') print((images.shape)) print('flow shape') print((flow12.shape)) print('********************************') print('********************************') print('change detection MASK CROP TESTING !!!!!!!!!!') print(mask.shape) print('********************************') print('********************************') flow12 = torch.from_numpy(flow12.astype(np.float32)) images = torch.from_numpy(images.astype(np.float32)) mask = torch.from_numpy(mask.astype(np.float32)) #('mask.shape', mask.shape) # print('flow12.shape', flow12.shape) # print('images.shape', images.shape) return [images], [flow12], [mask]