def test(self, restore_model, save_dir): dataset = BasicDataset(data_list_file=self.dataset_config['data_list_file'], img_dir=self.dataset_config['img_dir']) save_name_list = dataset.data_list[:, 2] iterator = dataset.create_one_shot_iterator(dataset.data_list, num_parallel_calls=self.num_input_threads) batch_img0, batch_img1, batch_img2 = iterator.get_next() flow_fw, flow_bw = pyramid_processing(batch_img0, batch_img1, batch_img2, train=False, trainable=False, is_scale=True) # For KITTI we set max_flow=256, while for Sintel we set max_flow=128 flow_fw_color = flow_to_color(flow_fw['full_res'], mask=None, max_flow=256) flow_bw_color = flow_to_color(flow_bw['full_res'], mask=None, max_flow=256) restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=restore_vars) sess = tf.Session() sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) saver.restore(sess, restore_model) if not os.path.exists(save_dir): os.makedirs(save_dir) for i in range(dataset.data_num): np_flow_fw, np_flow_bw, np_flow_fw_color, np_flow_fw_color = sess.run([flow_fw['full_res'], flow_bw['full_res'], flow_fw_color, flow_bw_color]) misc.imsave('%s/flow_fw_color_%s.png' % (save_dir, save_name_list[i]), np_flow_fw_color[0]) misc.imsave('%s/flow_bw_color_%s.png' % (save_dir, save_name_list[i]), np_flow_bw_color[0]) write_flo('%s/flow_fw_%s.flo' % (save_dir, save_name_list[i]), np_flow_fw[0]) write_flo('%s/flow_bw_%s.flo' % (save_dir, save_name_list[i]), np_flow_bw[0]) print('Finish %d/%d' % (i, dataset.data_num))
def test(self, restore_model, save_dir, is_normalize_img=True, prefix=''): dataset = BasicDataset(data_list_file=self.dataset_config['data_list_file'], img_dir=self.dataset_config['img_dir'] + prefix, is_normalize_img=is_normalize_img) save_name_list = dataset.data_list[:, -1] iterator = dataset.create_one_shot_iterator(dataset.data_list, num_parallel_calls=self.num_input_threads) batch_img0, batch_img1, batch_img2 = iterator.get_next() img_shape = tf.shape(batch_img0) h = img_shape[1] w = img_shape[2] new_h = tf.where(tf.equal(tf.mod(h, 64), 0), h, (tf.to_int32(tf.floor(h / 64) + 1)) * 64) new_w = tf.where(tf.equal(tf.mod(w, 64), 0), w, (tf.to_int32(tf.floor(w / 64) + 1)) * 64) batch_img0 = tf.image.resize_images(batch_img0, [new_h, new_w], method=1, align_corners=True) batch_img1 = tf.image.resize_images(batch_img1, [new_h, new_w], method=1, align_corners=True) batch_img2 = tf.image.resize_images(batch_img2, [new_h, new_w], method=1, align_corners=True) flow_fw, flow_bw = pyramid_processing(batch_img0, batch_img1, batch_img2, train=False, trainable=False, is_scale=True) flow_fw['full_res'] = flow_resize(flow_fw['full_res'], [h, w], method=1) flow_bw['full_res'] = flow_resize(flow_bw['full_res'], [h, w], method=1) flow_fw_color = flow_to_color(flow_fw['full_res'], mask=None, max_flow=256) flow_bw_color = flow_to_color(flow_bw['full_res'], mask=None, max_flow=256) restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=restore_vars) sess = tf.Session() sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) saver.restore(sess, restore_model) if not os.path.exists(save_dir): os.makedirs(save_dir) for i in range(dataset.data_num): np_flow_fw, np_flow_bw, np_flow_fw_color, np_flow_bw_color = sess.run([flow_fw['full_res'], flow_bw['full_res'], flow_fw_color, flow_bw_color]) misc.imsave(('%s/' + '%s.png') % (save_dir, save_name_list[i]), np_flow_fw_color[0]) # misc.imsave(('%s/' + prefix + '_%s.png') % (save_dir, save_name_list[i]), np_flow_fw_color[0]) # misc.imsave(('%s/' + prefix + '_flow_fw_color_%s.png') % (save_dir, save_name_list[i]), np_flow_fw_color[0]) # misc.imsave(('%s/' + prefix + '_flow_bw_color_%s.png') % (save_dir, save_name_list[i]), np_flow_bw_color[0]) # write_flo('%s/flow_fw_%s.flo' % (save_dir, save_name_list[i]), np_flow_fw[0]) # write_flo('%s/flow_bw_%s.flo' % (save_dir, save_name_list[i]), np_flow_bw[0]) print('Finish %d/%d' % (i+1, dataset.data_num))
method=1, align_corners=True) batch_img2 = tf.image.resize_images(b_img2, [new_h, new_w], method=1, align_corners=True) flow_fw, flow_bw = pyramid_processing(batch_img0, batch_img1, batch_img2, train=False, trainable=False, is_scale=True) flow_fw['full_res'] = flow_resize(flow_fw['full_res'], [h, w], method=1) flow_bw['full_res'] = flow_resize(flow_bw['full_res'], [h, w], method=1) flow_fw_color = flow_to_color(flow_fw['full_res'], mask=None, max_flow=256) flow_bw_color = flow_to_color(flow_bw['full_res'], mask=None, max_flow=256) restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=restore_vars) sess = tf.Session() sess.run(tf.global_variables_initializer()) saver.restore(sess, restore_model) def fb_check(w_warp, w_back): #optimizable? cuda? weights = np.ones(w_warp.shape[:2]) norm_wb = np.linalg.norm(w_warp + w_back, axis=2)**2.0 norm_w = np.linalg.norm(w_warp, axis=2)**2.0
def sintel_raw_prediction(dataset_name, restore_model_dir, model_name, sample_step=100000, dataset_config={}, is_scale=True, num_parallel_calls=4, network_mode='v1'): dataset = SintelRawDataset(data_list_file=dataset_config['data_list_file'], img_dir=dataset_config['img_dir']) data_num = dataset.data_num iterator = dataset.create_prediction_iterator( dataset.data_list, num_parallel_calls=num_parallel_calls) batch_img0, batch_img1, batch_img2, batch_img3, batch_img4 = iterator.get_next( ) flow_fw_12, flow_bw_10, flow_fw_23, flow_bw_21, flow_fw_34, flow_bw_32 = pyramid_processing_five_frame( batch_img0, batch_img1, batch_img2, batch_img3, batch_img4, train=False, trainable=False, regularizer=None, is_scale=is_scale, network_mode=network_mode) occlusion_12, occlusion_21 = occlusion(flow_fw_12['full_res'], flow_bw_21['full_res']) occlusion_23, occlusion_32 = occlusion(flow_fw_23['full_res'], flow_bw_32['full_res']) mag_sq = length_sq(flow_bw_21['full_res']) + length_sq( flow_fw_23['full_res']) occ_thresh = 0.01 * mag_sq + 0.5 occlusion_2 = tf.cast( length_sq(flow_bw_21['full_res'] + flow_fw_23['full_res']) > occ_thresh, tf.float32) flow_fw_12_color = flow_to_color(flow_fw_12['full_res'], mask=None, max_flow=256) flow_bw_21_color = flow_to_color(flow_bw_21['full_res'], mask=None, max_flow=256) flow_fw_23_color = flow_to_color(flow_fw_23['full_res'], mask=None, max_flow=256) flow_bw_32_color = flow_to_color(flow_bw_32['full_res'], mask=None, max_flow=256) flow_fw_12_uint16 = flow_fw_12['full_res'] * 64. + 32768 flow_fw_12_uint16 = tf.cast(flow_fw_12_uint16, tf.uint16) flow_bw_21_uint16 = flow_bw_21['full_res'] * 64. + 32768 flow_bw_21_uint16 = tf.cast(flow_bw_21_uint16, tf.uint16) flow_fw_23_uint16 = flow_fw_23['full_res'] * 64. + 32768 flow_fw_23_uint16 = tf.cast(flow_fw_23_uint16, tf.uint16) flow_bw_32_uint16 = flow_bw_32['full_res'] * 64. + 32768 flow_bw_32_uint16 = tf.cast(flow_bw_32_uint16, tf.uint16) restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=restore_vars) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) save_dir = '/'.join([dataset_name, 'sample', model_name]) if not os.path.exists(save_dir): os.makedirs(save_dir) saver.restore( sess, '%s/%s/model-%d' % (restore_model_dir, model_name, sample_step)) for i in range(data_num): [ np_flow_fw_12_color, np_flow_bw_21_color, np_flow_fw_23_color, np_flow_bw_32_color, np_flow_fw_12, np_flow_bw_21, np_flow_fw_23, np_flow_bw_32, np_occlusion_12, np_occlusion_21, np_occlusion_23, np_occlusion_32, np_occlusion_2 ] = sess.run([ flow_fw_12_color, flow_bw_21_color, flow_fw_23_color, flow_bw_32_color, flow_fw_12_uint16, flow_bw_21_uint16, flow_fw_23_uint16, flow_bw_32_uint16, occlusion_12, occlusion_21, occlusion_23, occlusion_32, occlusion_2 ]) #h, w = np_flow_fw_12.shape[1:3] #flow_compare = np.zeros([2*h, 2*w, 3]) #flow_compare[:h, :w, :] = np_flow_fw_12_color #flow_compare[h:2*h, :w, :] = np_flow_bw_21_color #flow_compare[:h, w:2*w, :] = np_flow_bw_32_color #flow_compare[h:2*h, w:2*w, :] = np_flow_fw_23_color #flow_compare = flow_compare * 255 #flow_compare = flow_compare.astype('uint8') #misc.imsave('%s/flow_compare_%05d.png' % (save_dir, i), flow_compare) h, w = np_flow_fw_12.shape[1:3] ones_channel = np.ones([h, w, 1]) np_flow_fw_12 = np_flow_fw_12[0] np_flow_bw_21 = np_flow_bw_21[0] np_flow_fw_23 = np_flow_fw_23[0] np_flow_bw_32 = np_flow_bw_32[0] np_flow_fw_12 = np.concatenate([np_flow_fw_12, ones_channel], -1) np_flow_bw_21 = np.concatenate([np_flow_bw_21, ones_channel], -1) np_flow_fw_23 = np.concatenate([np_flow_fw_23, ones_channel], -1) np_flow_bw_32 = np.concatenate([np_flow_bw_32, ones_channel], -1) np_flow_fw_12 = np_flow_fw_12.astype(np.uint16) np_flow_bw_21 = np_flow_bw_21.astype(np.uint16) np_flow_fw_23 = np_flow_fw_23.astype(np.uint16) np_flow_bw_32 = np_flow_bw_32.astype(np.uint16) np_flow_fw_12 = rgb_bgr(np_flow_fw_12) np_flow_bw_21 = rgb_bgr(np_flow_bw_21) np_flow_fw_23 = rgb_bgr(np_flow_fw_23) np_flow_bw_32 = rgb_bgr(np_flow_bw_32) cv2.imwrite('%s/flow_fw_12_%05d.png' % (save_dir, i), np_flow_fw_12) cv2.imwrite('%s/flow_bw_21_%05d.png' % (save_dir, i), np_flow_bw_21) cv2.imwrite('%s/flow_fw_23_%05d.png' % (save_dir, i), np_flow_fw_23) cv2.imwrite('%s/flow_bw_32_%05d.png' % (save_dir, i), np_flow_bw_32) #io.savemat('%s/flow_%04d.mat' % (save_dir, i), mdict={'flow_fw_12': np_flow_fw_12[0], 'flow_bw_21': np_flow_bw_21[0], 'flow_fw_23': np_flow_fw_23[0], 'flow_bw_32': np_flow_bw_32[0]}) misc.imsave('%s/occlusion12_%05d.png' % (save_dir, i), np_occlusion_12[0, :, :, 0]) misc.imsave('%s/occlusion21_%05d.png' % (save_dir, i), np_occlusion_21[0, :, :, 0]) misc.imsave('%s/occlusion23_%05d.png' % (save_dir, i), np_occlusion_23[0, :, :, 0]) misc.imsave('%s/occlusion32_%05d.png' % (save_dir, i), np_occlusion_32[0, :, :, 0]) #misc.imsave('%s/occlusion2_%05d.png' %(save_dir, i), np_occlusion_2[0, :, :, 0]) print('Finish %d/%d' % (i, data_num))
def sintel_prediction(dataset_name, restore_model_dir, model_name, sample_step=100000, dataset_config={}, is_scale=True, num_parallel_calls=4, network_mode='v1'): dataset = SintelDataset(data_list_file=dataset_config['data_list_file'], img_dir=dataset_config['img_dir'], dataset_type=dataset_config['dataset_type']) iterator = dataset.create_prediction_iterator( dataset.data_list, num_parallel_calls=num_parallel_calls) data_num = dataset.data_num batch_img0_raw, batch_img1_raw, batch_img2_raw, batch_img0, batch_img1, batch_img2 = iterator.get_next( ) flow_fw, flow_bw = pyramid_processing(batch_img0, batch_img1, batch_img2, train=False, trainable=False, is_scale=is_scale, network_mode=network_mode) img_shape = tf.shape(batch_img0) mask = tf.ones([1, img_shape[1], img_shape[2], 1]) fb_err_img = flow_error_image(flow_fw['full_res'], -flow_bw['full_res'], mask_occ=mask) flow_fw_color = flow_to_color(flow_fw['full_res'], mask=None, max_flow=256) flow_bw_color = flow_to_color(flow_bw['full_res'], mask=None, max_flow=256) flow_bw_color_minus = flow_to_color(-flow_bw['full_res'], mask=None, max_flow=256) restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=restore_vars) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) save_dir = '/'.join([dataset_name, 'sample', model_name]) if not os.path.exists(save_dir): os.makedirs(save_dir) saver.restore( sess, '%s/%s/model-%d' % (restore_model_dir, model_name, sample_step)) for i in range(data_num): np_batch_img0, np_batch_img1, np_batch_img2, np_flow_fw_color, np_flow_bw_color, np_flow_fw, np_flow_bw, np_fb_err_img, np_flow_bw_color_minus = sess.run( [ batch_img0_raw, batch_img1_raw, batch_img2_raw, flow_fw_color, flow_bw_color, flow_fw['full_res'], flow_bw['full_res'], fb_err_img, flow_bw_color_minus ]) #h, w = np_batch_img1.shape[1:3] #result_compare = np.zeros([3*h, 3*w, 3]) #result_compare[:h, :w, :] = (np_batch_img0 + np_batch_img1) / 2 #result_compare[h:2*h, :w, :] = np_flow_bw_color #result_compare[2*h:3*h, :w, :] = np_fb_err_img #result_compare[:h, w:2*w, :] = (np_batch_img1 + np_batch_img2) / 2 #result_compare[h:2*h, w:2*w, :] = np_flow_fw_color #result_compare[2*h:3*h, w:2*w, :] = np_flow_bw_color_minus #result_compare[:h, 2*w:3*w, :] = np_batch_img0 #result_compare[h:2*h, 2*w:3*w, :] = np_batch_img1 #result_compare[2*h:3*h, 2*w:3*w, :] = np_batch_img2 #result_compare = result_compare * 255 #result_compare = result_compare.astype('uint8') #misc.imsave('%s/result_%04d.jpg' % (save_dir, i), result_compare) misc.imsave('%s/flow_%04d.png' % (save_dir, i), np_flow_fw_color[0]) #io.savemat('%s/result_%04d.mat' % (save_dir, i), mdict={'flow_fw': np_flow_fw[0]}) print('Finish %d/%d' % (i, data_num))
def eval(self, restore_model, save_dir, is_normalize_img=True): from test_datasets_eval import BasicDataset from error_metrics import flow_error_avg, outlier_pct, merge_dictionaries dataset = BasicDataset(data_list_file=self.dataset_config['data_list_file'], img_dir=self.dataset_config['img_dir'], is_normalize_img=is_normalize_img) save_name_list = dataset.data_list[:, -1] iterator = dataset.create_one_shot_iterator(dataset.data_list, num_parallel_calls=self.num_input_threads) batch_img0, batch_img1, batch_img2, flow_noc, flow_occ, mask_noc, mask_occ = iterator.get_next() img_shape = tf.shape(batch_img0) h = img_shape[1] w = img_shape[2] new_h = tf.where(tf.equal(tf.mod(h, 64), 0), h, (tf.to_int32(tf.floor(h / 64) + 1)) * 64) new_w = tf.where(tf.equal(tf.mod(w, 64), 0), w, (tf.to_int32(tf.floor(w / 64) + 1)) * 64) batch_img0 = tf.image.resize_images(batch_img0, [new_h, new_w], method=1, align_corners=True) batch_img1 = tf.image.resize_images(batch_img1, [new_h, new_w], method=1, align_corners=True) batch_img2 = tf.image.resize_images(batch_img2, [new_h, new_w], method=1, align_corners=True) flow_fw, _ = pyramid_processing(batch_img0, batch_img1, batch_img2, train=False, trainable=False, is_scale=True) flow_fw['full_res'] = flow_resize(flow_fw['full_res'], [h, w], method=1) flow_fw_color = flow_to_color(flow_fw['full_res'], mask=None, max_flow=256) error_fw_color = flow_error_image(flow_fw['full_res'], flow_occ, mask_occ) errors = {} errors['EPE_noc'] = flow_error_avg(flow_noc, flow_fw['full_res'], mask_noc) errors['EPE_all'] = flow_error_avg(flow_occ, flow_fw['full_res'], mask_occ) errors['outliers_noc'] = outlier_pct(flow_noc, flow_fw['full_res'], mask_noc) errors['outliers_all'] = outlier_pct(flow_occ, flow_fw['full_res'], mask_occ) restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) saver = tf.train.Saver(var_list=restore_vars) sess = tf.Session() sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) saver.restore(sess, restore_model) if not os.path.exists(save_dir): os.makedirs(save_dir) sum_EPE_noc = 0. sum_EPE_all = 0. sum_outliers_noc = 0. sum_outliers_all = 0. for i in range(dataset.data_num): np_flow_fw, np_flow_fw_color, np_error_fw_color = sess.run([flow_fw['full_res'], flow_fw_color, error_fw_color]) EPE_noc, EPE_all, outliers_noc, outliers_all = sess.run([errors['EPE_noc'], errors['EPE_all'], errors['outliers_noc'], errors['outliers_all']]) sum_EPE_noc += EPE_noc sum_EPE_all += EPE_all sum_outliers_noc += outliers_noc sum_outliers_all += outliers_all misc.imsave('%s/%s_10.png' % (save_dir, save_name_list[i]), np_flow_fw_color[0]) misc.imsave('%s/error_%s.png' % (save_dir, save_name_list[i]), np_error_fw_color[0]) #write_flo('%s/flow_fw_%s.flo' % (save_dir, save_name_list[i]), np_flow_fw[0]) print('Finish %d/%d' % (i+1, dataset.data_num)) print("EPE_noc: %f, EPE_all: %f" % (sum_EPE_noc/dataset.data_num, sum_EPE_all/dataset.data_num)) print("F1_noc: %f, F1_all: %f" % (sum_outliers_noc/dataset.data_num, sum_outliers_all/dataset.data_num))
def build_dp_loss(self): """ For each pair, we predict two camera poses (1->2, 2->1) """ smooth_loss = 0 reconstructed_loss = 0 cross_reconstructed_loss = 0 ssim_loss = 0 cross_ssim_loss = 0 proj_error_depth_all = [] flyout_map_all_tgt = [] flyout_map_all_src0 = [] flyout_map_all_src1 = [] curr_tgt_image_all = [] curr_src_image_stack_all = [] proj_error_src0 = [] proj_error_src0_1 = [] proj_error_src1 = [] proj_error_src1_1 = [] proj_error_tgt = [] proj_error_tgt1 = [] upsampled_tgt_depth_all = [] summaries = [] """ Generating occlusion map from FlowNet Calculate different scale occulsion maps described in 'Occlusion Aware Unsupervised Learning of Optical Flow by Yang Wang et al' """ occu_masks_bw = [] occu_masks_bw_avg = [] occu_masks_fw = [] occu_masks_fw_avg = [] for i in range(len(self.pred_bw_flows)): temp_occu_masks_bw = [] temp_occu_masks_bw_avg = [] temp_occu_masks_fw = [] temp_occu_masks_fw_avg = [] for s in range(self.num_scales): H = int(self.img_height / (2**s)) W = int(self.img_width / (2**s)) mask, mask_avg = self.occulsion(self.pred_bw_flows[i][s], H, W) temp_occu_masks_bw.append(mask) temp_occu_masks_bw_avg.append(mask_avg) # [src0, tgt, src0_1] mask, mask_avg = self.occulsion(self.pred_fw_flows[i][s], H, W) temp_occu_masks_fw.append(mask) temp_occu_masks_fw_avg.append(mask_avg) # [tgt, src1, src1_1] occu_masks_bw.append(temp_occu_masks_bw) occu_masks_bw_avg.append(temp_occu_masks_bw_avg) occu_masks_fw.append(temp_occu_masks_fw) occu_masks_fw_avg.append(temp_occu_masks_fw_avg) self.scaled_tgt_images = [None for _ in range(self.num_scales)] self.scaled_src_images_stack = [None for _ in range(self.num_scales)] for s in range(self.num_scales): if s == 0: # Just as a precaution. TF often has interpolation bugs. self.scaled_tgt_images[s] = self.tgt_image self.scaled_src_images_stack[s] = self.src_image_stack else: self.scaled_tgt_images[s] = tf.image.resize_bilinear( self.tgt_image, [ int(self.img_height / (2**s)), int(self.img_width / (2**s)) ], align_corners=True) self.scaled_src_images_stack[s] = tf.image.resize_bilinear( self.src_image_stack, [ int(self.img_height / (2**s)), int(self.img_width / (2**s)) ], align_corners=True) selected_scale = 0 if self.is_depth_upsampling else s H = int(self.img_height / (2**selected_scale)) W = int(self.img_width / (2**selected_scale)) curr_tgt_image = self.scaled_tgt_images[selected_scale] curr_src_image_stack = self.scaled_src_images_stack[selected_scale] curr_tgt_image_all.append(curr_tgt_image) curr_src_image_stack_all.append(curr_src_image_stack) if self.is_depth_upsampling: tgt_depth = self.depth_upsampled['tgt'][s] src0_depth = self.depth_upsampled['src0'][s] src1_depth = self.depth_upsampled['src1'][s] else: tgt_depth = self.depth['tgt'][s] src0_depth = self.depth['src0'][s] src1_depth = self.depth['src1'][s] # src0 depth_flow_src02tgt, _ = inverse_warp( src0_depth, self.pred_poses[:, 0, 0:6], # src0 -> tgt (fw0) self.proj_cam2pix[:, selected_scale, :, :], self.proj_pix2cam[:, selected_scale, :, :]) curr_proj_image_tgt2src0 = transformer_old(curr_tgt_image, depth_flow_src02tgt, [H, W]) curr_proj_error_src0 = tf.abs(curr_proj_image_tgt2src0 - curr_src_image_stack[:, :, :, 0:3]) depth_flow_src02src1, _ = inverse_warp( src0_depth, self.pred_poses[:, 0, 6:12], # src0 -> src1 (fw2) self.proj_cam2pix[:, selected_scale, :, :], self.proj_pix2cam[:, selected_scale, :, :]) curr_proj_image_src12src0 = transformer_old( curr_src_image_stack[:, :, :, 3:6], depth_flow_src02src1, [H, W]) curr_proj_error_src0_1 = tf.abs(curr_proj_image_src12src0 - curr_src_image_stack[:, :, :, 0:3]) # tgt depth_flow_tgt2src1, _ = inverse_warp( tgt_depth, self.pred_poses[:, 0, 12:18], # tgt -> src1 (fw1) self.proj_cam2pix[:, selected_scale, :, :], self.proj_pix2cam[:, selected_scale, :, :]) curr_proj_image_src12tgt = transformer_old( curr_src_image_stack[:, :, :, 3:6], depth_flow_tgt2src1, [H, W]) curr_proj_error_tgt = tf.abs(curr_proj_image_src12tgt - curr_tgt_image) depth_flow_tgt2src0, _ = inverse_warp( tgt_depth, self.pred_poses[:, 0, 18:24], # tgt -> src0 (bw0) self.proj_cam2pix[:, selected_scale, :, :], self.proj_pix2cam[:, selected_scale, :, :]) curr_proj_image_src02tgt = transformer_old( curr_src_image_stack[:, :, :, 0:3], depth_flow_tgt2src0, [H, W]) curr_proj_error_tgt_1 = tf.abs(curr_proj_image_src02tgt - curr_tgt_image) # src1 depth_flow_src12src0, _ = inverse_warp( src1_depth, self.pred_poses[:, 0, 24:30], # src1 -> src0 (bw2) self.proj_cam2pix[:, selected_scale, :, :], self.proj_pix2cam[:, selected_scale, :, :]) curr_proj_image_src02src1 = transformer_old( curr_src_image_stack[:, :, :, 0:3], depth_flow_src12src0, [H, W]) curr_proj_error_src1 = tf.abs(curr_proj_image_src02src1 - curr_src_image_stack[:, :, :, 3:6]) depth_flow_src12tgt, _ = inverse_warp( src1_depth, self.pred_poses[:, 0, 30:36], # src1 -> tgt (bw1) self.proj_cam2pix[:, selected_scale, :, :], self.proj_pix2cam[:, selected_scale, :, :]) curr_proj_image_tgt2src1 = transformer_old(curr_tgt_image, depth_flow_src12tgt, [H, W]) curr_proj_error_src1_1 = tf.abs(curr_proj_image_tgt2src1 - curr_src_image_stack[:, :, :, 3:6]) if not self.compute_minimum_loss: # src0 reconstructed_loss += tf.reduce_mean( curr_proj_error_src0 * occu_masks_bw[0][selected_scale] ) / occu_masks_bw_avg[0][selected_scale] cross_reconstructed_loss += tf.reduce_mean( curr_proj_error_src0_1 * occu_masks_bw[2][selected_scale] ) / occu_masks_bw_avg[2][selected_scale] # tgt reconstructed_loss += tf.reduce_mean( curr_proj_error_tgt * occu_masks_bw[1][selected_scale] ) / occu_masks_bw_avg[1][selected_scale] reconstructed_loss += tf.reduce_mean( curr_proj_error_tgt_1 * occu_masks_fw[0][selected_scale] ) / occu_masks_fw_avg[0][selected_scale] # src1 cross_reconstructed_loss += tf.reduce_mean( curr_proj_error_src1 * occu_masks_fw[2][selected_scale] ) / occu_masks_fw_avg[2][selected_scale] reconstructed_loss += tf.reduce_mean( curr_proj_error_src1_1 * occu_masks_fw[1][selected_scale] ) / occu_masks_fw_avg[1][selected_scale] if self.ssim_weight > 0: # src0 ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_tgt2src0 * occu_masks_bw[0][selected_scale], curr_src_image_stack[:, :, :, 0:3] * occu_masks_bw[0][selected_scale]) ) / occu_masks_bw_avg[0][selected_scale] cross_ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_src12src0 * occu_masks_bw[2][selected_scale], curr_src_image_stack[:, :, :, 0:3] * occu_masks_bw[2][selected_scale]) ) / occu_masks_bw_avg[2][selected_scale] # tgt ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_src12tgt * occu_masks_bw[1][selected_scale], curr_tgt_image * occu_masks_bw[1][selected_scale]) ) / occu_masks_bw_avg[1][selected_scale] ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_src02tgt * occu_masks_fw[0][selected_scale], curr_tgt_image * occu_masks_fw[0][selected_scale]) ) / occu_masks_fw_avg[0][selected_scale] # src1 cross_ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_src02src1 * occu_masks_fw[2][selected_scale], curr_src_image_stack[:, :, :, 3:6] * occu_masks_fw[2][selected_scale]) ) / occu_masks_bw_avg[2][selected_scale] ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_tgt2src1 * occu_masks_fw[1][selected_scale], curr_src_image_stack[:, :, :, 3:6] * occu_masks_fw[1][selected_scale]) ) / occu_masks_fw_avg[1][selected_scale] if self.dp_smooth_weight > 0: if self.depth_normalization: # Perform depth normalization, dividing by the mean. mean_tgt_disp = tf.reduce_mean(self.disp['tgt'][s], axis=[1, 2, 3], keepdims=True) tgt_disp_input = self.disp['tgt'][s] / mean_tgt_disp mean_src0_disp = tf.reduce_mean(self.disp['src0'][s], axis=[1, 2, 3], keepdims=True) src0_disp_input = self.disp['src0'][s] / mean_src0_disp mean_src1_disp = tf.reduce_mean(self.disp['src1'][s], axis=[1, 2, 3], keepdims=True) src1_disp_input = self.disp['src1'][s] / mean_src1_disp else: tgt_disp_input = self.disp['tgt'][s] src0_disp_input = self.disp['src0'][s] src1_disp_input = self.disp['src1'][s] scaling_f = (1.0 if self.equal_weighting else 1.0 / (2**s)) # Edge-aware first-order smooth_loss += scaling_f * depth_smoothness( tgt_disp_input, self.scaled_tgt_images[s]) smooth_loss += scaling_f * depth_smoothness( src0_disp_input, self.scaled_src_images_stack[s][:, :, :, 0:3]) smooth_loss += scaling_f * depth_smoothness( src1_disp_input, self.scaled_src_images_stack[s][:, :, :, 3:6]) if s == 0: if self.compute_minimum_loss: flyout_map_all_tgt.append(min_mask_tgt) flyout_map_all_src0.append(min_mask_src0) flyout_map_all_src1.append(min_mask_src1) else: flyout_map_all_tgt.append(occu_masks_fw[0][selected_scale]) flyout_map_all_src0.append( occu_masks_bw[0][selected_scale]) flyout_map_all_src1.append( occu_masks_fw[1][selected_scale]) # proj_error_tgt = curr_proj_error_tgt # proj_error_tgt1 = curr_proj_error_tgt1 # proj_error_src0 = curr_proj_error_src0 # proj_error_src0_1 = curr_proj_error_src0_1 # proj_error_src1 = curr_proj_error_src1 # proj_error_src1_1 = curr_proj_error_src1_1 upsampled_tgt_depth_all.append(tgt_depth) # self.losses = (self.pixel_loss_weight * pixel_loss_depth + self.smooth_weight * dp_smooth_loss) self.losses = self.dp_reconstruction_weight*((1.0 - self.ssim_weight)*(reconstructed_loss + self.dp_cross_geometry_weight*cross_reconstructed_loss) + self.ssim_weight*(ssim_loss+self.dp_cross_geometry_weight*cross_ssim_loss)) + \ self.dp_smooth_weight * smooth_loss summaries.append(tf.summary.scalar("total_loss", self.losses)) summaries.append( tf.summary.scalar("reconstruction_loss", reconstructed_loss)) summaries.append( tf.summary.scalar("cross_reconstruction_loss", cross_reconstructed_loss)) summaries.append(tf.summary.scalar("ssim_loss", ssim_loss)) summaries.append(tf.summary.scalar("cross_ssim_loss", cross_ssim_loss)) summaries.append(tf.summary.scalar("dp_smooth_loss", smooth_loss)) s = 0 tf.summary.image( 'scale%d_target_image' % s, tf.image.convert_image_dtype(curr_tgt_image_all[0], dtype=tf.uint8)) for i in range(self.num_source): tf.summary.image( 'scale%d_src_image_%d' % (s, i), tf.image.convert_image_dtype( curr_src_image_stack_all[0][:, :, :, i * 3:(i + 1) * 3], dtype=tf.uint8)) tf.summary.image('scale%d_src0_pred_disp' % s, self.disp['src0'][s]) # for k in range(self.num_scales): tf.summary.image('scale%d_tgt_pred_disp' % s, self.disp['tgt'][s]) tf.summary.image('scale%d_src1_pred_disp' % s, self.disp['src1'][s]) # tf.summary.image('scale_proj_error_src0', proj_error_src0) # tf.summary.image('scale_proj_error_src0_1', proj_error_src0_1) # tf.summary.image('scale_proj_error_src1', proj_error_src1) # tf.summary.image('scale_proj_error_src1_1', proj_error_src1_1) # tf.summary.image('scale_proj_error_tgt', proj_error_tgt) # tf.summary.image('scale_proj_error_tgt1', proj_error_tgt1) tf.summary.image( 'scale%d_flow_src02tgt' % s, fl.flow_to_color(self.pred_fw_flows[0][s], max_flow=256)) tf.summary.image( 'scale%d_flow_tgt2src1' % s, fl.flow_to_color(self.pred_fw_flows[1][s], max_flow=256)) tf.summary.image( 'scale%d_flow_src02src1' % s, fl.flow_to_color(self.pred_fw_flows[2][s], max_flow=256)) tf.summary.image( 'scale%d_flow_tgt2src0' % s, fl.flow_to_color(self.pred_bw_flows[0][s], max_flow=256)) tf.summary.image( 'scale%d_flow_src12tgt' % s, fl.flow_to_color(self.pred_bw_flows[1][s], max_flow=256)) tf.summary.image( 'scale%d_flow_src12src0' % s, fl.flow_to_color(self.pred_bw_flows[2][s], max_flow=256)) if self.is_depth_upsampling: for k in range(self.num_scales): tf.summary.image('scale%d_tgt_upsampled_pred_depth' % k, upsampled_tgt_depth_all[k]) tf.summary.image('occlusion_src02tgt', flyout_map_all_tgt[0]) tf.summary.image('occlusion_tgt2src0', flyout_map_all_src0[0]) tf.summary.image('occlusion_tgt2src1', flyout_map_all_src1[0]) self.summ_op = tf.summary.merge(summaries)
def build_flow_loss(self): reconstructed_loss = 0 cross_reconstructed_loss = 0 flow_smooth_loss = 0 cross_flow_smooth_loss = 0 ssim_loss = 0 cross_ssim_loss = 0 curr_tgt_image_all = [] curr_src_image_stack_all = [] occlusion_map_0_all = [] occlusion_map_1_all = [] occlusion_map_2_all = [] occlusion_map_3_all = [] occlusion_map_4_all = [] occlusion_map_5_all = [] # Calculate different scale occulsion maps described in 'Occlusion Aware Unsupervised # Learning of Optical Flow by Yang Wang et al' occu_masks_bw = [] occu_masks_bw_avg = [] occu_masks_fw = [] occu_masks_fw_avg = [] for i in range(len(self.pred_bw_flows)): temp_occu_masks_bw = [] temp_occu_masks_bw_avg = [] temp_occu_masks_fw = [] temp_occu_masks_fw_avg = [] for s in range(self.num_scales): H = int(self.img_height / (2**s)) W = int(self.img_width / (2**s)) mask, mask_avg = self.occulsion(self.pred_bw_flows[i][s], H, W) temp_occu_masks_bw.append(mask) temp_occu_masks_bw_avg.append(mask_avg) # [src0, tgt, src0_1] mask, mask_avg = self.occulsion(self.pred_fw_flows[i][s], H, W) temp_occu_masks_fw.append(mask) temp_occu_masks_fw_avg.append(mask_avg) # [tgt, src1, src1_1] occu_masks_bw.append(temp_occu_masks_bw) occu_masks_bw_avg.append(temp_occu_masks_bw_avg) occu_masks_fw.append(temp_occu_masks_fw) occu_masks_fw_avg.append(temp_occu_masks_fw_avg) for s in range(self.num_scales): H = int(self.img_height / (2**s)) W = int(self.img_width / (2**s)) curr_tgt_image = tf.image.resize_area(self.tgt_image, [H, W]) curr_src_image_stack = tf.image.resize_area( self.src_image_stack, [H, W]) curr_tgt_image_all.append(curr_tgt_image) curr_src_image_stack_all.append(curr_src_image_stack) # src0 curr_proj_image_optical_src0 = transformer_old( curr_tgt_image, self.pred_fw_flows[0][s], [H, W]) curr_proj_error_optical_src0 = tf.abs( curr_proj_image_optical_src0 - curr_src_image_stack[:, :, :, 0:3]) reconstructed_loss += tf.reduce_mean( curr_proj_error_optical_src0 * occu_masks_bw[0][s]) / occu_masks_bw_avg[0][s] curr_proj_image_optical_src0_1 = transformer_old( curr_src_image_stack[:, :, :, 3:6], self.pred_fw_flows[2][s], [H, W]) curr_proj_error_optical_src0_1 = tf.abs( curr_proj_image_optical_src0_1 - curr_src_image_stack[:, :, :, 0:3]) cross_reconstructed_loss += tf.reduce_mean( curr_proj_error_optical_src0_1 * occu_masks_bw[2][s]) / occu_masks_bw_avg[2][s] # tgt curr_proj_image_optical_tgt = transformer_old( curr_src_image_stack[:, :, :, 3:6], self.pred_fw_flows[1][s], [H, W]) curr_proj_error_optical_tgt = tf.abs(curr_proj_image_optical_tgt - curr_tgt_image) reconstructed_loss += tf.reduce_mean( curr_proj_error_optical_tgt * occu_masks_bw[1][s]) / occu_masks_bw_avg[1][s] curr_proj_image_optical_tgt_1 = transformer_old( curr_src_image_stack[:, :, :, 0:3], self.pred_bw_flows[0][s], [H, W]) curr_proj_error_optical_tgt_1 = tf.abs( curr_proj_image_optical_tgt_1 - curr_tgt_image) reconstructed_loss += tf.reduce_mean( curr_proj_error_optical_tgt_1 * occu_masks_fw[0][s]) / occu_masks_fw_avg[0][s] # src1 curr_proj_image_optical_src1 = transformer_old( curr_tgt_image, self.pred_bw_flows[1][s], [H, W]) curr_proj_error_optical_src1 = tf.abs( curr_proj_image_optical_src1 - curr_src_image_stack[:, :, :, 3:6]) reconstructed_loss += tf.reduce_mean( curr_proj_error_optical_src1 * occu_masks_fw[1][s]) / occu_masks_fw_avg[1][s] curr_proj_image_optical_src1_1 = transformer_old( curr_src_image_stack[:, :, :, 0:3], self.pred_bw_flows[2][s], [H, W]) curr_proj_error_optical_src1_1 = tf.abs( curr_proj_image_optical_src1_1 - curr_src_image_stack[:, :, :, 3:6]) cross_reconstructed_loss += tf.reduce_mean( curr_proj_error_optical_src1_1 * occu_masks_fw[2][s]) / occu_masks_fw_avg[2][s] if self.ssim_weight > 0: # src0 ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_optical_src0 * occu_masks_bw[0][s], curr_src_image_stack[:, :, :, 0:3] * occu_masks_bw[0][s])) / occu_masks_bw_avg[0][s] cross_ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_optical_src0_1 * occu_masks_bw[2][s], curr_src_image_stack[:, :, :, 0:3] * occu_masks_bw[2][s])) / occu_masks_bw_avg[2][s] # tgt ssim_loss += tf.reduce_mean( SSIM(curr_proj_image_optical_tgt * occu_masks_bw[1][s], curr_tgt_image * occu_masks_bw[1][s])) / occu_masks_bw_avg[1][s] ssim_loss += tf.reduce_mean( SSIM(curr_proj_image_optical_tgt_1 * occu_masks_fw[0][s], curr_tgt_image * occu_masks_fw[0][s])) / occu_masks_fw_avg[0][s] # src1 ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_optical_src1 * occu_masks_fw[1][s], curr_src_image_stack[:, :, :, 3:6] * occu_masks_fw[1][s])) / occu_masks_fw_avg[1][s] cross_ssim_loss += tf.reduce_mean( SSIM( curr_proj_image_optical_src1_1 * occu_masks_fw[2][s], curr_src_image_stack[:, :, :, 3:6] * occu_masks_fw[2][s])) / occu_masks_fw_avg[2][s] # Compute second-order derivatives for flow smoothness loss flow_smooth_loss += cal_grad2_error( self.pred_fw_flows[0][s] / 20.0, curr_src_image_stack[:, :, :, 0:3], 1.0) flow_smooth_loss += cal_grad2_error( self.pred_fw_flows[1][s] / 20.0, curr_tgt_image, 1.0) cross_flow_smooth_loss += cal_grad2_error( self.pred_fw_flows[2][s] / 20.0, curr_src_image_stack[:, :, :, 0:3], 1.0) flow_smooth_loss += cal_grad2_error( self.pred_bw_flows[0][s] / 20.0, curr_tgt_image, 1.0) flow_smooth_loss += cal_grad2_error( self.pred_bw_flows[1][s] / 20.0, curr_src_image_stack[:, :, :, 3:6], 1.0) cross_flow_smooth_loss += cal_grad2_error( self.pred_bw_flows[2][s] / 20.0, curr_src_image_stack[:, :, :, 3:6], 1.0) # [TODO] Add first-order derivatives for flow smoothness loss # [TODO] use robust Charbonnier penalty? if s == 0: occlusion_map_0_all = occu_masks_bw[0][s] occlusion_map_1_all = occu_masks_bw[1][s] occlusion_map_2_all = occu_masks_bw[2][s] occlusion_map_3_all = occu_masks_fw[0][s] occlusion_map_4_all = occu_masks_fw[1][s] occlusion_map_5_all = occu_masks_fw[2][s] self.losses = self.flow_reconstruction_weight * ((1.0 - self.ssim_weight) * \ (reconstructed_loss + self.flow_cross_geometry_weight*cross_reconstructed_loss) + \ self.ssim_weight*(ssim_loss+self.flow_cross_geometry_weight*cross_ssim_loss)) + \ self.flow_smooth_weight * (flow_smooth_loss + self.flow_cross_geometry_weight*cross_flow_smooth_loss) summaries = [] summaries.append(tf.summary.scalar("total_loss", self.losses)) summaries.append( tf.summary.scalar("reconstructed_loss", reconstructed_loss)) summaries.append( tf.summary.scalar("cross_reconstructed_loss", cross_reconstructed_loss)) summaries.append(tf.summary.scalar("ssim_loss", ssim_loss)) summaries.append(tf.summary.scalar("cross_ssim_loss", cross_ssim_loss)) summaries.append( tf.summary.scalar("flow_smooth_loss", flow_smooth_loss)) summaries.append( tf.summary.scalar("cross_flow_smooth_loss", cross_flow_smooth_loss)) s = 0 tf.summary.image( 'scale%d_target_image' % s, tf.image.convert_image_dtype(curr_tgt_image_all[0], dtype=tf.uint8)) for i in range(self.num_source): tf.summary.image('scale%d_src_image_%d' % (s, i), \ tf.image.convert_image_dtype(curr_src_image_stack_all[0][:, :, :, i*3:(i+1)*3], dtype=tf.uint8)) tf.summary.image( 'scale%d_flow_src02tgt' % s, fl.flow_to_color(self.pred_fw_flows[0][s], max_flow=256)) tf.summary.image( 'scale%d_flow_tgt2src1' % s, fl.flow_to_color(self.pred_fw_flows[1][s], max_flow=256)) tf.summary.image( 'scale%d_flow_src02src1' % s, fl.flow_to_color(self.pred_fw_flows[2][s], max_flow=256)) tf.summary.image( 'scale%d_flow_tgt2src0' % s, fl.flow_to_color(self.pred_bw_flows[0][s], max_flow=256)) tf.summary.image( 'scale%d_flow_src12tgt' % s, fl.flow_to_color(self.pred_bw_flows[1][s], max_flow=256)) tf.summary.image( 'scale%d_flow_src12src0' % s, fl.flow_to_color(self.pred_bw_flows[2][s], max_flow=256)) tf.summary.image('scale_flyout_mask_src0', occlusion_map_0_all) tf.summary.image('scale_flyout_mask_tgt', occlusion_map_1_all) tf.summary.image('scale_flyout_mask_src0_1', occlusion_map_2_all) tf.summary.image('scale_flyout_mask_tgt1', occlusion_map_3_all) tf.summary.image('scale_flyout_mask_src1', occlusion_map_4_all) tf.summary.image('scale_flyout_mask_src1_1', occlusion_map_5_all) self.summ_op = tf.summary.merge(summaries)