def gan_loss(fake_flow_d, real_flow_d, conv_real, conv_fake, weight=1): EPS = 1e-12 with tf.variable_scope('generator_loss'): g_total_loss = sops.replace_nonfinite( tf.reduce_mean(-tf.log(fake_flow_d + EPS))) # g_total_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv_fake,labels=tf.ones_like(conv_fake))) g_total_loss = tf.losses.compute_weighted_loss(g_total_loss, weights=1) with tf.variable_scope('discriminator_loss'): d_total_loss = sops.replace_nonfinite( tf.reduce_mean(-(tf.log(real_flow_d + EPS) + tf.log(1 - fake_flow_d + EPS)))) # d_total_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv_real,labels=tf.ones_like(conv_real))) # d_total_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv_fake,labels=tf.zeros_like(conv_fake))) # d_total_loss = d_total_loss_fake + d_total_loss_real # d_total_loss = sops.replace_nonfinite(d_total_loss) # feature_matching_loss = endpoint_loss(conv_real,conv_fake,weight=weight + 10,scope='feature_matching_loss') # tf.add_to_collection('disc_loss',feature_matching_loss) # tf.add_to_collection('disc_loss',d_total_loss) d_total_loss = tf.losses.compute_weighted_loss(d_total_loss, weights=1) # tf.summary.scalar('disc_loss'+summary_type,d_total_loss) # tf.summary.scalar('feature_matching_loss',feature_matching_loss) return g_total_loss, d_total_loss
def pointwise_l2_loss(inp, gt, epsilon=0.00001,mask = None): """Computes the pointwise unsquared l2 loss. One channel is equal to l1 The input tensors must use the format NCHW. This loss ignores nan values. The loss is normalized by the number of pixels. inp: Tensor This is the prediction. gt: Tensor The ground truth with the same shape as 'inp' epsilon: float The epsilon value to avoid division by zero in the gradient computation """ with tf.name_scope('pointwise_l2_loss'): gt_ = tf.stop_gradient(gt) diff = sops.replace_nonfinite(inp-gt_) if mask is not None: while len(mask.shape)<len(diff.shape): mask = tf.expand_dims(mask,-1) diff = mask*diff return tf.reduce_mean(tf.sqrt(tf.reduce_sum(diff**2, axis=3)+epsilon))
def train_for_sceneflow(image1, image2, depth1, depth2, depth_chng, optical_flow): max_depth1 = tf.reduce_max(depth1) depth1 = depth1 / max_depth1 depth2 = depth2 / max_depth1 depth1 = sops.replace_nonfinite(depth1) depth2 = sops.replace_nonfinite(depth2) image1 = combine_depth_values(image1, depth1, 2) image2 = combine_depth_values(image2, depth2, 2) img_pair_rgbd = tf.concat([image1, image2], axis=-1) img_pair_rgbd_swapped = tf.concat([image2, image1], axis=-1) # optical_flow = optical_flow / 50 # comment for optical flow. Uncomment for Sceneflow optical_flow_with_depth_change = combine_depth_values( optical_flow, depth_chng, 2) optical_flow_with_depth_change_swapped = tf.zeros( optical_flow_with_depth_change.get_shape()) # inputt = divide_inputs_to_patches(img_pair,8) # label = divide_inputs_to_patches(label_pair,3) # padding_input = tf.constant([[0, 0],[5, 4],[0, 0]]) # x_dimension_padding = tf.constant([[4, 4],[0, 0],[0,0]]) # padding2 = tf.constant([[4, 4],[0,0]]) # padded_img_pair_rgbd = tf.pad(img_pair_rgbd,x_dimension_padding,'CONSTANT') # padded_optical_flow_with_depth_change = tf.pad(optical_flow_with_depth_change,x_dimension_padding,'CONSTANT') # padded_img_pair_rgbd_swapped = tf.pad(img_pair_rgbd_swapped,x_dimension_padding,'CONSTANT') # padded_optical_flow_with_depth_change_swapped = tf.pad(optical_flow_with_depth_change_swapped,x_dimension_padding,'CONSTANT') fb_rgbd_img_pair = tf.stack([img_pair_rgbd, img_pair_rgbd_swapped]) fb_rgbd_optflow_with_depth_change = tf.stack([ optical_flow_with_depth_change, optical_flow_with_depth_change_swapped ]) return { 'input_n': fb_rgbd_img_pair, 'label_n': fb_rgbd_optflow_with_depth_change }
def KL_divergence_loss(z_mu, z_log_sigma_sq): with tf.variable_scope('kl_loss'): latent_loss = -tf.reduce_mean(0.5 * tf.reduce_sum( 1 + z_log_sigma_sq - z_mu**2 - tf.exp(z_log_sigma_sq), axis=1)) latent_loss = sops.replace_nonfinite(latent_loss) latent_loss = tf.losses.compute_weighted_loss(latent_loss, weights=1) return latent_loss
def reconstruction_loss_l2(prediction, gt): with tf.variable_scope('reconstruction_loss'): rec_loss = tf.reduce_mean( tf.reduce_sum((gt - prediction)**2, axis=[1, 2, 3])) # rec_loss = -tf.reduce_sum(gt * tf.log(1e-8 + prediction) + (1-gt) * tf.log(1e-8 + 1 - prediction), axis=[1, 2, 3]) recon_loss = sops.replace_nonfinite(rec_loss) recon_loss = tf.losses.compute_weighted_loss(recon_loss, weights=1) return recon_loss
def test_shape(self): with self.test_session(use_gpu=False, force_gpu=False): input1 = np.empty((8, 40, 31)) input2 = np.empty((8, 1, 40, 31)) input3 = np.empty((2, 2, 2, 40, 31)) inputs = (input1, input2, input3) for i in inputs: output_tensor = ops.replace_nonfinite(input=i) out_shape = output_tensor.get_shape().as_list() self.assertAllEqual(out_shape, i.shape)
def l2_loss(inp, gt, epsilon,mask = None): """L1 loss Returns a scalar tensor with the loss """ with tf.name_scope('l2_loss'): gt_ = tf.stop_gradient(gt) diff = sops.replace_nonfinite(inp-gt_) if mask is not None: while len(mask.shape)<len(diff.shape): mask = tf.expand_dims(mask,-1) diff = mask*diff return tf.reduce_mean(tf.sqrt(tf.reduce_sum(diff**2, axis=[1,2,3])+epsilon))
def pointwise_l2_loss(inp, gt, epsilon, data_format='NCHW'): """Computes the pointwise unsquared l2 loss. The input tensors must use the format NCHW. This loss ignores nan values. The loss is normalized by the number of pixels. inp: Tensor This is the prediction. gt: Tensor The ground truth with the same shape as 'inp' epsilon: float The epsilon value to avoid division by zero in the gradient computation """ with tf.name_scope('pointwise_l2_loss'): gt_ = tf.stop_gradient(gt) diff = sops.replace_nonfinite(inp-gt_) if data_format == 'NCHW': return tf.reduce_mean(tf.sqrt(tf.reduce_sum(diff**2, axis=1)+epsilon)) else: # NHWC return tf.reduce_mean(tf.sqrt(tf.reduce_sum(diff**2, axis=3)+epsilon))
def _test_grad(self, dtype): A = np.random.rand(9).astype(dtype) A[2] = np.nan shape = A.shape data = tf.constant(A) output = ops.replace_nonfinite(input=data, value=123) #print(A) #print(output.eval()) err = tf.test.compute_gradient_error(data, shape, output, output.get_shape().as_list(), x_init_value=A) print('error', err, flush=True) self.assertLess(err, 1e-3) grad = tf.test.compute_gradient(data, shape, output, output.get_shape().as_list(), x_init_value=A, delta=0.1) for g in grad: print(g) print(g.shape)
def main(_): if not tf.gfile.Exists(FLAGS.checkpoint_dir): tf.gfile.MakeDirs(FLAGS.checkpoint_dir) with tf.Graph().as_default(): #============================================ #Load image and labels #============================================ with tf.name_scope("data_loading"): # imageloader = DataLoader(FLAGS.dataset_dir, # FLAGS.batch_size, # FLAGS.image_height, # FLAGS.image_width, # FLAGS.num_sources, # FLAGS.num_scales, # 'train') # image_left, image_right, label, intrinsics = imageloader.load_train_batch() data_dict, ground_truth, intrinsics = Demon_Dataloader() image_left, image_right = tf.split(value=data_dict['IMAGE_PAIR'], num_or_size_splits=2, axis=3) label = ground_truth['depth0'] gt_right_cam = tf.concat( [ground_truth['translation'], ground_truth['rotation']], axis=1) #============================================ #Define the model #============================================ with tf.variable_scope("model") as scope: with tf.name_scope("depth_prediction"): #Using left right to predict inputdata = tf.concat([image_left, image_right], axis=3) pred_depth_left, pred_poses_right, pred_exp_logits_left, depth_net_endpoints = depth_net( inputdata, is_training=True) #Using right left to predict scope.reuse_variables() inputdata = tf.concat([image_right, image_left], axis=3) pred_depth_right, pred_poses_left, pred_exp_logits_right, depth_net_endpoints_right = depth_net( inputdata, is_training=True) #import pdb;pdb.set_trace() # pred_depth_left = [tf.expand_dims(d[:,:,:,0],-1) for d in pred_depth] # pred_depth_right = [tf.expand_dims(d[:,:,:,1],-1) for d in pred_depth] # pred_poses_left = pred_poses[:,1,:] # pred_poses_right = pred_poses[:,0,:] #============================================ #Specify the loss function: #============================================ with tf.name_scope("compute_loss"): depth_loss = 0 optflow_loss = 0 pixel_loss = 0 smooth_loss = 0 exp_loss = 0 consist_loss = 0 cam_consist_loss = 0 cam_loss = 0 sig_depth_loss = 0 epsilon = 0.00001 left_image_all = [] right_image_all = [] proj_image_left_all = [] proj_image_right_all = [] proj_error_stack_all = [] optflow_x_all = [] optflow_y_all = [] exp_mask_all = [] # ========= # left right camera Consistent loss # ========= # cam_consist_loss = tf.reduce_mean((pred_poses_right+pred_poses_left)**2)*FLAGS.cam_consist_weight #========= #Cam pose loss #========= gt_proj_l2r = pose_vec2mat(gt_right_cam, 'angleaxis') pose_left2right = pose_vec2mat(pred_poses_right[:, 0, :], 'angleaxis') pose_righ2left = pose_vec2mat(pred_poses_left[:, 0, :], 'angleaxis') cam_loss += tf.reduce_mean( (gt_proj_l2r[:, 0:3, 0:3] - pose_left2right[:, 0:3, 0:3])** 2) * FLAGS.cam_weight_rot cam_loss += tf.reduce_mean( (tf.matrix_inverse(gt_proj_l2r)[:, 0:3, 3] - pose_righ2left[:, 0:3, 3])**2) * FLAGS.cam_weight_tran #========= #Gradient loss #========= sig_params = { 'deltas': [1, 2, 4, 8, 16], 'weights': [1, 1, 1, 1, 1], 'epsilon': 0.001 } pr_depth_sig = scale_invariant_gradient( tf.transpose(pred_depth_left[0], perm=[0, 3, 1, 2]), **sig_params) gt_depth_sig = scale_invariant_gradient( tf.transpose(label, perm=[0, 3, 1, 2]), **sig_params) sig_depth_loss += FLAGS.sig_depth_weight * pointwise_l2_loss( pr_depth_sig, gt_depth_sig, epsilon=epsilon) for s in range(FLAGS.num_scales): #======= #Smooth loss #======= # smooth_loss += FLAGS.smooth_weight/(2**s) * \ # compute_smooth_loss(1.0/pred_depth_left[s]) # smooth_loss += FLAGS.smooth_weight/(2**s) * \ # compute_smooth_loss(1.0/pred_depth_right[s]) curr_label = tf.image.resize_area(label, [ int(FLAGS.resizedheight / (2**s)), int(FLAGS.resizedwidth / (2**s)) ]) curr_image_left = tf.image.resize_area(image_left, [ int(FLAGS.resizedheight / (2**s)), int(FLAGS.resizedwidth / (2**s)) ]) curr_image_right = tf.image.resize_area( image_right, [ int(FLAGS.resizedheight / (2**s)), int(FLAGS.resizedwidth / (2**s)) ]) #======= #Depth loss #======= diff = sops.replace_nonfinite(curr_label - pred_depth_left[s]) curr_depth_error = tf.abs(diff) depth_loss += tf.reduce_mean( curr_depth_error) * FLAGS.depth_weight / (2**s) #======= #Pixel loss #======= # wmask = tf.concat([wmask,wmask,wmask],axis=3) #import pdb;pdb.set_trace() curr_proj_image_left, src_pixel_coords_right, wmask_left, warp_depth_right, _ = projective_inverse_warp( curr_image_right, tf.squeeze(1.0 / pred_depth_left[s], axis=3), #pred_poses_right[:,0,:], pose_left2right, intrinsics[:, s, :, :], format='matrix') #wmask_left = tf.concat([wmask_left,wmask_left,wmask_left],axis=3) curr_proj_error_left = tf.abs(curr_proj_image_left - curr_image_left) curr_proj_image_right, src_pixel_coords_left, wmask_right, warp_depth_left, _ = projective_inverse_warp( curr_image_left, tf.squeeze(1.0 / pred_depth_right[s], axis=3), #pred_poses_left[:,0,:], pose_righ2left, intrinsics[:, s, :, :], format='matrix') #wmask_right = tf.concat([wmask_right,wmask_right,wmask_right],axis=3) curr_proj_error_right = tf.abs(curr_proj_image_right - curr_image_right) #import pdb;pdb.set_trace() # ========= # left right camera Consistent loss # ========= #cam_consist_loss = tf.reduce_mean((tf.matrix_inverse(pose_left2right)-pose_righ2left)**2)*FLAGS.cam_consist_weight #=============== #exp mask #=============== ref_exp_mask = get_reference_explain_mask(s, FLAGS) if FLAGS.explain_reg_weight > 0: curr_exp_logits_left = tf.slice(pred_exp_logits_left[s], [0, 0, 0, 0], [-1, -1, -1, 2]) exp_loss += FLAGS.explain_reg_weight * \ compute_exp_reg_loss(curr_exp_logits_left, ref_exp_mask) curr_exp_left = tf.nn.softmax(curr_exp_logits_left) # Photo-consistency loss weighted by explainability if FLAGS.explain_reg_weight > 0: pixel_loss += tf.reduce_mean(curr_proj_error_left * \ tf.expand_dims(curr_exp_left[:,:,:,1], -1))*FLAGS.data_weight/(2**s) exp_mask = tf.expand_dims(curr_exp_left[:, :, :, 1], -1) exp_mask_all.append(exp_mask) if FLAGS.explain_reg_weight > 0: curr_exp_logits_right = tf.slice(pred_exp_logits_right[s], [0, 0, 0, 0], [-1, -1, -1, 2]) exp_loss += FLAGS.explain_reg_weight * \ compute_exp_reg_loss(curr_exp_logits_right, ref_exp_mask) curr_exp_right = tf.nn.softmax(curr_exp_logits_right) # Photo-consistency loss weighted by explainability if FLAGS.explain_reg_weight > 0: pixel_loss += tf.reduce_mean(curr_proj_error_right * \ tf.expand_dims(curr_exp_right[:,:,:,1], -1))*FLAGS.data_weight/(2**s) #======= #left right depth Consistent loss #======= right_depth_proj_error = consistent_depth_loss( 1.0 / pred_depth_right[s], warp_depth_right, src_pixel_coords_right) left_depth_proj_error = consistent_depth_loss( 1.0 / pred_depth_left[s], warp_depth_left, src_pixel_coords_left) consist_loss += tf.reduce_mean( right_depth_proj_error * tf.expand_dims(curr_exp_left[:, :, :, 1], -1)) * FLAGS.consist_weight / (2**s) consist_loss += tf.reduce_mean( left_depth_proj_error * tf.expand_dims(curr_exp_right[:, :, :, 1], -1)) * FLAGS.consist_weight / (2**s) #import pdb;pdb.set_trace() #======== #For tensorboard visualize #======== left_image_all.append(curr_image_left) right_image_all.append(curr_image_right) proj_image_left_all.append(curr_proj_image_left) proj_image_right_all.append(curr_proj_image_right) proj_error_stack_all.append(curr_proj_error_right) total_loss = pixel_loss + smooth_loss + exp_loss + cam_loss + consist_loss + cam_consist_loss + depth_loss + sig_depth_loss #============================================ #Start training #============================================ with tf.name_scope("train_op"): tf.summary.scalar('losses/total_loss', total_loss) tf.summary.scalar('losses/smooth_loss', smooth_loss) tf.summary.scalar('losses/depth_loss', depth_loss) tf.summary.scalar('losses/pixel_loss', pixel_loss) tf.summary.scalar('losses/cam_loss', cam_loss) tf.summary.scalar('losses/exp_loss', exp_loss) tf.summary.scalar('losses/consist_loss', consist_loss) tf.summary.scalar('losses/cam_consist_loss', cam_consist_loss) tf.summary.scalar('losses/sig_depth_loss', sig_depth_loss) tf.summary.histogram('scale%d_pred_depth_left' % s, pred_depth_left[0]) tf.summary.histogram('scale%d_pred_depth_right' % s, pred_depth_right[0]) tf.summary.histogram('GT_left_depth', \ sops.replace_nonfinite(label)) for s in range(FLAGS.num_scales): tf.summary.image('scale%d_left_image' % s, \ left_image_all[s]) tf.summary.image('scale%d_right_image' % s, \ right_image_all[s]) tf.summary.image('scale%d_projected_image_left' % s, \ proj_image_left_all[s]) tf.summary.image('scale%d_projected_image_right' % s, \ proj_image_right_all[s]) tf.summary.image('scale%d_projected_error_left' % s, \ proj_error_stack_all[s]) tf.summary.image('scale%d_pred_depth_left' % s, 1.0 / pred_depth_left[s]) tf.summary.image('scale%d_pred_depth_right' % s, 1.0 / pred_depth_right[s]) tf.summary.image('scale%d_exp_mask' % s, exp_mask_all[s]) #tf.get_variable_scope().reuse_variables() # Specify the optimization scheme: # with tf.variable_scope("scope_global_step") as scope_global_step: # global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) #with tf.variable_scope(tf.get_variable_scope(), reuse=False): optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, FLAGS.beta1) # create_train_op that ensures that when we evaluate it to get the loss, # the update_ops are done and the gradient updates are computed. train_op = slim.learning.create_train_op(total_loss, optimizer) global_step = tf.Variable(0, name='global_step', trainable=False) incr_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver([var for var in tf.model_variables()]) #import pdb;pdb.set_trace() with tf.Session() as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( FLAGS.checkpoint_dir + '/sum', sess.graph) tf.initialize_all_variables().run() tf.initialize_local_variables().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if FLAGS.continue_train: if FLAGS.init_checkpoint_file is None: checkpoint = tf.train.latest_checkpoint( FLAGS.checkpoint_dir) else: checkpoint = FLAGS.init_checkpoint_file print("Resume training from previous checkpoint: %s" % checkpoint) saver.restore(sess, checkpoint) for step in range(1, FLAGS.max_steps): #print("steps %d" % (step)) fetches = { "train": train_op, "global_step": global_step, "incr_global_step": incr_global_step } if step % FLAGS.summary_freq == 0: fetches["loss"] = total_loss fetches["summary"] = merged fetches["GT_cam"] = gt_right_cam fetches["est_cam"] = pred_poses_right fetches["est_cam_left"] = pred_poses_left results = sess.run(fetches) gs = results["global_step"] if step % FLAGS.summary_freq == 0: train_writer.add_summary(results["summary"], gs) print("steps: %d === loss: %.3f" \ % (gs, results["loss"])) translation_rotation = results["GT_cam"] print(translation_rotation[0]) print(results["est_cam"][0]) print(results["est_cam_left"][0]) if step % FLAGS.save_latest_freq == 0: saver.save(sess, FLAGS.checkpoint_dir + '/model', global_step=step) coord.request_stop() coord.join(threads)
def _test_nonfinite(self, dtype): value = 123 A = np.array([np.nan, np.inf, -np.inf, 100], dtype=dtype) result = ops.replace_nonfinite(A, value=value).eval() self.assertAllEqual(result, [value] * 3 + [100])
def compute_sad_volume_for_sequence(img0, images, rotations, translations, intrinsics, depth_values, channel_weights=None, patch_size=3, sad_shift=None, name=None): """Computes the confidence weighted sum of SAD cost volumes between img0 and the given images img0: Tensor image in NCHW format images: list of Tensor List of images in NCHW format rotations: list of Tensor rotations in 3d angle axis format for each image in 'images' translations: list Tensor translations for each image in 'images' intrinsics: Tensor Intrinsic parameters valid for all images and img0 depth_values: list of float or Tensor Either a list of inverse depth values or a tensor with shape NCHW channel_weights: list of float Individual weighting factors for the image channels. Defaults to [5/32, 16/32, 11/32] for 3 channel images and [1,..]/num_channels for channels != 3. patch_size: int The spatial patch size sad_shift: float Shift the valid sad values by this value """ with tf.name_scope(name, "computeSADVolumeForSequence", [img0, intrinsics] + images + rotations + translations): img0 = tf.convert_to_tensor(img0, name='img0', dtype=tf.float32) images = [ tf.convert_to_tensor(v, name='images{0}'.format(i), dtype=np.float32) for i, v in enumerate(images) ] rotations = [ tf.convert_to_tensor(v, name='rotations{0}'.format(i), dtype=np.float32) for i, v in enumerate(rotations) ] translations = [ tf.convert_to_tensor(v, name='translations{0}'.format(i), dtype=np.float32) for i, v in enumerate(translations) ] intrinsics = tf.convert_to_tensor(intrinsics, name='img0', dtype=tf.float32) assert len(images) == len(rotations) assert len(images) == len(translations) assert not isinstance(intrinsics, (list, tuple)) border_radius = patch_size // 2 + 1 cv_list = [] conf_list = [] depths = depth_values for i in range(len(images)): image = images[i] rotation = rotations[i] translation = translations[i] warped, mask, depths = create_depthsweep_images_tensor( image=image, rotation=rotation, translation=translation, intrinsics=intrinsics, depth_values=depths, border_radius=border_radius, ) cv, conf = compute_sad_volume_with_confidence( img0, warped, mask, channel_weights=channel_weights, patch_size=patch_size) cv_list.append(cv) conf_list.append(conf) if sad_shift is None: multiplied_cv = [ cv_list[i] * conf_list[i] for i in range(len(cv_list)) ] else: multiplied_cv = [(cv_list[i] + sad_shift) * conf_list[i] for i in range(len(cv_list))] conf_sum = tf.add_n(conf_list) cv = sops.replace_nonfinite(tf.add_n(multiplied_cv) / conf_sum) return cv, conf_sum