def compute_photometric_loss(self, pred_depth, rgb_src, rgb_tgt, intrinsic, RT, mask=None): pred_array = helpers.multiscale(pred_depth) rgb_src_array = helpers.multiscale(rgb_src) rgb_tgt_array = helpers.multiscale(rgb_tgt) if mask is not None: mask_array = helpers.multiscale(mask) photometric_loss = 0 num_scales = len(pred_array) for scale in range(len(pred_array)): pred_ = pred_array[scale] rgb_src_ = rgb_src_array[scale] rgb_tgt_ = rgb_tgt_array[scale] mask_ = None if mask is not None: mask_ = mask_array[scale] # compute the corresponding intrinsic parameters b, h_, w_, c = pred_.get_shape().as_list() ratio_h, ratio_w = h_ / 256, w_ / 256 intrinsic_ = helpers.scale_intrinsics(intrinsic, ratio_h, ratio_w) # inverse warp from a nearby frame to the current frame pred_ = tf.squeeze(pred_) warped_ = projective_inverse_warp(rgb_src_, pred_, RT, intrinsic_, ret_flows=False) photometric_loss += helpers.photometric_Loss( warped_, rgb_tgt_, mask_) * (2**(scale - num_scales)) return photometric_loss
def infer_tgt_views(self, raw_src_image, RT, intrinsic): b, h, w, _ = raw_src_image.get_shape().as_list() z_size = 856 with tf.name_scope('preprocessing'): src_image = self.image2tensor(raw_src_image) self.manual_check = RT RT, inv_RT = self.reshape_posematrix(RT) with tf.name_scope('Encoder'): z_enc_out = Encoder(src_image, num_outputs=z_size) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() # print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :600], [b, -1, 4]) z_app = z_enc_out[:, :, :, 600:] # print('z geo', z_geo) # print('z app', z_app) z_geo_tf = tf.matmul(z_geo, inv_RT) # print('z geo tf', z_geo_tf) # print('inv_RT', inv_RT) z_geo_tf = tf.reshape( z_geo_tf, [b, 1, 1, 600]) # TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 # self.depth_scale_vis = 125. / depth_scale # self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder') depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Mask'): mask_dec_out = Decoder(z_geo_tf, 1, variable_scope='Mask_Decoder') mask_pred = tf.nn.sigmoid(mask_dec_out) # print('mask pred', mask_pred) with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder') pixel_pred = tf.nn.tanh(pixel_dec_out) # print('pixel pred', pixel_pred) with tf.name_scope('prediction'): warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False) # print('warped pred', warped_pred) fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply( warped_pred, 1 - mask_pred) # Collect output tensors pred = {} pred['out_depth'] = depth_pred pred['out_mask'] = mask_pred pred['out_pixel'] = pixel_pred pred['warped_image'] = warped_pred pred['tgt_image'] = fake_tgt return pred
def build_train_graph(self, inputs): with tf.name_scope('input_data'): raw_src_image = inputs['B'] raw_tgt_image = inputs['A'] raw_src_depth = inputs['B_depth'] raw_tgt_depth = inputs['A_depth'] # RT = inputs['RT'] intrinsic = inputs['intrinsic'] RT, inv_RT = self.reshape_posematrix(inputs['RT']) # self.manual_check2 = inputs with tf.name_scope('inference'): with tf.name_scope('src2tgt'): predictions1 = self.into_depth_and_rgb_block( raw_src_image, raw_src_depth, RT) pred_tgt_image = predictions1['out_pixel'] pred_tgt_depth = predictions1['out_depth'] with tf.name_scope('tgt2src'): input_pred_tgt_image = self.tensor2image(pred_tgt_image) predictions2 = self.into_depth_and_rgb_block( input_pred_tgt_image, pred_tgt_depth, inv_RT, reuse_weights=tf.AUTO_REUSE) pred_src_image = predictions2['out_pixel'] pred_src_depth = predictions2['out_depth'] with tf.name_scope('loss'): if self.data == 'car': # pixel value=1 if foreground otherwise pixel value=0 fg_src_mask = tf.cast(raw_src_depth > 0, dtype=tf.float32) # b x 256 x 256 #fg_src_mask = tf.expand_dims(fg_src_mask, axis =-1) # b x 256 x256 x 1 src_image = self.image2tensor(raw_src_image) src_depth = tf.expand_dims(raw_src_depth, axis=-1) # pred_src_depth = tf.squeeze(pred_src_depth) # pred_tgt_depth = tf.squeeze(pred_tgt_depth) # loss 1 depth_loss = helpers.masked_L1_Loss(pred_src_depth, src_depth, mask=fg_src_mask) pixel_loss = helpers.masked_L1_Loss(pred_src_image, src_image, mask=fg_src_mask) # loss 2 # create multiscale-pyramid photometric_loss1 = self.compute_photometric_loss(pred_tgt_depth, src_image, pred_tgt_image, intrinsic, RT, mask=None) photometric_loss2 = self.compute_photometric_loss(pred_src_depth, pred_tgt_image, src_image, intrinsic, inv_RT, mask=fg_src_mask) total_loss = depth_loss + pixel_loss + photometric_loss1 * 0.1 + photometric_loss2 with tf.name_scope('train_op'): train_vars = [var for var in tf.trainable_variables()] optim = tf.train.AdamOptimizer(self.learning_rate, self.beta1) grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars) train_op = optim.apply_gradients(grads_and_vars) # Summaries tgt_img_sample = projective_inverse_warp( self.image2tensor(raw_src_image), raw_tgt_depth, RT, intrinsic, ret_flows=False) tgt_img_sample2 = projective_inverse_warp( self.image2tensor(raw_tgt_image), raw_src_depth, inv_RT, intrinsic, ret_flows=False) tf.summary.image('sample_image', tgt_img_sample) tf.summary.image('sample_image', tgt_img_sample2) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('depth_loss', depth_loss) tf.summary.scalar('pixel_loss', pixel_loss) tf.summary.scalar('photometric_loss1', photometric_loss1) tf.summary.scalar('photometric_loss2', photometric_loss2) tf.summary.image('raw_src_image', raw_src_image) tf.summary.image('raw_tgt_image', raw_tgt_image) tf.summary.image('raw_src_depth', tf.expand_dims(raw_src_depth, axis=-1)) tf.summary.image('raw_tgt_depth', tf.expand_dims(raw_tgt_depth, axis=-1)) tf.summary.image('pred_tgt_image', self.tensor2image(pred_tgt_image)) tf.summary.image('pred_src_image', self.tensor2image(pred_src_image)) tf.summary.image('pred_src_depth', pred_src_depth) tf.summary.image('pred_tgt_depth', pred_tgt_depth) return train_op
def build_train_graph(self, is_train=True): z_size = 856 with tf.name_scope('Encoder'): z_enc_out = Encoder(self.src_image, num_outputs=z_size) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :600], [self.batch_size, -1, 4]) z_app = z_enc_out[:, :, :, 600:] print('z geo', z_geo) print('z app', z_app) z_geo_tf = tf.matmul(z_geo, self.inv_RT) print('z geo tf', z_geo_tf) print('inv_RT', self.inv_RT) z_geo_tf = tf.reshape(z_geo_tf, [self.batch_size, 1,1, 600]) #TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) print('z tf', z_tf) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 self.depth_scale_vis = 125. / depth_scale self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder') depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Mask'): mask_dec_out = Decoder (z_geo_tf, 1, variable_scope='Mask_Decoder') mask_pred = tf.nn.sigmoid(mask_dec_out) print('mask pred', mask_pred) with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder') pixel_pred = tf.nn.tanh(pixel_dec_out) print('pixel pred', pixel_pred) with tf.name_scope('prediction'): warped_pred = projective_inverse_warp(self.src_image, tf.squeeze(depth_pred), self.RT, self.intrinsic, ret_flows=False) print('warped pred', warped_pred) fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(warped_pred, 1-mask_pred) with tf.name_scope('loss'): self.eval_loss ={} depth_loss = tf.reduce_mean(tf.abs(self.tgt_image - warped_pred)) * self.loss_weight pixel_loss = tf.reduce_mean(tf.abs(self.tgt_image - pixel_pred)) * self.loss_weight mask_loss = tf.reduce_mean(tf.abs(self.tgt_image - fake_tgt)) * self.loss_weight self.total_loss = depth_loss + pixel_loss + mask_loss self.eval_loss['depth_loss'] = depth_loss self.eval_loss['pixel_loss'] = pixel_loss self.eval_loss['mask_loss'] = mask_loss self.eval_loss['total_loss'] = self.total_loss # Summaries tf.summary.image('src_image', self.deprocess_image(self.src_image)) tf.summary.image('tgt_image', self.deprocess_image(self.tgt_image)) tf.summary.image('fake_tgt_image', self.deprocess_image(fake_tgt)) tf.summary.image('pixel_pred_image', self.deprocess_image(pixel_pred)) tf.summary.image('warped_pred_image', warped_pred) tf.summary.scalar('total_loss', self.total_loss) # Define optimizers with tf.name_scope('train_optimizers'): self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1) train_vars = [var for var in tf.trainable_variables()] grads_and_vars = self.optimizer.compute_gradients(self.total_loss, var_list=train_vars) self.train_op = self.optimizer.apply_gradients(grads_and_vars)