示例#1
0
文件: test_model1.py 项目: shgold/sh
    def compute_photometric_loss(self,
                                 pred_depth,
                                 rgb_src,
                                 rgb_tgt,
                                 intrinsic,
                                 RT,
                                 mask=None):
        pred_array = helpers.multiscale(pred_depth)
        rgb_src_array = helpers.multiscale(rgb_src)
        rgb_tgt_array = helpers.multiscale(rgb_tgt)
        if mask is not None:
            mask_array = helpers.multiscale(mask)

        photometric_loss = 0
        num_scales = len(pred_array)

        for scale in range(len(pred_array)):
            pred_ = pred_array[scale]
            rgb_src_ = rgb_src_array[scale]
            rgb_tgt_ = rgb_tgt_array[scale]
            mask_ = None
            if mask is not None:
                mask_ = mask_array[scale]

            # compute the corresponding intrinsic parameters
            b, h_, w_, c = pred_.get_shape().as_list()
            ratio_h, ratio_w = h_ / 256, w_ / 256
            intrinsic_ = helpers.scale_intrinsics(intrinsic, ratio_h, ratio_w)

            # inverse warp from a nearby frame to the current frame
            pred_ = tf.squeeze(pred_)
            warped_ = projective_inverse_warp(rgb_src_,
                                              pred_,
                                              RT,
                                              intrinsic_,
                                              ret_flows=False)

            photometric_loss += helpers.photometric_Loss(
                warped_, rgb_tgt_, mask_) * (2**(scale - num_scales))

        return photometric_loss
示例#2
0
文件: test_model3.py 项目: shgold/sh
    def infer_tgt_views(self, raw_src_image, RT, intrinsic):
        b, h, w, _ = raw_src_image.get_shape().as_list()
        z_size = 856

        with tf.name_scope('preprocessing'):
            src_image = self.image2tensor(raw_src_image)
            self.manual_check = RT
            RT, inv_RT = self.reshape_posematrix(RT)

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(src_image, num_outputs=z_size)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            # print('encoder out', z_enc_out)

            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :600], [b, -1, 4])
            z_app = z_enc_out[:, :, :, 600:]
            # print('z geo', z_geo)
            # print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, inv_RT)
            # print('z geo tf', z_geo_tf)
            # print('inv_RT', inv_RT)

            z_geo_tf = tf.reshape(
                z_geo_tf, [b, 1, 1, 600])  # TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                # self.depth_scale_vis = 125. / depth_scale
                # self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf,
                                    1,
                                    variable_scope='Depth_Decoder')
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias

        with tf.name_scope('Mask'):
            mask_dec_out = Decoder(z_geo_tf, 1, variable_scope='Mask_Decoder')
            mask_pred = tf.nn.sigmoid(mask_dec_out)
            # print('mask pred', mask_pred)

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder')
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            # print('pixel pred', pixel_pred)

        with tf.name_scope('prediction'):
            warped_pred = projective_inverse_warp(src_image,
                                                  tf.squeeze(depth_pred),
                                                  RT,
                                                  intrinsic,
                                                  ret_flows=False)
            # print('warped pred', warped_pred)

            fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(
                warped_pred, 1 - mask_pred)

        # Collect output tensors
        pred = {}
        pred['out_depth'] = depth_pred
        pred['out_mask'] = mask_pred
        pred['out_pixel'] = pixel_pred
        pred['warped_image'] = warped_pred
        pred['tgt_image'] = fake_tgt

        return pred
示例#3
0
文件: test_model2.py 项目: shgold/sh
    def build_train_graph(self, inputs):
        with tf.name_scope('input_data'):
            raw_src_image = inputs['B']
            raw_tgt_image = inputs['A']
            raw_src_depth = inputs['B_depth']
            raw_tgt_depth = inputs['A_depth']
            # RT = inputs['RT']
            intrinsic = inputs['intrinsic']
            RT, inv_RT = self.reshape_posematrix(inputs['RT'])

            # self.manual_check2 = inputs

        with tf.name_scope('inference'):
            with tf.name_scope('src2tgt'):
                predictions1 = self.into_depth_and_rgb_block(
                    raw_src_image, raw_src_depth, RT)
                pred_tgt_image = predictions1['out_pixel']
                pred_tgt_depth = predictions1['out_depth']
            with tf.name_scope('tgt2src'):
                input_pred_tgt_image = self.tensor2image(pred_tgt_image)
                predictions2 = self.into_depth_and_rgb_block(
                    input_pred_tgt_image,
                    pred_tgt_depth,
                    inv_RT,
                    reuse_weights=tf.AUTO_REUSE)
                pred_src_image = predictions2['out_pixel']
                pred_src_depth = predictions2['out_depth']

        with tf.name_scope('loss'):
            if self.data == 'car':
                # pixel value=1 if foreground otherwise pixel value=0
                fg_src_mask = tf.cast(raw_src_depth > 0,
                                      dtype=tf.float32)  # b x 256 x 256
                #fg_src_mask = tf.expand_dims(fg_src_mask, axis =-1) # b x 256 x256 x 1
            src_image = self.image2tensor(raw_src_image)
            src_depth = tf.expand_dims(raw_src_depth, axis=-1)
            # pred_src_depth = tf.squeeze(pred_src_depth)
            # pred_tgt_depth = tf.squeeze(pred_tgt_depth)

            # loss 1
            depth_loss = helpers.masked_L1_Loss(pred_src_depth,
                                                src_depth,
                                                mask=fg_src_mask)
            pixel_loss = helpers.masked_L1_Loss(pred_src_image,
                                                src_image,
                                                mask=fg_src_mask)

            # loss 2
            # create multiscale-pyramid
            photometric_loss1 = self.compute_photometric_loss(pred_tgt_depth,
                                                              src_image,
                                                              pred_tgt_image,
                                                              intrinsic,
                                                              RT,
                                                              mask=None)
            photometric_loss2 = self.compute_photometric_loss(pred_src_depth,
                                                              pred_tgt_image,
                                                              src_image,
                                                              intrinsic,
                                                              inv_RT,
                                                              mask=fg_src_mask)

            total_loss = depth_loss + pixel_loss + photometric_loss1 * 0.1 + photometric_loss2

        with tf.name_scope('train_op'):
            train_vars = [var for var in tf.trainable_variables()]
            optim = tf.train.AdamOptimizer(self.learning_rate, self.beta1)
            grads_and_vars = optim.compute_gradients(total_loss,
                                                     var_list=train_vars)
            train_op = optim.apply_gradients(grads_and_vars)

        # Summaries
        tgt_img_sample = projective_inverse_warp(
            self.image2tensor(raw_src_image),
            raw_tgt_depth,
            RT,
            intrinsic,
            ret_flows=False)
        tgt_img_sample2 = projective_inverse_warp(
            self.image2tensor(raw_tgt_image),
            raw_src_depth,
            inv_RT,
            intrinsic,
            ret_flows=False)
        tf.summary.image('sample_image', tgt_img_sample)
        tf.summary.image('sample_image', tgt_img_sample2)

        tf.summary.scalar('total_loss', total_loss)
        tf.summary.scalar('depth_loss', depth_loss)
        tf.summary.scalar('pixel_loss', pixel_loss)
        tf.summary.scalar('photometric_loss1', photometric_loss1)
        tf.summary.scalar('photometric_loss2', photometric_loss2)

        tf.summary.image('raw_src_image', raw_src_image)
        tf.summary.image('raw_tgt_image', raw_tgt_image)
        tf.summary.image('raw_src_depth', tf.expand_dims(raw_src_depth,
                                                         axis=-1))
        tf.summary.image('raw_tgt_depth', tf.expand_dims(raw_tgt_depth,
                                                         axis=-1))
        tf.summary.image('pred_tgt_image', self.tensor2image(pred_tgt_image))
        tf.summary.image('pred_src_image', self.tensor2image(pred_src_image))
        tf.summary.image('pred_src_depth', pred_src_depth)
        tf.summary.image('pred_tgt_depth', pred_tgt_depth)

        return train_op
示例#4
0
文件: test_model2.py 项目: shgold/sh
    def build_train_graph(self, is_train=True):
        z_size = 856

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(self.src_image, num_outputs=z_size)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            print('encoder out', z_enc_out)


            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :600], [self.batch_size, -1, 4])
            z_app = z_enc_out[:, :, :, 600:]
            print('z geo', z_geo)
            print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, self.inv_RT)
            print('z geo tf', z_geo_tf)
            print('inv_RT', self.inv_RT)

            z_geo_tf = tf.reshape(z_geo_tf, [self.batch_size, 1,1, 600]) #TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)
            print('z tf', z_tf)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                self.depth_scale_vis = 125. / depth_scale
                self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder')
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias


        with tf.name_scope('Mask'):
            mask_dec_out = Decoder (z_geo_tf, 1,  variable_scope='Mask_Decoder')
            mask_pred = tf.nn.sigmoid(mask_dec_out)
            print('mask pred', mask_pred)

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder')
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            print('pixel pred', pixel_pred)

        with tf.name_scope('prediction'):
            warped_pred = projective_inverse_warp(self.src_image, tf.squeeze(depth_pred), self.RT, self.intrinsic, ret_flows=False)
            print('warped pred', warped_pred)

            fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(warped_pred, 1-mask_pred)

        with tf.name_scope('loss'):
            self.eval_loss ={}

            depth_loss = tf.reduce_mean(tf.abs(self.tgt_image - warped_pred)) * self.loss_weight
            pixel_loss = tf.reduce_mean(tf.abs(self.tgt_image - pixel_pred)) * self.loss_weight
            mask_loss = tf.reduce_mean(tf.abs(self.tgt_image - fake_tgt)) * self.loss_weight

            self.total_loss = depth_loss + pixel_loss + mask_loss

            self.eval_loss['depth_loss'] = depth_loss
            self.eval_loss['pixel_loss'] = pixel_loss
            self.eval_loss['mask_loss'] = mask_loss
            self.eval_loss['total_loss'] = self.total_loss

        # Summaries
        tf.summary.image('src_image', self.deprocess_image(self.src_image))
        tf.summary.image('tgt_image', self.deprocess_image(self.tgt_image))

        tf.summary.image('fake_tgt_image', self.deprocess_image(fake_tgt))
        tf.summary.image('pixel_pred_image', self.deprocess_image(pixel_pred))
        tf.summary.image('warped_pred_image', warped_pred)
        tf.summary.scalar('total_loss', self.total_loss)


        # Define optimizers
        with tf.name_scope('train_optimizers'):
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1)
            train_vars = [var for var in tf.trainable_variables()]
            grads_and_vars = self.optimizer.compute_gradients(self.total_loss, var_list=train_vars)
            self.train_op = self.optimizer.apply_gradients(grads_and_vars)