示例#1
0
文件: test_model3.py 项目: shgold/sh
    def infer_tgt_views(self, raw_src_image, RT, intrinsic):
        b, h, w, _ = raw_src_image.get_shape().as_list()
        z_size = 856

        with tf.name_scope('preprocessing'):
            src_image = self.image2tensor(raw_src_image)
            self.manual_check = RT
            RT, inv_RT = self.reshape_posematrix(RT)

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(src_image, num_outputs=z_size)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            # print('encoder out', z_enc_out)

            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :600], [b, -1, 4])
            z_app = z_enc_out[:, :, :, 600:]
            # print('z geo', z_geo)
            # print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, inv_RT)
            # print('z geo tf', z_geo_tf)
            # print('inv_RT', inv_RT)

            z_geo_tf = tf.reshape(
                z_geo_tf, [b, 1, 1, 600])  # TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                # self.depth_scale_vis = 125. / depth_scale
                # self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf,
                                    1,
                                    variable_scope='Depth_Decoder')
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias

        with tf.name_scope('Mask'):
            mask_dec_out = Decoder(z_geo_tf, 1, variable_scope='Mask_Decoder')
            mask_pred = tf.nn.sigmoid(mask_dec_out)
            # print('mask pred', mask_pred)

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder')
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            # print('pixel pred', pixel_pred)

        with tf.name_scope('prediction'):
            warped_pred = projective_inverse_warp(src_image,
                                                  tf.squeeze(depth_pred),
                                                  RT,
                                                  intrinsic,
                                                  ret_flows=False)
            # print('warped pred', warped_pred)

            fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(
                warped_pred, 1 - mask_pred)

        # Collect output tensors
        pred = {}
        pred['out_depth'] = depth_pred
        pred['out_mask'] = mask_pred
        pred['out_pixel'] = pixel_pred
        pred['warped_image'] = warped_pred
        pred['tgt_image'] = fake_tgt

        return pred
示例#2
0
文件: test_model1.py 项目: shgold/sh
    def into_depth_and_rgb_block(self, raw_src_image, raw_src_depth, pose):
        b, h, w, _ = raw_src_image.get_shape().as_list()
        z_size = 856
        z_geo_size = 600

        with tf.name_scope('preprocessing'):
            src_image = self.image2tensor(raw_src_image)
            if len(raw_src_depth.get_shape()) != 4:
                src_depth = tf.expand_dims(raw_src_depth, axis=3)
            else:
                src_depth = raw_src_depth
            # self.manual_check = pose

        with tf.name_scope('concat_rgbd'):
            #conv_depth = conv2d(raw_src_depth, 32, is_train=True, k_h=3, k_w=3, s=1)
            #conv_rgb = conv2d(src_image, 32*3, is_train=True, k_h=3, k_w=3, s=1)
            input_rgbd = tf.concat([src_image, src_depth], axis=3)

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(input_rgbd,
                                num_outputs=z_size,
                                reuse_weights=tf.AUTO_REUSE)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            # print('encoder out', z_enc_out)

            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :z_geo_size], [b, -1, 4])
            z_app = z_enc_out[:, :, :, z_geo_size:]
            # print('z geo', z_geo)
            # print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, pose)
            # print('z geo tf', z_geo_tf)
            # print('inv_RT', inv_RT)

            z_geo_tf = tf.reshape(
                z_geo_tf, [b, 1, 1, 600])  # TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                # self.depth_scale_vis = 125. / depth_scale
                # self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf,
                                    1,
                                    variable_scope='Depth_Decoder',
                                    reuse_weights=tf.AUTO_REUSE)
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf,
                                    3,
                                    variable_scope='Pixel_Decoder',
                                    reuse_weights=tf.AUTO_REUSE)
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            # print('pixel pred', pixel_pred)

        # with tf.name_scope('prediction'):
        # warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False)
        # print('warped pred', warped_pred)

        # tgt_img_tf = projective_inverse_warp(src_image, raw_tgt_depth, RT, intrinsic, ret_flows=False)

        # Collect output tensors
        pred = {}
        pred['out_depth'] = depth_pred
        pred['out_pixel'] = pixel_pred
        # pred['warped_image'] = warped_pred
        # pred['inverse_warping_image'] = tgt_img_tf
        # pred['tgt_image'] = fake_tgt

        return pred
示例#3
0
文件: test_model2.py 项目: shgold/sh
    def build_train_graph(self, is_train=True):
        z_size = 856

        with tf.name_scope('Encoder'):
            z_enc_out = Encoder(self.src_image, num_outputs=z_size)
            _, z_h, z_w, _ = z_enc_out.get_shape().as_list()
            print('encoder out', z_enc_out)


            # transform latent vector
            z_geo = tf.reshape(z_enc_out[:, :, :, :600], [self.batch_size, -1, 4])
            z_app = z_enc_out[:, :, :, 600:]
            print('z geo', z_geo)
            print('z app', z_app)

            z_geo_tf = tf.matmul(z_geo, self.inv_RT)
            print('z geo tf', z_geo_tf)
            print('inv_RT', self.inv_RT)

            z_geo_tf = tf.reshape(z_geo_tf, [self.batch_size, 1,1, 600]) #TODO: solving z_h and z_w values
            z_tf = tf.concat([z_geo_tf, z_app], axis=3)
            print('z tf', z_tf)

        with tf.name_scope('Depth'):
            if self.data == 'car':
                depth_bias = 2
                depth_scale = 1.0
                self.depth_scale_vis = 125. / depth_scale
                self.depth_bias_vis = depth_bias - depth_scale

            depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder')
            depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias


        with tf.name_scope('Mask'):
            mask_dec_out = Decoder (z_geo_tf, 1,  variable_scope='Mask_Decoder')
            mask_pred = tf.nn.sigmoid(mask_dec_out)
            print('mask pred', mask_pred)

        with tf.name_scope('Pixel'):
            pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder')
            pixel_pred = tf.nn.tanh(pixel_dec_out)
            print('pixel pred', pixel_pred)

        with tf.name_scope('prediction'):
            warped_pred = projective_inverse_warp(self.src_image, tf.squeeze(depth_pred), self.RT, self.intrinsic, ret_flows=False)
            print('warped pred', warped_pred)

            fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(warped_pred, 1-mask_pred)

        with tf.name_scope('loss'):
            self.eval_loss ={}

            depth_loss = tf.reduce_mean(tf.abs(self.tgt_image - warped_pred)) * self.loss_weight
            pixel_loss = tf.reduce_mean(tf.abs(self.tgt_image - pixel_pred)) * self.loss_weight
            mask_loss = tf.reduce_mean(tf.abs(self.tgt_image - fake_tgt)) * self.loss_weight

            self.total_loss = depth_loss + pixel_loss + mask_loss

            self.eval_loss['depth_loss'] = depth_loss
            self.eval_loss['pixel_loss'] = pixel_loss
            self.eval_loss['mask_loss'] = mask_loss
            self.eval_loss['total_loss'] = self.total_loss

        # Summaries
        tf.summary.image('src_image', self.deprocess_image(self.src_image))
        tf.summary.image('tgt_image', self.deprocess_image(self.tgt_image))

        tf.summary.image('fake_tgt_image', self.deprocess_image(fake_tgt))
        tf.summary.image('pixel_pred_image', self.deprocess_image(pixel_pred))
        tf.summary.image('warped_pred_image', warped_pred)
        tf.summary.scalar('total_loss', self.total_loss)


        # Define optimizers
        with tf.name_scope('train_optimizers'):
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1)
            train_vars = [var for var in tf.trainable_variables()]
            grads_and_vars = self.optimizer.compute_gradients(self.total_loss, var_list=train_vars)
            self.train_op = self.optimizer.apply_gradients(grads_and_vars)