示例#1
0
 def _gauss_log_pi(self, mu, log_sig):
     sigma = tf.exp(log_sig)
     normal = Normal(mu, sigma)
     z = normal.sample()
     actions = self._squash_actions(z)
     gauss_log_prob = normal.log_prob(z)
     log_pi = gauss_log_prob - self._squash_correction(z)
     return log_pi[:, None], actions
示例#2
0
    def gen_one_step(self, z, u):

        p_mean, p_var = self.p_transition(z, u)

        p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var))

        z_step = p.sample()

        return z_step
示例#3
0
    def one_step(self, a, x):

        z = a[0]
        u, enc = x

        q_mean, q_var = self.q_transition(z, enc, u)
        p_mean, p_var = self.p_transition(z, u)

        q = MultivariateNormalDiag(q_mean, tf.sqrt(q_var))
        p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var))

        z_step = q.sample()

        kl = kl_divergence(q, p)

        return z_step, kl
示例#4
0
文件: mmd_evaluator.py 项目: gmum/MoW
class MMDEvaluator:
    def __init__(self,
                 z_dim,
                 repeats_count: int = 3,
                 samples_limit: int = 2000):
        self.__z_dim = z_dim
        self.__repeats_count = repeats_count
        self.__samples_limit = samples_limit

    def build(self):
        self.__tensor_z_encoded = tf.placeholder(shape=np.append([None],
                                                                 self.__z_dim),
                                                 dtype=tf.float32)
        self.__distr = MultivariateNormalDiag(loc=tf.zeros(self.__z_dim),
                                              scale_diag=tf.ones(self.__z_dim))
        self.__tensor_z_sampled = self.__distr.sample(
            tf.shape(self.__tensor_z_encoded)[0])
        self.__tensor_mmd_penalty = mmd_penalty(self.__tensor_z_encoded,
                                                self.__tensor_z_sampled)

    def __compute_wae_distance(self, session, latent):
        mmd_penalty_sum = 0
        feed_dict = {
            self.__tensor_z_encoded: latent,
        }

        for _ in range(self.__repeats_count):
            mmd_penalty_sum += session.run(self.__tensor_mmd_penalty,
                                           feed_dict)

        avg_mmd_penalty = mmd_penalty_sum / self.__repeats_count
        return avg_mmd_penalty

    def evaluate(self, session, z):
        print('Computing MMD')
        if z.shape[0] > self.__samples_limit:
            index = np.random.choice(z.shape[0],
                                     self.__samples_limit,
                                     replace=False)
            wae_distance = self.__compute_wae_distance(session, z[index])
        else:
            wae_distance = self.__compute_wae_distance(session, z)

        return [('wae_distance', wae_distance)]
示例#5
0
    def gmm_log_pi(self, log_weights, mu, log_std):

        sigma = tf.exp(log_std)
        normal = Normal(mu, sigma)

        # sample from GMM
        sample_w = tf.stop_gradient(
            tf.multinomial(logits=log_weights, num_samples=1))
        sample_z = tf.stop_gradient(normal.sample())
        mask = tf.one_hot(sample_w[:, 0], depth=self._actor.K)
        z = tf.reduce_sum(sample_z * mask[:, :, None], axis=1)
        action = self.squash_action(z)

        # calculate log policy
        gauss_log_pi = normal.log_prob(z[:, None, :])
        log_pi = tf.reduce_logsumexp(gauss_log_pi + log_weights, axis=-1)
        log_pi -= tf.reduce_logsumexp(log_weights, axis=-1)
        log_pi -= self.get_squash_correction(z)
        log_pi *= self._temp

        return log_pi[:, None], action
示例#6
0
class Network(object):
    def __init__(self, state_dim, act_dim):
        self.input = tfl.input_data([None, state_dim])

        self.variables_v = tf.trainable_variables()
        net = self.input
        for h in [64, 64]:
            net = tfl.fully_connected(net, h, activation='tanh')
        net = tfl.fully_connected(net, 1, activation='linear')
        self.vpred = tf.squeeze(net, axis=[1])
        self.variables_v = tf.trainable_variables()[len(self.variables_v):]

        self.variables_p = tf.trainable_variables()
        net = self.input
        for h in [64, 64]:
            net = tfl.fully_connected(net, h, activation='tanh')
        mean = tfl.fully_connected(net, act_dim, activation='linear')
        logstd = tf.Variable(
            initial_value=np.zeros(act_dim).astype(np.float32))
        self.variables_p = tf.trainable_variables()[len(self.variables_p):]

        self.mvn = MultivariateNormalDiag(mean, tf.exp(logstd))
        self.sample = self.mvn.sample()
    def one_step_IAF(self, a, x):
        z = a[0]
        #log_q = a[1]
        u, enc = x

        input_h = tf.concat([z, enc, u], 1)                          #input should have enc(x), u and previous z
        h = self.q_henc(input_h)                                     #h encoding for iaf
        q_mean, q_var = self.q_transition(z, enc, u)
        p_mean, p_var = self.p_transition(z, u)

        q = MultivariateNormalDiag(q_mean, tf.sqrt(q_var))
        p = MultivariateNormalDiag(p_mean, tf.sqrt(p_var))

        z_step = q.sample()

        log_q = q.log_prob(z_step)                                  #before performing the iaf step

        z_step_iaf, q_var = self.q_transition_IAF(z_step, h)
        log_q = log_q - tf.reduce_sum(tf.log(q_var + 1e-5), axis=1) #after performing the iaf step

        log_p = p.log_prob(z_step_iaf)  #TODO: check if this is correct? Should we be getting the probability of z_step or z_step_iaf?

        return z_step_iaf, log_q, log_p
示例#8
0
class Layer(object):
    def __init__(self,
                 layer_index,
                 kern,
                 output_dim,
                 n_inducing,
                 X,
                 n_sample=100,
                 fixed_mean=True):
        eps_dim = int(n_inducing * output_dim)
        self.layer_index = layer_index
        self.kernel = kern
        self.input_dim = kern.input_dim
        self.output_dim = output_dim
        self.eps_dim = eps_dim
        self.n_sample = n_sample
        self.n_inducing = n_inducing
        self.fixed_mean = fixed_mean  # bool, Defatl = True for all layers before the last layer.
        print("========= Layer {} summary =========".format(layer_index))
        print("::::: LAYOUT")
        print("----- [Input dimension]       : ", self.input_dim)
        print("----- [Output dimension]      : ", self.output_dim)
        """ 
        The prior distribution is set to be i.i.d Gaussian distributions.
        """
        """================== Initialization of the inducing point =================="""
        with tf.variable_scope('theta'):  # scope [theta]
            self.Z = tf.Variable(kmeans2(X, self.n_inducing,
                                         minit='points')[0],
                                 dtype=tf.float64,
                                 name='Z')
        """================== Initialization of the GAN and noise sampler =================="""
        self.gan = GAN(self.n_inducing, self.output_dim, self.input_dim,
                       self.layer_index)
        _prior_mean = 0.0
        _prior_var = 1.0
        self.prior_mean = [_prior_mean] * int(n_inducing * output_dim)
        self.prior_var = [_prior_var] * int(n_inducing * output_dim)
        self.mu, self.scale = [0.] * eps_dim, [1.0] * eps_dim
        # In the paper we use a single global eps while in this implementation we disentangle them.
        self.eps_sampler = MultiNormal(self.mu, self.scale)
        print("----- [Prior mean]            : ", _prior_mean)
        print("----- [Prior var]             : ", _prior_var)
        """================== Initialization of the skip layer connection =================="""
        if self.input_dim == self.output_dim:
            self.W_skiplayer = np.eye(self.input_dim)
        elif self.input_dim < self.output_dim:
            self.W_skiplayer = np.concatenate([
                np.eye(self.input_dim),
                np.zeros((self.input_dim, self.output_dim - self.input_dim))
            ],
                                              axis=1)
        else:
            _, _, V = np.linalg.svd(X, full_matrices=False)
            self.W_skiplayer = V[:self.output_dim, :].T

    """ return the mean & cov in X given inducing points&values"""

    def gan_base_conditional(self,
                             Kmn,
                             Kmm,
                             Knn,
                             f,
                             full_cov=False,
                             q_sqrt=None,
                             white=False):
        if full_cov != False:
            print("ERROR! full_cov NOT IMPLEMENTED!")
        num_func = f.shape[2]  # R
        Lm = tf.cholesky(Kmm)
        # Compute the projection matrix A
        A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)
        # Compute the covariance due to the conditioning
        fvar = Knn - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(fvar[None, :], [num_func, 1])  # R x N
        # Another backsubstitution in the unwhitened case
        if not white:
            A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)
        fmean = tf.einsum("zx,nzo->nxo", A, f)

        if q_sqrt is not None:
            if q_sqrt.get_shape().ndims == 2:
                LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x M x N
            elif q_sqrt.get_shape().ndims == 3:
                L = q_sqrt
                A_tiled = tf.tile(tf.expand_dims(A, 0),
                                  tf.stack([num_func, 1, 1]))
                LTA = tf.matmul(L, A_tiled, transpose_a=True)  # R x M x N
            else:  # pragma: no cover
                raise ValueError("Bad dimension for q_sqrt: %s" %
                                 str(q_sqrt.get_shape().ndims))
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x N

        fvar = tf.transpose(fvar)
        return fmean, fvar  # n_sample x N x R, N x R

    def gan_conditional(self, X):
        """
        Given f, representing the GP at the points X, produce the mean and
        (co-)variance of the GP at the points Xnew.

        Additionally, there may be Gaussian uncertainty about f as represented by
        q_sqrt. In this case `f` represents the mean of the distribution and
        q_sqrt the square-root of the covariance.

        :: [params] :: white
        Additionally, the GP may have been centered (whitened) so that
            p(v) = N(0, I)
            f = L v
        thus
            p(f) = N(0, LL^T) = N(0, K).
        In this case `f` represents the values taken by v.

        The method can either return the diagonals of the covariance matrix for
        each output (default) or the full covariance matrix (full_cov=True).
        Let R = output_dim, N = N_x, M = n_inducing;
        We assume R independent GPs, represented by the columns of f (and the
        first dimension of q_sqrt).
        :param Xnew: data matrix, size N x D. Evaluate the GP at these new points
        :param X: data points, size M x D.
        :param kern: GPflow kernel.
        :param f: data matrix, M x R, representing the function values at X,
            for K functions.
        :param q_sqrt: matrix of standard-deviations or Cholesky matrices,
            size M x R or R x M x M.
        :param white: boolean of whether to use the whitened representation as
            described above.
        :return:
            - mean:     N x R
            - variance: N x R (full_cov = False), R x N x N (full_cov = True)
        """

        self.eps = tf.reshape(
            self.eps_sampler.sample(self.n_sample),  # n_sample * self.eps_dim
            [self.n_sample, self.n_inducing, self.output_dim])
        self.Z_repeat = tf.cast(
            tf.tile(tf.reshape(self.Z, [1, self.n_inducing, self.input_dim]),
                    [self.n_sample, 1, 1]), tf.float32)
        self.eps_with_z = tf.concat([self.eps, self.Z_repeat], axis=2)
        self.post = tf.cast(self.gan.generator(self.eps_with_z),
                            tf.float64)  # n_sample * n_inducing * output_dim
        Kxz = self.kernel.K(X, self.Z)
        Kzx = self.kernel.K(self.Z, X)
        Kzz = self.kernel.K(
            self.Z) + tf.eye(self.n_inducing, dtype=tf.float64) * 1e-7
        self.Kzz = Kzz
        self.determinant = tf.matrix_determinant(Kzz)
        Kxx = self.kernel.Kdiag(X)  # Just the diagonal part.
        mu, _var1 = self.gan_base_conditional(Kzx,
                                              Kzz,
                                              Kxx,
                                              self.post,
                                              full_cov=False,
                                              q_sqrt=None,
                                              white=True)
        mean = tf.reduce_mean(mu, axis=0)  # n_X * output_dim
        _var2 = tf.einsum("nxi,nxi->xi", mu, mu) / self.n_sample
        _var3 = -tf.einsum("xi,xi->xi", mean, mean)
        var = _var1 + _var2 + _var3  # Use momentum matching for mixtures of Gaussians to estimate posterior variance.
        return mean, var

    def prior_sampler(self, prior_batch_size):
        self.prob_prior = MultiNormal(self.prior_mean, self.prior_var)
        samples = tf.reshape(
            self.prob_prior.sample(prior_batch_size),
            [prior_batch_size, self.n_inducing, self.output_dim])
        return samples
示例#9
0
    def network(self, inputs, pi_raw_action, q_action, phase, num_samples):
        # TODO: Remove alpha (not using multimodal)
        # shared net
        shared_net = tf.contrib.layers.fully_connected(
            inputs,
            self.shared_layer_dim,
            activation_fn=None,
            weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                factor=1.0, mode="FAN_IN", uniform=True),
            weights_regularizer=tf.contrib.layers.l2_regularizer(0.01),
            biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                factor=1.0, mode="FAN_IN", uniform=True))

        shared_net = self.apply_norm(shared_net,
                                     activation_fn=tf.nn.relu,
                                     phase=phase,
                                     layer_num=1)

        # action branch
        pi_net = tf.contrib.layers.fully_connected(
            shared_net,
            self.actor_layer_dim,
            activation_fn=None,
            weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                factor=1.0, mode="FAN_IN", uniform=True),
            weights_regularizer=None,
            biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                factor=1.0, mode="FAN_IN", uniform=True))

        pi_net = self.apply_norm(pi_net,
                                 activation_fn=tf.nn.relu,
                                 phase=phase,
                                 layer_num=2)

        # no activation
        pi_mu = tf.contrib.layers.fully_connected(
            pi_net,
            self.num_modal * self.action_dim,
            activation_fn=None,
            weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                factor=1.0, mode="FAN_IN", uniform=True),
            # weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3),
            weights_regularizer=None,
            # tf.contrib.layers.l2_regularizer(0.001),
            biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                factor=1.0, mode="FAN_IN", uniform=True))
        # biases_initializer=tf.random_uniform_initializer(-3e-3, 3e-3))

        pi_logstd = tf.contrib.layers.fully_connected(
            pi_net,
            self.num_modal * self.action_dim,
            activation_fn=tf.tanh,
            weights_initializer=tf.random_uniform_initializer(0, 1),
            weights_regularizer=None,
            # tf.contrib.layers.l2_regularizer(0.001),
            biases_initializer=tf.random_uniform_initializer(-3e-3, 3e-3))

        pi_alpha = tf.contrib.layers.fully_connected(
            pi_net,
            self.num_modal,
            activation_fn=tf.tanh,
            weights_initializer=tf.random_uniform_initializer(-3e-3, 3e-3),
            weights_regularizer=None,
            # tf.contrib.layers.l2_regularizer(0.001),
            biases_initializer=tf.random_uniform_initializer(-3e-3, 3e-3))

        # reshape output
        assert (self.num_modal == 1)

        # pi_mu = tf.reshape(pi_mu, [-1, self.num_modal, self.action_dim])
        # pi_logstd = tf.reshape(pi_logstd, [-1, self.num_modal, self.action_dim])
        # pi_alpha = tf.reshape(pi_alpha, [-1, self.num_modal, 1])

        pi_mu = tf.reshape(pi_mu, [-1, self.action_dim])
        pi_logstd = tf.reshape(pi_logstd, [-1, self.action_dim])
        pi_alpha = tf.reshape(pi_alpha, [-1, 1])

        # exponentiate logstd
        # pi_std = tf.exp(tf.scalar_mul(self.sigma_scale, pi_logstd))
        pi_std = tf.exp(self.LOG_STD_MIN + 0.5 *
                        (self.LOG_STD_MAX - self.LOG_STD_MIN) *
                        (pi_logstd + 1))

        # construct MultivariateNormalDiag dist.
        mvn = MultivariateNormalDiag(loc=pi_mu, scale_diag=pi_std)

        if self.actor_update == "reparam":
            # pi = mu + tf.random_normal(tf.shape(mu)) * std
            # logp_pi = self.gaussian_likelihood(pi, mu, log_std)

            # pi_mu: (batch_size, action_dim)

            # (batch_size x num_samples, action_dim)
            # If updating multiple samples
            stacked_pi_mu = tf.expand_dims(pi_mu, 1)
            stacked_pi_mu = tf.tile(stacked_pi_mu, [1, num_samples, 1])
            stacked_pi_mu = tf.reshape(
                stacked_pi_mu,
                (-1,
                 self.action_dim))  # (batch_size * num_samples, action_dim)

            stacked_pi_std = tf.expand_dims(pi_std, 1)
            stacked_pi_std = tf.tile(stacked_pi_std, [1, num_samples, 1])
            stacked_pi_std = tf.reshape(
                stacked_pi_std,
                (-1,
                 self.action_dim))  # (batch_size * num_samples, action_dim)

            noise = tf.random_normal(tf.shape(stacked_pi_mu))

            # (batch_size * num_samples, action_dim)
            pi_raw_samples = stacked_pi_mu + noise * stacked_pi_std
            pi_raw_samples_logprob = self.gaussian_loglikelihood(
                pi_raw_samples, stacked_pi_mu, stacked_pi_std)

            pi_raw_samples = tf.reshape(pi_raw_samples,
                                        (-1, num_samples, self.action_dim))
            pi_raw_samples_logprob = tf.reshape(
                pi_raw_samples_logprob, (-1, num_samples, self.action_dim))

        else:
            pi_raw_samples_og = mvn.sample(num_samples)

            # dim: (batch_size, num_samples, action_dim)
            pi_raw_samples = tf.transpose(pi_raw_samples_og, [1, 0, 2])

            # get raw logprob
            pi_raw_samples_logprob_og = mvn.log_prob(pi_raw_samples_og)
            pi_raw_samples_logprob = tf.transpose(pi_raw_samples_logprob_og,
                                                  [1, 0, 2])

        # apply tanh
        pi_mu = tf.tanh(pi_mu)
        pi_samples = tf.tanh(pi_raw_samples)

        pi_samples_logprob = pi_raw_samples_logprob - tf.reduce_sum(tf.log(
            self.clip_but_pass_gradient(1 - pi_samples**2, l=0, u=1) + 1e-6),
                                                                    axis=-1)

        pi_mu = tf.multiply(pi_mu, self.action_max)
        pi_samples = tf.multiply(pi_samples, self.action_max)

        # compute logprob for input action
        pi_raw_actions_logprob = mvn.log_prob(pi_raw_action)
        pi_action = tf.tanh(pi_raw_action)
        pi_actions_logprob = pi_raw_actions_logprob - tf.reduce_sum(tf.log(
            self.clip_but_pass_gradient(1 - pi_action**2, l=0, u=1) + 1e-6),
                                                                    axis=-1)

        # TODO: Remove alpha
        # compute softmax prob. of alpha
        max_alpha = tf.reduce_max(pi_alpha, axis=1, keepdims=True)
        pi_alpha = tf.subtract(pi_alpha, max_alpha)
        pi_alpha = tf.exp(pi_alpha)

        normalize_alpha = tf.reciprocal(
            tf.reduce_sum(pi_alpha, axis=1, keepdims=True))
        pi_alpha = tf.multiply(normalize_alpha, pi_alpha)

        # Q branch
        with tf.variable_scope('qf'):
            q_actions_prediction = self.q_network(shared_net, q_action, phase)
        with tf.variable_scope('qf', reuse=True):
            # if len(tf.shape(pi_samples)) == 3:
            pi_samples_reshaped = tf.reshape(
                pi_samples, (self.batch_size * num_samples, self.action_dim))
            # else:
            #     assert(len(tf.shape(pi_samples)) == 2)
            #     pi_samples_reshaped = pi_samples
            q_samples_prediction = self.q_network(shared_net,
                                                  pi_samples_reshaped, phase)

        # print(pi_raw_action, pi_action)
        # print(pi_raw_actions_logprob, pi_raw_actions_logprob)
        # print(pi_action, pi_actions_logprob)

        return pi_alpha, pi_mu, pi_std, pi_raw_samples, pi_samples, pi_samples_logprob, pi_actions_logprob, q_samples_prediction, q_actions_prediction
class Network:
    def inference_value(self, observation_space):
        """
         Creates a neural-network value function approximator
        Args:
         observation_space: observation space of the environment
        Returns:
         Nothing, the network is usable only after calling this method
        """
        self.variables = tf.trainable_variables()
        self.input_pl = tf.placeholder(tf.float32, [None, observation_space],
                                       name='Input_PL')
        #2 hidden layer with 100 neurons each
        net = tf.layers.dense(
            self.input_pl,
            100,
            activation=tf.nn.tanh,
            kernel_initializer=tf.random_normal_initializer(stddev=.1))
        net = tf.layers.dense(
            net,
            100,
            activation=tf.nn.tanh,
            kernel_initializer=tf.random_normal_initializer(stddev=.1))
        net = tf.layers.dense(
            net,
            1,
            kernel_initializer=tf.random_normal_initializer(stddev=.01))
        self.predict = tf.squeeze(net, axis=[1])
        self.variables = tf.trainable_variables()[len(self.variables):]

    def inference_policy(self, observation_space, action_space):
        """
         Creates a neural-network policy approximator
        Args:
         observation_space: observation space of the environment
         action_space: action space of the environment
        Returns:
         Nothing, the network is usable only after calling this method
        """
        self.variables = tf.trainable_variables()
        self.input_pl = tf.placeholder(tf.float32, [None, observation_space],
                                       name='Input_PL')
        #2 hidden layers with 100 neurons each
        net = tf.layers.dense(
            self.input_pl,
            100,
            activation=tf.nn.tanh,
            kernel_initializer=tf.random_normal_initializer(stddev=.1))
        net = tf.layers.dense(
            net,
            100,
            activation=tf.nn.tanh,
            kernel_initializer=tf.random_normal_initializer(stddev=.1))
        mean = tf.layers.dense(
            net,
            action_space,
            kernel_initializer=tf.random_normal_initializer(stddev=.01))
        self.std = tf.Variable(np.ones(action_space).astype(np.float32))
        self.mvn = MultivariateNormalDiag(mean, self.std)
        self.sample = self.mvn.sample()
        self.variables = tf.trainable_variables()[len(self.variables):]

    def copy_to(self, target_network):
        """
         Operations to copy from self to target
        Args:
         target_network: network to be copied into
        Returns:
         copy_ops: tf-operations (have to be run inside a tf.Session)
        """
        v1 = self.variables
        v2 = target_network.variables
        copy_ops = [v2[i].assign(v1[i]) for i in range(len(v1))]
        return copy_ops
示例#11
0
class Layer(object):
    """ has skip layer connection;
        define self.U to be the non-trainable variables (self.M, self.outputs)"""
    def __init__(self, layer_index, kern, output_dim, n_inducing,  X, 
                 fixed_mean=True,n_sample=100, eps_dim=32):
        print("=== Layer {} summary ===".format(layer_index))
        print("--- Input dimension: ",kern.input_dim)
        print("--- Output dimension: ",output_dim)
        eps_dim=int(n_inducing*output_dim)
        
        self.layer_index = layer_index
        self.kernel = kern
        self.input_dim, self.output_dim, self.eps_dim = kern.input_dim, output_dim, eps_dim
        self.n_sample = n_sample
        self.n_inducing = n_inducing
        self.fixed_mean = fixed_mean # bool
        
        
        self.prior_mean = [0.0] * int(n_inducing*output_dim)
        self.prior_var = [1.0] * int(n_inducing*output_dim)
        
        
        self.mu, self.scale = [0.] * eps_dim, [1.0] * eps_dim
        self.eps_sampler = MultiNormal(self.mu, self.scale)
        ###################################################################################
        with tf.variable_scope('theta'):
            self.Z = tf.Variable(kmeans2(X, self.n_inducing, minit='points')[0], dtype=tf.float64, name='Z')
        ###################################################################################
        self.gan = GAN(self.n_inducing, self.output_dim, self.layer_index, self.input_dim)
        ###################################################################################
        if self.input_dim == self.output_dim:
            self.W_skiplayer = np.eye(self.input_dim)
        elif self.input_dim < self.output_dim:
            self.W_skiplayer = np.concatenate([np.eye(self.input_dim), 
                                               np.zeros((self.input_dim, self.output_dim - self.input_dim))], axis=1)
        else:
            _, _, V = np.linalg.svd(X, full_matrices=False)
            self.W_skiplayer = V[:self.output_dim, :].T
        ###################################################################################
        """ 1. trainable=False because this is the prior, and it is not a variable to be learn from SGD;
            2. exist because ??? """
        self.U = tf.Variable(np.zeros((self.n_inducing, self.output_dim)), dtype=tf.float64, trainable=False, name='U')

    """ return the mean & cov in X given inducing points&values"""
    def gan_base_conditional(self, Kmn, Kmm, Knn, f, 
                             full_cov=False, q_sqrt=None, white=False):
        if full_cov!=False:
            print("BUG! full_cov NOT IMPLEMENTED!")

        # compute kernel stuff
        num_func = f.shape[2]  # R
        Lm = tf.cholesky(Kmm)
        # Compute the projection matrix A
        A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)
        # compute the covariance due to the conditioning
        fvar = Knn - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(fvar[None, :], [num_func, 1])  # R x N

        # another backsubstitution in the unwhitened case
        if not white:
            A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

        fmean = tf.einsum("zx,nzo->nxo",A,f)

        if q_sqrt is not None:
            if q_sqrt.get_shape().ndims == 2:
                LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x M x N
            elif q_sqrt.get_shape().ndims == 3:
                L = q_sqrt
                A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
                LTA = tf.matmul(L, A_tiled, transpose_a=True)  # R x M x N
            else:  # pragma: no cover
                raise ValueError("Bad dimension for q_sqrt: %s" %
                                 str(q_sqrt.get_shape().ndims))
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x N

        fvar = tf.transpose(fvar)  # N x R
        return fmean, fvar # n_sample x N x R, N x R

    """# return the mean and cov in Xnew
       # this discribe the distribution of p( f in Xnew | X, f )"""
    def gan_conditional(self, X):
        """
        Given f, representing the GP at the points X, produce the mean and
        (co-)variance of the GP at the points Xnew.

        Additionally, there may be Gaussian uncertainty about f as represented by
        q_sqrt. In this case `f` represents the mean of the distribution and
        q_sqrt the square-root of the covariance.

        :: [params] :: white
        Additionally, the GP may have been centered (whitened) so that
            p(v) = N(0, I)
            f = L v
        thus
            p(f) = N(0, LL^T) = N(0, K).
        In this case `f` represents the values taken by v.

        The method can either return the diagonals of the covariance matrix for
        each output (default) or the full covariance matrix (full_cov=True).
        We assume R independent GPs, represented by the columns of f (and the
        first dimension of q_sqrt).
        :param Xnew: data matrix, size N x D. Evaluate the GP at these new points
        :param X: data points, size M x D.
        :param kern: GPflow kernel.
        :param f: data matrix, M x R, representing the function values at X,
            for K functions.
        :param q_sqrt: matrix of standard-deviations or Cholesky matrices,
            size M x R or R x M x M.
        :param white: boolean of whether to use the whitened representation as
            described above.
        :return:
            - mean:     N x R
            - variance: N x R (full_cov = False), R x N x N (full_cov = True)
        """

        self.eps = tf.reshape(self.eps_sampler.sample(self.n_sample), # n_sample * self.eps_dim
                              [self.n_sample, self.n_inducing, self.output_dim])
        self.Z_repeat = tf.cast(tf.tile(tf.reshape(self.Z,
                                           [1, self.n_inducing, self.input_dim]),
                                    [self.n_sample, 1, 1]),
                                tf.float32)
        self.eps_with_z = tf.concat([self.eps,self.Z_repeat], axis = 2)
        self.post = tf.cast(self.gan.generator(self.eps_with_z,reuse = True), tf.float64) # self.n_sample * n_Z * self.output_dim
        Kxz = self.kernel.K(X, self.Z) # n_X * n_Z
        Kzx = self.kernel.K(self.Z, X)
        Kzz = self.kernel.K(self.Z) + tf.eye(self.n_inducing, dtype=tf.float64) * 1e-7
        self.Kzz = Kzz
        self.determinant = tf.matrix_determinant(Kzz)
        
        Kxx = self.kernel.Kdiag(X)
        mu , _var1 = self.gan_base_conditional(Kzx, Kzz, Kxx, self.post, full_cov=False, q_sqrt=None, white=True)
        mean = tf.reduce_mean(mu, axis=0) # n_X * self.output_dim
        _var2 = tf.einsum("nxi,nyi->ixy",mu,mu)/self.n_sample
        _var3 = - tf.einsum("xi,yi->ixy",mean,mean)
        var = _var1 + tf.transpose( tf.linalg.diag_part(_var2) + tf.linalg.diag_part(_var3) )
        
        return mean,var
    """ log density of prior"""
    def prior_sampler(self, prior_batch_size):
        self.prob_prior = MultiNormal(self.prior_mean, self.prior_var)
        samples = tf.reshape(self.prob_prior.sample(prior_batch_size), [prior_batch_size, self.n_inducing, self.output_dim])
        return samples
    def prior(self):
        return -tf.reduce_sum(tf.square(self.U)) / 2.0 * 1.0