示例#1
0
文件: nn.py 项目: yuchen8807/parasol
 def map_fn(data):
     data_shape = T.shape(data)
     leading = data_shape[:-1]
     dim_in = data_shape[-1]
     flattened = T.reshape(data, [-1, dim_in])
     net_out = network(flattened)
     if isinstance(net_out, stats.GaussianScaleDiag):
         scale_diag, mu = net_out.get_parameters('regular')
         dim_out = T.shape(mu)[-1]
         return stats.GaussianScaleDiag([
             T.reshape(scale_diag, T.concatenate([leading, [dim_out]])),
             T.reshape(mu, T.concatenate([leading, [dim_out]])),
         ])
     elif isinstance(net_out, stats.Gaussian):
         sigma, mu = net_out.get_parameters('regular')
         dim_out = T.shape(mu)[-1]
         return stats.Gaussian([
             T.reshape(sigma, T.concatenate([leading, [dim_out, dim_out]])),
             T.reshape(mu, T.concatenate([leading, [dim_out]])),
         ])
     elif isinstance(net_out, stats.Bernoulli):
         params = net_out.get_parameters('natural')
         dim_out = T.shape(params)[-1]
         return stats.Bernoulli(
             T.reshape(params, T.concatenate([leading, [dim_out]])),
             'natural')
     else:
         raise Exception("Unimplemented distribution")
示例#2
0
文件: lds.py 项目: yuchen8807/parasol
 def kl_divergence(self, q_X, q_A, _):
     # q_Xt - [N, H, ds]
     # q_At - [N, H, da]
     if (q_X, q_A) not in self.cache:
         info = {}
         if self.smooth:
             state_prior = stats.GaussianScaleDiag([
                 T.ones(self.ds),
                 T.zeros(self.ds)
             ])
             p_X = stats.LDS(
                 (self.sufficient_statistics(), state_prior, None, q_A.expected_value(), self.horizon),
             'internal')
             kl = T.mean(stats.kl_divergence(q_X, p_X), axis=0)
             Q = self.get_dynamics()[1]
             info['model-stdev'] = T.sqrt(T.matrix_diag_part(Q))
         else:
             q_Xt = q_X.__class__([
                 q_X.get_parameters('regular')[0][:, :-1],
                 q_X.get_parameters('regular')[1][:, :-1],
             ])
             q_At = q_A.__class__([
                 q_A.get_parameters('regular')[0][:, :-1],
                 q_A.get_parameters('regular')[1][:, :-1],
             ])
             p_Xt1 = self.forward(q_Xt, q_At)
             q_Xt1 = q_X.__class__([
                 q_X.get_parameters('regular')[0][:, 1:],
                 q_X.get_parameters('regular')[1][:, 1:],
             ])
             rmse = T.sqrt(T.sum(T.square(q_Xt1.get_parameters('regular')[1] - p_Xt1.get_parameters('regular')[1]), axis=-1))
             kl = T.mean(T.sum(stats.kl_divergence(q_Xt1, p_Xt1), axis=-1), axis=0)
             Q = self.get_dynamics()[1]
             model_stdev = T.sqrt(T.matrix_diag_part(Q))
             info['rmse'] = rmse
             info['model-stdev'] = model_stdev
         self.cache[(q_X, q_A)] = kl, info
     return self.cache[(q_X, q_A)]
示例#3
0
 def kl_divergence(self, q_X, q_A, num_data):
     if (q_X, q_A) not in self.cache:
         if self.smooth:
             state_prior = stats.GaussianScaleDiag(
                 [T.ones(self.ds), T.zeros(self.ds)])
             self.p_X = stats.LDS(
                 (self.sufficient_statistics(), state_prior, None,
                  q_A.expected_value(), self.horizon), 'internal')
             local_kl = stats.kl_divergence(q_X, self.p_X)
             if self.time_varying:
                 global_kl = T.sum(
                     stats.kl_divergence(self.A_variational, self.A_prior))
             else:
                 global_kl = stats.kl_divergence(self.A_variational,
                                                 self.A_prior)
             prior_kl = T.mean(local_kl,
                               axis=0) + global_kl / T.to_float(num_data)
             A, Q = self.get_dynamics()
             model_stdev = T.sqrt(T.matrix_diag_part(Q))
             self.cache[(q_X, q_A)] = prior_kl, {
                 'local-kl': local_kl,
                 'global-kl': global_kl,
                 'model-stdev': model_stdev,
             }
         else:
             q_Xt = q_X.__class__([
                 q_X.get_parameters('regular')[0][:, :-1],
                 q_X.get_parameters('regular')[1][:, :-1],
             ])
             q_At = q_A.__class__([
                 q_A.get_parameters('regular')[0][:, :-1],
                 q_A.get_parameters('regular')[1][:, :-1],
             ])
             p_Xt1 = self.forward(q_Xt, q_At)
             q_Xt1 = q_X.__class__([
                 q_X.get_parameters('regular')[0][:, 1:],
                 q_X.get_parameters('regular')[1][:, 1:],
             ])
             num_data = T.to_float(num_data)
             rmse = T.sqrt(
                 T.sum(T.square(
                     q_Xt1.get_parameters('regular')[1] -
                     p_Xt1.get_parameters('regular')[1]),
                       axis=-1))
             A, Q = self.get_dynamics()
             model_stdev = T.sqrt(T.matrix_diag_part(Q))
             local_kl = T.sum(stats.kl_divergence(q_Xt1, p_Xt1), axis=1)
             if self.time_varying:
                 global_kl = T.sum(
                     stats.kl_divergence(self.A_variational, self.A_prior))
             else:
                 global_kl = stats.kl_divergence(self.A_variational,
                                                 self.A_prior)
             self.cache[(q_X, q_A)] = (T.mean(local_kl, axis=0) +
                                       global_kl / T.to_float(num_data), {
                                           'rmse': rmse,
                                           'model-stdev': model_stdev,
                                           'local-kl': local_kl,
                                           'global-kl': global_kl
                                       })
     return self.cache[(q_X, q_A)]
示例#4
0
 def log_likelihood(self, states, costs):
     mean = self.evaluate(states)
     stdev = T.ones_like(mean) * self.stdev
     return stats.GaussianScaleDiag([
         stdev, mean
     ]).log_likelihood(costs)
示例#5
0
 def kl_divergence(self, q_X, q_A, num_data):
     mu_shape = T.shape(q_X.get_parameters('regular')[1])
     p_X = stats.GaussianScaleDiag([T.ones(mu_shape), T.zeros(mu_shape)])
     return T.mean(T.sum(stats.kl_divergence(q_X, p_X), -1), 0), {}
示例#6
0
文件: vae.py 项目: yuchen8807/parasol
    def initialize(self):
        self.graph = T.core.Graph()
        with self.graph.as_default():
            prior_params = self.prior_params.copy()
            prior_type = prior_params.pop('prior_type')
            self.prior = PRIOR_MAP[prior_type](self.ds, self.da, self.horizon, **prior_params)

            cost_params = self.cost_params.copy()
            cost_type = cost_params.pop('cost_type')
            self.cost = COST_MAP[cost_type](self.ds, self.da, **cost_params)

            self.O = T.placeholder(T.floatx(), [None, None, self.do])
            self.U = T.placeholder(T.floatx(), [None, None, self.du])
            self.C = T.placeholder(T.floatx(), [None, None])
            self.S = T.placeholder(T.floatx(), [None, None, self.ds])
            self.A = T.placeholder(T.floatx(), [None, None, self.da])

            self.t = T.placeholder(T.int32, [])
            self.state, self.action = T.placeholder(T.floatx(), [None, self.ds]), T.placeholder(T.floatx(), [None, self.da])
            if self.prior.has_dynamics():
                self.next_state = self.prior.next_state(self.state, self.action, self.t)
                self.prior_dynamics = self.prior.get_dynamics()

            self.num_data = T.scalar()
            self.beta = T.placeholder(T.floatx(), [])
            self.learning_rate = T.placeholder(T.floatx(), [])
            self.model_learning_rate = T.placeholder(T.floatx(), [])

            self.S_potentials = util.map_network(self.state_encoder)(self.O)
            self.A_potentials = util.map_network(self.action_encoder)(self.U)

            if self.prior.is_dynamics_prior():
                self.data_strength = T.placeholder(T.floatx(), [])
                self.max_iter = T.placeholder(T.int32, [])
                posterior_dynamics, (encodings, actions) = \
                        self.prior.posterior_dynamics(self.S_potentials, self.A_potentials,
                                                      data_strength=self.data_strength,
                                                      max_iter=self.max_iter)
                self.posterior_dynamics_ = posterior_dynamics, (encodings.expected_value(), actions.expected_value())

            if self.prior.is_filtering_prior():
                self.prior_dynamics_stats = self.prior.sufficient_statistics()
                self.dynamics_stats = (
                    T.placeholder(T.floatx(), [None, self.ds, self.ds]),
                    T.placeholder(T.floatx(), [None, self.ds, self.ds + self.da]),
                    T.placeholder(T.floatx(), [None, self.ds + self.da, self.ds + self.da]),
                    T.placeholder(T.floatx(), [None]),
                )
                S_natparam = self.S_potentials.get_parameters('natural')
                num_steps = T.shape(S_natparam)[1]

                self.padded_S = stats.Gaussian(T.core.pad(
                    self.S_potentials.get_parameters('natural'),
                    [[0, 0], [0, self.horizon - num_steps], [0, 0], [0, 0]]
                ), 'natural')
                self.padded_A = stats.GaussianScaleDiag([
                    T.core.pad(self.A_potentials.get_parameters('regular')[0],
                            [[0, 0], [0, self.horizon - num_steps], [0, 0]]),
                    T.core.pad(self.A_potentials.get_parameters('regular')[1],
                            [[0, 0], [0, self.horizon - num_steps], [0, 0]])
                ], 'regular')
                self.q_S_padded, self.q_A_padded = self.prior.encode(
                    self.padded_S, self.padded_A,
                    dynamics_stats=self.dynamics_stats
                )
                self.q_S_filter = self.q_S_padded.filter(max_steps=num_steps)
                self.q_A_filter = self.q_A_padded.__class__(
                    self.q_A_padded.get_parameters('natural')[:, :num_steps]
                , 'natural')
                self.e_q_S_filter = self.q_S_filter.expected_value()
                self.e_q_A_filter = self.q_A_filter.expected_value()

            (self.q_S, self.q_A), self.prior_kl, self.kl_grads, self.info = self.prior.posterior_kl_grads(
                self.S_potentials, self.A_potentials, self.num_data
            )

            self.q_S_sample = self.q_S.sample()[0]
            self.q_A_sample = self.q_A.sample()[0]

            self.q_O = util.map_network(self.state_decoder)(self.q_S_sample)
            self.q_U = util.map_network(self.action_decoder)(self.q_A_sample)
            self.q_O_sample = self.q_O.sample()[0]
            self.q_U_sample = self.q_U.sample()[0]

            self.q_O_ = util.map_network(self.state_decoder)(self.S)
            self.q_U_ = util.map_network(self.action_decoder)(self.A)
            self.q_O__sample = self.q_O_.sample()[0]
            self.q_U__sample = self.q_U_.sample()[0]

            self.cost_likelihood = self.cost.log_likelihood(self.q_S_sample, self.C)
            if self.cost.is_cost_function():
                self.evaluated_cost = self.cost.evaluate(self.S)
            self.log_likelihood = T.sum(self.q_O.log_likelihood(self.O), axis=1)

            self.elbo = T.mean(self.log_likelihood + self.cost_likelihood - self.prior_kl)
            train_elbo = T.mean(self.log_likelihood + self.beta * (self.cost_likelihood - self.prior_kl))
            T.core.summary.scalar("encoder-stdev", T.mean(self.S_potentials.get_parameters('regular')[0]))
            T.core.summary.scalar("log-likelihood", T.mean(self.log_likelihood))
            T.core.summary.scalar("cost-likelihood", T.mean(self.cost_likelihood))
            T.core.summary.scalar("prior-kl", T.mean(self.prior_kl))
            T.core.summary.scalar("beta", self.beta)
            T.core.summary.scalar("elbo", self.elbo)
            T.core.summary.scalar("beta-elbo", train_elbo)
            for k, v in self.info.items():
                T.core.summary.scalar(k, T.mean(v))
            self.summary = T.core.summary.merge_all()
            neural_params = (
                self.state_encoder.get_parameters()
                + self.state_decoder.get_parameters()
                + self.action_encoder.get_parameters()
                + self.action_decoder.get_parameters()
            )
            cost_params = self.cost.get_parameters()
            if len(neural_params) > 0:
                optimizer = T.core.train.AdamOptimizer(self.learning_rate)
                gradients, variables = zip(*optimizer.compute_gradients(-train_elbo, var_list=neural_params))
                gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                self.neural_op = optimizer.apply_gradients(zip(gradients, variables))
            else:
                self.neural_op = T.core.no_op()
            if len(cost_params) > 0:
                self.cost_op = T.core.train.AdamOptimizer(self.learning_rate).minimize(-self.elbo, var_list=cost_params)
            else:
                self.cost_op = T.core.no_op()
            if len(self.kl_grads) > 0:
                if self.prior.is_dynamics_prior():
                    # opt = lambda x: T.core.train.MomentumOptimizer(x, 0.5)
                    opt = lambda x: T.core.train.GradientDescentOptimizer(x)
                else:
                    opt = T.core.train.AdamOptimizer
                self.dynamics_op = opt(self.model_learning_rate).apply_gradients([
                    (b, a) for a, b in self.kl_grads
                ])
            else:
                self.dynamics_op = T.core.no_op()
            self.train_op = T.core.group(self.neural_op, self.dynamics_op, self.cost_op)
        self.session = T.interactive_session(graph=self.graph, allow_soft_placement=True, log_device_placement=False)