def _create_graph(self): Dx = self._Dx if len(self._cond_t_lst) == 0: mu_and_logsig_t = tf.get_variable( 'params', self._layer_sizes[-1], initializer=tf.random_normal_initializer(0, 0.1) ) else: mu_and_logsig_t = mlp( inputs=self._cond_t_lst, layer_sizes=self._layer_sizes, output_nonlinearity=None, ) # ... x K*Dx*2+K self._mu_t = mu_and_logsig_t[..., :Dx] self._log_sig_t = tf.clip_by_value(mu_and_logsig_t[..., Dx:], LOG_SIG_CAP_MIN, LOG_SIG_CAP_MAX) # Tensorflow's multivariate normal distribution supports reparameterization ds = tf.contrib.distributions dist = ds.MultivariateNormalDiag(loc=self._mu_t, scale_diag=tf.exp(self._log_sig_t)) x_t = dist.sample() if not self._reparameterize: x_t = tf.stop_gradient(x_t) log_pi_t = dist.log_prob(x_t) self._dist = dist self._x_t = x_t self._log_pi_t = log_pi_t reg_loss_t = self._reg * 0.5 * tf.reduce_mean(self._log_sig_t ** 2) reg_loss_t += self._reg * 0.5 * tf.reduce_mean(self._mu_t ** 2) self._reg_loss_t = reg_loss_t
def _create_p_xz_params(self): K = self._K Dx = self._Dx if len(self._cond_t_lst) == 0: w_and_mu_and_logsig_t = tf.get_variable( 'params', self._layer_sizes[-1], initializer=tf.random_normal_initializer(0, 0.1)) else: w_and_mu_and_logsig_t = mlp( inputs=self._cond_t_lst, layer_sizes=self._layer_sizes, output_nonlinearity=None, ) # ... x K*Dx*2+K w_and_mu_and_logsig_t = tf.reshape(w_and_mu_and_logsig_t, shape=(-1, K, 2 * Dx + 1)) log_w_t = w_and_mu_and_logsig_t[..., 0] mu_t = w_and_mu_and_logsig_t[..., 1:1 + Dx] log_sig_t = w_and_mu_and_logsig_t[..., 1 + Dx:] log_sig_t = tf.clip_by_value(log_sig_t, LOG_SIG_CAP_MIN, LOG_SIG_CAP_MAX) return log_w_t, mu_t, log_sig_t
def _create_graph(self): logits = mlp( inputs=self._cond_t_lst, layer_sizes=self._layer_sizes, output_nonlinearity=tf.nn.sigmoid, ) l_logits = [ logits[:, self._slice[i]:self._slice[i + 1]] for i in range(self._actnum) ] self._logits = [] for i in range(self._actnum): up_tri = tf.constant( np.triu(np.ones((self._Dx[i], self._Dx[i]), dtype=np.float32))) low_tri = tf.constant( np.tril(np.ones((self._Dx[i], self._Dx[i]), dtype=np.float32), -1)) ordinal_logits = tf.matmul( tf.log(l_logits[i] + EPS), up_tri) + tf.matmul( tf.log(1 - l_logits[i] + EPS), low_tri) self._logits.append(ordinal_logits) #self._logits = [logits[:,self._slice[i]:self._slice[i+1]] for i in range(self._actnum)] self._probs = [tf.nn.softmax(logit) for logit in self._logits] self._logprobs = [tf.nn.log_softmax(logit) for logit in self._logits]
def get_output_for(self, *inputs, reuse=False): with tf.variable_scope(self._name, reuse=reuse): value_t = mlp( inputs=inputs, output_nonlinearity=None, layer_sizes=self._layer_sizes, ) # N return value_t
def _create_graph(self): logits = mlp( inputs=self._cond_t_lst, layer_sizes=self._layer_sizes, output_nonlinearity=None, ) self._logits = logits self._probs = tf.nn.softmax(logits) self._logprobs = tf.nn.log_softmax(logits)
def get_output_for(self, *inputs, **kwargs): if 'reuse' not in kwargs.keys(): reuse = False else: reuse = kwargs['reuse'] with tf.variable_scope(self._name, reuse=reuse): value_t = mlp( inputs=inputs, output_nonlinearity=None, layer_sizes=self._layer_sizes, ) # N return value_t