def _make(flow): for i, size in enumerate(layer_dims): flow = fullyConnected( "layer%i" % i, flow, size, tf.nn.relu) return fullyConnected( "output_layer", flow, self.action_dim, None)
def _make(): flow = self.state for i, size in enumerate(layer_dims): flow = fullyConnected("layer%i" % i, flow, size, tf.nn.relu) value = fullyConnected( "output_layer", flow, 1) advantage = fullyConnected("advantage", flow, self.action_dim) return value + advantage - tf.reduce_mean(advantage, axis=1, keep_dims=True)
def _build(self): self.state_ph = tf.placeholder(tf.float32, (None, state_dim), 'states') self.target_values = tf.placeholder(tf.float32(None, ), 'values') flow = self.state_ph for i, dim in enumerate(dimensions): flow = fullyConnected('layer_%i' % i, flow, dim, tf.nn.relu) self.value = fullyConnected('output', flow, 1, None) self.loss = tf.reduce_mean(tf.square(self.value - self.target_values)) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.minimize(self.loss)
def build(self, dimensions): assert type(dimensions) in [list, tuple] flow = self.state_ph for i, dim in enumerate(dimensions): # TODO: PPO with tanh or relu flow = fullyConnected("layer_%i" % i, flow, dim, tf.nn.relu) self.means = fullyConnected("means", flow, self.action_dim, None) self.vars = tf.get_variable('vars', (self.action_dim, ), tf.float32, tf.constant_initializer(self.initial_var)) def _temp(means, variances): log_prob = tf.reduce_sum(tf.square(self.action_ph - means)) log_prob /= tf.exp(variances, axis=1) # TODO: need axis? log_prob += tf.reduce_sum(variances) log_prob *= -.5 return log_prob self.log_prob = _temp(self.means, self.vars) self.old_log_prob = _temp(self.old_mean_ph, self.old_var_ph) # TODO: check with # https://github.com/tensorflow/agents/blob/master/agents/ppo/utility.py#L122 kl = tf.reduce_sum(tf.exp(self.old_var_ph - self.vars)) kl += tf.reduce_sum(tf.square(self.means - self.old_mean_ph) / tf.exp(self.vars), axis=1) kl += tf.reduce_sum(self.vars) kl -= tf.reduce_sum(self.old_var_ph) kl -= self.action_dim self.kl = .5 * tf.reduce_sum(kl) # TODO: check with # https://github.com/tensorflow/agents/blob/master/agents/ppo/utility.py#L139 _ = self.action_dim * np.log(2 * np.pi * np.e) _ += tf.reduce_sum(self.vars) self.entropy = .5 * _ _ = tf.exp(self.log_vars * .5) _ *= tf.random_normal(shape=(self.action_dim, )) _ += self.means self.sampled = _ loss_1 = -tf.reduce_mean( self.advantage_ph * tf.exp(self.log_prob - self.old_log_prob)) loss_2 = tf.reduce_mean(self.beta_ph * self.kl) loss_3 = self.eta_ph * \ tf.square(tf.maximum(0., self.kl - 2. * self.target_kl)) self.loss = loss_1 + loss_2 + loss_3 optimizer = tf.train.AdamOptimizer(self.learning_rate_ph) self.train_op = optimizer.minimize(self.loss) self.session = tf.Session() self.session.run(tf.global_variables_initializer())
def generator(flow): flow = fullyConnected('layer_0', flow, 1024, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = fullyConnected('layer_1', flow, 7 * 7 * 64, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = tf.reshape(flow, [batch_size, 7, 7, 64]) flow = deconv('layer_2', flow, [batch_size, 14, 14, 32], 5, 2) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = tf.nn.sigmoid( deconv('layer_3', flow, [batch_size, 28, 28, 1], 5, 2)) return flow
def discriminator(flow): flow = conv('layer_0', flow, 32, 5, 2, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = conv('layer_1', flow, 64, 5, 2, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = flat(flow) flow = fullyConnected('layer_2', flow, 1024, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) # flow = tf.nn.dropout(flow, .5) flow = fullyConnected('output', flow, 1, None) return flow
def encoder(flow): flow = conv('layer_0', flow, 32, 5, 2, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = conv('layer_1', flow, 64, 5, 2, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) flow = flat(flow) flow = fullyConnected('layer_2', flow, 1024, None) flow = normalizeBatch(flow, True) flow = lrelu(flow) mean = fullyConnected('mu', flow, latent_dim, None) sigma = fullyConnected('sigma', flow, latent_dim, None) return mean, sigma
from monitor import Figure environ['CUDA_VISIBLE_DEVICES'] = '' mnist = input_data.read_data_sets('MNIST') input_dim = 784 hidden_encoder_dim = 400 hidden_decoder_dim = 400 latent_dim = 20 lam = 0 input_data = tf.placeholder("float", shape=[None, input_dim]) flow = fullyConnected('hidden', input_data, hidden_encoder_dim, tf.nn.relu) z, kl_loss = latent(flow, latent_dim) flow = fullyConnected( 'hidden_decoder', z, hidden_decoder_dim, tf.nn.relu) # flow = fullyConnected('hidden_2', flow, 600, tf.nn.relu) x_hat = fullyConnected('output', flow, input_dim, None) generated = tf.sigmoid(x_hat) # reconstruction_loss = tf.reduce_sum(tf.square(generated - input_data), 1) reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits( logits=x_hat, labels=input_data), reduction_indices=1) loss = tf.reduce_mean(reconstruction_loss + kl_loss)