Пример #1
0
        def _make(flow):
            for i, size in enumerate(layer_dims):
                flow = fullyConnected(
                    "layer%i" % i, flow, size, tf.nn.relu)

            return fullyConnected(
                "output_layer", flow, self.action_dim, None)
Пример #2
0
        def _make():
            flow = self.state
            for i, size in enumerate(layer_dims):
                flow = fullyConnected("layer%i" % i, flow, size, tf.nn.relu)

            value = fullyConnected(
                "output_layer", flow, 1)
            advantage = fullyConnected("advantage", flow, self.action_dim)
            return value + advantage - tf.reduce_mean(advantage, axis=1, keep_dims=True)
Пример #3
0
 def _build(self):
     self.state_ph = tf.placeholder(tf.float32, (None, state_dim), 'states')
     self.target_values = tf.placeholder(tf.float32(None, ), 'values')
     flow = self.state_ph
     for i, dim in enumerate(dimensions):
         flow = fullyConnected('layer_%i' % i, flow, dim, tf.nn.relu)
     self.value = fullyConnected('output', flow, 1, None)
     self.loss = tf.reduce_mean(tf.square(self.value - self.target_values))
     optimizer = tf.train.AdamOptimizer(self.learning_rate)
     self.train_op = optimizer.minimize(self.loss)
Пример #4
0
    def build(self, dimensions):
        assert type(dimensions) in [list, tuple]
        flow = self.state_ph
        for i, dim in enumerate(dimensions):
            # TODO: PPO with tanh or relu
            flow = fullyConnected("layer_%i" % i, flow, dim, tf.nn.relu)
        self.means = fullyConnected("means", flow, self.action_dim, None)
        self.vars = tf.get_variable('vars', (self.action_dim, ), tf.float32,
                                    tf.constant_initializer(self.initial_var))

        def _temp(means, variances):
            log_prob = tf.reduce_sum(tf.square(self.action_ph - means))
            log_prob /= tf.exp(variances, axis=1)  # TODO: need axis?
            log_prob += tf.reduce_sum(variances)
            log_prob *= -.5
            return log_prob

        self.log_prob = _temp(self.means, self.vars)
        self.old_log_prob = _temp(self.old_mean_ph, self.old_var_ph)

        # TODO: check with
        # https://github.com/tensorflow/agents/blob/master/agents/ppo/utility.py#L122
        kl = tf.reduce_sum(tf.exp(self.old_var_ph - self.vars))
        kl += tf.reduce_sum(tf.square(self.means - self.old_mean_ph) /
                            tf.exp(self.vars),
                            axis=1)
        kl += tf.reduce_sum(self.vars)
        kl -= tf.reduce_sum(self.old_var_ph)
        kl -= self.action_dim
        self.kl = .5 * tf.reduce_sum(kl)

        # TODO: check with
        # https://github.com/tensorflow/agents/blob/master/agents/ppo/utility.py#L139
        _ = self.action_dim * np.log(2 * np.pi * np.e)
        _ += tf.reduce_sum(self.vars)
        self.entropy = .5 * _

        _ = tf.exp(self.log_vars * .5)
        _ *= tf.random_normal(shape=(self.action_dim, ))
        _ += self.means
        self.sampled = _

        loss_1 = -tf.reduce_mean(
            self.advantage_ph * tf.exp(self.log_prob - self.old_log_prob))
        loss_2 = tf.reduce_mean(self.beta_ph * self.kl)
        loss_3 = self.eta_ph * \
            tf.square(tf.maximum(0., self.kl - 2. * self.target_kl))
        self.loss = loss_1 + loss_2 + loss_3
        optimizer = tf.train.AdamOptimizer(self.learning_rate_ph)
        self.train_op = optimizer.minimize(self.loss)
        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
Пример #5
0
def generator(flow):
    flow = fullyConnected('layer_0', flow, 1024, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)
    flow = fullyConnected('layer_1', flow, 7 * 7 * 64, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)
    flow = tf.reshape(flow, [batch_size, 7, 7, 64])
    flow = deconv('layer_2', flow, [batch_size, 14, 14, 32], 5, 2)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)
    flow = tf.nn.sigmoid(
        deconv('layer_3', flow, [batch_size, 28, 28, 1], 5, 2))
    return flow
Пример #6
0
def discriminator(flow):
    flow = conv('layer_0', flow, 32, 5, 2, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)
    flow = conv('layer_1', flow, 64, 5, 2, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)

    flow = flat(flow)
    flow = fullyConnected('layer_2', flow, 1024, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)
    # flow = tf.nn.dropout(flow, .5)
    flow = fullyConnected('output', flow, 1, None)

    return flow
Пример #7
0
def encoder(flow):
    flow = conv('layer_0', flow, 32, 5, 2, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)

    flow = conv('layer_1', flow, 64, 5, 2, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)

    flow = flat(flow)

    flow = fullyConnected('layer_2', flow, 1024, None)
    flow = normalizeBatch(flow, True)
    flow = lrelu(flow)

    mean = fullyConnected('mu', flow, latent_dim, None)
    sigma = fullyConnected('sigma', flow, latent_dim, None)

    return mean, sigma
Пример #8
0
from monitor import Figure

environ['CUDA_VISIBLE_DEVICES'] = ''


mnist = input_data.read_data_sets('MNIST')

input_dim = 784
hidden_encoder_dim = 400
hidden_decoder_dim = 400
latent_dim = 20
lam = 0

input_data = tf.placeholder("float", shape=[None, input_dim])

flow = fullyConnected('hidden', input_data, hidden_encoder_dim, tf.nn.relu)

z, kl_loss = latent(flow, latent_dim)

flow = fullyConnected(
    'hidden_decoder', z, hidden_decoder_dim, tf.nn.relu)
# flow = fullyConnected('hidden_2', flow, 600, tf.nn.relu)
x_hat = fullyConnected('output', flow, input_dim, None)

generated = tf.sigmoid(x_hat)

# reconstruction_loss = tf.reduce_sum(tf.square(generated - input_data), 1)
reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
    logits=x_hat, labels=input_data), reduction_indices=1)
loss = tf.reduce_mean(reconstruction_loss + kl_loss)