Пример #1
0
    def optimize(self):

        # Trainable variables for FF / Generative / Connection
        self.variables_ff = [
            var for var in tf.trainable_variables()
            if var.op.name.find('ff') == 0
        ]
        self.variables_full = [
            var for var in tf.trainable_variables()
            if (var.op.name.find('conn') == 0)
        ]

        adam_optimizer_ff = AdamOpt(self.variables_ff,
                                    learning_rate=par['learning_rate'])
        adam_optimizer_full = AdamOpt(self.variables_full,
                                      learning_rate=par['learning_rate'])

        self.ff_loss = tf.reduce_mean([
            tf.square(y - y_hat)
            for (y, y_hat) in zip(tf.unstack(self.y_data, axis=0),
                                  tf.unstack(self.ff_output, axis=0))
        ])
        with tf.control_dependencies([self.ff_loss]):
            self.train_op_ff = adam_optimizer_ff.compute_gradients(
                self.ff_loss)

        self.full_loss = tf.reduce_mean([
            tf.square(ys - ys_hat)
            for (ys, ys_hat) in zip(tf.unstack(self.ys_data, axis=0),
                                    tf.unstack(self.full_output, axis=0))
        ])

        self.latent_loss = 8e-5 * -0.5 * tf.reduce_mean(
            tf.reduce_sum(1 + self.si - tf.square(self.mu) - tf.exp(self.si),
                          axis=-1))

        with tf.control_dependencies([self.full_loss + self.latent_loss]):
            self.train_op_full = adam_optimizer_full.compute_gradients(
                self.full_loss + self.latent_loss)

        # self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op_ff = adam_optimizer_ff.reset_params()
        self.reset_adam_op_full = adam_optimizer_full.reset_params()

        self.reset_weights_ff()
        self.reset_weights_full()

        self.make_recurrent_weights_positive_ff()
        self.make_recurrent_weights_positive_full()
Пример #2
0
    def optimize(self):

        epsilon = 1e-6

        # Collect and list all variables in the model
        var_list = tf.trainable_variables()
        self.var_dict = {var.op.name: var for var in var_list}
        print('Variables:')
        [print(var.op.name.ljust(20), ':', var.shape) for var in var_list]
        print()

        # Make optimizer
        # opt = tf.train.AdamOptimizer(par['learning_rate'])
        opt = AdamOpt(tf.trainable_variables(), par['learning_rate'])

        # Calculate RL quantities
        pred_val = self.reward + (par['discount_rate']**
                                  self.step) * self.future_val * (
                                      1 - self.terminal_state)
        advantage = pred_val - self.val

        # Stop gradients where necessary
        advantage_static = tf.stop_gradient(advantage)
        pred_val_static = tf.stop_gradient(pred_val)

        # Calculate RL losses
        self.pol_loss = -tf.reduce_mean(
            advantage_static * self.action * tf.log(self.pol + epsilon))
        self.val_loss = tf.reduce_mean(tf.square(self.val - pred_val_static))
        self.entropy_loss = -tf.reduce_mean(
            tf.reduce_sum(self.pol * tf.log(self.pol + epsilon), axis=1))

        total_loss = self.pol_loss + par[
            'val_cost'] * self.val_loss - self.entropy_cost * self.entropy_loss

        # Make update operations for gradient applications
        self.update_grads = opt.compute_gradients(total_loss)
        self.grads = opt.return_delta_grads()

        # Make apply operations for gradient applications
        self.apply_grads = opt.update_weights()