def optimize(self): # Trainable variables for FF / Generative / Connection self.variables_ff = [ var for var in tf.trainable_variables() if var.op.name.find('ff') == 0 ] self.variables_full = [ var for var in tf.trainable_variables() if (var.op.name.find('conn') == 0) ] adam_optimizer_ff = AdamOpt(self.variables_ff, learning_rate=par['learning_rate']) adam_optimizer_full = AdamOpt(self.variables_full, learning_rate=par['learning_rate']) self.ff_loss = tf.reduce_mean([ tf.square(y - y_hat) for (y, y_hat) in zip(tf.unstack(self.y_data, axis=0), tf.unstack(self.ff_output, axis=0)) ]) with tf.control_dependencies([self.ff_loss]): self.train_op_ff = adam_optimizer_ff.compute_gradients( self.ff_loss) self.full_loss = tf.reduce_mean([ tf.square(ys - ys_hat) for (ys, ys_hat) in zip(tf.unstack(self.ys_data, axis=0), tf.unstack(self.full_output, axis=0)) ]) self.latent_loss = 8e-5 * -0.5 * tf.reduce_mean( tf.reduce_sum(1 + self.si - tf.square(self.mu) - tf.exp(self.si), axis=-1)) with tf.control_dependencies([self.full_loss + self.latent_loss]): self.train_op_full = adam_optimizer_full.compute_gradients( self.full_loss + self.latent_loss) # self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op_ff = adam_optimizer_ff.reset_params() self.reset_adam_op_full = adam_optimizer_full.reset_params() self.reset_weights_ff() self.reset_weights_full() self.make_recurrent_weights_positive_ff() self.make_recurrent_weights_positive_full()
def optimize(self): epsilon = 1e-6 # Collect and list all variables in the model var_list = tf.trainable_variables() self.var_dict = {var.op.name: var for var in var_list} print('Variables:') [print(var.op.name.ljust(20), ':', var.shape) for var in var_list] print() # Make optimizer # opt = tf.train.AdamOptimizer(par['learning_rate']) opt = AdamOpt(tf.trainable_variables(), par['learning_rate']) # Calculate RL quantities pred_val = self.reward + (par['discount_rate']** self.step) * self.future_val * ( 1 - self.terminal_state) advantage = pred_val - self.val # Stop gradients where necessary advantage_static = tf.stop_gradient(advantage) pred_val_static = tf.stop_gradient(pred_val) # Calculate RL losses self.pol_loss = -tf.reduce_mean( advantage_static * self.action * tf.log(self.pol + epsilon)) self.val_loss = tf.reduce_mean(tf.square(self.val - pred_val_static)) self.entropy_loss = -tf.reduce_mean( tf.reduce_sum(self.pol * tf.log(self.pol + epsilon), axis=1)) total_loss = self.pol_loss + par[ 'val_cost'] * self.val_loss - self.entropy_cost * self.entropy_loss # Make update operations for gradient applications self.update_grads = opt.compute_gradients(total_loss) self.grads = opt.return_delta_grads() # Make apply operations for gradient applications self.apply_grads = opt.update_weights()