def optimize(self): self.variables_task = [var for var in tf.trainable_variables() if var.op.name.find('task')==0] self.variables_recon = [var for var in tf.trainable_variables() if not var.op.name.find('task')==0] #opt = tf.train.GradientDescentOptimizer(par['learning_rate']) opt_task = AdamOpt.AdamOpt(self.variables_task, par['learning_rate']) opt_recon = AdamOpt.AdamOpt(self.variables_recon, par['learning_rate']) #self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.y, labels=self.target_data, dim=1)) self.task_loss = tf.reduce_mean(tf.multiply(self.input_info, tf.square(self.y - self.target_data))) self.recon_loss = 1*tf.reduce_mean(tf.square(self.x_hat - self.input_data)) #self.recon_loss = 1e-3*tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.x_hat, labels=self.input_data)) self.latent_loss = 8e-5 * -0.5*tf.reduce_mean(tf.reduce_sum(1+self.si-tf.square(self.mu)-tf.exp(self.si),axis=-1)) self.total_loss = self.task_loss + self.recon_loss + self.latent_loss self.train_op_task = opt_task.compute_gradients(self.task_loss) self.train_op_recon = opt_recon.compute_gradients(self.recon_loss+self.latent_loss) self.generative_vars = {} for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='post_latent'): self.generative_vars[var.op.name] = var
def __init__(self, state_size, action_size, power_constraint, power_val_array, meta_param_len, id): self.agent_id = id self.state_size = state_size self.action_size = action_size self.power_constraint = power_constraint self.power_val_array = np.array(power_val_array) self.power_val_chosen = [] self.mean_pow_val = 0 self.mean_pow_val_check = 0 self.penalty_lambda = 1 / np.amax(self.power_val_array) self.lambda_learning_rate = 0.0001 self.lambda_lr_decay = 0.99993 self.penalty_lambda_array = [] self.penalty_lambda_array = np.array(self.penalty_lambda_array) self.AdamOpt = AdamOpt.AdamOpt(step=self.lambda_learning_rate) self.AdamOptMeta = AdamOptMeta.AdamOpt(step=self.lambda_learning_rate, sign=-1) self.memory = deque( maxlen=30000) # Memory D for storing states, actions, rewards etc self.meta_memory = deque( maxlen=1000) # Memory D for storing states, actions, rewards etc self.gamma = 0.9 # discount factor gamma = 1 (average case) self.epsilon = 1.0 # keep choosing random actions in the beginning and decay epsilon as time # progresses self.epsilon_min = 0.1 # minimum exploration rate self.epsilon_decay = 0.98 # decay rate. (epsilon = epsilon * epsilon_decay) self.learning_rate = 0.001 # learning rate for optimizer in neural network self.batch_size = 64 # mini batch size for replay self.model = self.build_model() # neural network to learn q function self.target_model = self.build_model( ) # neural network to estimate target q function self.meta_model = self.build_model() self.update_target_model( ) # Initialize target model to be same as model (theta_ = theta) # self.power_values = np.arange(1, 52, 2.55) / 49.45 self.target_model_update_count = 0 self.cumulative_reward = 0 self.average_reward = 0 self.num_of_actions = 0 self.reward_array = [] self.reward_array = np.array(self.reward_array) self.meta_param_len = meta_param_len self.DSGDA = NA.DNNApproximator((1, self.meta_param_len), 1, .01, .01) # SharedWeights.weights = np.append(SharedWeights.weights, self.target_model.get_weights()) SharedWeights.weights.append(self.target_model.get_weights()) SharedWeights.weight_size = SharedWeights.weight_size + 1
def optimize(self): """ Calculate losses and apply corrections to model """ # Set up optimizer and required constants epsilon = 1e-7 adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=par['learning_rate']) # Spiking activity loss (penalty on high activation values in the hidden layer) self.spike_loss = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \ for (h, mask, time_mask) in zip(tf.unstack(self.h), tf.unstack(self.mask), tf.unstack(self.time_mask))])) # Correct time mask shape self.time_mask = self.time_mask[..., tf.newaxis] # Get the value outputs of the network, and pad the last time step val_out = tf.concat( [self.val_out, tf.zeros([1, par['batch_size'], par['n_val']])], axis=0) # Determine terminal state of the network terminal_state = tf.cast( tf.logical_not(tf.equal(self.reward, tf.constant(0.))), tf.float32) # Compute predicted value and the advantage for plugging into the policy loss pred_val = self.reward + par['discount_rate'] * val_out[1:, :, :] * ( 1 - terminal_state) advantage = pred_val - val_out[:-1, :, :] # Stop gradients back through action, advantage, and mask action_static = tf.stop_gradient(self.action) advantage_static = tf.stop_gradient(advantage) mask_static = tf.stop_gradient(self.mask) pred_val_static = tf.stop_gradient(pred_val) # Multiply masks together full_mask = mask_static * self.time_mask # Policy loss self.pol_loss = -tf.reduce_mean( full_mask * advantage_static * action_static * tf.log(epsilon + self.pol_out)) # Value loss self.val_loss = 0.5 * par['val_cost'] * tf.reduce_mean( full_mask * tf.square(val_out[:-1, :, :] - pred_val_static)) # Entropy loss self.ent_loss = -par['entropy_cost'] * tf.reduce_mean( tf.reduce_sum( full_mask * self.pol_out * tf.log(epsilon + self.pol_out), axis=2)) # Collect RL losses RL_loss = self.pol_loss + self.val_loss - self.ent_loss # Collect loss terms and compute gradients total_loss = RL_loss + self.spike_loss self.train_op = adam_optimizer.compute_gradients(total_loss)
def optimize(self): self.variables = [ var for var in tf.trainable_variables() if not var.op.name.find('conv') == 0 ] adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=p.par['learning_rate']) #print('mask', self.mask) #print('target_data', self.target_data) #print('network_output', self.network_output) # Calculate performance loss perf_loss = tf.stack([mask*tf.nn.softmax_cross_entropy_with_logits(logits = y_hat, labels = desired_output, dim=0) \ for (y_hat, desired_output, mask) in zip(self.network_output, self.target_data, self.mask)]) self.perf_loss = tf.reduce_mean(perf_loss) # Calculate spiking loss self.spike_loss = [ p.par['spike_cost'] * tf.reduce_mean(tf.square(h), axis=0) for h in self.network_hidden ] self.spike_loss = tf.reduce_mean(self.spike_loss) / tf.reduce_mean( self.gate) # Calculate wiring cost self.wiring_loss = [p.par['wiring_cost']*tf.nn.relu(W_rnn) \ for W_rnn in tf.trainable_variables() if 'W_rnn' in W_rnn.name] self.wiring_loss = tf.reduce_mean(self.wiring_loss) # Collect total loss self.total_loss = self.perf_loss + self.spike_loss + self.wiring_loss self.train_op = adam_optimizer.compute_gradients(self.total_loss) self.reset_adam_op = adam_optimizer.reset_params()
def calculate_encoder_grads(self): """ Calculate the gradient on the latent weights """ encoding_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoding') self.encoding_optimizer = AdamOpt.AdamOpt(encoding_vars, 0.0002) stim = self.stim_pl / (1e-9 + tf.sqrt( tf.reduce_sum(self.stim_pl**2, axis=1, keepdims=True))) # if the dot-product between stimuli is less than 0.95, consider them different s = tf.cast((stim @ tf.transpose(stim)) < 0.99, tf.float32) latent = self.latent_mu / (1e-9 + tf.sqrt( tf.reduce_sum(self.latent_mu**2, axis=1, keepdims=True))) c = latent @ tf.transpose(latent) c *= s self.sparsity_loss = tf.reduce_mean(tf.abs(c)) self.reconstruction_loss = tf.reduce_mean( tf.square(self.stim_pl - self.stim_hat)) self.loss = self.reconstruction_loss + par['latent_cost']*self.latent_loss \ + par['sparsity_cost']*self.sparsity_loss if par['train_encoder']: self.train_encoder = self.encoding_optimizer.compute_gradients( self.loss) else: self.train_encoder = tf.no_op()
def __init__(self, inp_dim, out_dim, lr, tau, min_max=-1): #min=-1,max=+1 # Dimensions and Hyperparams self.env_dim = inp_dim self.act_dim = out_dim self.tau, self.lr = tau, lr self.model = self.network() self.model.compile(Adam(self.lr), 'mse') self.AdamOpt = AdamOpt.AdamOpt(sign=min_max, step=self.tau)
def optimize(self): # Use all trainable variables, except those in the convolutional layers self.variables = [ var for var in tf.trainable_variables() if not var.op.name.find('conv') == 0 ] adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=par['learning_rate']) previous_weights_mu_minus_1 = {} reset_prev_vars_ops = [] self.big_omega_var = {} aux_losses = [] for var in self.variables: self.big_omega_var[var.op.name] = tf.Variable(tf.zeros( var.get_shape()), trainable=False) previous_weights_mu_minus_1[var.op.name] = tf.Variable( tf.zeros(var.get_shape()), trainable=False) aux_losses.append(par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \ tf.square(previous_weights_mu_minus_1[var.op.name] - var) ))) reset_prev_vars_ops.append( tf.assign(previous_weights_mu_minus_1[var.op.name], var)) self.aux_loss = tf.add_n(aux_losses) self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.y, \ labels = self.target_data, dim=1)) # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights with tf.control_dependencies([self.task_loss, self.aux_loss]): self.train_op = adam_optimizer.compute_gradients(self.task_loss + self.aux_loss) if par['stabilization'] == 'pathint': # Zenke method self.pathint_stabilization(adam_optimizer, previous_weights_mu_minus_1) elif par['stabilization'] == 'EWC': # Kirkpatrick method self.EWC() self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = adam_optimizer.reset_params() correct_prediction = tf.equal( tf.argmax(self.y - (1 - self.mask) * 9999, 1), tf.argmax(self.target_data - (1 - self.mask) * 9999, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.reset_weights()
def optimize(self): adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=par['learning_rate']) self.task_loss = tf.reduce_mean( self.m * tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.y_hat, labels=self.output_data)) # Compute gradients self.train = adam_optimizer.compute_gradients(self.task_loss)
def optimize(self): epsilon = 1e-6 # Collect and list all variables in the model var_list = tf.trainable_variables() self.var_dict = {var.op.name: var for var in var_list} print('Variables:') [print(var.op.name.ljust(20), ':', var.shape) for var in var_list] print() # Make optimizer # opt = tf.train.AdamOptimizer(par['learning_rate']) opt = AdamOpt(tf.trainable_variables(), par['learning_rate']) # Calculate RL quantities pred_val = self.reward + (par['discount_rate']** self.step) * self.future_val * ( 1 - self.terminal_state) advantage = pred_val - self.val # Stop gradients where necessary advantage_static = tf.stop_gradient(advantage) pred_val_static = tf.stop_gradient(pred_val) # Calculate RL losses self.pol_loss = -tf.reduce_mean( advantage_static * self.action * tf.log(self.pol + epsilon)) self.val_loss = tf.reduce_mean(tf.square(self.val - pred_val_static)) self.entropy_loss = -tf.reduce_mean( tf.reduce_sum(self.pol * tf.log(self.pol + epsilon), axis=1)) total_loss = self.pol_loss + par[ 'val_cost'] * self.val_loss - self.entropy_cost * self.entropy_loss # Make update operations for gradient applications self.update_grads = opt.compute_gradients(total_loss) self.grads = opt.return_delta_grads() # Make apply operations for gradient applications self.apply_grads = opt.update_weights()
def optimize(self): self.variables = [ var for var in tf.trainable_variables() if not var.op.name.find('conv') == 0 ] print(self.variables) adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=p.par['learning_rate']) previous_weights_mu_minus_1 = {} reset_prev_vars_ops = [] self.big_omega_var = {} aux_losses = [] for var in self.variables: self.big_omega_var[var.op.name] = tf.Variable(tf.zeros( var.get_shape()), trainable=False) previous_weights_mu_minus_1[var.op.name] = tf.Variable( tf.zeros(var.get_shape()), trainable=False) aux_losses.append(p.par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \ tf.square(previous_weights_mu_minus_1[var.op.name] - var) ))) reset_prev_vars_ops.append( tf.assign(previous_weights_mu_minus_1[var.op.name], var)) self.aux_loss = tf.add_n(aux_losses) # Calculate performance loss self.perf_loss = [mask*tf.nn.softmax_cross_entropy_with_logits(logits = y_hat, labels = desired_output, dim=0) \ for (y_hat, desired_output, mask) in zip(self.networks_output, self.target_data, self.mask)] self.perf_loss = tf.reduce_mean(self.perf_loss) # Calculate spiking loss self.spike_loss = [ p.par['spike_cost'] * tf.reduce_mean(tf.square(h), axis=0) for h in self.networks_hidden ] self.spike_loss = tf.reduce_mean(self.spike_loss) # Collect total loss self.total_loss = self.perf_loss + self.spike_loss # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights with tf.control_dependencies([self.total_loss, self.aux_loss]): self.train_op = adam_optimizer.compute_gradients(self.total_loss + self.aux_loss) # Zenke method self.pathint_stabilization(adam_optimizer, previous_weights_mu_minus_1) self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = adam_optimizer.reset_params()
def optimize(self): # Trainable variables for FF / Generative / Connection self.variables_ff = [ var for var in tf.trainable_variables() if var.op.name.find('ff') == 0 ] self.variables_full = [ var for var in tf.trainable_variables() if (var.op.name.find('conn') == 0) ] adam_optimizer_ff = AdamOpt(self.variables_ff, learning_rate=par['learning_rate']) adam_optimizer_full = AdamOpt(self.variables_full, learning_rate=par['learning_rate']) self.ff_loss = tf.reduce_mean([ tf.square(y - y_hat) for (y, y_hat) in zip(tf.unstack(self.y_data, axis=0), tf.unstack(self.ff_output, axis=0)) ]) with tf.control_dependencies([self.ff_loss]): self.train_op_ff = adam_optimizer_ff.compute_gradients( self.ff_loss) self.full_loss = tf.reduce_mean([ tf.square(ys - ys_hat) for (ys, ys_hat) in zip(tf.unstack(self.ys_data, axis=0), tf.unstack(self.full_output, axis=0)) ]) self.latent_loss = 8e-5 * -0.5 * tf.reduce_mean( tf.reduce_sum(1 + self.si - tf.square(self.mu) - tf.exp(self.si), axis=-1)) with tf.control_dependencies([self.full_loss + self.latent_loss]): self.train_op_full = adam_optimizer_full.compute_gradients( self.full_loss + self.latent_loss) # self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op_ff = adam_optimizer_ff.reset_params() self.reset_adam_op_full = adam_optimizer_full.reset_params() self.reset_weights_ff() self.reset_weights_full() self.make_recurrent_weights_positive_ff() self.make_recurrent_weights_positive_full()
def optimize(self): epsilon = 1e-6 # Collect all variables in the model and list them out var_list_all = tf.trainable_variables() var_list = [var for var in var_list_all if not 'striatum' in var.op.name] var_list = var_list_all var_list_striatum = [var for var in var_list_all if 'striatum' in var.op.name] self.var_dict = {var.op.name : var for var in var_list} print('Variables:') [print(var.op.name.ljust(20), ':', var.shape) for var in var_list] print() print('Striatum Variables:') [print(var.op.name.ljust(20), ':', var.shape) for var in var_list_striatum] print() # Make optimizer opt = AdamOpt.AdamOpt(var_list, algorithm = 'rmsprop', learning_rate = par['learning_rate']) opt_striatum = AdamOpt.AdamOpt(var_list_striatum, algorithm = 'rmsprop', learning_rate = par['learning_rate']) pred_val = self.reward + (par['discount_rate']**self.step)*self.future_val*(1. - self.terminal_state) advantage = pred_val - self.val pol_loss = -tf.reduce_mean(tf.stop_gradient(advantage)*self.action*tf.log(self.pol + epsilon)) val_loss = tf.reduce_mean(tf.square(advantage)) entropy_loss = -tf.reduce_mean(tf.reduce_sum(self.pol*tf.log(self.pol + epsilon), axis = 1)) #entropy_loss = -tf.reduce_mean(tf.reduce_mean(self.pol*tf.log(self.pol + epsilon), axis = 1)) loss = pol_loss + par['val_cost'] * val_loss - par['entropy_cost'] * entropy_loss self.update_grads = opt.compute_gradients_rmsprop(loss) self.update_weights = opt.update_weights_rmsprop(lr_multiplier = self.lr_multiplier) """
def optimize(self): #opt = tf.train.GradientDescentOptimizer(par['learning_rate']) var_list = [var for var in tf.trainable_variables()] opt = AdamOpt.AdamOpt(var_list, par['learning_rate']) opt_pred = AdamOpt.AdamOpt(var_list, par['learning_rate']) self.pred_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred, \ labels=self.pred_target, dim=1)) self.train_op_pred = opt_pred.compute_gradients(self.pred_loss) #self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.y, labels=self.target_data, dim=1)) self.task_loss = self.alpha * tf.reduce_mean( tf.square(self.y - self.target_data)) self.recon_loss = tf.reduce_mean( tf.square(self.x_hat - self.input_data)) self.weight_loss = 0.00 * tf.reduce_sum( tf.square(self.var_dict['W_layer_out'])) #self.recon_loss = 1e-3*tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.x_hat, labels=self.input_data)) #self.latent_loss = 0.*-0.5*tf.reduce_mean(tf.reduce_sum(1+self.si-tf.square(self.mu)-tf.exp(self.si),axis=-1)) self.latent_loss = 0.0001 * tf.reduce_mean( tf.square(self.latent_sample)) self.total_loss = self.task_loss + self.recon_loss + self.latent_loss + self.weight_loss - 2. * self.pred_loss with tf.control_dependencies([self.total_loss]): self.train_op = opt.compute_gradients(self.total_loss, gate_prediction=True) self.generative_vars = {} for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='post_latent'): self.generative_vars[var.op.name] = var
def optimize(self): """ Calculate losses and apply corrections to model """ # Set up optimizer adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=par['learning_rate']) # Calculate losses self.task_loss = tf.reduce_mean(self.time_mask[::1,...] * \ tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.output[::1,...], \ labels=self.target_data[::1,...])) self.spike_loss = 0. * tf.reduce_mean(tf.nn.relu(self.h + 0.02)) # Compute gradients self.train = adam_optimizer.compute_gradients(self.task_loss + self.spike_loss)
def optimize(self): self.perf_losses = [] self.spike_losses = [] self.wiring_losses = [] self.total_loss = tf.constant(0.) self.variables = [ var for var in tf.trainable_variables() if not var.op.name.find('conv') == 0 ] adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=p.par['learning_rate']) for n in range(p.par['num_networks']): # Calculate performance loss perf_loss = [mask*tf.nn.softmax_cross_entropy_with_logits(logits = y_hat, labels = desired_output, dim=0) \ for (y_hat, desired_output, mask) in zip(self.networks_output[n], self.target_data, self.mask)] perf_loss = tf.reduce_mean(tf.stack(perf_loss, axis=0)) # Calculate spiking loss spike_loss = [ p.par['spike_cost'] * tf.reduce_mean(tf.square(h), axis=0) for h in self.networks_hidden[n] ] spike_loss = tf.reduce_mean(tf.stack(spike_loss, axis=0)) # Calculate wiring cost wiring_loss = [ p.par['wiring_cost'] * tf.nn.relu(W_rnn * p.par['W_rnn_dist']) for W_rnn in tf.trainable_variables() if 'W_rnn' in W_rnn.name ] wiring_loss = tf.reduce_mean(tf.stack(wiring_loss, axis=0)) # Add losses to record self.perf_losses.append(perf_loss) self.spike_losses.append(spike_loss) self.wiring_losses.append(wiring_loss) # Collect total loss self.total_loss += perf_loss + spike_loss + wiring_loss self.train_op = adam_optimizer.compute_gradients(self.total_loss) self.reset_adam_op = adam_optimizer.reset_params()
def optimize(self): epsilon = 1e-6 # Collect all variables in the model and list them out var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.var_dict = {var.op.name: var for var in var_list} print('Variables:') [print(var.op.name.ljust(20), ':', var.shape) for var in var_list] print() # Make optimizer opt = AdamOpt.AdamOpt(var_list, algorithm='rmsprop', learning_rate=par['learning_rate']) # Calculate RL quantities pred_val = self.reward + (par['discount_rate']** self.step) * self.future_val * ( 1. - self.terminal_state) advantage = pred_val - self.val # Calculate RL losses pol_loss = -tf.reduce_mean( tf.stop_gradient(advantage) * self.action * tf.log(self.pol + epsilon)) val_loss = tf.reduce_mean(tf.square(advantage)) entropy_loss = -tf.reduce_mean( tf.reduce_sum(self.pol * tf.log(self.pol + epsilon), axis=1)) # Calculate state prediction loss self.pred_loss = tf.reduce_mean( tf.square(self.pred - self.future_capsule)) loss = pol_loss + par['val_cost'] * val_loss - par['entropy_cost'] * entropy_loss \ + par['pred_cost'] * self.pred_loss # Make update operations for gradient applications self.update_grads = opt.compute_gradients_rmsprop(loss) # Make apply operations for gradient applications self.update_weights = opt.update_weights_rmsprop( lr_multiplier=self.lr_multiplier)
def optimize(self): opt = AdamOpt.AdamOpt(tf.trainable_variables(), par['learning_rate']) eps = 1e-7 # Task loss and training if par['task'] == 'trig': self.task_loss = tf.reduce_mean( tf.square(self.outputs_dict['encoder_to_solution'] - self.target_data)) elif par['task'] == 'go': self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \ logits=self.y_hat, labels=self.target_data+eps)) y_prob = tf.nn.softmax(self.y_hat) self.entropy_loss = -tf.reduce_mean(-y_prob * tf.log(y_prob)) self.train_task = opt.compute_gradients(self.task_loss) self.train_task_entropy = opt.compute_gradients(self.entropy_loss)
def __init__(self, input_data, W=None, U=None): if type(W) is type(None): self.W = tf.get_variable('W', initializer=tf.random_uniform_initializer( -0.5, 0.5), shape=[par['n_input'], par['n_latent']]) else: self.W = tf.get_variable('W', initializer=W, trainable=False) if type(U) is type(None): self.U = tf.get_variable('U', initializer=tf.random_uniform_initializer( -0.5, 0.5), shape=[par['n_latent'], par['n_input']]) else: self.U = tf.get_variable('U', initializer=U, trainable=False) self.I = input_data self.E = [] self.R = [] for t in range(input_data.shape.as_list()[0]): E = tf.nn.relu(self.I[t] @ self.W) R = E @ self.U self.E.append(E) self.R.append(R) self.E = tf.stack(self.E, axis=0) self.R = tf.stack(self.R, axis=0) self.loss_plot = 0.5 * tf.square(self.I - self.R) self.rec_loss = tf.reduce_mean(self.loss_plot) self.act_loss = par['enc_activity_cost'] * tf.reduce_mean( tf.log(1 + tf.abs(self.E))) self.wei_loss = par['enc_weight_cost'] * tf.reduce_mean(tf.abs(self.U)) total_loss = self.rec_loss + self.act_loss + self.wei_loss if type(W) is type(None) or type(U) is type(None): opt = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=0.01) self.train = opt.compute_gradients(total_loss)
def calculate_policy_grads(self): """ Calculate the gradient on the policy/value weights """ RL_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='RL') self.RL_optimizer = AdamOpt.AdamOpt(RL_vars, par['learning_rate']) not_terminal_state = tf.cast(tf.equal(self.reward_pl, tf.constant(0.)), tf.float32) advantage = self.reward_pl + par[ 'discount_rate'] * self.future_val_pl * not_terminal_state - self.val_out self.val_loss = 0.5 * tf.reduce_mean(tf.square(advantage)) self.pol_loss = -tf.reduce_mean(tf.stop_gradient(advantage*self.action_pl) \ *tf.log(1e-9 + self.pol_out)) self.entropy_loss = -tf.reduce_mean(tf.reduce_sum(self.pol_out \ *tf.log(1e-9 + self.pol_out), axis = -1)) self.loss = self.pol_loss + par['val_cost']*self.val_loss \ - par['entropy_cost']*self.entropy_loss self.train_RL = self.RL_optimizer.compute_gradients(self.loss)
def calculate_encoder_grads(self): """ Calculate the gradient on the latent weights """ encoding_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoding') self.encoding_optimizer = AdamOpt.AdamOpt(encoding_vars, 0.001) self.reconstruction_loss = tf.reduce_mean( tf.square(self.stim_pl - self.stim_hat)) self.weight_loss = tf.reduce_mean(tf.abs( self.var_dict['W_enc'])) + tf.reduce_mean( tf.abs(self.var_dict['W_dec'])) latent_mask = np.ones( (par['n_latent'], par['n_latent']), dtype=np.float32) - np.eye( (par['n_latent']), dtype=np.float32) self.sparsity_loss = tf.reduce_mean( latent_mask * (tf.transpose(self.latent) @ self.latent)) / par['batch_size'] self.loss = self.reconstruction_loss + par['sparsity_cost']*self.sparsity_loss \ + par['weight_cost']*self.weight_loss self.train_encoder = self.encoding_optimizer.compute_gradients( self.loss)
def optimize(self): self.loss = tf.reduce_mean(tf.square(self.y - self.y_hat)) variables = [var for var in tf.trainable_variables()] if True: # Adam optimizer scenario optimizer = AdamOpt.AdamOpt(variables, learning_rate=self.lr) self.train = optimizer.compute_gradients(self.loss, gate=0) gvs = optimizer.return_gradients() self.g = gvs[0][0] self.v = gvs[0][1] else: # GD optimizer scenario optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.lr) gvs = optimizer.compute_gradients(self.loss) self.train = optimizer.apply_gradients(gvs) self.g = tf.reduce_mean(gvs[0][0]) self.v = tf.reduce_mean(gvs[0][1])
def optimize(self): """ Calculate losses and apply corrections to model """ # Set up optimizer and required constants epsilon = 1e-7 opt = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=par['learning_rate']) # Calculate task performance loss if par['loss_function'] == 'MSE': perf_loss = [m*tf.reduce_mean(tf.square(t - y)) for m, t, y \ in zip(self.mask, self.target_data, self.y_hat)] elif par['loss_function'] == 'cross_entropy': perf_loss = [m*tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=t) for m, t, y \ in zip(self.mask, self.target_data, self.y_hat)] self.perf_loss = tf.reduce_mean(tf.stack(perf_loss)) # Calculate L2 loss on hidden state spiking activity self.spike_loss = tf.reduce_mean(tf.stack([par['spike_cost']*tf.reduce_mean(tf.square(h), axis=0) \ for h in self.hidden_hist])) # Calculate L1 loss on weight strengths if par['architecture'] == 'BIO': self.wiring_loss = tf.reduce_sum(tf.nn.relu(self.var_dict['W_in'])) \ + tf.reduce_sum(tf.nn.relu(self.var_dict['W_rnn'])) \ + tf.reduce_sum(tf.nn.relu(self.var_dict['W_out'])) self.wiring_loss *= par['wiring_cost'] elif par['architecture'] == 'LSTM': self.wiring_loss = 0 # Collect total loss self.loss = self.perf_loss + self.spike_loss + self.wiring_loss # Compute and apply network gradients self.train_op = opt.compute_gradients(self.loss)
def optimize(self): # Use all trainable variables, except those in the convolutional layers self.variables = [ var for var in tf.trainable_variables() if not var.op.name.find('conv') == 0 ] adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=par['learning_rate']) previous_weights_mu_minus_1 = {} reset_prev_vars_ops = [] self.big_omega_var = {} aux_losses = [] for var in self.variables: self.big_omega_var[var.op.name] = tf.Variable(tf.zeros( var.get_shape()), trainable=False) previous_weights_mu_minus_1[var.op.name] = tf.Variable( tf.zeros(var.get_shape()), trainable=False) aux_losses.append(par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \ tf.square(previous_weights_mu_minus_1[var.op.name] - var) ))) reset_prev_vars_ops.append( tf.assign(previous_weights_mu_minus_1[var.op.name], var)) self.aux_loss = tf.add_n(aux_losses) self.spike_loss = par['spike_cost'] * tf.reduce_mean( tf.square(self.hidden_state_hist)) self.task_loss = tf.reduce_mean([mask*tf.nn.softmax_cross_entropy_with_logits(logits = y, \ labels = target, dim=1) for y, target, mask in zip(self.output, self.target_data, self.mask)]) output_softmax = [tf.nn.softmax(y, dim=1) for y in self.output] self.entropy_loss = -par['entropy_cost']*tf.reduce_mean([m*tf.reduce_sum(out_sm*tf.log(1e-7+out_sm), axis = 1) \ for (out_sm,m) in zip(output_softmax, self.mask)]) """ with tf.variable_scope('rnn', reuse = True): W_in = tf.get_variable('W_in') W_rnn = tf.get_variable('W_rnn') active_weights_rnn = tf.matmul(tf.reshape(self.gating,[-1,1]), tf.reshape(self.gating,[1,-1])) active_weights_in = tf.tile(tf.reshape(self.gating,[1,-1]),[par['n_input'], 1]) self.weight_loss = par['weight_cost']*(tf.reduce_mean(active_weights_in*W_in**2) + tf.reduce_mean(tf.nn.relu(active_weights_rnn*W_rnn)**2)) """ # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights with tf.control_dependencies([ self.task_loss, self.aux_loss, self.spike_loss, self.entropy_loss ]): self.train_op = adam_optimizer.compute_gradients(self.task_loss + self.aux_loss + self.spike_loss - self.entropy_loss) # Stabilizing weights if par['stabilization'] == 'pathint': # Zenke method self.pathint_stabilization(adam_optimizer, previous_weights_mu_minus_1) elif par['stabilization'] == 'EWC': # Kirkpatrick method self.EWC() self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = adam_optimizer.reset_params() self.reset_weights() self.make_recurrent_weights_positive()
def optimize(self): epsilon = 1e-7 self.variables = [ var for var in tf.trainable_variables() if not '_d_' in var.op.name ] self.d_variables = [ var for var in tf.trainable_variables() if '_d_' in var.op.name ] #self.variables_val = [var for var in tf.trainable_variables() if 'val' in var.op.name] adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=par['learning_rate']) adam_optimizer_d = AdamOpt.AdamOpt(self.d_variables, learning_rate=par['learning_rate']) #adam_optimizer_val = AdamOpt.AdamOpt(self.variables_val, learning_rate = 10.*par['learning_rate']) self.previous_weights_mu_minus_1 = {} reset_prev_vars_ops = [] self.big_omega_var = {} aux_losses = [] for var in self.variables: self.big_omega_var[var.op.name] = tf.Variable(tf.zeros( var.get_shape()), trainable=False) self.previous_weights_mu_minus_1[var.op.name] = tf.Variable( tf.zeros(var.get_shape()), trainable=False) if not 'val' in var.op.name: # don't stabilizae the value weights or biases aux_losses.append(par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \ tf.square(self.previous_weights_mu_minus_1[var.op.name] - var) ))) reset_prev_vars_ops.append( tf.assign(self.previous_weights_mu_minus_1[var.op.name], var)) self.aux_loss = tf.add_n(aux_losses) self.pol_out_sm = [ tf.nn.softmax(pol_out, dim=1) for pol_out in self.pol_out ] self.spike_loss = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \ for (h, mask, time_mask) in zip(self.h, self.mask, self.time_mask)])) self.pol_loss = -tf.reduce_mean(tf.stack([advantage*time_mask*mask*act*tf.log(epsilon + pol_out) \ for (pol_out, advantage, act, mask, time_mask) in zip(self.pol_out_sm, self.advantage, \ self.actual_action, self.mask, self.time_mask)])) self.d_loss = tf.reduce_mean([mask*tf.nn.softmax_cross_entropy_with_logits(logits = y, \ labels = target, dim=1) for y, target, mask in zip(self.pol_d_out, self.pol_target_data, self.mask)]) self.spike_loss_d = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \ for (h, mask, time_mask) in zip(self.h_d, self.mask, self.time_mask)])) self.entropy_loss = -par['entropy_cost']*tf.reduce_mean(tf.stack([tf.reduce_sum(time_mask*mask*pol_out*tf.log(epsilon+pol_out), axis = 1) \ for (pol_out, mask, time_mask) in zip(self.pol_out_sm, self.mask, self.time_mask)])) self.val_loss = 0.5*tf.reduce_mean(tf.stack([time_mask*mask*tf.square(val_out - pred_val) \ for (val_out, mask, time_mask, pred_val) in zip(self.val_out[:-1], self.mask, self.time_mask, self.pred_val[:-1])])) # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights with tf.control_dependencies( [self.pol_loss, self.aux_loss, self.spike_loss, self.val_loss]): self.train_op = adam_optimizer.compute_gradients(self.pol_loss + self.val_loss + \ self.aux_loss + self.spike_loss - self.entropy_loss) self.train_op_d = adam_optimizer_d.compute_gradients(self.d_loss + self.spike_loss_d) # Stabilizing weights if par['stabilization'] == 'pathint': # Zenke method self.pathint_stabilization(adam_optimizer) elif par['stabilization'] == 'EWC': # Kirkpatrick method self.EWC() self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = adam_optimizer.reset_params() self.make_recurrent_weights_positive()
def __init__(self, requests, timelines, users, service_time, total_users, total_good_users, cache_size, total_services, threadName, meta_parameter, meta_param_len=1, id=0): self.load = 0 self.ThreadName = threadName self.meta_loop = 0 self.meta_interval = 500 self.meta_loop_counter = 0 self.meta_loop_max = 10 self.requests = np.array(requests) self.timelines = np.array(timelines) self.users = np.array(users) self.total_users = total_users self.total_good_users = total_good_users self.total_bad_users = total_users - total_good_users self.queue = deque() #Multicast Queue self.defer_queue = np.array([]) self.noise_power = 1 self.bandwidth = 10 #MHz self.rate = 10 #Mbps self.service_time = service_time self.serve_start_index = 0 self.serve_start_time = 0 self.serve_stop_time = self.serve_start_time + self.service_time self.sojournTimes = np.array([]) self.powerVecs = np.array([]) self.powerVecsPolicy = np.array([7]) self.element_in_service = Elements([], [], []) self.userCaches = LRU_MQ_Cache(cache_size, total_users) self.services = 0 self.servicable_users = np.array([]) #DQN Parameters self.enable_ddpg = 1 self.enable_sch = 1 self.enable_meta = 1 self.retransmit_no = 1 self.stop_sch_training = 0 self.inputvector = [] self.LoopDefState = np.array([]) self.act_dist = [] self.queue_window = 1 # represents total actions, state dimension is this*5: See AutoEncoder self.service_vecs = [0 for i in range(self.queue_window)] self.TransLoopDefer_vec = [0, 0, 0] self.schWindow = 100 self.metaWindow = 10 self.schTime = 0 self.state_memory = deque(maxlen=100000) self.target_memory = deque(maxlen=100000) self.starting_vector = np.random.randint(0, self.schWindow, size=(self.schWindow, 3)) self.starting_vector = np.divide( self.starting_vector, self.starting_vector.sum(axis=1).reshape(self.schWindow, 1)) self.reward_window_sch = deque(maxlen=100000) self.reward_window_meta = deque(maxlen=100000) self.meta_reward_counter = 0 self.reward_sch = 0 self.reward_window = deque(maxlen=10000) #Holds last 500 sojourn times self.power_window = deque( maxlen=1000) #Holds a maximum of 1000 power actions self.max_power = 20 self.avg_power_constraint = 7 self.transmit_power = self.avg_power_constraint self.power_beta = 1 / self.avg_power_constraint self.eta_beta = .00001 #.0005 working self.tau_ddpg = .01 #.001 working self.AdamOpt = AdamOpt.AdamOpt(step=self.eta_beta) self.sojournTimes_window_avg = np.array([]) self.LoopDefWindow = 1 self.action = 0 self.action_prob = np.array([1, 0, 0]) self.meta_parameter = meta_parameter self.actionProbVec = np.array([]) #self.ddpg_action_prob=np.array([-1,1,-1,self.transmit_power*2/self.max_power-1]) # self.ddpg_action_prob=np.array([self.transmit_power*2/self.max_power-1]) self.reward = 0 self.ddpg_action_prob = np.array([0]) #self.DQNA = dqn.DQNAgent(int(self.queue_window*5+self.total_users), int(self.queue_window)) self.imit_decay = 1 / 2500 self.imitate_prob = 1 / ( 1 + self.imit_decay * np.arange(np.round(total_services * 1.5).astype(int)) ) #Number inside the arange is larger tham the simulation time self.imit_choose = np.random.binomial(1, self.imitate_prob) self.fading_samples = np.random.exponential( 1, (total_users, np.round(total_services * 1.5).astype(int))) self.fading_samples[int(total_good_users):int( total_users)] = 0.1 * self.fading_samples[int( total_good_users):int(total_users)] #bad user fading states self.imit_times = 0 #self.Autoencode() self.queue_decision = 1 [self.AutoEncoderLoopDef() for i in range(0, self.LoopDefWindow)] self.action_vector = np.array( [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 25, 30, 40, 50]) # self.DDPGA = ddpgc.DDPG(self.ddpg_action_prob.size, self.LoopDefState.shape,1,1,50,lr=.05,tau=self.tau_ddpg) self.DDQNA = dqn.DQNAgent(self.LoopDefState.size, self.action_vector.size, self.avg_power_constraint, self.action_vector, meta_param_len, id) self.DNN = NA.DNNApproximator((1, 3), 1, .01, .01) self.reward_array = np.array([]) self.first = 0 self.curr_state = self.LoopDefState self.next_state = self.LoopDefState self.LoopDefState = np.array([]) self.time = 0 self.load = 1
def optimize(self): """ Calculate losses and apply corrections to model """ # Set up optimizer and required constants epsilon = 1e-7 adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=par['learning_rate']) # Make stabilization records self.prev_weights = {} self.big_omega_var = {} reset_prev_vars_ops = [] aux_losses = [] # Set up stabilization based on trainable variables for var in tf.trainable_variables(): n = var.op.name # Make big omega and prev_weight variables self.big_omega_var[n] = tf.Variable(tf.zeros(var.get_shape()), trainable=False) self.prev_weights[n] = tf.Variable(tf.zeros(var.get_shape()), trainable=False) # Don't stabilize value weights/biases if not 'val' in n: aux_losses.append(par['omega_c'] * \ tf.reduce_sum(self.big_omega_var[n] * tf.square(self.prev_weights[n] - var))) # Make a reset function for each prev_weight element reset_prev_vars_ops.append(tf.assign(self.prev_weights[n], var)) # Auxiliary stabilization loss self.aux_loss = tf.add_n(aux_losses) # Spiking activity loss (penalty on high activation values in the hidden layer) self.spike_loss = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \ for (h, mask, time_mask) in zip(self.h, self.mask, self.time_mask)])) # Training-specific losses if par['training_method'] == 'SL': RL_loss = tf.constant(0.) # Task loss (cross entropy) self.pol_loss = tf.reduce_mean([mask*tf.nn.softmax_cross_entropy_with_logits(logits=y, \ labels=target, dim=1) for y, target, mask in zip(self.output, self.target_data, self.time_mask)]) sup_loss = self.pol_loss elif par['training_method'] == 'RL': sup_loss = tf.constant(0.) # Collect information from across time self.time_mask = tf.reshape(tf.stack( self.time_mask), (par['num_time_steps'], par['batch_size'], 1)) self.mask = tf.stack(self.mask) self.reward = tf.stack(self.reward) self.action = tf.stack(self.action) self.pol_out = tf.stack(self.pol_out) # Get the value outputs of the network, and pad the last time step val_out = tf.concat([ tf.stack(self.val_out), tf.zeros([1, par['batch_size'], par['n_val']]) ], axis=0) # Determine terminal state of the network terminal_state = tf.cast( tf.logical_not(tf.equal(self.reward, tf.constant(0.))), tf.float32) # Compute predicted value and the advantage for plugging into the policy loss pred_val = self.reward + par['discount_rate'] * val_out[ 1:, :, :] * (1 - terminal_state) advantage = pred_val - val_out[:-1, :, :] # Stop gradients back through action, advantage, and mask action_static = tf.stop_gradient(self.action) advantage_static = tf.stop_gradient(advantage) mask_static = tf.stop_gradient(self.mask) # Policy loss self.pol_loss = -tf.reduce_mean( advantage_static * mask_static * self.time_mask * action_static * tf.log(epsilon + self.pol_out)) # Value loss self.val_loss = 0.5 * par['val_cost'] * tf.reduce_mean( mask_static * self.time_mask * tf.square(val_out[:-1, :, :] - tf.stop_gradient(pred_val))) # Entropy loss self.entropy_loss = -par['entropy_cost'] * tf.reduce_mean( tf.reduce_sum(mask_static * self.time_mask * self.pol_out * tf.log(epsilon + self.pol_out), axis=1)) # Prediction loss self.pred_loss = par['error_cost'] * tf.reduce_mean( tf.stack(self.total_pred_error)) # Collect RL losses RL_loss = self.pol_loss + self.val_loss - self.entropy_loss + self.pred_loss # Collect loss terms and compute gradients total_loss = sup_loss + RL_loss + self.aux_loss + self.spike_loss self.train_op = adam_optimizer.compute_gradients(total_loss) # Stabilize weights if par['stabilization'] == 'pathint': # Zenke method self.pathint_stabilization(adam_optimizer) elif par['stabilization'] == 'EWC': # Kirkpatrick method self.EWC() else: # No stabilization pass # Make reset operations self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = adam_optimizer.reset_params() self.reset_weights() # Make saturation correction operation self.make_recurrent_weights_positive()
def optimize(self): opt = AdamOpt.AdamOpt(tf.trainable_variables(), par['learning_rate']) eps = 1e-7 # Putting together variable groups encoder = tf.trainable_variables('encoder') decoder = tf.trainable_variables('decoder') VAE_vars = encoder + decoder generator = tf.trainable_variables('generator') discriminator = tf.trainable_variables('discriminator') GAN_vars = generator + discriminator task_vars = tf.trainable_variables('solution') # Task loss and training task_loss_list = [mask*tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=target+eps) \ for out, target, mask in zip(self.outputs_dict['encoder_to_solution'], self.target_data, self.time_mask)] self.task_loss = tf.reduce_mean(tf.stack(task_loss_list)) y_prob = [ tf.nn.softmax(out) for out in self.outputs_dict['generator_to_solution'] ] self.entropy_loss = tf.reduce_mean( tf.stack([ -m * tf.reduce_mean(-p_i * tf.log(p_i + eps)) for p_i, m in zip(y_prob, self.time_mask) ])) y_prob = [ tf.nn.softmax(out) for out in self.outputs_dict['encoder_to_solution'] ] self.entropy_loss_enc = tf.reduce_mean( tf.stack([ -m * tf.reduce_mean(-p_i * tf.log(p_i + eps)) for p_i, m in zip(y_prob, self.time_mask) ])) self.train_task = opt.compute_gradients(self.task_loss, var_list=task_vars) self.train_task_entropy = opt.compute_gradients(self.entropy_loss, var_list=task_vars) # Autoencoder loss and training recon_loss_list = [tf.square(out-target) for out, target in \ zip(self.outputs_dict['encoder_to_decoder'], self.input_data)] self.recon_loss = tf.reduce_mean(tf.stack(recon_loss_list)) si = self.outputs_dict['encoder_si'] mu = self.outputs_dict['encoder_mu'] latent_loss_list = [-0.5 * tf.reduce_sum(1+si_t-tf.square(mu_t)-tf.exp(si_t), axis=-1) \ for mu_t, si_t in zip(mu, si)] self.act_latent_loss = par['act_latent_cost'] * tf.reduce_mean( tf.stack(latent_loss_list)) self.train_VAE = opt.compute_gradients(self.recon_loss + self.act_latent_loss, var_list=VAE_vars) # Discriminator loss and training """ self.discr_gen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \ labels=tf.stack(self.outputs_dict['generator_to_discriminator'], axis=0), logits=par['discriminator_gen_target']+eps)) self.discr_act_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \ labels=tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0), logits=par['discriminator_act_target']+eps)) self.gener_gen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \ labels=tf.stack(self.outputs_dict['generator_to_discriminator'], axis=0), logits=par['discriminator_act_target']+eps)) self.gener_act_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \ labels=tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0), logits=par['discriminator_gen_target']+eps)) #""" self.discr_gen_loss = tf.reduce_mean( tf.square( tf.stack(self.outputs_dict['generator_to_discriminator'], axis=0) - par['discriminator_gen_target'])) self.discr_act_loss = tf.reduce_mean( tf.square( tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0) - par['discriminator_act_target'])) self.gener_gen_loss = tf.reduce_mean( tf.square( tf.stack(self.outputs_dict['generator_to_discriminator'], axis=0) - par['discriminator_act_target'])) self.gener_act_loss = tf.reduce_mean( tf.square( tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0) - par['discriminator_gen_target'])) #""" si = self.outputs_dict['generator_si'] mu = self.outputs_dict['generator_mu'] latent_loss_list = [ -0.5 * tf.reduce_sum(1 + si_t - tf.square(mu_t) - tf.exp(si_t), axis=-1) for mu_t, si_t in zip(mu, si) ] self.gen_latent_loss = par['gen_latent_cost'] * tf.reduce_mean( tf.stack(latent_loss_list)) self.gen_var_loss = -par['var_cost'] * tf.reduce_mean( tf.nn.moments(tf.stack(self.outputs_dict['generator_to_decoder'], axis=0), axes=1)[1]) self.generator_loss = self.gener_gen_loss + self.gener_act_loss + self.gen_latent_loss + self.gen_var_loss self.discriminator_loss = self.discr_gen_loss + self.discr_act_loss self.train_generator = opt.compute_gradients(self.generator_loss, var_list=generator) self.train_discriminator = opt.compute_gradients( self.discriminator_loss, var_list=discriminator) self.reset_adam_op = opt.reset_params()
def optimize(self): """ Calculate losses and apply corrections to the model """ # Optimize all trainable variables, except those in the convolutional layers self.variables = [ var for var in tf.trainable_variables() if not 'conv' in var.op.name ] # Use all trainable variables for synaptic stabilization, except conv and rule weights self.variables_stabilization = [ var for var in tf.trainable_variables() if not ('conv' in var.op.name or 'Wr' in var.op.name) ] # Set up the optimizer adam_optimizer = AdamOpt.AdamOpt(self.variables, learning_rate=par['learning_rate']) # Make stabilization records prev_weights = {} reset_prev_vars_ops = [] self.big_omega_var = {} aux_losses = [] # Set up stabilization based on designated variables list for var in self.variables_stabilization: n = var.op.name # Make big omega and prev_weight variables self.big_omega_var[n] = tf.Variable(tf.zeros(var.get_shape()), trainable=False) prev_weights[n] = tf.Variable(tf.zeros(var.get_shape()), trainable=False) # Generate auxiliary stabilization losses aux_losses.append(par['omega_c'] * tf.reduce_sum( tf.multiply(self.big_omega_var[n], tf.square(prev_weights[n] - var)))) # Make a reset function for each prev_weight element reset_prev_vars_ops.append(tf.assign(prev_weights[n], var)) # Aggregate auxiliary losses self.aux_loss = tf.add_n(aux_losses) # Determine softmax task loss on the network output self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.y, \ labels = self.target_data, dim=1)) # Get the gradient of the loss+aux function, in order to both perform training and to compute delta_weights with tf.control_dependencies([self.task_loss, self.aux_loss]): self.train_op = adam_optimizer.compute_gradients(self.task_loss + self.aux_loss) # Stabilize weights if par['stabilization'] == 'pathint': # Zenke method self.pathint_stabilization(adam_optimizer, prev_weights) elif par['stabilization'] == 'EWC': # Kirkpatrick method self.EWC() else: # No stabilization pass # Make reset operations self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = adam_optimizer.reset_params() self.reset_weights() # Calculate accuracy for analysis correct_prediction = tf.equal( tf.argmax(self.y - (1 - self.mask) * 9999, 1), tf.argmax(self.target_data - (1 - self.mask) * 9999, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def optimize(self): opt = AdamOpt.AdamOpt(tf.trainable_variables(), par['learning_rate']) eps = 1e-7 # Putting together variable groups encoder = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder') decoder = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='decoder') VAE_vars = encoder + decoder generator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') discriminator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='discriminator') GAN_vars = generator + discriminator task_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='solution') # Task loss and training if par['task'] == 'trig': self.task_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_to_solution']-self.target_data)) elif par['task'] == 'go': self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \ logits=self.outputs_dict['encoder_to_solution'], labels=self.target_data+eps)) y_prob = tf.nn.softmax(self.outputs_dict['generator_to_solution']) self.entropy_loss = -tf.reduce_mean(-y_prob * tf.log(y_prob)) y_prob = tf.nn.softmax(self.outputs_dict['encoder_to_solution']) self.entropy_loss_encoded = -tf.reduce_mean(-y_prob * tf.log(y_prob)) self.aux_loss, prev_weights, reset_prev_vars_ops = self.pathint_loss(task_vars) # Loss calculation self.train_task = opt.compute_gradients(self.task_loss + self.aux_loss, var_list=task_vars) self.train_task_entropy = opt.compute_gradients(self.entropy_loss, var_list=task_vars) self.pathint_stabilization(opt, prev_weights, task_vars) # Weight stabilization self.reset_prev_vars = tf.group(*reset_prev_vars_ops) self.reset_adam_op = opt.reset_params() # Autoencoder loss and training self.recon_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_reconstruction']-self.input_data)) si = self.outputs_dict['encoder_sig'] mu = self.outputs_dict['encoder_mu'] self.act_latent_loss = par['act_latent_cost']* -0.5*tf.reduce_mean(tf.reduce_sum(1+si-tf.square(mu)-tf.exp(si),axis=-1)) self.train_VAE = opt.compute_gradients(self.recon_loss + self.act_latent_loss, var_list=VAE_vars) # Discriminator loss and training self.discr_gen_loss = tf.reduce_mean(tf.square(self.outputs_dict['generator_to_discriminator'] - par['discriminator_gen_target'])) self.discr_act_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_to_discriminator'] - par['discriminator_act_target'])) self.gener_gen_loss = tf.reduce_mean(tf.square(self.outputs_dict['generator_to_discriminator'] - par['discriminator_act_target'])) self.gener_act_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_to_discriminator'] - par['discriminator_gen_target'])) si = self.outputs_dict['generator_sig'] mu = self.outputs_dict['generator_mu'] self.gen_latent_loss = par['gen_latent_cost'] * -0.5*tf.reduce_mean(tf.reduce_sum(1+si-tf.square(mu)-tf.exp(si),axis=-1)) self.generator_loss = self.gener_gen_loss + self.gener_act_loss + self.gen_latent_loss self.discriminator_loss = self.discr_gen_loss + self.discr_act_loss self.train_generator = opt.compute_gradients(self.generator_loss, var_list=generator) self.train_discriminator = opt.compute_gradients(self.discriminator_loss, var_list=discriminator)