Пример #1
0
    def optimize(self):

        self.variables_task = [var for var in tf.trainable_variables() if var.op.name.find('task')==0]
        self.variables_recon = [var for var in tf.trainable_variables() if not var.op.name.find('task')==0]

        #opt = tf.train.GradientDescentOptimizer(par['learning_rate'])
        opt_task = AdamOpt.AdamOpt(self.variables_task, par['learning_rate'])
        opt_recon = AdamOpt.AdamOpt(self.variables_recon, par['learning_rate'])

        #self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.y, labels=self.target_data, dim=1))

        self.task_loss = tf.reduce_mean(tf.multiply(self.input_info, tf.square(self.y - self.target_data)))
        self.recon_loss = 1*tf.reduce_mean(tf.square(self.x_hat - self.input_data))
        #self.recon_loss = 1e-3*tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.x_hat, labels=self.input_data))

        self.latent_loss = 8e-5 * -0.5*tf.reduce_mean(tf.reduce_sum(1+self.si-tf.square(self.mu)-tf.exp(self.si),axis=-1))

        self.total_loss = self.task_loss + self.recon_loss + self.latent_loss

        self.train_op_task = opt_task.compute_gradients(self.task_loss)
        self.train_op_recon = opt_recon.compute_gradients(self.recon_loss+self.latent_loss)


        self.generative_vars = {}
        for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='post_latent'):
            self.generative_vars[var.op.name] = var
Пример #2
0
    def __init__(self, state_size, action_size, power_constraint,
                 power_val_array, meta_param_len, id):
        self.agent_id = id
        self.state_size = state_size
        self.action_size = action_size
        self.power_constraint = power_constraint
        self.power_val_array = np.array(power_val_array)
        self.power_val_chosen = []
        self.mean_pow_val = 0
        self.mean_pow_val_check = 0
        self.penalty_lambda = 1 / np.amax(self.power_val_array)
        self.lambda_learning_rate = 0.0001
        self.lambda_lr_decay = 0.99993
        self.penalty_lambda_array = []
        self.penalty_lambda_array = np.array(self.penalty_lambda_array)
        self.AdamOpt = AdamOpt.AdamOpt(step=self.lambda_learning_rate)
        self.AdamOptMeta = AdamOptMeta.AdamOpt(step=self.lambda_learning_rate,
                                               sign=-1)
        self.memory = deque(
            maxlen=30000)  # Memory D for storing states, actions, rewards etc
        self.meta_memory = deque(
            maxlen=1000)  # Memory D for storing states, actions, rewards etc
        self.gamma = 0.9  # discount factor gamma = 1 (average case)
        self.epsilon = 1.0  # keep choosing random actions in the beginning and decay epsilon as time
        # progresses
        self.epsilon_min = 0.1  # minimum exploration rate
        self.epsilon_decay = 0.98  # decay rate. (epsilon = epsilon * epsilon_decay)
        self.learning_rate = 0.001  # learning rate for optimizer in neural network
        self.batch_size = 64  # mini batch size for replay

        self.model = self.build_model()  # neural network to learn q function
        self.target_model = self.build_model(
        )  # neural network to estimate target q function
        self.meta_model = self.build_model()

        self.update_target_model(
        )  # Initialize target model to be same as model (theta_ = theta)
        # self.power_values = np.arange(1, 52, 2.55) / 49.45
        self.target_model_update_count = 0
        self.cumulative_reward = 0
        self.average_reward = 0
        self.num_of_actions = 0
        self.reward_array = []
        self.reward_array = np.array(self.reward_array)
        self.meta_param_len = meta_param_len
        self.DSGDA = NA.DNNApproximator((1, self.meta_param_len), 1, .01, .01)
        # SharedWeights.weights = np.append(SharedWeights.weights, self.target_model.get_weights())
        SharedWeights.weights.append(self.target_model.get_weights())
        SharedWeights.weight_size = SharedWeights.weight_size + 1
Пример #3
0
    def optimize(self):
        """ Calculate losses and apply corrections to model """

        # Set up optimizer and required constants
        epsilon = 1e-7
        adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(),
                                         learning_rate=par['learning_rate'])

        # Spiking activity loss (penalty on high activation values in the hidden layer)
        self.spike_loss = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \
         for (h, mask, time_mask) in zip(tf.unstack(self.h), tf.unstack(self.mask), tf.unstack(self.time_mask))]))

        # Correct time mask shape
        self.time_mask = self.time_mask[..., tf.newaxis]

        # Get the value outputs of the network, and pad the last time step
        val_out = tf.concat(
            [self.val_out,
             tf.zeros([1, par['batch_size'], par['n_val']])],
            axis=0)

        # Determine terminal state of the network
        terminal_state = tf.cast(
            tf.logical_not(tf.equal(self.reward, tf.constant(0.))), tf.float32)

        # Compute predicted value and the advantage for plugging into the policy loss
        pred_val = self.reward + par['discount_rate'] * val_out[1:, :, :] * (
            1 - terminal_state)
        advantage = pred_val - val_out[:-1, :, :]

        # Stop gradients back through action, advantage, and mask
        action_static = tf.stop_gradient(self.action)
        advantage_static = tf.stop_gradient(advantage)
        mask_static = tf.stop_gradient(self.mask)
        pred_val_static = tf.stop_gradient(pred_val)

        # Multiply masks together
        full_mask = mask_static * self.time_mask

        # Policy loss
        self.pol_loss = -tf.reduce_mean(
            full_mask * advantage_static * action_static *
            tf.log(epsilon + self.pol_out))

        # Value loss
        self.val_loss = 0.5 * par['val_cost'] * tf.reduce_mean(
            full_mask * tf.square(val_out[:-1, :, :] - pred_val_static))

        # Entropy loss
        self.ent_loss = -par['entropy_cost'] * tf.reduce_mean(
            tf.reduce_sum(
                full_mask * self.pol_out * tf.log(epsilon + self.pol_out),
                axis=2))

        # Collect RL losses
        RL_loss = self.pol_loss + self.val_loss - self.ent_loss

        # Collect loss terms and compute gradients
        total_loss = RL_loss + self.spike_loss
        self.train_op = adam_optimizer.compute_gradients(total_loss)
Пример #4
0
    def optimize(self):

        self.variables = [
            var for var in tf.trainable_variables()
            if not var.op.name.find('conv') == 0
        ]
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=p.par['learning_rate'])

        #print('mask', self.mask)
        #print('target_data', self.target_data)
        #print('network_output', self.network_output)
        # Calculate performance loss
        perf_loss = tf.stack([mask*tf.nn.softmax_cross_entropy_with_logits(logits = y_hat, labels = desired_output, dim=0) \
                        for (y_hat, desired_output, mask) in zip(self.network_output, self.target_data, self.mask)])
        self.perf_loss = tf.reduce_mean(perf_loss)

        # Calculate spiking loss
        self.spike_loss = [
            p.par['spike_cost'] * tf.reduce_mean(tf.square(h), axis=0)
            for h in self.network_hidden
        ]
        self.spike_loss = tf.reduce_mean(self.spike_loss) / tf.reduce_mean(
            self.gate)

        # Calculate wiring cost
        self.wiring_loss = [p.par['wiring_cost']*tf.nn.relu(W_rnn) \
            for W_rnn in tf.trainable_variables() if 'W_rnn' in W_rnn.name]
        self.wiring_loss = tf.reduce_mean(self.wiring_loss)

        # Collect total loss
        self.total_loss = self.perf_loss + self.spike_loss + self.wiring_loss

        self.train_op = adam_optimizer.compute_gradients(self.total_loss)
        self.reset_adam_op = adam_optimizer.reset_params()
Пример #5
0
    def calculate_encoder_grads(self):
        """
		Calculate the gradient on the latent weights
		"""
        encoding_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope='encoding')
        self.encoding_optimizer = AdamOpt.AdamOpt(encoding_vars, 0.0002)

        stim = self.stim_pl / (1e-9 + tf.sqrt(
            tf.reduce_sum(self.stim_pl**2, axis=1, keepdims=True)))
        # if the dot-product between stimuli is less than 0.95, consider them different
        s = tf.cast((stim @ tf.transpose(stim)) < 0.99, tf.float32)

        latent = self.latent_mu / (1e-9 + tf.sqrt(
            tf.reduce_sum(self.latent_mu**2, axis=1, keepdims=True)))
        c = latent @ tf.transpose(latent)
        c *= s
        self.sparsity_loss = tf.reduce_mean(tf.abs(c))

        self.reconstruction_loss = tf.reduce_mean(
            tf.square(self.stim_pl - self.stim_hat))
        self.loss = self.reconstruction_loss + par['latent_cost']*self.latent_loss \
         + par['sparsity_cost']*self.sparsity_loss

        if par['train_encoder']:
            self.train_encoder = self.encoding_optimizer.compute_gradients(
                self.loss)
        else:
            self.train_encoder = tf.no_op()
Пример #6
0
 def __init__(self, inp_dim, out_dim, lr, tau, min_max=-1):  #min=-1,max=+1
     # Dimensions and Hyperparams
     self.env_dim = inp_dim
     self.act_dim = out_dim
     self.tau, self.lr = tau, lr
     self.model = self.network()
     self.model.compile(Adam(self.lr), 'mse')
     self.AdamOpt = AdamOpt.AdamOpt(sign=min_max, step=self.tau)
Пример #7
0
    def optimize(self):

        # Use all trainable variables, except those in the convolutional layers
        self.variables = [
            var for var in tf.trainable_variables()
            if not var.op.name.find('conv') == 0
        ]
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=par['learning_rate'])

        previous_weights_mu_minus_1 = {}
        reset_prev_vars_ops = []
        self.big_omega_var = {}
        aux_losses = []

        for var in self.variables:
            self.big_omega_var[var.op.name] = tf.Variable(tf.zeros(
                var.get_shape()),
                                                          trainable=False)
            previous_weights_mu_minus_1[var.op.name] = tf.Variable(
                tf.zeros(var.get_shape()), trainable=False)
            aux_losses.append(par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \
               tf.square(previous_weights_mu_minus_1[var.op.name] - var) )))
            reset_prev_vars_ops.append(
                tf.assign(previous_weights_mu_minus_1[var.op.name], var))

        self.aux_loss = tf.add_n(aux_losses)

        self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.y, \
            labels = self.target_data, dim=1))

        # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights
        with tf.control_dependencies([self.task_loss, self.aux_loss]):
            self.train_op = adam_optimizer.compute_gradients(self.task_loss +
                                                             self.aux_loss)

        if par['stabilization'] == 'pathint':
            # Zenke method
            self.pathint_stabilization(adam_optimizer,
                                       previous_weights_mu_minus_1)

        elif par['stabilization'] == 'EWC':
            # Kirkpatrick method
            self.EWC()

        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = adam_optimizer.reset_params()

        correct_prediction = tf.equal(
            tf.argmax(self.y - (1 - self.mask) * 9999, 1),
            tf.argmax(self.target_data - (1 - self.mask) * 9999, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        self.reset_weights()
Пример #8
0
    def optimize(self):

        adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(),
                                         learning_rate=par['learning_rate'])

        self.task_loss = tf.reduce_mean(
            self.m * tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=self.y_hat, labels=self.output_data))

        # Compute gradients
        self.train = adam_optimizer.compute_gradients(self.task_loss)
Пример #9
0
    def optimize(self):

        epsilon = 1e-6

        # Collect and list all variables in the model
        var_list = tf.trainable_variables()
        self.var_dict = {var.op.name: var for var in var_list}
        print('Variables:')
        [print(var.op.name.ljust(20), ':', var.shape) for var in var_list]
        print()

        # Make optimizer
        # opt = tf.train.AdamOptimizer(par['learning_rate'])
        opt = AdamOpt(tf.trainable_variables(), par['learning_rate'])

        # Calculate RL quantities
        pred_val = self.reward + (par['discount_rate']**
                                  self.step) * self.future_val * (
                                      1 - self.terminal_state)
        advantage = pred_val - self.val

        # Stop gradients where necessary
        advantage_static = tf.stop_gradient(advantage)
        pred_val_static = tf.stop_gradient(pred_val)

        # Calculate RL losses
        self.pol_loss = -tf.reduce_mean(
            advantage_static * self.action * tf.log(self.pol + epsilon))
        self.val_loss = tf.reduce_mean(tf.square(self.val - pred_val_static))
        self.entropy_loss = -tf.reduce_mean(
            tf.reduce_sum(self.pol * tf.log(self.pol + epsilon), axis=1))

        total_loss = self.pol_loss + par[
            'val_cost'] * self.val_loss - self.entropy_cost * self.entropy_loss

        # Make update operations for gradient applications
        self.update_grads = opt.compute_gradients(total_loss)
        self.grads = opt.return_delta_grads()

        # Make apply operations for gradient applications
        self.apply_grads = opt.update_weights()
Пример #10
0
    def optimize(self):

        self.variables = [
            var for var in tf.trainable_variables()
            if not var.op.name.find('conv') == 0
        ]
        print(self.variables)
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=p.par['learning_rate'])

        previous_weights_mu_minus_1 = {}
        reset_prev_vars_ops = []
        self.big_omega_var = {}
        aux_losses = []

        for var in self.variables:
            self.big_omega_var[var.op.name] = tf.Variable(tf.zeros(
                var.get_shape()),
                                                          trainable=False)
            previous_weights_mu_minus_1[var.op.name] = tf.Variable(
                tf.zeros(var.get_shape()), trainable=False)
            aux_losses.append(p.par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \
               tf.square(previous_weights_mu_minus_1[var.op.name] - var) )))
            reset_prev_vars_ops.append(
                tf.assign(previous_weights_mu_minus_1[var.op.name], var))

        self.aux_loss = tf.add_n(aux_losses)

        # Calculate performance loss
        self.perf_loss = [mask*tf.nn.softmax_cross_entropy_with_logits(logits = y_hat, labels = desired_output, dim=0) \
            for (y_hat, desired_output, mask) in zip(self.networks_output, self.target_data, self.mask)]
        self.perf_loss = tf.reduce_mean(self.perf_loss)

        # Calculate spiking loss
        self.spike_loss = [
            p.par['spike_cost'] * tf.reduce_mean(tf.square(h), axis=0)
            for h in self.networks_hidden
        ]
        self.spike_loss = tf.reduce_mean(self.spike_loss)

        # Collect total loss
        self.total_loss = self.perf_loss + self.spike_loss

        # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights
        with tf.control_dependencies([self.total_loss, self.aux_loss]):
            self.train_op = adam_optimizer.compute_gradients(self.total_loss +
                                                             self.aux_loss)

        # Zenke method
        self.pathint_stabilization(adam_optimizer, previous_weights_mu_minus_1)

        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = adam_optimizer.reset_params()
Пример #11
0
    def optimize(self):

        # Trainable variables for FF / Generative / Connection
        self.variables_ff = [
            var for var in tf.trainable_variables()
            if var.op.name.find('ff') == 0
        ]
        self.variables_full = [
            var for var in tf.trainable_variables()
            if (var.op.name.find('conn') == 0)
        ]

        adam_optimizer_ff = AdamOpt(self.variables_ff,
                                    learning_rate=par['learning_rate'])
        adam_optimizer_full = AdamOpt(self.variables_full,
                                      learning_rate=par['learning_rate'])

        self.ff_loss = tf.reduce_mean([
            tf.square(y - y_hat)
            for (y, y_hat) in zip(tf.unstack(self.y_data, axis=0),
                                  tf.unstack(self.ff_output, axis=0))
        ])
        with tf.control_dependencies([self.ff_loss]):
            self.train_op_ff = adam_optimizer_ff.compute_gradients(
                self.ff_loss)

        self.full_loss = tf.reduce_mean([
            tf.square(ys - ys_hat)
            for (ys, ys_hat) in zip(tf.unstack(self.ys_data, axis=0),
                                    tf.unstack(self.full_output, axis=0))
        ])

        self.latent_loss = 8e-5 * -0.5 * tf.reduce_mean(
            tf.reduce_sum(1 + self.si - tf.square(self.mu) - tf.exp(self.si),
                          axis=-1))

        with tf.control_dependencies([self.full_loss + self.latent_loss]):
            self.train_op_full = adam_optimizer_full.compute_gradients(
                self.full_loss + self.latent_loss)

        # self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op_ff = adam_optimizer_ff.reset_params()
        self.reset_adam_op_full = adam_optimizer_full.reset_params()

        self.reset_weights_ff()
        self.reset_weights_full()

        self.make_recurrent_weights_positive_ff()
        self.make_recurrent_weights_positive_full()
Пример #12
0
	def optimize(self):

		epsilon = 1e-6

		# Collect all variables in the model and list them out
		var_list_all = tf.trainable_variables()
		var_list = [var for var in var_list_all if not 'striatum' in var.op.name]
		var_list = var_list_all

		var_list_striatum = [var for var in var_list_all if 'striatum' in var.op.name]
		self.var_dict = {var.op.name : var for var in var_list}
		print('Variables:')
		[print(var.op.name.ljust(20), ':', var.shape) for var in var_list]
		print()
		print('Striatum Variables:')
		[print(var.op.name.ljust(20), ':', var.shape) for var in var_list_striatum]
		print()

		# Make optimizer
		opt = AdamOpt.AdamOpt(var_list, algorithm = 'rmsprop', learning_rate = par['learning_rate'])
		opt_striatum = AdamOpt.AdamOpt(var_list_striatum, algorithm = 'rmsprop', learning_rate = par['learning_rate'])

		pred_val = self.reward + (par['discount_rate']**self.step)*self.future_val*(1. - self.terminal_state)
		advantage = pred_val - self.val

		pol_loss = -tf.reduce_mean(tf.stop_gradient(advantage)*self.action*tf.log(self.pol + epsilon))

		val_loss = tf.reduce_mean(tf.square(advantage))

		entropy_loss = -tf.reduce_mean(tf.reduce_sum(self.pol*tf.log(self.pol + epsilon), axis = 1))
		#entropy_loss = -tf.reduce_mean(tf.reduce_mean(self.pol*tf.log(self.pol + epsilon), axis = 1))

		loss = pol_loss + par['val_cost'] * val_loss - par['entropy_cost'] * entropy_loss

		self.update_grads = opt.compute_gradients_rmsprop(loss)

		self.update_weights = opt.update_weights_rmsprop(lr_multiplier = self.lr_multiplier)
		"""
    def optimize(self):

        #opt = tf.train.GradientDescentOptimizer(par['learning_rate'])
        var_list = [var for var in tf.trainable_variables()]

        opt = AdamOpt.AdamOpt(var_list, par['learning_rate'])
        opt_pred = AdamOpt.AdamOpt(var_list, par['learning_rate'])

        self.pred_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred, \
                labels=self.pred_target, dim=1))
        self.train_op_pred = opt_pred.compute_gradients(self.pred_loss)

        #self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.y, labels=self.target_data, dim=1))

        self.task_loss = self.alpha * tf.reduce_mean(
            tf.square(self.y - self.target_data))
        self.recon_loss = tf.reduce_mean(
            tf.square(self.x_hat - self.input_data))
        self.weight_loss = 0.00 * tf.reduce_sum(
            tf.square(self.var_dict['W_layer_out']))
        #self.recon_loss = 1e-3*tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.x_hat, labels=self.input_data))

        #self.latent_loss = 0.*-0.5*tf.reduce_mean(tf.reduce_sum(1+self.si-tf.square(self.mu)-tf.exp(self.si),axis=-1))
        self.latent_loss = 0.0001 * tf.reduce_mean(
            tf.square(self.latent_sample))

        self.total_loss = self.task_loss + self.recon_loss + self.latent_loss + self.weight_loss - 2. * self.pred_loss

        with tf.control_dependencies([self.total_loss]):
            self.train_op = opt.compute_gradients(self.total_loss,
                                                  gate_prediction=True)

        self.generative_vars = {}
        for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='post_latent'):
            self.generative_vars[var.op.name] = var
Пример #14
0
    def optimize(self):
        """ Calculate losses and apply corrections to model """

        # Set up optimizer
        adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(),
                                         learning_rate=par['learning_rate'])

        # Calculate losses
        self.task_loss = tf.reduce_mean(self.time_mask[::1,...] * \
          tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.output[::1,...], \
          labels=self.target_data[::1,...]))

        self.spike_loss = 0. * tf.reduce_mean(tf.nn.relu(self.h + 0.02))

        # Compute gradients
        self.train = adam_optimizer.compute_gradients(self.task_loss +
                                                      self.spike_loss)
Пример #15
0
    def optimize(self):

        self.perf_losses = []
        self.spike_losses = []
        self.wiring_losses = []
        self.total_loss = tf.constant(0.)

        self.variables = [
            var for var in tf.trainable_variables()
            if not var.op.name.find('conv') == 0
        ]
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=p.par['learning_rate'])

        for n in range(p.par['num_networks']):

            # Calculate performance loss
            perf_loss = [mask*tf.nn.softmax_cross_entropy_with_logits(logits = y_hat, labels = desired_output, dim=0) \
                         for (y_hat, desired_output, mask) in zip(self.networks_output[n], self.target_data, self.mask)]
            perf_loss = tf.reduce_mean(tf.stack(perf_loss, axis=0))

            # Calculate spiking loss
            spike_loss = [
                p.par['spike_cost'] * tf.reduce_mean(tf.square(h), axis=0)
                for h in self.networks_hidden[n]
            ]
            spike_loss = tf.reduce_mean(tf.stack(spike_loss, axis=0))

            # Calculate wiring cost
            wiring_loss = [
                p.par['wiring_cost'] * tf.nn.relu(W_rnn * p.par['W_rnn_dist'])
                for W_rnn in tf.trainable_variables() if 'W_rnn' in W_rnn.name
            ]
            wiring_loss = tf.reduce_mean(tf.stack(wiring_loss, axis=0))

            # Add losses to record
            self.perf_losses.append(perf_loss)
            self.spike_losses.append(spike_loss)
            self.wiring_losses.append(wiring_loss)

            # Collect total loss
            self.total_loss += perf_loss + spike_loss + wiring_loss

        self.train_op = adam_optimizer.compute_gradients(self.total_loss)
        self.reset_adam_op = adam_optimizer.reset_params()
Пример #16
0
    def optimize(self):

        epsilon = 1e-6

        # Collect all variables in the model and list them out
        var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        self.var_dict = {var.op.name: var for var in var_list}
        print('Variables:')
        [print(var.op.name.ljust(20), ':', var.shape) for var in var_list]
        print()

        # Make optimizer
        opt = AdamOpt.AdamOpt(var_list,
                              algorithm='rmsprop',
                              learning_rate=par['learning_rate'])

        # Calculate RL quantities
        pred_val = self.reward + (par['discount_rate']**
                                  self.step) * self.future_val * (
                                      1. - self.terminal_state)
        advantage = pred_val - self.val

        # Calculate RL losses
        pol_loss = -tf.reduce_mean(
            tf.stop_gradient(advantage) * self.action *
            tf.log(self.pol + epsilon))
        val_loss = tf.reduce_mean(tf.square(advantage))
        entropy_loss = -tf.reduce_mean(
            tf.reduce_sum(self.pol * tf.log(self.pol + epsilon), axis=1))

        # Calculate state prediction loss
        self.pred_loss = tf.reduce_mean(
            tf.square(self.pred - self.future_capsule))

        loss = pol_loss + par['val_cost'] * val_loss - par['entropy_cost'] * entropy_loss \
         + par['pred_cost'] * self.pred_loss

        # Make update operations for gradient applications
        self.update_grads = opt.compute_gradients_rmsprop(loss)

        # Make apply operations for gradient applications
        self.update_weights = opt.update_weights_rmsprop(
            lr_multiplier=self.lr_multiplier)
Пример #17
0
    def optimize(self):

        opt = AdamOpt.AdamOpt(tf.trainable_variables(), par['learning_rate'])
        eps = 1e-7

        # Task loss and training
        if par['task'] == 'trig':
            self.task_loss = tf.reduce_mean(
                tf.square(self.outputs_dict['encoder_to_solution'] -
                          self.target_data))
        elif par['task'] == 'go':
            self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \
                logits=self.y_hat, labels=self.target_data+eps))

            y_prob = tf.nn.softmax(self.y_hat)
            self.entropy_loss = -tf.reduce_mean(-y_prob * tf.log(y_prob))

        self.train_task = opt.compute_gradients(self.task_loss)
        self.train_task_entropy = opt.compute_gradients(self.entropy_loss)
Пример #18
0
    def __init__(self, input_data, W=None, U=None):

        if type(W) is type(None):
            self.W = tf.get_variable('W',
                                     initializer=tf.random_uniform_initializer(
                                         -0.5, 0.5),
                                     shape=[par['n_input'], par['n_latent']])
        else:
            self.W = tf.get_variable('W', initializer=W, trainable=False)

        if type(U) is type(None):
            self.U = tf.get_variable('U',
                                     initializer=tf.random_uniform_initializer(
                                         -0.5, 0.5),
                                     shape=[par['n_latent'], par['n_input']])
        else:
            self.U = tf.get_variable('U', initializer=U, trainable=False)

        self.I = input_data

        self.E = []
        self.R = []
        for t in range(input_data.shape.as_list()[0]):
            E = tf.nn.relu(self.I[t] @ self.W)
            R = E @ self.U

            self.E.append(E)
            self.R.append(R)

        self.E = tf.stack(self.E, axis=0)
        self.R = tf.stack(self.R, axis=0)

        self.loss_plot = 0.5 * tf.square(self.I - self.R)

        self.rec_loss = tf.reduce_mean(self.loss_plot)
        self.act_loss = par['enc_activity_cost'] * tf.reduce_mean(
            tf.log(1 + tf.abs(self.E)))
        self.wei_loss = par['enc_weight_cost'] * tf.reduce_mean(tf.abs(self.U))
        total_loss = self.rec_loss + self.act_loss + self.wei_loss

        if type(W) is type(None) or type(U) is type(None):
            opt = AdamOpt.AdamOpt(tf.trainable_variables(), learning_rate=0.01)
            self.train = opt.compute_gradients(total_loss)
Пример #19
0
    def calculate_policy_grads(self):
        """
		Calculate the gradient on the policy/value weights
		"""
        RL_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='RL')
        self.RL_optimizer = AdamOpt.AdamOpt(RL_vars, par['learning_rate'])

        not_terminal_state = tf.cast(tf.equal(self.reward_pl, tf.constant(0.)),
                                     tf.float32)
        advantage = self.reward_pl + par[
            'discount_rate'] * self.future_val_pl * not_terminal_state - self.val_out
        self.val_loss = 0.5 * tf.reduce_mean(tf.square(advantage))
        self.pol_loss     = -tf.reduce_mean(tf.stop_gradient(advantage*self.action_pl) \
         *tf.log(1e-9 + self.pol_out))
        self.entropy_loss = -tf.reduce_mean(tf.reduce_sum(self.pol_out \
         *tf.log(1e-9 + self.pol_out), axis = -1))

        self.loss = self.pol_loss + par['val_cost']*self.val_loss \
         - par['entropy_cost']*self.entropy_loss
        self.train_RL = self.RL_optimizer.compute_gradients(self.loss)
Пример #20
0
    def calculate_encoder_grads(self):
        """
		Calculate the gradient on the latent weights
		"""
        encoding_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope='encoding')
        self.encoding_optimizer = AdamOpt.AdamOpt(encoding_vars, 0.001)

        self.reconstruction_loss = tf.reduce_mean(
            tf.square(self.stim_pl - self.stim_hat))
        self.weight_loss = tf.reduce_mean(tf.abs(
            self.var_dict['W_enc'])) + tf.reduce_mean(
                tf.abs(self.var_dict['W_dec']))
        latent_mask = np.ones(
            (par['n_latent'], par['n_latent']), dtype=np.float32) - np.eye(
                (par['n_latent']), dtype=np.float32)
        self.sparsity_loss = tf.reduce_mean(
            latent_mask *
            (tf.transpose(self.latent) @ self.latent)) / par['batch_size']
        self.loss = self.reconstruction_loss + par['sparsity_cost']*self.sparsity_loss \
         + par['weight_cost']*self.weight_loss
        self.train_encoder = self.encoding_optimizer.compute_gradients(
            self.loss)
Пример #21
0
    def optimize(self):

        self.loss = tf.reduce_mean(tf.square(self.y - self.y_hat))
        variables = [var for var in tf.trainable_variables()]

        if True:
            # Adam optimizer scenario
            optimizer = AdamOpt.AdamOpt(variables, learning_rate=self.lr)
            self.train = optimizer.compute_gradients(self.loss, gate=0)
            gvs = optimizer.return_gradients()

            self.g = gvs[0][0]
            self.v = gvs[0][1]

        else:
            # GD optimizer scenario
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.lr)
            gvs = optimizer.compute_gradients(self.loss)
            self.train = optimizer.apply_gradients(gvs)

            self.g = tf.reduce_mean(gvs[0][0])
            self.v = tf.reduce_mean(gvs[0][1])
Пример #22
0
    def optimize(self):
        """ Calculate losses and apply corrections to model """

        # Set up optimizer and required constants
        epsilon = 1e-7
        opt = AdamOpt.AdamOpt(tf.trainable_variables(),
                              learning_rate=par['learning_rate'])

        # Calculate task performance loss
        if par['loss_function'] == 'MSE':
            perf_loss = [m*tf.reduce_mean(tf.square(t - y)) for m, t, y \
                in zip(self.mask, self.target_data, self.y_hat)]

        elif par['loss_function'] == 'cross_entropy':
            perf_loss = [m*tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=t) for m, t, y \
                in zip(self.mask, self.target_data, self.y_hat)]

        self.perf_loss = tf.reduce_mean(tf.stack(perf_loss))

        # Calculate L2 loss on hidden state spiking activity
        self.spike_loss = tf.reduce_mean(tf.stack([par['spike_cost']*tf.reduce_mean(tf.square(h), axis=0) \
            for h in self.hidden_hist]))

        # Calculate L1 loss on weight strengths
        if par['architecture'] == 'BIO':
            self.wiring_loss  = tf.reduce_sum(tf.nn.relu(self.var_dict['W_in'])) \
                              + tf.reduce_sum(tf.nn.relu(self.var_dict['W_rnn'])) \
                              + tf.reduce_sum(tf.nn.relu(self.var_dict['W_out']))
            self.wiring_loss *= par['wiring_cost']
        elif par['architecture'] == 'LSTM':
            self.wiring_loss = 0

        # Collect total loss
        self.loss = self.perf_loss + self.spike_loss + self.wiring_loss

        # Compute and apply network gradients
        self.train_op = opt.compute_gradients(self.loss)
Пример #23
0
    def optimize(self):

        # Use all trainable variables, except those in the convolutional layers
        self.variables = [
            var for var in tf.trainable_variables()
            if not var.op.name.find('conv') == 0
        ]
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=par['learning_rate'])

        previous_weights_mu_minus_1 = {}
        reset_prev_vars_ops = []
        self.big_omega_var = {}
        aux_losses = []

        for var in self.variables:
            self.big_omega_var[var.op.name] = tf.Variable(tf.zeros(
                var.get_shape()),
                                                          trainable=False)
            previous_weights_mu_minus_1[var.op.name] = tf.Variable(
                tf.zeros(var.get_shape()), trainable=False)
            aux_losses.append(par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \
               tf.square(previous_weights_mu_minus_1[var.op.name] - var) )))
            reset_prev_vars_ops.append(
                tf.assign(previous_weights_mu_minus_1[var.op.name], var))

        self.aux_loss = tf.add_n(aux_losses)

        self.spike_loss = par['spike_cost'] * tf.reduce_mean(
            tf.square(self.hidden_state_hist))


        self.task_loss = tf.reduce_mean([mask*tf.nn.softmax_cross_entropy_with_logits(logits = y, \
            labels = target, dim=1) for y, target, mask in zip(self.output, self.target_data, self.mask)])

        output_softmax = [tf.nn.softmax(y, dim=1) for y in self.output]
        self.entropy_loss = -par['entropy_cost']*tf.reduce_mean([m*tf.reduce_sum(out_sm*tf.log(1e-7+out_sm), axis = 1) \
            for (out_sm,m) in zip(output_softmax, self.mask)])
        """
        with tf.variable_scope('rnn', reuse = True):
            W_in  = tf.get_variable('W_in')
            W_rnn = tf.get_variable('W_rnn')

        active_weights_rnn = tf.matmul(tf.reshape(self.gating,[-1,1]), tf.reshape(self.gating,[1,-1]))
        active_weights_in = tf.tile(tf.reshape(self.gating,[1,-1]),[par['n_input'], 1])
        self.weight_loss = par['weight_cost']*(tf.reduce_mean(active_weights_in*W_in**2) + tf.reduce_mean(tf.nn.relu(active_weights_rnn*W_rnn)**2))
        """
        # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights
        with tf.control_dependencies([
                self.task_loss, self.aux_loss, self.spike_loss,
                self.entropy_loss
        ]):
            self.train_op = adam_optimizer.compute_gradients(self.task_loss +
                                                             self.aux_loss +
                                                             self.spike_loss -
                                                             self.entropy_loss)

        # Stabilizing weights
        if par['stabilization'] == 'pathint':
            # Zenke method
            self.pathint_stabilization(adam_optimizer,
                                       previous_weights_mu_minus_1)

        elif par['stabilization'] == 'EWC':
            # Kirkpatrick method
            self.EWC()

        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = adam_optimizer.reset_params()

        self.reset_weights()

        self.make_recurrent_weights_positive()
    def optimize(self):

        epsilon = 1e-7
        self.variables = [
            var for var in tf.trainable_variables() if not '_d_' in var.op.name
        ]
        self.d_variables = [
            var for var in tf.trainable_variables() if '_d_' in var.op.name
        ]
        #self.variables_val = [var for var in tf.trainable_variables() if 'val' in var.op.name]
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=par['learning_rate'])
        adam_optimizer_d = AdamOpt.AdamOpt(self.d_variables,
                                           learning_rate=par['learning_rate'])
        #adam_optimizer_val = AdamOpt.AdamOpt(self.variables_val, learning_rate = 10.*par['learning_rate'])

        self.previous_weights_mu_minus_1 = {}
        reset_prev_vars_ops = []
        self.big_omega_var = {}
        aux_losses = []

        for var in self.variables:
            self.big_omega_var[var.op.name] = tf.Variable(tf.zeros(
                var.get_shape()),
                                                          trainable=False)
            self.previous_weights_mu_minus_1[var.op.name] = tf.Variable(
                tf.zeros(var.get_shape()), trainable=False)
            if not 'val' in var.op.name:
                # don't stabilizae the value weights or biases
                aux_losses.append(par['omega_c']*tf.reduce_sum(tf.multiply(self.big_omega_var[var.op.name], \
                    tf.square(self.previous_weights_mu_minus_1[var.op.name] - var) )))
            reset_prev_vars_ops.append(
                tf.assign(self.previous_weights_mu_minus_1[var.op.name], var))

        self.aux_loss = tf.add_n(aux_losses)

        self.pol_out_sm = [
            tf.nn.softmax(pol_out, dim=1) for pol_out in self.pol_out
        ]

        self.spike_loss = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \
            for (h, mask, time_mask) in zip(self.h, self.mask, self.time_mask)]))


        self.pol_loss = -tf.reduce_mean(tf.stack([advantage*time_mask*mask*act*tf.log(epsilon + pol_out) \
            for (pol_out, advantage, act, mask, time_mask) in zip(self.pol_out_sm, self.advantage, \
            self.actual_action, self.mask, self.time_mask)]))

        self.d_loss = tf.reduce_mean([mask*tf.nn.softmax_cross_entropy_with_logits(logits = y, \
            labels = target, dim=1) for y, target, mask in zip(self.pol_d_out, self.pol_target_data, self.mask)])

        self.spike_loss_d = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \
            for (h, mask, time_mask) in zip(self.h_d, self.mask, self.time_mask)]))



        self.entropy_loss = -par['entropy_cost']*tf.reduce_mean(tf.stack([tf.reduce_sum(time_mask*mask*pol_out*tf.log(epsilon+pol_out), axis = 1) \
            for (pol_out, mask, time_mask) in zip(self.pol_out_sm, self.mask, self.time_mask)]))


        self.val_loss = 0.5*tf.reduce_mean(tf.stack([time_mask*mask*tf.square(val_out - pred_val) \
            for (val_out, mask, time_mask, pred_val) in zip(self.val_out[:-1], self.mask, self.time_mask, self.pred_val[:-1])]))

        # Gradient of the loss+aux function, in order to both perform training and to compute delta_weights
        with tf.control_dependencies(
            [self.pol_loss, self.aux_loss, self.spike_loss, self.val_loss]):
            self.train_op = adam_optimizer.compute_gradients(self.pol_loss + self.val_loss + \
                self.aux_loss + self.spike_loss - self.entropy_loss)
        self.train_op_d = adam_optimizer_d.compute_gradients(self.d_loss +
                                                             self.spike_loss_d)

        # Stabilizing weights
        if par['stabilization'] == 'pathint':
            # Zenke method
            self.pathint_stabilization(adam_optimizer)

        elif par['stabilization'] == 'EWC':
            # Kirkpatrick method
            self.EWC()

        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = adam_optimizer.reset_params()

        self.make_recurrent_weights_positive()
Пример #25
0
    def __init__(self,
                 requests,
                 timelines,
                 users,
                 service_time,
                 total_users,
                 total_good_users,
                 cache_size,
                 total_services,
                 threadName,
                 meta_parameter,
                 meta_param_len=1,
                 id=0):
        self.load = 0
        self.ThreadName = threadName
        self.meta_loop = 0
        self.meta_interval = 500
        self.meta_loop_counter = 0
        self.meta_loop_max = 10
        self.requests = np.array(requests)
        self.timelines = np.array(timelines)
        self.users = np.array(users)
        self.total_users = total_users
        self.total_good_users = total_good_users
        self.total_bad_users = total_users - total_good_users
        self.queue = deque()  #Multicast Queue
        self.defer_queue = np.array([])

        self.noise_power = 1
        self.bandwidth = 10  #MHz
        self.rate = 10  #Mbps

        self.service_time = service_time
        self.serve_start_index = 0
        self.serve_start_time = 0
        self.serve_stop_time = self.serve_start_time + self.service_time
        self.sojournTimes = np.array([])
        self.powerVecs = np.array([])
        self.powerVecsPolicy = np.array([7])
        self.element_in_service = Elements([], [], [])
        self.userCaches = LRU_MQ_Cache(cache_size, total_users)
        self.services = 0
        self.servicable_users = np.array([])

        #DQN Parameters
        self.enable_ddpg = 1
        self.enable_sch = 1
        self.enable_meta = 1
        self.retransmit_no = 1
        self.stop_sch_training = 0
        self.inputvector = []
        self.LoopDefState = np.array([])
        self.act_dist = []
        self.queue_window = 1  # represents total actions, state dimension is this*5: See AutoEncoder
        self.service_vecs = [0 for i in range(self.queue_window)]
        self.TransLoopDefer_vec = [0, 0, 0]
        self.schWindow = 100
        self.metaWindow = 10
        self.schTime = 0
        self.state_memory = deque(maxlen=100000)
        self.target_memory = deque(maxlen=100000)
        self.starting_vector = np.random.randint(0,
                                                 self.schWindow,
                                                 size=(self.schWindow, 3))
        self.starting_vector = np.divide(
            self.starting_vector,
            self.starting_vector.sum(axis=1).reshape(self.schWindow, 1))
        self.reward_window_sch = deque(maxlen=100000)
        self.reward_window_meta = deque(maxlen=100000)
        self.meta_reward_counter = 0
        self.reward_sch = 0
        self.reward_window = deque(maxlen=10000)  #Holds last 500 sojourn times
        self.power_window = deque(
            maxlen=1000)  #Holds a maximum of 1000 power actions
        self.max_power = 20
        self.avg_power_constraint = 7
        self.transmit_power = self.avg_power_constraint
        self.power_beta = 1 / self.avg_power_constraint
        self.eta_beta = .00001  #.0005 working
        self.tau_ddpg = .01  #.001 working
        self.AdamOpt = AdamOpt.AdamOpt(step=self.eta_beta)
        self.sojournTimes_window_avg = np.array([])
        self.LoopDefWindow = 1
        self.action = 0
        self.action_prob = np.array([1, 0, 0])
        self.meta_parameter = meta_parameter
        self.actionProbVec = np.array([])
        #self.ddpg_action_prob=np.array([-1,1,-1,self.transmit_power*2/self.max_power-1])
        # self.ddpg_action_prob=np.array([self.transmit_power*2/self.max_power-1])
        self.reward = 0
        self.ddpg_action_prob = np.array([0])
        #self.DQNA = dqn.DQNAgent(int(self.queue_window*5+self.total_users), int(self.queue_window))
        self.imit_decay = 1 / 2500
        self.imitate_prob = 1 / (
            1 + self.imit_decay *
            np.arange(np.round(total_services * 1.5).astype(int))
        )  #Number inside the arange is larger tham the simulation time
        self.imit_choose = np.random.binomial(1, self.imitate_prob)
        self.fading_samples = np.random.exponential(
            1, (total_users, np.round(total_services * 1.5).astype(int)))
        self.fading_samples[int(total_good_users):int(
            total_users)] = 0.1 * self.fading_samples[int(
                total_good_users):int(total_users)]  #bad user fading states
        self.imit_times = 0
        #self.Autoencode()
        self.queue_decision = 1
        [self.AutoEncoderLoopDef() for i in range(0, self.LoopDefWindow)]
        self.action_vector = np.array(
            [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 25, 30, 40, 50])
        # self.DDPGA = ddpgc.DDPG(self.ddpg_action_prob.size, self.LoopDefState.shape,1,1,50,lr=.05,tau=self.tau_ddpg)
        self.DDQNA = dqn.DQNAgent(self.LoopDefState.size,
                                  self.action_vector.size,
                                  self.avg_power_constraint,
                                  self.action_vector, meta_param_len, id)
        self.DNN = NA.DNNApproximator((1, 3), 1, .01, .01)
        self.reward_array = np.array([])
        self.first = 0
        self.curr_state = self.LoopDefState
        self.next_state = self.LoopDefState
        self.LoopDefState = np.array([])
        self.time = 0
        self.load = 1
Пример #26
0
    def optimize(self):
        """ Calculate losses and apply corrections to model """

        # Set up optimizer and required constants
        epsilon = 1e-7
        adam_optimizer = AdamOpt.AdamOpt(tf.trainable_variables(),
                                         learning_rate=par['learning_rate'])

        # Make stabilization records
        self.prev_weights = {}
        self.big_omega_var = {}
        reset_prev_vars_ops = []
        aux_losses = []

        # Set up stabilization based on trainable variables
        for var in tf.trainable_variables():
            n = var.op.name

            # Make big omega and prev_weight variables
            self.big_omega_var[n] = tf.Variable(tf.zeros(var.get_shape()),
                                                trainable=False)
            self.prev_weights[n] = tf.Variable(tf.zeros(var.get_shape()),
                                               trainable=False)

            # Don't stabilize value weights/biases
            if not 'val' in n:
                aux_losses.append(par['omega_c'] * \
                    tf.reduce_sum(self.big_omega_var[n] * tf.square(self.prev_weights[n] - var)))

            # Make a reset function for each prev_weight element
            reset_prev_vars_ops.append(tf.assign(self.prev_weights[n], var))

        # Auxiliary stabilization loss
        self.aux_loss = tf.add_n(aux_losses)

        # Spiking activity loss (penalty on high activation values in the hidden layer)
        self.spike_loss = par['spike_cost']*tf.reduce_mean(tf.stack([mask*time_mask*tf.reduce_mean(h) \
            for (h, mask, time_mask) in zip(self.h, self.mask, self.time_mask)]))

        # Training-specific losses
        if par['training_method'] == 'SL':
            RL_loss = tf.constant(0.)

            # Task loss (cross entropy)
            self.pol_loss = tf.reduce_mean([mask*tf.nn.softmax_cross_entropy_with_logits(logits=y, \
                labels=target, dim=1) for y, target, mask in zip(self.output, self.target_data, self.time_mask)])
            sup_loss = self.pol_loss

        elif par['training_method'] == 'RL':
            sup_loss = tf.constant(0.)

            # Collect information from across time
            self.time_mask = tf.reshape(tf.stack(
                self.time_mask), (par['num_time_steps'], par['batch_size'], 1))
            self.mask = tf.stack(self.mask)
            self.reward = tf.stack(self.reward)
            self.action = tf.stack(self.action)
            self.pol_out = tf.stack(self.pol_out)

            # Get the value outputs of the network, and pad the last time step
            val_out = tf.concat([
                tf.stack(self.val_out),
                tf.zeros([1, par['batch_size'], par['n_val']])
            ],
                                axis=0)

            # Determine terminal state of the network
            terminal_state = tf.cast(
                tf.logical_not(tf.equal(self.reward, tf.constant(0.))),
                tf.float32)

            # Compute predicted value and the advantage for plugging into the policy loss
            pred_val = self.reward + par['discount_rate'] * val_out[
                1:, :, :] * (1 - terminal_state)
            advantage = pred_val - val_out[:-1, :, :]

            # Stop gradients back through action, advantage, and mask
            action_static = tf.stop_gradient(self.action)
            advantage_static = tf.stop_gradient(advantage)
            mask_static = tf.stop_gradient(self.mask)

            # Policy loss
            self.pol_loss = -tf.reduce_mean(
                advantage_static * mask_static * self.time_mask *
                action_static * tf.log(epsilon + self.pol_out))

            # Value loss
            self.val_loss = 0.5 * par['val_cost'] * tf.reduce_mean(
                mask_static * self.time_mask *
                tf.square(val_out[:-1, :, :] - tf.stop_gradient(pred_val)))

            # Entropy loss
            self.entropy_loss = -par['entropy_cost'] * tf.reduce_mean(
                tf.reduce_sum(mask_static * self.time_mask * self.pol_out *
                              tf.log(epsilon + self.pol_out),
                              axis=1))

            # Prediction loss
            self.pred_loss = par['error_cost'] * tf.reduce_mean(
                tf.stack(self.total_pred_error))

            # Collect RL losses
            RL_loss = self.pol_loss + self.val_loss - self.entropy_loss + self.pred_loss

        # Collect loss terms and compute gradients
        total_loss = sup_loss + RL_loss + self.aux_loss + self.spike_loss
        self.train_op = adam_optimizer.compute_gradients(total_loss)

        # Stabilize weights
        if par['stabilization'] == 'pathint':
            # Zenke method
            self.pathint_stabilization(adam_optimizer)
        elif par['stabilization'] == 'EWC':
            # Kirkpatrick method
            self.EWC()
        else:
            # No stabilization
            pass

        # Make reset operations
        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = adam_optimizer.reset_params()
        self.reset_weights()

        # Make saturation correction operation
        self.make_recurrent_weights_positive()
Пример #27
0
    def optimize(self):

        opt = AdamOpt.AdamOpt(tf.trainable_variables(), par['learning_rate'])
        eps = 1e-7

        # Putting together variable groups
        encoder = tf.trainable_variables('encoder')
        decoder = tf.trainable_variables('decoder')
        VAE_vars = encoder + decoder

        generator = tf.trainable_variables('generator')
        discriminator = tf.trainable_variables('discriminator')
        GAN_vars = generator + discriminator

        task_vars = tf.trainable_variables('solution')

        # Task loss and training
        task_loss_list = [mask*tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=target+eps) \
            for out, target, mask in zip(self.outputs_dict['encoder_to_solution'], self.target_data, self.time_mask)]
        self.task_loss = tf.reduce_mean(tf.stack(task_loss_list))

        y_prob = [
            tf.nn.softmax(out)
            for out in self.outputs_dict['generator_to_solution']
        ]
        self.entropy_loss = tf.reduce_mean(
            tf.stack([
                -m * tf.reduce_mean(-p_i * tf.log(p_i + eps))
                for p_i, m in zip(y_prob, self.time_mask)
            ]))

        y_prob = [
            tf.nn.softmax(out)
            for out in self.outputs_dict['encoder_to_solution']
        ]
        self.entropy_loss_enc = tf.reduce_mean(
            tf.stack([
                -m * tf.reduce_mean(-p_i * tf.log(p_i + eps))
                for p_i, m in zip(y_prob, self.time_mask)
            ]))

        self.train_task = opt.compute_gradients(self.task_loss,
                                                var_list=task_vars)
        self.train_task_entropy = opt.compute_gradients(self.entropy_loss,
                                                        var_list=task_vars)

        # Autoencoder loss and training
        recon_loss_list = [tf.square(out-target) for out, target in \
            zip(self.outputs_dict['encoder_to_decoder'], self.input_data)]
        self.recon_loss = tf.reduce_mean(tf.stack(recon_loss_list))

        si = self.outputs_dict['encoder_si']
        mu = self.outputs_dict['encoder_mu']
        latent_loss_list = [-0.5 * tf.reduce_sum(1+si_t-tf.square(mu_t)-tf.exp(si_t), axis=-1) \
            for mu_t, si_t in zip(mu, si)]
        self.act_latent_loss = par['act_latent_cost'] * tf.reduce_mean(
            tf.stack(latent_loss_list))

        self.train_VAE = opt.compute_gradients(self.recon_loss +
                                               self.act_latent_loss,
                                               var_list=VAE_vars)

        # Discriminator loss and training
        """
        self.discr_gen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \
            labels=tf.stack(self.outputs_dict['generator_to_discriminator'], axis=0), logits=par['discriminator_gen_target']+eps))
        self.discr_act_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \
            labels=tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0), logits=par['discriminator_act_target']+eps))

        self.gener_gen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \
            labels=tf.stack(self.outputs_dict['generator_to_discriminator'], axis=0), logits=par['discriminator_act_target']+eps))
        self.gener_act_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \
            labels=tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0), logits=par['discriminator_gen_target']+eps))
        #"""

        self.discr_gen_loss = tf.reduce_mean(
            tf.square(
                tf.stack(self.outputs_dict['generator_to_discriminator'],
                         axis=0) - par['discriminator_gen_target']))
        self.discr_act_loss = tf.reduce_mean(
            tf.square(
                tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0)
                - par['discriminator_act_target']))

        self.gener_gen_loss = tf.reduce_mean(
            tf.square(
                tf.stack(self.outputs_dict['generator_to_discriminator'],
                         axis=0) - par['discriminator_act_target']))
        self.gener_act_loss = tf.reduce_mean(
            tf.square(
                tf.stack(self.outputs_dict['encoder_to_discriminator'], axis=0)
                - par['discriminator_gen_target']))
        #"""

        si = self.outputs_dict['generator_si']
        mu = self.outputs_dict['generator_mu']
        latent_loss_list = [
            -0.5 *
            tf.reduce_sum(1 + si_t - tf.square(mu_t) - tf.exp(si_t), axis=-1)
            for mu_t, si_t in zip(mu, si)
        ]
        self.gen_latent_loss = par['gen_latent_cost'] * tf.reduce_mean(
            tf.stack(latent_loss_list))

        self.gen_var_loss = -par['var_cost'] * tf.reduce_mean(
            tf.nn.moments(tf.stack(self.outputs_dict['generator_to_decoder'],
                                   axis=0),
                          axes=1)[1])

        self.generator_loss = self.gener_gen_loss + self.gener_act_loss + self.gen_latent_loss + self.gen_var_loss
        self.discriminator_loss = self.discr_gen_loss + self.discr_act_loss

        self.train_generator = opt.compute_gradients(self.generator_loss,
                                                     var_list=generator)
        self.train_discriminator = opt.compute_gradients(
            self.discriminator_loss, var_list=discriminator)

        self.reset_adam_op = opt.reset_params()
Пример #28
0
    def optimize(self):
        """ Calculate losses and apply corrections to the model """

        # Optimize all trainable variables, except those in the convolutional layers
        self.variables = [
            var for var in tf.trainable_variables()
            if not 'conv' in var.op.name
        ]

        # Use all trainable variables for synaptic stabilization, except conv and rule weights
        self.variables_stabilization = [
            var for var in tf.trainable_variables()
            if not ('conv' in var.op.name or 'Wr' in var.op.name)
        ]

        # Set up the optimizer
        adam_optimizer = AdamOpt.AdamOpt(self.variables,
                                         learning_rate=par['learning_rate'])

        # Make stabilization records
        prev_weights = {}
        reset_prev_vars_ops = []
        self.big_omega_var = {}
        aux_losses = []

        # Set up stabilization based on designated variables list
        for var in self.variables_stabilization:
            n = var.op.name

            # Make big omega and prev_weight variables
            self.big_omega_var[n] = tf.Variable(tf.zeros(var.get_shape()),
                                                trainable=False)
            prev_weights[n] = tf.Variable(tf.zeros(var.get_shape()),
                                          trainable=False)

            # Generate auxiliary stabilization losses
            aux_losses.append(par['omega_c'] * tf.reduce_sum(
                tf.multiply(self.big_omega_var[n],
                            tf.square(prev_weights[n] - var))))

            # Make a reset function for each prev_weight element
            reset_prev_vars_ops.append(tf.assign(prev_weights[n], var))

        # Aggregate auxiliary losses
        self.aux_loss = tf.add_n(aux_losses)

        # Determine softmax task loss on the network output
        self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.y, \
            labels = self.target_data, dim=1))

        # Get the gradient of the loss+aux function, in order to both perform training and to compute delta_weights
        with tf.control_dependencies([self.task_loss, self.aux_loss]):
            self.train_op = adam_optimizer.compute_gradients(self.task_loss +
                                                             self.aux_loss)

        # Stabilize weights
        if par['stabilization'] == 'pathint':
            # Zenke method
            self.pathint_stabilization(adam_optimizer, prev_weights)
        elif par['stabilization'] == 'EWC':
            # Kirkpatrick method
            self.EWC()
        else:
            # No stabilization
            pass

        # Make reset operations
        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = adam_optimizer.reset_params()
        self.reset_weights()

        # Calculate accuracy for analysis
        correct_prediction = tf.equal(
            tf.argmax(self.y - (1 - self.mask) * 9999, 1),
            tf.argmax(self.target_data - (1 - self.mask) * 9999, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Пример #29
0
    def optimize(self):

        opt = AdamOpt.AdamOpt(tf.trainable_variables(), par['learning_rate'])
        eps = 1e-7

        # Putting together variable groups
        encoder  = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder')
        decoder  = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='decoder')
        VAE_vars = encoder + decoder

        generator     = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator')
        discriminator = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='discriminator')
        GAN_vars      = generator + discriminator

        task_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='solution')

        # Task loss and training
        if par['task'] == 'trig':
            self.task_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_to_solution']-self.target_data))
        elif par['task'] == 'go':
            self.task_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( \
                logits=self.outputs_dict['encoder_to_solution'], labels=self.target_data+eps))

            y_prob = tf.nn.softmax(self.outputs_dict['generator_to_solution'])
            self.entropy_loss = -tf.reduce_mean(-y_prob * tf.log(y_prob))

            y_prob = tf.nn.softmax(self.outputs_dict['encoder_to_solution'])
            self.entropy_loss_encoded = -tf.reduce_mean(-y_prob * tf.log(y_prob))


        self.aux_loss, prev_weights, reset_prev_vars_ops = self.pathint_loss(task_vars) # Loss calculation

        self.train_task = opt.compute_gradients(self.task_loss + self.aux_loss, var_list=task_vars)
        self.train_task_entropy = opt.compute_gradients(self.entropy_loss, var_list=task_vars)

        self.pathint_stabilization(opt, prev_weights, task_vars)    # Weight stabilization
        self.reset_prev_vars = tf.group(*reset_prev_vars_ops)
        self.reset_adam_op = opt.reset_params()


        # Autoencoder loss and training
        self.recon_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_reconstruction']-self.input_data))

        si = self.outputs_dict['encoder_sig']
        mu = self.outputs_dict['encoder_mu']
        self.act_latent_loss = par['act_latent_cost']* -0.5*tf.reduce_mean(tf.reduce_sum(1+si-tf.square(mu)-tf.exp(si),axis=-1))

        self.train_VAE = opt.compute_gradients(self.recon_loss + self.act_latent_loss, var_list=VAE_vars)


        # Discriminator loss and training
        self.discr_gen_loss = tf.reduce_mean(tf.square(self.outputs_dict['generator_to_discriminator'] - par['discriminator_gen_target']))
        self.discr_act_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_to_discriminator'] - par['discriminator_act_target']))

        self.gener_gen_loss = tf.reduce_mean(tf.square(self.outputs_dict['generator_to_discriminator'] - par['discriminator_act_target']))
        self.gener_act_loss = tf.reduce_mean(tf.square(self.outputs_dict['encoder_to_discriminator'] - par['discriminator_gen_target']))

        si = self.outputs_dict['generator_sig']
        mu = self.outputs_dict['generator_mu']
        self.gen_latent_loss = par['gen_latent_cost'] * -0.5*tf.reduce_mean(tf.reduce_sum(1+si-tf.square(mu)-tf.exp(si),axis=-1))

        self.generator_loss = self.gener_gen_loss + self.gener_act_loss + self.gen_latent_loss
        self.discriminator_loss = self.discr_gen_loss + self.discr_act_loss

        self.train_generator     = opt.compute_gradients(self.generator_loss, var_list=generator)
        self.train_discriminator = opt.compute_gradients(self.discriminator_loss, var_list=discriminator)