def _build_decoding(self, hidden_name, decoding_shape, filtered): # pylint: disable=W0613 """Build the decoder""" # Decoding: Reconstruction of the input # ----------------------------------------------------------------- deconv_strides = self._strides deconv_shape = self._input_shape deconv_shape[0] = self._hparams.batch_size deconvolved = tf.nn.conv2d_transpose( filtered, self._weights, output_shape=deconv_shape, strides=deconv_strides, padding='SAME', name='deconvolved') logging.debug(deconvolved) # Reconstruction of the input, in 3d deconvolved_biased = tf.add(deconvolved, self._bias_decoding, name='deconvolved_biased') logging.debug(deconvolved_biased) # TODO: If adding bias, make it only 1 per conv location rather than 1 per pixel. # Reconstruction of the input, batch x 1D decoding_transfer, _ = activation_fn(deconvolved_biased, self._hparams.decoder_nonlinearity) decoding_reshape = tf.reshape(decoding_transfer, self._input_shape, name='decoding_reshape') logging.debug(decoding_reshape) return decoding_reshape
def _build_decoding(self, hidden_name, decoding_shape, filtered): """ Build a decoder in feedback path to reconstruct the feedback input (i.e. previous hidden state) :param hidden_name the name of the dense layer from which to extract weights :param decoding_shape: shape of the reconstruction :param filtered: the hidden state to be used to reconstructed """ input_area = np.prod(decoding_shape[1:]) hidden_activity = filtered # Decoding: Reconstruction of the input # ----------------------------------------------------------------- with tf.variable_scope(hidden_name, reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): decoding_bias = tf.get_variable(name='decoding_bias', shape=[input_area], initializer=tf.zeros_initializer) weights = tf.get_variable('kernel') decoding_weighted_sum = tf.matmul(hidden_activity, weights, transpose_b=True, name=(hidden_name + '/decoding_weighted_sum')) decoding_biased_sum = tf.nn.bias_add(decoding_weighted_sum, decoding_bias) decoding_transfer, _ = activation_fn( decoding_biased_sum, self._hparams.decoder_nonlinearity) decoding_reshape = tf.reshape(decoding_transfer, decoding_shape) return decoding_reshape
def _build_encoding(self, input_tensor, mask_pl=None): """Build the encoder network""" hidden_size = self._hparams.filters batch_hidden_shape = (self._hparams.batch_size, hidden_size) weighted_sum = self._build_weighted_sum(input_tensor, self._hparams.use_bias) # Nonlinearity # ----------------------------------------------------------------- hidden_transfer, _ = activation_fn(weighted_sum, self._hparams.encoder_nonlinearity) # External masking # ----------------------------------------------------------------- name = self._hidden_name + '_masked' mask_pl = self._dual.add('mask', shape=batch_hidden_shape, default_value=1.0).add_pl() hidden_masked = tf.multiply(hidden_transfer, mask_pl, name=name) return hidden_masked, hidden_masked
def _build_encoding(self, input_tensor, mask_pl=None): """ Build combined encoding of feedforward and feedback inputs. 'add_bias' is honored for the feedforward, but feedback is set to NO BIAS """ def is_train_ff(): train_ff = False if self._hparams.optimize == 'ff' or self._hparams.optimize == 'both': train_ff = True return train_ff def is_train_fb(): train_fb = False if self._hparams.optimize == 'fb' or self._hparams.optimize == 'both': train_fb = True return train_fb hidden_size = self._hparams.filters batch_hidden_shape = (self._hparams.batch_size, hidden_size) feedback_tensor = self._add_feedback() feedback_tensor = tf_print(feedback_tensor, "Feedback tensor: ", 100, mute=TF_DBUG_MUTE) # apply input mask to provide partial images input_shape = input_tensor.shape.as_list() sample_size = np.prod(input_shape[1:]) input_mask = self._dual.add('input_mask', shape=[sample_size], default_value=1.0).add_pl(default=True) input_mask_reshaped = tf.reshape(input_mask, input_shape[1:]) input_tensor = tf.multiply( input_tensor, input_mask_reshaped) # apply shaped mask to each samples in batch # apply input gain (can be used to amplify or attenuate input) input_gain = self._dual.add('input_gain', shape=[1], default_value=1.0).add_pl(default=True) x_ff = tf.multiply(input_tensor, input_gain) self._dual.set_op('x_ff', x_ff) # Feed forward weighted sum # ------------------------------------------ ws_ff = super()._build_weighted_sum(x_ff, add_bias=False, trainable=is_train_ff()) # Feed back weighted sum # ------------------------------------------ # ensure the feedback encoding has a different name so that it is a separate graph op hidden_name = self._hidden_name self._hidden_name = self._hidden_feedback_name ws_fb = super()._build_weighted_sum( feedback_tensor, add_bias=False, # don't add bias for feedback trainable=is_train_fb()) self._hidden_name = hidden_name # zero out single cell circular weights (i.e. cell 1 to cell 1), applied next batch before optimisation remove_circular = True if remove_circular: with tf.variable_scope(self._hidden_feedback_name, reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): weights_fb = tf.get_variable('kernel') mask = np.ones(weights_fb.get_shape(), dtype=np.float32) np.fill_diagonal(mask, 0) diagonal_mask = tf.convert_to_tensor(mask) weights_updated = tf.multiply( weights_fb, diagonal_mask) # must be element-wise fb_weights_adjust_op = tf.assign(weights_fb, weights_updated, validate_shape=False, use_locking=True) self._dual.set_op('fb_weights_adjust_op', fb_weights_adjust_op) # Total weighted sum # ------------------------------------------ ws = ws_fb + ws_ff # Non-linearity and masking # ------------------------------------------ z_pre_mask, _ = activation_fn(ws, self._hparams.nonlinearity) # external masking name = hidden_name + '_masked' mask_pl = self._dual.add('mask', shape=batch_hidden_shape, default_value=1.0).add_pl() z = tf.multiply(z_pre_mask, mask_pl, name=name) # store reference to relevant ops for later use with tf.variable_scope(self._hidden_feedback_name, reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): weights_fb = tf.get_variable('kernel') self._dual.set_op('weights_fb', weights_fb) with tf.variable_scope(self._hidden_name): weights_ff = tf.get_variable('kernel') self._dual.set_op('weights_ff', weights_ff) if self._hparams.use_bias: bias_ff = tf.get_variable('bias') self._dual.set_op('bias_ff', bias_ff) self._dual.set_op('ws_fb', ws_fb) self._dual.set_op('ws_ff', ws_ff) self._dual.set_op('ws', ws) self._dual.set_op('z', z) return z, z
def _build_encoding(self, input_tensor, mask_pl=None): """Build the encoder""" batch_input_shape = self._input_shape kernel_size = [ self._hparams.filters_field_width, self._hparams.filters_field_height ] self._strides = [ 1, self._hparams.filters_field_stride, self._hparams.filters_field_stride, 1 ] # Input reshape: Ensure 3D input for convolutional processing # ----------------------------------------------------------------- self._input = tf.reshape(input_tensor, batch_input_shape, name='input') logging.debug(self._input) # Encoding # ----------------------------------------------------------------- conv_filter_shape = [ kernel_size[0], # field w kernel_size[1], # field h self._input_shape[3], # input depth self._hparams.filters # number of filters ] # Initialise weights and bias for the filter self._weights = tf.get_variable( shape=conv_filter_shape, initializer=tf.truncated_normal_initializer(stddev=0.03), name='weights') self._bias_encoding = tf.get_variable( shape=[self._hparams.filters], initializer=tf.zeros_initializer, name='bias_encoding') self._bias_decoding = tf.get_variable( shape=self._input_shape[1:], initializer=tf.zeros_initializer, name='bias_decoding') logging.debug(self._weights) logging.debug(self._bias_encoding) logging.debug(self._bias_decoding) # Setup the convolutional layer operation # Note: The first kernel is centered at the origin, not aligned to # it by its origin. convolved = tf.nn.conv2d(self._input, self._weights, self._strides, padding='SAME', name='convolved') # zero padded logging.debug(convolved) # Bias convolved_biased = tf.nn.bias_add(convolved, self._bias_encoding, name='convolved_biased') logging.debug(convolved_biased) # Nonlinearity # ----------------------------------------------------------------- hidden_transfer, _ = activation_fn(convolved_biased, self._hparams.encoder_nonlinearity) # External masking # ----------------------------------------------------------------- mask_shape = hidden_transfer.get_shape().as_list() mask_shape[0] = self._hparams.batch_size mask_pl = self._dual.add('mask', shape=mask_shape, default_value=1.0).add_pl() hidden_masked = tf.multiply(hidden_transfer, mask_pl) return hidden_masked, hidden_masked
def _build_rnn(self, input_tensor): """ Build the encoder network input_tensor = 1 batch = 1 episode (batch size, #neurons) Assumes second last item is degraded input, last is target """ w_trainable = False x_shift_trainable = False eta_trainable = True input_shape = input_tensor.get_shape().as_list() input_area = np.prod(input_shape[1:]) batch_input_shape = (-1, input_area) filters = self._hparams.filters + self._hparams.bias_neurons hidden_size = [filters] weights_shape = [filters, filters] with tf.variable_scope("rnn"): init_state_pl = self._dual.add('init_pl', shape=hidden_size, default_value=0).add_pl() init_hebb_pl = self._dual.add('hebb_init_pl', shape=weights_shape, default_value=0).add_pl() # ensure init placeholders are being reset every iteration init_hebb_pl = tf_print(init_hebb_pl, "Init Hebb:", summarize=100, mute=True) # Input reshape: Ensure flat (vector) x batch size input (batches, inputs) # ----------------------------------------------------------------- input_vector = tf.reshape(input_tensor, batch_input_shape, name='input_vector') # unroll input into a series so that we can iterate over it easily x_series = tf.unstack( input_vector, axis=0, name="ep-series") # batch_size of hidden_size # get the target and degraded samples target = input_vector[-1] target = tf_print(target, "TARGET\n", mute=True) degraded_extracted = input_vector[-2] degraded_extracted = tf_print(degraded_extracted, "DEGRADED-extracted\n", mute=True) self._dual.set_op('target', target) self._dual.set_op('degraded_raw', degraded_extracted) y_current = tf.reshape(init_state_pl, [1, filters], name="init-curr-state") hebb = init_hebb_pl with tf.variable_scope("slow-weights"): w_default = 0.01 alpha_default = 0.1 eta_default = 0.1 x_shift_default = 0.01 bias_default = 1.0 * w_default # To emulate the Miconi method of having an additional input at 20 i.e. # it creates an output of 1.0, and this is multiplied by the weight (here we have straight bias, no weight) if w_trainable: w = tf.get_variable( name="w", initializer=(w_default * tf.random_uniform(weights_shape))) else: w = tf.zeros(weights_shape) alpha = tf.get_variable( name="alpha", initializer=(alpha_default * tf.random_uniform(weights_shape))) if eta_trainable: eta = tf.get_variable(name="eta", initializer=(eta_default * tf.ones(shape=[1]))) else: eta = eta_default * tf.ones([1]) if x_shift_trainable: x_shift = tf.get_variable(name="x_shift", initializer=(x_shift_default * tf.ones(shape=[1]))) else: x_shift = 0 self._dual.set_op('w', w) self._dual.set_op('alpha', alpha) self._dual.set_op('eta', eta) self._dual.set_op('x_shift', x_shift) if self._hparams.bias: bias = tf.get_variable(name="bias", initializer=(bias_default * tf.ones(filters))) self._dual.set_op('bias', bias) bias = tf_print(bias, "*** bias ***", mute=MUTE_DEBUG_GRAPH) with tf.variable_scope("layers"): hebb = tf_print(hebb, "*** initial hebb ***", mute=MUTE_DEBUG_GRAPH) y_current = tf_print(y_current, "*** initial state ***") w = tf_print(w, "*** w ***", mute=MUTE_DEBUG_GRAPH) alpha = tf_print(alpha, "*** alpha ***", mute=MUTE_DEBUG_GRAPH) i = 0 last_x = None outer_first = None outer_last = None for x in x_series: # last sample is target, so don't process it again if i == len(x_series) - 1: # [0:x, 1:d, 2:t], l=3 break layer_name = "layer-" + str(i) with tf.variable_scope(layer_name): x = self._hparams.bt_amplify_factor * x x = tf_print(x, str(i) + ": x_input", mute=MUTE_DEBUG_GRAPH) y_current = tf_print(y_current, str(i) + ": y(t-1)", mute=MUTE_DEBUG_GRAPH) # neurons latch on as they have bidirectional connections # attempt to remove this issue by knocking out lateral connections remove = 'random' if remove == 'circular': diagonal_mask = tf.convert_to_tensor( np.tril( np.ones(weights_shape, dtype=np.float32), 0)) alpha = tf.multiply(alpha, diagonal_mask) elif remove == 'random': size = np.prod(weights_shape[:]) knockout_mask = np.ones(size) knockout_mask[:int(size / 2)] = 0 np.random.shuffle(knockout_mask) knockout_mask = np.reshape(knockout_mask, weights_shape) alpha = tf.multiply(alpha, knockout_mask) # ---------- Calculate next output of the RNN weighted_sum = tf.add( tf.matmul(y_current - x_shift, tf.add(w, tf.multiply(alpha, hebb, name='lyr-mul'), name="lyr-add_w_ah"), name='lyr-mul-add-matmul'), x, "weighted_sum") if self._hparams.bias: weighted_sum = tf.add( weighted_sum, bias) # weighted sum with bias y_next, _ = activation_fn(weighted_sum, self._hparams.nonlinearity) with tf.variable_scope("fast_weights"): # ---------- Update Hebbian fast weights # outer product of (yin * yout) = (current_state * next_state) outer = tf.matmul(tf.reshape(y_current, shape=[filters, 1]), tf.reshape(y_next, shape=[1, filters]), name="outer-product") outer = tf_print(outer, str(i) + ": *** outer = y(t-1) * y(t) ***", mute=MUTE_DEBUG_GRAPH) if i == 1: # first outer is zero outer_first = outer outer_last = outer hebb = (1.0 - eta) * hebb + eta * outer hebb = tf_print(hebb, str(i) + ": *** hebb ***", mute=MUTE_DEBUG_GRAPH) # record for visualisation the output when presented with the last blank idx_blank_first = self._blank_indices[-1][0] idx_blank_last = self._blank_indices[-1][1] if i == idx_blank_first: blank_output_first = y_next self._dual.set_op('blank_output_first', blank_output_first) if i == idx_blank_last: blank_output_last = y_next self._dual.set_op('blank_output_last', blank_output_last) y_current = y_next last_x = x i = i + 1 self._dual.set_op('hebb', hebb) self._dual.set_op('outer_first', outer_first) self._dual.set_op('outer_last', outer_last) last_x = tf_print(last_x, str(i) + ": LAST-X", mute=True) self._dual.set_op('degraded', last_x) output_pre_masked = tf.squeeze(y_current) self._dual.set_op('output_pre_masked', output_pre_masked) # pre-masked output # External masking # ----------------------------------------------------------------- with tf.variable_scope("masking"): mask_pl = self._dual.add('mask', shape=hidden_size, default_value=1.0).add_pl() y_masked = tf.multiply(y_current, mask_pl, name='y_masked') # Setup the training operations # ----------------------------------------------------------------- with tf.variable_scope("optimizer"): loss_op = self._build_loss_op(y_masked, target) self._dual.set_op('loss', loss_op) self._optimizer = tf.train.AdamOptimizer( self._hparams.learning_rate) training_op = self._optimizer.minimize( loss_op, global_step=tf.train.get_or_create_global_step(), name='training_op') self._dual.set_op('training', training_op) return y_masked, y_masked