Пример #1
0
  def _build_decoding(self, hidden_name, decoding_shape, filtered):  # pylint: disable=W0613
    """Build the decoder"""

    # Decoding: Reconstruction of the input
    # -----------------------------------------------------------------
    deconv_strides = self._strides
    deconv_shape = self._input_shape
    deconv_shape[0] = self._hparams.batch_size

    deconvolved = tf.nn.conv2d_transpose(
        filtered, self._weights, output_shape=deconv_shape,
        strides=deconv_strides, padding='SAME', name='deconvolved')
    logging.debug(deconvolved)

    # Reconstruction of the input, in 3d
    deconvolved_biased = tf.add(deconvolved, self._bias_decoding, name='deconvolved_biased')
    logging.debug(deconvolved_biased)

    # TODO: If adding bias, make it only 1 per conv location rather than 1 per pixel.

    # Reconstruction of the input, batch x 1D
    decoding_transfer, _ = activation_fn(deconvolved_biased, self._hparams.decoder_nonlinearity)
    decoding_reshape = tf.reshape(decoding_transfer, self._input_shape, name='decoding_reshape')
    logging.debug(decoding_reshape)
    return decoding_reshape
Пример #2
0
    def _build_decoding(self, hidden_name, decoding_shape, filtered):
        """
    Build a decoder in feedback path to reconstruct the feedback input (i.e. previous hidden state)
    :param hidden_name the name of the dense layer from which to extract weights
    :param decoding_shape: shape of the reconstruction
    :param filtered: the hidden state to be used to reconstructed

    """

        input_area = np.prod(decoding_shape[1:])
        hidden_activity = filtered

        # Decoding: Reconstruction of the input
        # -----------------------------------------------------------------
        with tf.variable_scope(hidden_name,
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            decoding_bias = tf.get_variable(name='decoding_bias',
                                            shape=[input_area],
                                            initializer=tf.zeros_initializer)

            weights = tf.get_variable('kernel')
            decoding_weighted_sum = tf.matmul(hidden_activity,
                                              weights,
                                              transpose_b=True,
                                              name=(hidden_name +
                                                    '/decoding_weighted_sum'))
            decoding_biased_sum = tf.nn.bias_add(decoding_weighted_sum,
                                                 decoding_bias)

            decoding_transfer, _ = activation_fn(
                decoding_biased_sum, self._hparams.decoder_nonlinearity)
            decoding_reshape = tf.reshape(decoding_transfer, decoding_shape)
            return decoding_reshape
Пример #3
0
    def _build_encoding(self, input_tensor, mask_pl=None):
        """Build the encoder network"""

        hidden_size = self._hparams.filters
        batch_hidden_shape = (self._hparams.batch_size, hidden_size)

        weighted_sum = self._build_weighted_sum(input_tensor,
                                                self._hparams.use_bias)

        # Nonlinearity
        # -----------------------------------------------------------------
        hidden_transfer, _ = activation_fn(weighted_sum,
                                           self._hparams.encoder_nonlinearity)

        # External masking
        # -----------------------------------------------------------------
        name = self._hidden_name + '_masked'
        mask_pl = self._dual.add('mask',
                                 shape=batch_hidden_shape,
                                 default_value=1.0).add_pl()
        hidden_masked = tf.multiply(hidden_transfer, mask_pl, name=name)
        return hidden_masked, hidden_masked
Пример #4
0
    def _build_encoding(self, input_tensor, mask_pl=None):
        """
    Build combined encoding of feedforward and feedback inputs.
    'add_bias' is honored for the feedforward, but feedback is set to NO BIAS
    """
        def is_train_ff():
            train_ff = False
            if self._hparams.optimize == 'ff' or self._hparams.optimize == 'both':
                train_ff = True
            return train_ff

        def is_train_fb():
            train_fb = False
            if self._hparams.optimize == 'fb' or self._hparams.optimize == 'both':
                train_fb = True
            return train_fb

        hidden_size = self._hparams.filters
        batch_hidden_shape = (self._hparams.batch_size, hidden_size)

        feedback_tensor = self._add_feedback()
        feedback_tensor = tf_print(feedback_tensor,
                                   "Feedback tensor: ",
                                   100,
                                   mute=TF_DBUG_MUTE)

        # apply input mask to provide partial images
        input_shape = input_tensor.shape.as_list()
        sample_size = np.prod(input_shape[1:])
        input_mask = self._dual.add('input_mask',
                                    shape=[sample_size],
                                    default_value=1.0).add_pl(default=True)
        input_mask_reshaped = tf.reshape(input_mask, input_shape[1:])
        input_tensor = tf.multiply(
            input_tensor,
            input_mask_reshaped)  # apply shaped mask to each samples in batch

        # apply input gain (can be used to amplify or attenuate input)
        input_gain = self._dual.add('input_gain', shape=[1],
                                    default_value=1.0).add_pl(default=True)
        x_ff = tf.multiply(input_tensor, input_gain)

        self._dual.set_op('x_ff', x_ff)

        # Feed forward weighted sum
        # ------------------------------------------
        ws_ff = super()._build_weighted_sum(x_ff,
                                            add_bias=False,
                                            trainable=is_train_ff())

        # Feed back weighted sum
        # ------------------------------------------
        # ensure the feedback encoding has a different name so that it is a separate graph op
        hidden_name = self._hidden_name
        self._hidden_name = self._hidden_feedback_name
        ws_fb = super()._build_weighted_sum(
            feedback_tensor,
            add_bias=False,  # don't add bias for feedback
            trainable=is_train_fb())
        self._hidden_name = hidden_name

        # zero out single cell circular weights (i.e. cell 1 to cell 1), applied next batch before optimisation
        remove_circular = True
        if remove_circular:
            with tf.variable_scope(self._hidden_feedback_name,
                                   reuse=tf.AUTO_REUSE,
                                   auxiliary_name_scope=False):
                weights_fb = tf.get_variable('kernel')
                mask = np.ones(weights_fb.get_shape(), dtype=np.float32)
                np.fill_diagonal(mask, 0)
                diagonal_mask = tf.convert_to_tensor(mask)
                weights_updated = tf.multiply(
                    weights_fb, diagonal_mask)  # must be element-wise
                fb_weights_adjust_op = tf.assign(weights_fb,
                                                 weights_updated,
                                                 validate_shape=False,
                                                 use_locking=True)
                self._dual.set_op('fb_weights_adjust_op', fb_weights_adjust_op)

        # Total weighted sum
        # ------------------------------------------
        ws = ws_fb + ws_ff

        # Non-linearity and masking
        # ------------------------------------------
        z_pre_mask, _ = activation_fn(ws, self._hparams.nonlinearity)

        # external masking
        name = hidden_name + '_masked'
        mask_pl = self._dual.add('mask',
                                 shape=batch_hidden_shape,
                                 default_value=1.0).add_pl()
        z = tf.multiply(z_pre_mask, mask_pl, name=name)

        # store reference to relevant ops for later use
        with tf.variable_scope(self._hidden_feedback_name,
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            weights_fb = tf.get_variable('kernel')
            self._dual.set_op('weights_fb', weights_fb)
        with tf.variable_scope(self._hidden_name):
            weights_ff = tf.get_variable('kernel')
            self._dual.set_op('weights_ff', weights_ff)

            if self._hparams.use_bias:
                bias_ff = tf.get_variable('bias')
                self._dual.set_op('bias_ff', bias_ff)

        self._dual.set_op('ws_fb', ws_fb)
        self._dual.set_op('ws_ff', ws_ff)
        self._dual.set_op('ws', ws)
        self._dual.set_op('z', z)

        return z, z
Пример #5
0
  def _build_encoding(self, input_tensor, mask_pl=None):
    """Build the encoder"""
    batch_input_shape = self._input_shape

    kernel_size = [
        self._hparams.filters_field_width, self._hparams.filters_field_height
    ]

    self._strides = [
        1, self._hparams.filters_field_stride, self._hparams.filters_field_stride, 1
    ]

    # Input reshape: Ensure 3D input for convolutional processing
    # -----------------------------------------------------------------
    self._input = tf.reshape(input_tensor, batch_input_shape, name='input')
    logging.debug(self._input)

    # Encoding
    # -----------------------------------------------------------------
    conv_filter_shape = [
        kernel_size[0],       # field w
        kernel_size[1],       # field h
        self._input_shape[3], # input depth
        self._hparams.filters # number of filters
    ]

    # Initialise weights and bias for the filter
    self._weights = tf.get_variable(
        shape=conv_filter_shape,
        initializer=tf.truncated_normal_initializer(stddev=0.03), name='weights')
    self._bias_encoding = tf.get_variable(
        shape=[self._hparams.filters],
        initializer=tf.zeros_initializer, name='bias_encoding')
    self._bias_decoding = tf.get_variable(
        shape=self._input_shape[1:],
        initializer=tf.zeros_initializer, name='bias_decoding')

    logging.debug(self._weights)
    logging.debug(self._bias_encoding)
    logging.debug(self._bias_decoding)

    # Setup the convolutional layer operation
    # Note: The first kernel is centered at the origin, not aligned to
    # it by its origin.
    convolved = tf.nn.conv2d(self._input, self._weights, self._strides, padding='SAME', name='convolved') # zero padded
    logging.debug(convolved)

    # Bias
    convolved_biased = tf.nn.bias_add(convolved, self._bias_encoding, name='convolved_biased')
    logging.debug(convolved_biased)

    # Nonlinearity
    # -----------------------------------------------------------------
    hidden_transfer, _ = activation_fn(convolved_biased, self._hparams.encoder_nonlinearity)

    # External masking
    # -----------------------------------------------------------------
    mask_shape = hidden_transfer.get_shape().as_list()
    mask_shape[0] = self._hparams.batch_size

    mask_pl = self._dual.add('mask', shape=mask_shape, default_value=1.0).add_pl()
    hidden_masked = tf.multiply(hidden_transfer, mask_pl)
    return hidden_masked, hidden_masked
Пример #6
0
    def _build_rnn(self, input_tensor):
        """
    Build the encoder network

    input_tensor = 1 batch = 1 episode (batch size, #neurons)
    Assumes second last item is degraded input, last is target
    """

        w_trainable = False
        x_shift_trainable = False
        eta_trainable = True

        input_shape = input_tensor.get_shape().as_list()
        input_area = np.prod(input_shape[1:])
        batch_input_shape = (-1, input_area)

        filters = self._hparams.filters + self._hparams.bias_neurons
        hidden_size = [filters]
        weights_shape = [filters, filters]

        with tf.variable_scope("rnn"):
            init_state_pl = self._dual.add('init_pl',
                                           shape=hidden_size,
                                           default_value=0).add_pl()
            init_hebb_pl = self._dual.add('hebb_init_pl',
                                          shape=weights_shape,
                                          default_value=0).add_pl()

            # ensure init placeholders are being reset every iteration
            init_hebb_pl = tf_print(init_hebb_pl,
                                    "Init Hebb:",
                                    summarize=100,
                                    mute=True)

            # Input reshape: Ensure flat (vector) x batch size input (batches, inputs)
            # -----------------------------------------------------------------
            input_vector = tf.reshape(input_tensor,
                                      batch_input_shape,
                                      name='input_vector')

            # unroll input into a series so that we can iterate over it easily
            x_series = tf.unstack(
                input_vector, axis=0,
                name="ep-series")  # batch_size of hidden_size

            # get the target and degraded samples
            target = input_vector[-1]
            target = tf_print(target, "TARGET\n", mute=True)
            degraded_extracted = input_vector[-2]
            degraded_extracted = tf_print(degraded_extracted,
                                          "DEGRADED-extracted\n",
                                          mute=True)
            self._dual.set_op('target', target)
            self._dual.set_op('degraded_raw', degraded_extracted)

            y_current = tf.reshape(init_state_pl, [1, filters],
                                   name="init-curr-state")
            hebb = init_hebb_pl

            with tf.variable_scope("slow-weights"):
                w_default = 0.01
                alpha_default = 0.1
                eta_default = 0.1
                x_shift_default = 0.01
                bias_default = 1.0 * w_default  # To emulate the Miconi method of having an additional input at 20 i.e.
                # it creates an output of 1.0, and this is multiplied by the weight (here we have straight bias, no weight)

                if w_trainable:
                    w = tf.get_variable(
                        name="w",
                        initializer=(w_default *
                                     tf.random_uniform(weights_shape)))
                else:
                    w = tf.zeros(weights_shape)

                alpha = tf.get_variable(
                    name="alpha",
                    initializer=(alpha_default *
                                 tf.random_uniform(weights_shape)))

                if eta_trainable:
                    eta = tf.get_variable(name="eta",
                                          initializer=(eta_default *
                                                       tf.ones(shape=[1])))
                else:
                    eta = eta_default * tf.ones([1])

                if x_shift_trainable:
                    x_shift = tf.get_variable(name="x_shift",
                                              initializer=(x_shift_default *
                                                           tf.ones(shape=[1])))
                else:
                    x_shift = 0

                self._dual.set_op('w', w)
                self._dual.set_op('alpha', alpha)
                self._dual.set_op('eta', eta)
                self._dual.set_op('x_shift', x_shift)

                if self._hparams.bias:
                    bias = tf.get_variable(name="bias",
                                           initializer=(bias_default *
                                                        tf.ones(filters)))
                    self._dual.set_op('bias', bias)
                    bias = tf_print(bias,
                                    "*** bias ***",
                                    mute=MUTE_DEBUG_GRAPH)

            with tf.variable_scope("layers"):
                hebb = tf_print(hebb,
                                "*** initial hebb ***",
                                mute=MUTE_DEBUG_GRAPH)
                y_current = tf_print(y_current, "*** initial state ***")
                w = tf_print(w, "*** w ***", mute=MUTE_DEBUG_GRAPH)
                alpha = tf_print(alpha, "*** alpha ***", mute=MUTE_DEBUG_GRAPH)

                i = 0
                last_x = None
                outer_first = None
                outer_last = None
                for x in x_series:
                    # last sample is target, so don't process it again
                    if i == len(x_series) - 1:  # [0:x, 1:d, 2:t], l=3
                        break
                    layer_name = "layer-" + str(i)
                    with tf.variable_scope(layer_name):
                        x = self._hparams.bt_amplify_factor * x
                        x = tf_print(x,
                                     str(i) + ": x_input",
                                     mute=MUTE_DEBUG_GRAPH)
                        y_current = tf_print(y_current,
                                             str(i) + ": y(t-1)",
                                             mute=MUTE_DEBUG_GRAPH)

                        # neurons latch on as they have bidirectional connections
                        # attempt to remove this issue by knocking out lateral connections
                        remove = 'random'
                        if remove == 'circular':
                            diagonal_mask = tf.convert_to_tensor(
                                np.tril(
                                    np.ones(weights_shape, dtype=np.float32),
                                    0))
                            alpha = tf.multiply(alpha, diagonal_mask)
                        elif remove == 'random':
                            size = np.prod(weights_shape[:])
                            knockout_mask = np.ones(size)
                            knockout_mask[:int(size / 2)] = 0
                            np.random.shuffle(knockout_mask)
                            knockout_mask = np.reshape(knockout_mask,
                                                       weights_shape)
                            alpha = tf.multiply(alpha, knockout_mask)

                        # ---------- Calculate next output of the RNN
                        weighted_sum = tf.add(
                            tf.matmul(y_current - x_shift,
                                      tf.add(w,
                                             tf.multiply(alpha,
                                                         hebb,
                                                         name='lyr-mul'),
                                             name="lyr-add_w_ah"),
                                      name='lyr-mul-add-matmul'), x,
                            "weighted_sum")

                        if self._hparams.bias:
                            weighted_sum = tf.add(
                                weighted_sum, bias)  # weighted sum with bias

                        y_next, _ = activation_fn(weighted_sum,
                                                  self._hparams.nonlinearity)

                        with tf.variable_scope("fast_weights"):
                            # ---------- Update Hebbian fast weights
                            # outer product of (yin * yout) = (current_state * next_state)
                            outer = tf.matmul(tf.reshape(y_current,
                                                         shape=[filters, 1]),
                                              tf.reshape(y_next,
                                                         shape=[1, filters]),
                                              name="outer-product")
                            outer = tf_print(outer,
                                             str(i) +
                                             ": *** outer = y(t-1) * y(t) ***",
                                             mute=MUTE_DEBUG_GRAPH)

                            if i == 1:  # first outer is zero
                                outer_first = outer
                            outer_last = outer

                            hebb = (1.0 - eta) * hebb + eta * outer
                            hebb = tf_print(hebb,
                                            str(i) + ": *** hebb ***",
                                            mute=MUTE_DEBUG_GRAPH)

                        # record for visualisation the output when presented with the last blank
                        idx_blank_first = self._blank_indices[-1][0]
                        idx_blank_last = self._blank_indices[-1][1]

                        if i == idx_blank_first:
                            blank_output_first = y_next
                            self._dual.set_op('blank_output_first',
                                              blank_output_first)

                        if i == idx_blank_last:
                            blank_output_last = y_next
                            self._dual.set_op('blank_output_last',
                                              blank_output_last)

                        y_current = y_next
                        last_x = x
                        i = i + 1

            self._dual.set_op('hebb', hebb)
            self._dual.set_op('outer_first', outer_first)
            self._dual.set_op('outer_last', outer_last)

            last_x = tf_print(last_x, str(i) + ": LAST-X", mute=True)
            self._dual.set_op('degraded', last_x)

            output_pre_masked = tf.squeeze(y_current)
            self._dual.set_op('output_pre_masked',
                              output_pre_masked)  # pre-masked output

        # External masking
        # -----------------------------------------------------------------
        with tf.variable_scope("masking"):
            mask_pl = self._dual.add('mask',
                                     shape=hidden_size,
                                     default_value=1.0).add_pl()
            y_masked = tf.multiply(y_current, mask_pl, name='y_masked')

        # Setup the training operations
        # -----------------------------------------------------------------
        with tf.variable_scope("optimizer"):
            loss_op = self._build_loss_op(y_masked, target)
            self._dual.set_op('loss', loss_op)

            self._optimizer = tf.train.AdamOptimizer(
                self._hparams.learning_rate)
            training_op = self._optimizer.minimize(
                loss_op,
                global_step=tf.train.get_or_create_global_step(),
                name='training_op')
            self._dual.set_op('training', training_op)

        return y_masked, y_masked