Пример #1
0
    def test_elementwise_in_connections(self):
        input_layer = layers.Input(2)
        hidden_layer_1 = layers.Relu(1,
                                     weight=init.Constant(1),
                                     bias=init.Constant(0))
        hidden_layer_2 = layers.Relu(1,
                                     weight=init.Constant(2),
                                     bias=init.Constant(0))
        elem_layer = layers.Elementwise(merge_function=tf.add)

        connection = layers.join(input_layer, hidden_layer_1, elem_layer)
        connection = layers.join(input_layer, hidden_layer_2, elem_layer)
        connection.initialize()

        self.assertEqual(elem_layer.output_shape, (1, ))

        test_input = asfloat(np.array([
            [0, 1],
            [-1, -1],
        ]))
        actual_output = self.eval(connection.output(test_input))
        expected_output = np.array([
            [3],
            [0],
        ])
        np.testing.assert_array_almost_equal(expected_output, actual_output)
    def test_constant_initializer(self):
        const = init.Constant(value=0)
        np.testing.assert_array_almost_equal(const.sample(shape=(2, 3)),
                                             np.zeros((2, 3)))

        const = init.Constant(value=1.5)
        np.testing.assert_array_almost_equal(const.sample(shape=(2, 3)),
                                             np.ones((2, 3)) * 1.5)
Пример #3
0
    def test_compilation_multiple_inputs(self):
        input_matrix = asfloat(np.ones((7, 10)))
        expected_output = np.ones((7, 5))

        network = layers.join([[
            layers.Input(10),
        ], [
            layers.Input(10),
        ]], layers.Elementwise(),
                              layers.Linear(5,
                                            weight=init.Constant(0.1),
                                            bias=None))

        # Generated input variables
        predict = network.compile()
        actual_output = predict(input_matrix * 0.7, input_matrix * 0.3)
        np.testing.assert_array_almost_equal(actual_output, expected_output)

        # Pre-defined input variables
        input_variable_1 = T.matrix('x1')
        input_variable_2 = T.matrix('x2')

        predict = network.compile(input_variable_1, input_variable_2)
        actual_output = predict(input_matrix * 0.7, input_matrix * 0.3)
        np.testing.assert_array_almost_equal(actual_output, expected_output)
Пример #4
0
class PRelu(ActivationLayer):
    """
    The layer with the parametrized ReLu activation
    function.

    Parameters
    ----------
    alpha_axes : int or tuple
        Axes that will not include unique alpha parameter.
        Single integer value defines the same as a tuple with one value.
        Defaults to ``1``.

    alpha : array-like, Theano shared variable, scalar or Initializer
        Alpha parameter per each non-shared axis for the ReLu.
        Scalar value means that each element in the tensor will be
        equal to the specified value.
        Default initialization methods you can find
        :ref:`here <init-methods>`.
        Defaults to ``Constant(value=0.25)``.

    {ActivationLayer.Parameters}

    Methods
    -------
    {ActivationLayer.Methods}

    Attributes
    ----------
    {ActivationLayer.Attributes}

    References
    ----------
    .. [1] https://arxiv.org/pdf/1502.01852v1.pdf
    """
    alpha_axes = AxesProperty(default=1)
    alpha = ParameterProperty(default=init.Constant(value=0.25))

    def __init__(self, *args, **options):
        super(PRelu, self).__init__(*args, **options)

        if 0 in self.alpha_axes:
            raise ValueError("Cannot specify alpha for 0-axis")

    def validate(self, input_shape):
        if max(self.alpha_axes) > len(input_shape):
            max_axis_index = len(input_shape) - 1
            raise ValueError("Cannot specify alpha for the axis #{}. "
                             "Maximum available axis is #{} (0-based indeces)."
                             "".format(max(self.alpha_axes), max_axis_index))

    def initialize(self):
        super(PRelu, self).initialize()

        alpha_shape = [self.output_shape[axis - 1] for axis in self.alpha_axes]
        self.add_parameter(value=self.alpha, name='alpha',
                           shape=alpha_shape, trainable=True)

    def activation_function(self, input_value):
        alpha = dimshuffle(self.alpha, input_value.ndim, self.alpha_axes)
        return T.nnet.relu(input_value, alpha)
Пример #5
0
    def test_compilation_multiple_outputs(self):
        input_matrix = asfloat(np.ones((7, 10)))
        expected_output_1 = np.ones((7, 5))
        expected_output_2 = np.ones((7, 2))

        network = layers.join(
            layers.Input(10),
            [[layers.Linear(5, weight=init.Constant(0.1), bias=None)],
             [layers.Linear(2, weight=init.Constant(0.1), bias=None)]])
        predict = network.compile()

        actual_output_1, actual_output_2 = predict(input_matrix)

        np.testing.assert_array_almost_equal(actual_output_1,
                                             expected_output_1)

        np.testing.assert_array_almost_equal(actual_output_2,
                                             expected_output_2)
Пример #6
0
    def __init__(self,
                 n_units=None,
                 alpha=0,
                 weight=init.HeNormal(gain=2),
                 bias=init.Constant(value=0),
                 name=None):

        self.alpha = alpha
        super(Relu, self).__init__(n_units=n_units,
                                   weight=weight,
                                   bias=bias,
                                   name=name)
Пример #7
0
def create_variable(value, name, shape, trainable=True):
    """
    Creates NN parameter as Tensorfow variable.

    Parameters
    ----------
    value : array-like, Tensorfow variable, scalar or Initializer
        Default value for the parameter.

    name : str
        Shared variable name.

    shape : tuple
        Parameter's shape.

    trainable : bool
        Whether parameter trainable by backpropagation.

    Returns
    -------
    Tensorfow variable.
    """
    from neupy import init

    if shape is not None:
        shape = shape_to_tuple(shape)

    if isinstance(value, (tf.Variable, tf.Tensor, np.ndarray, np.matrix)):
        variable_shape = shape_to_tuple(value.shape)

        if as_tuple(variable_shape) != as_tuple(shape):
            raise ValueError(
                "Cannot create variable with name `{}`. Provided variable "
                "with shape {} is incompatible with expected shape {}"
                "".format(name, variable_shape, shape))

    if isinstance(value, (tf.Variable, tf.Tensor)):
        return value

    if isinstance(value, (int, float)):
        value = init.Constant(value)

    if isinstance(value, init.Initializer):
        value = value.sample(shape)

    return tf.Variable(
        asfloat(value),
        name=name,
        dtype=tf.float32,
        trainable=trainable,
    )
Пример #8
0
    def test_gru_modify_only_one_weight_parameter(self):
        gru_layer = layers.GRU(
            2, weights=dict(weight_in_to_updategate=init.Constant(0)))

        layers.join(
            layers.Input((5, 3)),
            gru_layer,
        )

        for key, value in gru_layer.weights.items():
            if key == 'weight_in_to_updategate':
                self.assertIsInstance(value, init.Constant)
            else:
                self.assertIsInstance(value, init.XavierUniform)
Пример #9
0
    def test_linear_layer_withut_bias(self):
        input_layer = layers.Input(10)
        output_layer = layers.Linear(2, weight=init.Constant(0.1), bias=None)
        connection = input_layer > output_layer

        self.assertEqual(output_layer.bias_shape, None)

        input_value = asfloat(np.ones((1, 10)))
        actual_output = self.eval(connection.output(input_value))
        expected_output = np.ones((1, 2))

        np.testing.assert_array_almost_equal(expected_output, actual_output)

        with self.assertRaises(TypeError):
            layers.Linear(2, weight=None)
Пример #10
0
    def test_oja_minimization(self):
        ojanet = algorithms.Oja(minimized_data_size=1,
                                step=0.01,
                                weight=init.Constant(0.1),
                                verbose=False)

        ojanet.train(self.data, epsilon=1e-5, epochs=100)
        minimized_data = ojanet.predict(self.data)
        np.testing.assert_array_almost_equal(minimized_data,
                                             self.result,
                                             decimal=2)

        reconstructed = ojanet.reconstruct(minimized_data)
        np.testing.assert_array_almost_equal(reconstructed,
                                             self.data,
                                             decimal=3)
Пример #11
0
    def test_simple_connection_compilation(self):
        input_matrix = asfloat(np.ones((7, 10)))
        expected_output = np.ones((7, 5))

        network = layers.join(
            layers.Input(10),
            layers.Linear(5, weight=init.Constant(0.1), bias=None))

        # Generated input variables
        predict = network.compile()
        actual_output = predict(input_matrix)
        np.testing.assert_array_almost_equal(actual_output, expected_output)

        # Pre-defined input variables
        input_variable = T.matrix('x')
        predict = network.compile(input_variable)
        actual_output = predict(input_matrix)
        np.testing.assert_array_almost_equal(actual_output, expected_output)
Пример #12
0
    def test_layer_definitions(self):
        Conv = layers.Convolution.define(
            padding='SAME',
            weight=init.Constant(1),
            bias=None,
        )
        network = layers.join(
            layers.Input((28, 28, 1)),
            Conv((3, 3, 16)),
            Conv((3, 3, 32)),
        )
        network.create_variables()

        self.assertShapesEqual(network.output_shape, (None, 28, 28, 32))

        weight_1 = self.eval(network.layers[1].weight)
        self.assertEqual(weight_1.sum(), 1 * 3 * 3 * 16)
        self.assertIsNone(network.layers[1].bias)

        weight_2 = self.eval(network.layers[2].weight)
        self.assertEqual(weight_2.sum(), 16 * 3 * 3 * 32)
        self.assertIsNone(network.layers[2].bias)
Пример #13
0
class ParameterBasedLayer(BaseLayer):
    """
    Layer that creates weight and bias parameters.

    Parameters
    ----------
    size : int
        Layer's output size.

    weight : array-like, Theano variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`XavierNormal() <neupy.init.XavierNormal>`.

    bias : 1D array-like, Theano variable, scalar, Initializer or None
        Defines layer's bias.
        Default initialization methods you can find
        :ref:`here <init-methods>`. Defaults to
        :class:`Constant(0) <neupy.init.Constant>`.
        The ``None`` value excludes bias from the calculations and
        do not add it into parameters list.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}
    """
    size = IntProperty(minval=1)
    weight = ParameterProperty(default=init.XavierNormal())
    bias = ParameterProperty(default=init.Constant(value=0), allow_none=True)

    def __init__(self, size, **options):
        super(ParameterBasedLayer, self).__init__(size=size, **options)

    @property
    def weight_shape(self):
        return as_tuple(self.input_shape, self.output_shape)

    @property
    def bias_shape(self):
        if self.bias is not None:
            return as_tuple(self.output_shape)

    def initialize(self):
        super(ParameterBasedLayer, self).initialize()

        self.add_parameter(value=self.weight, name='weight',
                           shape=self.weight_shape, trainable=True)

        if self.bias is not None:
            self.add_parameter(value=self.bias, name='bias',
                               shape=self.bias_shape, trainable=True)

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}({size})'.format(name=classname, size=self.size)
Пример #14
0
class GRU(BaseRNNLayer):
    """
    Gated Recurrent Unit (GRU) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    weights : dict or Initializer
        Weight parameters for different gates.
        Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`.

        - In case if application requires the same initialization method
          for all weights, then it's possible to specify initialization
          method that would be automaticaly applied to all weight
          parameters in the GRU layer.

          .. code-block:: python

              layers.GRU(2, weights=init.Normal(0.1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  weight_in_to_updategate=init.XavierUniform(),
                  weight_hid_to_updategate=init.XavierUniform(),

                  weight_in_to_resetgate=init.XavierUniform(),
                  weight_hid_to_resetgate=init.XavierUniform(),

                  weight_in_to_hidden_update=init.XavierUniform(),
                  weight_hid_to_hidden_update=init.XavierUniform(),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(weight_in_to_updategate=init.Normal(0.1))

          Other parameters like ``weight_in_to_resetgate`` will be
          equal to their default values.

    biases : dict or Initializer
        Bias parameters for different gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

        - In case if application requires the same initialization method
          for all biases, then it's possible to specify initialization
          method that would be automaticaly applied to all bias parameters
          in the GRU layer.

          .. code-block:: python

              layers.GRU(2, biases=init.Constant(1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  bias_updategate=init.Constant(0),
                  bias_resetgate=init.Constant(0),
                  bias_hidden_update=init.Constant(0),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(bias_resetgate=init.Constant(1))

          Other parameters like ``bias_updategate`` will be
          equal to their default values.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import theano.tensor as T
            dict(
                resetgate=T.nnet.sigmoid,
                updategate=T.nnet.sigmoid,
                hidden_update=T.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(resetgate=T.tanh)

        Other parameters like ``updategate`` or ``hidden_update``
        will be equal to their default values.

    learn_init : bool
        If ``True``, make ``hid_init`` trainable variable.
        Defaults to ``False``.

    hid_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    {BaseRNNLayer.only_return_final}

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``.

    precompute_input : bool
        if ``True``, precompute ``input_to_hid`` before iterating
        through the sequence. This can result in a speed up at the
        expense of an increase in memory usage.
        Defaults to ``True``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.GRU(20),
                layers.Sigmoid(1),
            ]
        )
    """
    weights = MultiParameterProperty(
        default=dict(
            weight_in_to_updategate=init.XavierUniform(),
            weight_hid_to_updategate=init.XavierUniform(),

            weight_in_to_resetgate=init.XavierUniform(),
            weight_hid_to_resetgate=init.XavierUniform(),

            weight_in_to_hidden_update=init.XavierUniform(),
            weight_hid_to_hidden_update=init.XavierUniform(),
        ))
    biases = MultiParameterProperty(
        default=dict(
            bias_updategate=init.Constant(0),
            bias_resetgate=init.Constant(0),
            bias_hidden_update=init.Constant(0),
        ))
    activation_functions = MultiCallableProperty(
        default=dict(
            resetgate=T.nnet.sigmoid,
            updategate=T.nnet.sigmoid,
            hidden_update=T.tanh,
        ))

    learn_init = Property(default=False, expected_type=bool)
    hid_init = ParameterProperty(default=init.Constant(0))

    backwards = Property(default=False, expected_type=bool)
    unroll_scan = Property(default=False, expected_type=bool)
    precompute_input = Property(default=True, expected_type=bool)

    n_gradient_steps = IntProperty(default=-1)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(GRU, self).initialize()

        n_inputs = np.prod(self.input_shape[1:])
        weights = self.weights
        biases = self.biases

        # Update gate parameters
        self.weight_in_to_updategate = self.add_parameter(
            value=weights.weight_in_to_updategate,
            name='weight_in_to_updategate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_updategate = self.add_parameter(
            value=weights.weight_hid_to_updategate,
            name='weight_hid_to_updategate',
            shape=(self.size, self.size))
        self.bias_updategate = self.add_parameter(
            value=biases.bias_updategate, name='bias_updategate',
            shape=(self.size,))

        # Reset gate parameters
        self.weight_in_to_resetgate = self.add_parameter(
            value=weights.weight_in_to_resetgate,
            name='weight_in_to_resetgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_resetgate = self.add_parameter(
            value=weights.weight_hid_to_resetgate,
            name='weight_hid_to_resetgate',
            shape=(self.size, self.size))
        self.bias_resetgate = self.add_parameter(
            value=biases.bias_resetgate, name='bias_forgetgate',
            shape=(self.size,))

        # Hidden update gate parameters
        self.weight_in_to_hidden_update = self.add_parameter(
            value=weights.weight_in_to_hidden_update,
            name='weight_in_to_hidden_update',
            shape=(n_inputs, self.size))
        self.weight_hid_to_hidden_update = self.add_parameter(
            value=weights.weight_hid_to_hidden_update,
            name='weight_hid_to_hidden_update',
            shape=(self.size, self.size))
        self.bias_hidden_update = self.add_parameter(
            value=biases.bias_hidden_update, name='bias_hidden_update',
            shape=(self.size,))

        self.add_parameter(value=self.hid_init, shape=(1, self.size),
                           name="hid_init", trainable=self.learn_init)

    def output(self, input_value):
        # Treat all dimensions after the second as flattened
        # feature dimensions
        if input_value.ndim > 3:
            input_value = T.flatten(input_value, 3)

        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = input_value.dimshuffle(1, 0, 2)
        seq_len, n_batch, _ = input_value.shape

        # Stack input weight matrices into a (num_inputs, 3 * num_units)
        # matrix, which speeds up computation
        weight_in_stacked = T.concatenate([
            self.weight_in_to_updategate,
            self.weight_in_to_resetgate,
            self.weight_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        weight_hid_stacked = T.concatenate([
            self.weight_hid_to_updategate,
            self.weight_hid_to_resetgate,
            self.weight_hid_to_hidden_update], axis=1)

        # Stack biases into a (3 * num_units) vector
        bias_stacked = T.concatenate([
            self.bias_updategate,
            self.bias_resetgate,
            self.bias_hidden_update], axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # weight_in_stacked is (n_features, 3 * num_units).
            # Input: (n_time_steps, n_batch, 3 * num_units).
            input_value = T.dot(input_value, weight_in_stacked) + bias_stacked

        # When theano.scan calls step, input_n will be
        # (n_batch, 3 * num_units). We define a slicing function
        # that extract the input to each GRU gate
        def slice_w(x, n):
            s = x[:, n * self.size:(n + 1) * self.size]
            if self.size == 1:
                s = T.addbroadcast(s, 1)  # Theano cannot infer this by itself
            return s

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def one_gru_step(input_n, hid_previous, *args):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1},
            # and W_{hc} h_{t - 1}
            hid_input = T.dot(hid_previous, weight_hid_stacked)

            if self.gradient_clipping:
                input_n = theano.gradient.grad_clip(
                    input_n,
                    -self.gradient_clipping,
                    self.gradient_clipping)

                hid_input = theano.gradient.grad_clip(
                    hid_input,
                    -self.gradient_clipping,
                    self.gradient_clipping)

            if not self.precompute_input:
                # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u,
                # and W_{xc}x_t + b_c
                input_n = T.dot(input_n, weight_in_stacked) + bias_stacked

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            resetgate = self.activation_functions.resetgate(resetgate)

            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            updategate = self.activation_functions.updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate * hidden_update_hid

            if self.gradient_clipping:
                hidden_update = theano.gradient.grad_clip(
                    hidden_update,
                    -self.gradient_clipping,
                    self.gradient_clipping)

            hidden_update = self.activation_functions.hidden_update(
                hidden_update)

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate) * hid_previous + updategate * hidden_update
            return hid

        hid_init = T.dot(T.ones((n_batch, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_sequences = [weight_hid_stacked]

        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_sequences += [weight_in_stacked, bias_stacked]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            n_time_steps = self.input_shape[0]

            # Explicitly unroll the recurrence instead of using scan
            hid_out, = unroll_scan(
                fn=one_gru_step,
                sequences=[input_value],
                outputs_info=[hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                n_steps=n_time_steps)

        else:
            # Scan op iterates over first dimension of input and
            # repeatedly applies the step function
            hid_out, _ = theano.scan(
                fn=one_gru_step,
                sequences=[input_value],
                outputs_info=[hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                truncate_gradient=self.n_gradient_steps,
                strict=True)

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = hid_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]

        return hid_out
Пример #15
0
class LSTM(BaseRNNLayer):
    """
    Long Short Term Memory (LSTM) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    weights : dict or Initializer
        Weight parameters for different gates.
        Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`.

        - In case if application requires the same initialization method
          for all weights, then it's possible to specify initialization
          method that would be automaticaly applied to all weight
          parameters in the LSTM layer.

          .. code-block:: python

              layers.LSTM(2, weights=init.Normal(0.1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  weight_in_to_ingate=init.XavierUniform(),
                  weight_hid_to_ingate=init.XavierUniform(),
                  weight_cell_to_ingate=init.XavierUniform(),

                  weight_in_to_forgetgate=init.XavierUniform(),
                  weight_hid_to_forgetgate=init.XavierUniform(),
                  weight_cell_to_forgetgate=init.XavierUniform(),

                  weight_in_to_outgate=init.XavierUniform(),
                  weight_hid_to_outgate=init.XavierUniform(),
                  weight_cell_to_outgate=init.XavierUniform(),

                  weight_in_to_cell=init.XavierUniform(),
                  weight_hid_to_cell=init.XavierUniform(),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(weight_in_to_ingate=init.Normal(0.1))

          Other parameters like ``weight_cell_to_outgate`` will be
          equal to their default values.

    biases : dict or Initializer
        Bias parameters for different gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

        - In case if application requires the same initialization method
          for all biases, then it's possible to specify initialization
          method that would be automaticaly applied to all bias parameters
          in the LSTM layer.

          .. code-block:: python

              layers.LSTM(2, biases=init.Constant(1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  bias_ingate=init.Constant(0),
                  bias_forgetgate=init.Constant(0),
                  bias_cell=init.Constant(0),
                  bias_outgate=init.Constant(0),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(bias_ingate=init.Constant(1))

          Other parameters like ``bias_cell`` will be
          equal to their default values.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import theano.tensor as T
            dict(
                ingate=T.nnet.sigmoid,
                forgetgate=T.nnet.sigmoid,
                outgate=T.nnet.sigmoid,
                cell=T.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(ingate=T.tanh)

        Other parameters like ``forgetgate`` or ``outgate`` will be
        equal to their default values.

    learn_init : bool
        If ``True``, make ``cell_init`` and ``hid_init`` trainable
        variables. Defaults to ``False``.

    cell_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial cell state (:math:`c_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    hid_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``

    {BaseRNNLayer.only_return_final}

    precompute_input : bool
        if ``True``, precompute ``input_to_hid`` before iterating
        through the sequence. This can result in a speed up at the
        expense of an increase in memory usage.
        Defaults to ``True``.

    peepholes : bool
        If ``True``, the LSTM uses peephole connections.
        When ``False``, cell parameters  are ignored.
        Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    gradient_clipping : flaot or int
        If nonzero, the gradient messages are clipped to the
        given value during the backward pass. Defaults to ``0``.

    n_gradient_steps : int
        Number of timesteps to include in the backpropagated gradient.
        If ``-1``, backpropagate through the entire sequence.
        Defaults to ``-1``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.LSTM(20),
                layers.Sigmoid(1),
            ]
        )
    """
    weights = MultiParameterProperty(
        default=dict(
            weight_in_to_ingate=init.XavierUniform(),
            weight_hid_to_ingate=init.XavierUniform(),
            weight_cell_to_ingate=init.XavierUniform(),

            weight_in_to_forgetgate=init.XavierUniform(),
            weight_hid_to_forgetgate=init.XavierUniform(),
            weight_cell_to_forgetgate=init.XavierUniform(),

            weight_in_to_outgate=init.XavierUniform(),
            weight_hid_to_outgate=init.XavierUniform(),
            weight_cell_to_outgate=init.XavierUniform(),

            weight_in_to_cell=init.XavierUniform(),
            weight_hid_to_cell=init.XavierUniform(),
        ))
    biases = MultiParameterProperty(
        default=dict(
            bias_ingate=init.Constant(0),
            bias_forgetgate=init.Constant(0),
            bias_cell=init.Constant(0),
            bias_outgate=init.Constant(0),
        ))
    activation_functions = MultiCallableProperty(
        default=dict(
            ingate=T.nnet.sigmoid,
            forgetgate=T.nnet.sigmoid,
            outgate=T.nnet.sigmoid,
            cell=T.tanh,
        ))

    learn_init = Property(default=False, expected_type=bool)
    cell_init = ParameterProperty(default=init.Constant(0))
    hid_init = ParameterProperty(default=init.Constant(0))

    unroll_scan = Property(default=False, expected_type=bool)
    backwards = Property(default=False, expected_type=bool)
    precompute_input = Property(default=True, expected_type=bool)
    peepholes = Property(default=False, expected_type=bool)

    n_gradient_steps = IntProperty(default=-1)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(LSTM, self).initialize()

        n_inputs = np.prod(self.input_shape[1:])
        weights = self.weights
        biases = self.biases

        # Input gate parameters
        self.weight_in_to_ingate = self.add_parameter(
            value=weights.weight_in_to_ingate,
            name='weight_in_to_ingate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_ingate = self.add_parameter(
            value=weights.weight_hid_to_ingate,
            name='weight_hid_to_ingate',
            shape=(self.size, self.size))
        self.bias_ingate = self.add_parameter(
            value=biases.bias_ingate, name='bias_ingate',
            shape=(self.size,))

        # Forget gate parameters
        self.weight_in_to_forgetgate = self.add_parameter(
            value=weights.weight_in_to_forgetgate,
            name='weight_in_to_forgetgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_forgetgate = self.add_parameter(
            value=weights.weight_hid_to_forgetgate,
            name='weight_hid_to_forgetgate',
            shape=(self.size, self.size))
        self.bias_forgetgate = self.add_parameter(
            value=biases.bias_forgetgate, name='bias_forgetgate',
            shape=(self.size,))

        # Cell parameters
        self.weight_in_to_cell = self.add_parameter(
            value=weights.weight_in_to_cell,
            name='weight_in_to_cell',
            shape=(n_inputs, self.size))
        self.weight_hid_to_cell = self.add_parameter(
            value=weights.weight_hid_to_cell,
            name='weight_hid_to_cell',
            shape=(self.size, self.size))
        self.bias_cell = self.add_parameter(
            value=biases.bias_cell, name='bias_cell',
            shape=(self.size,))

        # If peephole (cell to gate) connections were enabled, initialize
        # peephole connections.  These are elementwise products with the cell
        # state, so they are represented as vectors.
        if self.peepholes:
            self.weight_cell_to_ingate = self.add_parameter(
                value=weights.weight_cell_to_ingate,
                name='weight_cell_to_ingate',
                shape=(self.size,))
            self.weight_cell_to_forgetgate = self.add_parameter(
                value=weights.weight_cell_to_forgetgate,
                name='weight_cell_to_forgetgate',
                shape=(self.size,))
            self.weight_cell_to_outgate = self.add_parameter(
                value=weights.weight_cell_to_outgate,
                name='weight_cell_to_outgate',
                shape=(self.size,))

        # Output gate parameters
        self.weight_in_to_outgate = self.add_parameter(
            value=weights.weight_in_to_outgate,
            name='weight_in_to_outgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_outgate = self.add_parameter(
            value=weights.weight_hid_to_outgate,
            name='weight_hid_to_outgate',
            shape=(self.size, self.size))
        self.bias_outgate = self.add_parameter(
            value=biases.bias_outgate, name='bias_outgate',
            shape=(self.size,))

        # Initialization parameters
        self.add_parameter(value=self.cell_init, shape=(1, self.size),
                           name="cell_init", trainable=self.learn_init)
        self.add_parameter(value=self.hid_init, shape=(1, self.size),
                           name="hid_init", trainable=self.learn_init)

    def output(self, input_value):
        # Treat all dimensions after the second as flattened
        # feature dimensions
        if input_value.ndim > 3:
            input_value = T.flatten(input_value, 3)

        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = input_value.dimshuffle(1, 0, 2)
        seq_len, n_batch, _ = input_value.shape

        # Stack input weight matrices into a (num_inputs, 4 * num_units)
        # matrix, which speeds up computation
        weight_in_stacked = T.concatenate([
            self.weight_in_to_ingate,
            self.weight_in_to_forgetgate,
            self.weight_in_to_cell,
            self.weight_in_to_outgate], axis=1)

        # Same for hidden weight matrices
        weight_hid_stacked = T.concatenate([
            self.weight_hid_to_ingate,
            self.weight_hid_to_forgetgate,
            self.weight_hid_to_cell,
            self.weight_hid_to_outgate], axis=1)

        # Stack biases into a (4 * num_units) vector
        bias_stacked = T.concatenate([
            self.bias_ingate,
            self.bias_forgetgate,
            self.bias_cell,
            self.bias_outgate], axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # weight_in_stacked is (n_features, 4 * num_units).
            # Input: (n_time_steps, n_batch, 4 * num_units).
            input_value = T.dot(input_value, weight_in_stacked) + bias_stacked

        # When theano.scan calls step, input_n will be
        # (n_batch, 4 * num_units). We define a slicing function
        # that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n * self.size:(n + 1) * self.size]

        def one_lstm_step(input_n, cell_previous, hid_previous, *args):
            if not self.precompute_input:
                input_n = T.dot(input_n, weight_in_stacked) + bias_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, weight_hid_stacked)

            # Clip gradients
            if self.gradient_clipping:
                gates = theano.gradient.grad_clip(
                    gates, -self.gradient_clipping, self.gradient_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.weight_cell_to_ingate
                forgetgate += cell_previous * self.weight_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.activation_functions.ingate(ingate)
            forgetgate = self.activation_functions.forgetgate(forgetgate)
            cell_input = self.activation_functions.cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.weight_cell_to_outgate

            outgate = self.activation_functions.outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * T.tanh(cell)
            return [cell, hid]

        ones = T.ones((n_batch, 1))
        cell_init = T.dot(ones, self.cell_init)
        hid_init = T.dot(ones, self.hid_init)

        non_sequences = [weight_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_sequences += [weight_in_stacked, bias_stacked]

        # The "peephole" weight matrices are only used
        # when self.peepholes=True
        if self.peepholes:
            non_sequences += [self.weight_cell_to_ingate,
                              self.weight_cell_to_forgetgate,
                              self.weight_cell_to_outgate]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            n_time_steps = self.input_shape[0]

            # Explicitly unroll the recurrence instead of using scan
            _, hid_out = unroll_scan(
                fn=one_lstm_step,
                sequences=[input_value],
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                n_steps=n_time_steps)

        else:
            (_, hid_out), _ = theano.scan(
                fn=one_lstm_step,
                sequences=input_value,
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                truncate_gradient=self.n_gradient_steps,
                non_sequences=non_sequences,
                strict=True)

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = hid_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]

        return hid_out
Пример #16
0
class GRU(BaseRNNLayer):
    """
    Gated Recurrent Unit (GRU) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    input_weights : Initializer, ndarray
        Weight parameters for input connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    hidden_weights : Initializer, ndarray
        Weight parameters for hidden connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : Initializer, ndarray
        Bias parameters for all gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import tensorflow as tf
            dict(
                resetgate=tf.nn.sigmoid,
                updategate=tf.nn.sigmoid,
                hidden_update=tf.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(resetgate=tf.tanh)

        Other parameters like ``updategate`` or ``hidden_update``
        will be equal to their default values.

    learn_init : bool
        If ``True``, make ``hidden_init`` trainable variable.
        Defaults to ``False``.

    hidden_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    {BaseRNNLayer.only_return_final}

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.GRU(20),
                layers.Sigmoid(1),
            ]
        )
    """
    input_weights = ParameterProperty(default=init.HeNormal())
    hidden_weights = ParameterProperty(default=init.HeNormal())
    biases = ParameterProperty(default=init.Constant(0))

    activation_functions = MultiCallableProperty(default=dict(
        resetgate=tf.nn.sigmoid,
        updategate=tf.nn.sigmoid,
        hidden_update=tf.tanh,
    ))

    learn_init = Property(default=False, expected_type=bool)
    hidden_init = ParameterProperty(default=init.Constant(0))

    backwards = Property(default=False, expected_type=bool)
    unroll_scan = Property(default=False, expected_type=bool)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(GRU, self).initialize()
        n_inputs = np.prod(self.input_shape[1:])

        self.input_weights = self.add_parameter(
            value=self.input_weights,
            name='input_weights',
            shape=(n_inputs, 3 * self.size),
        )
        self.hidden_weights = self.add_parameter(
            value=self.hidden_weights,
            name='hidden_weights',
            shape=(self.size, 3 * self.size),
        )
        self.biases = self.add_parameter(
            value=self.biases,
            name='biases',
            shape=(3 * self.size, ),
        )

        self.add_parameter(value=self.hidden_init,
                           shape=(1, self.size),
                           name="hidden_init",
                           trainable=self.learn_init)

    def output(self, input_value):
        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = tf.transpose(input_value, [1, 0, 2])
        input_shape = tf.shape(input_value)
        n_batch = input_shape[1]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def one_gru_step(states, input_n):
            with tf.name_scope('gru-cell'):
                hid_previous, = states
                input_n = tf.matmul(input_n, self.input_weights) + self.biases

                # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1},
                # and W_{hc} h_{t - 1}
                hid_input = tf.matmul(hid_previous, self.hidden_weights)

                if self.gradient_clipping != 0:
                    input_n = clip_gradient(input_n, self.gradient_clipping)
                    hid_input = clip_gradient(hid_input,
                                              self.gradient_clipping)

                hid_resetgate, hid_updategate, hid_hidden = tf.split(hid_input,
                                                                     3,
                                                                     axis=1)

                in_resetgate, in_updategate, in_hidden = tf.split(input_n,
                                                                  3,
                                                                  axis=1)

                # Reset and update gates
                resetgate = self.activation_functions.resetgate(hid_resetgate +
                                                                in_resetgate)

                updategate = self.activation_functions.updategate(
                    hid_updategate + in_updategate)

                # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
                hidden_update = in_hidden + resetgate * hid_hidden

                if self.gradient_clipping != 0:
                    hidden_update = clip_gradient(hidden_update,
                                                  self.gradient_clipping)

                hidden_update = self.activation_functions.hidden_update(
                    hidden_update)

                # Compute (1 - u_t)h_{t - 1} + u_t c_t
                return [
                    hid_previous - updategate * (hid_previous - hidden_update)
                ]

        hidden_init = tf.tile(self.hidden_init, (n_batch, 1))
        sequence = input_value

        if self.backwards:
            sequence = tf.reverse(sequence, axis=[0])

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            hid_out = unroll_scan(fn=one_gru_step,
                                  sequence=sequence,
                                  outputs_info=[hidden_init])
        else:
            hid_out, = tf.scan(
                fn=one_gru_step,
                elems=input_value,
                initializer=[hidden_init],
                name='gru-scan',
            )

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = tf.reverse(hid_out, axis=[0])

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = tf.transpose(hid_out, [1, 0, 2])
        return hid_out
Пример #17
0
class LSTM(BaseRNNLayer):
    """
    Long Short Term Memory (LSTM) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    input_weights : Initializer, ndarray
        Weight parameters for input connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    hidden_weights : Initializer, ndarray
        Weight parameters for hidden connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    cell_weights : Initializer, ndarray
        Weight parameters for cell connection. Require only when
        ``peepholes=True`` otherwise it will be ignored.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : Initializer, ndarray
        Bias parameters for all gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import tensorflow as tf
            dict(
                ingate=tf.nn.sigmoid,
                forgetgate=tf.nn.sigmoid,
                outgate=tf.nn.sigmoid,
                cell=tf.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(ingate=tf.tanh)

        Other parameters like ``forgetgate`` or ``outgate`` will be
        equal to their default values.

    learn_init : bool
        If ``True``, make ``cell_init`` and ``hidden_init`` trainable
        variables. Defaults to ``False``.

    cell_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial cell state (:math:`c_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    hidden_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``

    {BaseRNNLayer.only_return_final}

    peepholes : bool
        If ``True``, the LSTM uses peephole connections.
        When ``False``, cell parameters  are ignored.
        Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    gradient_clipping : float or int
        If nonzero, the gradient messages are clipped to the
        given value during the backward pass. Defaults to ``0``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.LSTM(20),
                layers.Sigmoid(1),
            ]
        )
    """
    input_weights = ParameterProperty(default=init.HeNormal())
    hidden_weights = ParameterProperty(default=init.HeNormal())
    cell_weights = ParameterProperty(default=init.HeNormal())
    biases = ParameterProperty(default=init.Constant(0))

    activation_functions = MultiCallableProperty(default=dict(
        ingate=tf.nn.sigmoid,
        forgetgate=tf.nn.sigmoid,
        outgate=tf.nn.sigmoid,
        cell=tf.tanh,
    ))

    learn_init = Property(default=False, expected_type=bool)
    cell_init = ParameterProperty(default=init.Constant(0))
    hidden_init = ParameterProperty(default=init.Constant(0))

    unroll_scan = Property(default=False, expected_type=bool)
    backwards = Property(default=False, expected_type=bool)
    peepholes = Property(default=False, expected_type=bool)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(LSTM, self).initialize()
        n_inputs = np.prod(self.input_shape[1:])

        # If peephole (cell to gate) connections were enabled, initialize
        # peephole connections.  These are elementwise products with the cell
        # state, so they are represented as vectors.
        if self.peepholes:
            self.weight_cell_to_ingate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_ingate',
                shape=(self.size, ))
            self.weight_cell_to_forgetgate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_forgetgate',
                shape=(self.size, ))
            self.weight_cell_to_outgate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_outgate',
                shape=(self.size, ))

        self.input_weights = self.add_parameter(
            value=self.input_weights,
            name='input_weights',
            shape=(n_inputs, 4 * self.size),
        )
        self.hidden_weights = self.add_parameter(
            value=self.hidden_weights,
            name='hidden_weights',
            shape=(self.size, 4 * self.size),
        )
        self.biases = self.add_parameter(
            value=self.biases,
            name='biases',
            shape=(4 * self.size, ),
        )

        # Initialization parameters
        self.add_parameter(
            value=self.cell_init,
            shape=(1, self.size),
            name="cell_init",
            trainable=self.learn_init,
        )
        self.add_parameter(
            value=self.hidden_init,
            shape=(1, self.size),
            name="hidden_init",
            trainable=self.learn_init,
        )

    def output(self, input_value):
        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = tf.transpose(input_value, [1, 0, 2])
        input_shape = tf.shape(input_value)
        n_batch = input_shape[1]

        def one_lstm_step(states, input_n):
            with tf.name_scope('lstm-cell'):
                cell_previous, hid_previous = states
                input_n = tf.matmul(input_n, self.input_weights) + self.biases

                # Calculate gates pre-activations and slice
                gates = input_n + tf.matmul(hid_previous, self.hidden_weights)

                # Clip gradients
                if self.gradient_clipping != 0:
                    gates = clip_gradient(gates, self.gradient_clipping)

                # Extract the pre-activation gate values
                ingate, forgetgate, cell_input, outgate = tf.split(gates,
                                                                   4,
                                                                   axis=1)

                if self.peepholes:
                    # Compute peephole connections
                    ingate += cell_previous * self.weight_cell_to_ingate
                    forgetgate += (cell_previous *
                                   self.weight_cell_to_forgetgate)

                # Apply nonlinearities
                ingate = self.activation_functions.ingate(ingate)
                forgetgate = self.activation_functions.forgetgate(forgetgate)
                cell_input = self.activation_functions.cell(cell_input)

                # Compute new cell value
                cell = forgetgate * cell_previous + ingate * cell_input

                if self.peepholes:
                    outgate += cell * self.weight_cell_to_outgate

                outgate = self.activation_functions.outgate(outgate)

                # Compute new hidden unit activation
                hid = outgate * tf.tanh(cell)
                return [cell, hid]

        cell_init = tf.tile(self.cell_init, (n_batch, 1))
        hidden_init = tf.tile(self.hidden_init, (n_batch, 1))
        sequence = input_value

        if self.backwards:
            sequence = tf.reverse(sequence, axis=[0])

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            hid_out = unroll_scan(
                fn=one_lstm_step,
                sequence=sequence,
                outputs_info=[cell_init, hidden_init],
            )
        else:
            _, hid_out = tf.scan(
                fn=one_lstm_step,
                elems=input_value,
                initializer=[cell_init, hidden_init],
                name='lstm-scan',
            )

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = tf.reverse(hid_out, axis=[0])

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = tf.transpose(hid_out, [1, 0, 2])

        return hid_out
Пример #18
0
class BaseStepAssociative(BaseAssociative):
    """
    Base class for associative algorithms which have 2 layers and first
    one is has step function as activation.

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    {BaseAssociative.n_outputs}

    n_unconditioned : int
        Number of unconditioned units in neraul networks. All these
        units wouldn't update during the training procedure.
        Unconditioned should be the first feature in the dataset.

    weight : array-like
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to ``None`` which means that all unconditional
        weights will be equal to ``1``. Other weights equal to ``0``.

    bias : array-like, Initializer
        Neural network bias units.
        Defaults to :class:`Constant(-0.5) <neupy.init.Constant>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    {BaseAssociative.Methods}
    """
    n_inputs = IntProperty(minval=2, required=True)
    n_unconditioned = IntProperty(minval=1, required=True)

    weight = ArrayProperty()
    bias = ParameterProperty(default=init.Constant(-0.5))

    def init_weights(self):
        if self.n_inputs <= self.n_unconditioned:
            raise ValueError(
                "Number of uncondition features should be less than total "
                "number of features. `n_inputs`={} and `n_unconditioned`={}"
                "".format(self.n_inputs, self.n_unconditioned))

        valid_weight_shape = (self.n_inputs, self.n_outputs)
        valid_bias_shape = (self.n_outputs, )

        if self.weight is None:
            self.weight = np.zeros(valid_weight_shape)
            self.weight[:self.n_unconditioned, :] = 1

        if isinstance(self.bias, init.Initializer):
            self.bias = self.bias.sample(valid_bias_shape, return_array=True)

        super(BaseStepAssociative, self).init_weights()

        if self.bias.shape != valid_bias_shape:
            raise ValueError(
                "Bias vector has invalid shape. Got {}, expected {}"
                "".format(self.bias.shape, valid_bias_shape))

        self.bias = self.bias.astype(float)

    def predict(self, X):
        X = format_data(X, is_feature1d=False)
        raw_output = X.dot(self.weight) + self.bias
        return np.where(raw_output > 0, 1, 0)

    def train(self, X_train, *args, **kwargs):
        X_train = format_data(X_train, is_feature1d=False)
        return super(BaseStepAssociative, self).train(X_train, *args, **kwargs)

    def one_training_update(self, X_train, y_train):
        weight = self.weight
        n_unconditioned = self.n_unconditioned
        predict = self.predict
        weight_delta = self.weight_delta

        error = 0

        for x_row in X_train:
            x_row = np.expand_dims(x_row, axis=0)
            layer_output = predict(x_row)

            delta = weight_delta(x_row, layer_output)
            weight[n_unconditioned:, :] += delta

            # This error can tell us whether network has converged
            # to some value of weihts. Low errors will mean that weights
            # hasn't been updated much during the training epoch.
            error += np.linalg.norm(delta)

        return error
Пример #19
0
 def test_constant_initialize_repr(self):
     const_initializer = init.Constant(value=3)
     self.assertEqual("Constant(3)", str(const_initializer))
Пример #20
0
 def __set__(self, instance, value):
     if isinstance(value, number_type):
         value = init.Constant(value)
     super(ParameterProperty, self).__set__(instance, value)
Пример #21
0
class BaseStepAssociative(BaseAssociative):
    """
    Base class for associative algorithms which have 2 layers and first
    one is has step function as activation.

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    {BaseAssociative.n_outputs}

    n_unconditioned : int
        Number of unconditioned units in neraul networks. All these
        units wouldn't update during the training procedure.
        Unconditioned should be the first feature in the dataset.

    weight : array-like
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to ``None`` which means that all unconditional
        weights will be equal to ``1``. Other weights equal to ``0``.

    bias : array-like, Initializer
        Neural network bias units.
        Defaults to :class:`Constant(-0.5) <neupy.init.Constant>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=2, required=True)
    n_unconditioned = IntProperty(minval=1, required=True)

    weight = ArrayProperty()
    bias = ParameterProperty(default=init.Constant(-0.5))

    def init_layers(self):
        if self.n_inputs <= self.n_unconditioned:
            raise ValueError(
                "Number of uncondition features should be less than total "
                "number of features. `n_inputs`={} and "
                "`n_unconditioned`={}".format(self.n_inputs,
                                              self.n_unconditioned))

        valid_weight_shape = (self.n_inputs, self.n_outputs)
        valid_bias_shape = (self.n_outputs, )

        if self.weight is None:
            self.weight = np.zeros(valid_weight_shape)
            self.weight[:self.n_unconditioned, :] = 1

        if isinstance(self.bias, init.Initializer):
            self.bias = self.bias.sample(valid_bias_shape)

        super(BaseStepAssociative, self).init_layers()

        if self.bias.shape != valid_bias_shape:
            raise ValueError("Bias vector has invalid shape. Got {}, "
                             "expected {}".format(self.bias.shape,
                                                  valid_bias_shape))

        self.bias = self.bias.astype(float)

    def predict(self, input_data):
        input_data = format_data(input_data, is_feature1d=False)
        raw_output = input_data.dot(self.weight) + self.bias
        return np.where(raw_output > 0, 1, 0)

    def train(self, input_train, *args, **kwargs):
        input_train = format_data(input_train, is_feature1d=False)
        return super(BaseStepAssociative, self).train(input_train, *args,
                                                      **kwargs)

    def train_epoch(self, input_train, target_train):
        weight = self.weight
        n_unconditioned = self.n_unconditioned
        predict = self.predict
        weight_delta = self.weight_delta

        for input_row in input_train:
            input_row = np.reshape(input_row, (1, input_row.size))
            layer_output = predict(input_row)
            weight[n_unconditioned:, :] += weight_delta(
                input_row, layer_output)
Пример #22
0
class BatchNorm(BaseLayer):
    """
    Batch-normalization layer.

    Parameters
    ----------
    axes : int, tuple with int or None
        The axis or axes along which normalization is applied.
        ``None`` means that normalization will be applied over
        all axes except the first one. In case of 4D tensor it will
        be equal to ``(0, 2, 3)``. Defaults to ``None``.

    epsilon : float
        Epsilon is a positive constant that adds to the standard
        deviation to prevent the division by zero.
        Defaults to ``1e-5``.

    alpha : float
        Coefficient for the exponential moving average of
        batch-wise means and standard deviations computed during
        training; the closer to one, the more it will depend on
        the last batches seen. Value needs to be between ``0`` and ``1``.
        Defaults to ``0.1``.

    gamma : array-like, Theano variable, scalar or Initializer
        Default initialization methods you can
        find :ref:`here <init-methods>`.
        Defaults to ``Constant(value=1)``.

    beta : array-like, Theano variable, scalar or Initializer
        Default initialization methods you can
        find :ref:`here <init-methods>`.
        Defaults to ``Constant(value=0)``.

    running_mean : array-like, Theano variable, scalar or Initializer
        Default initialization methods you can
        find :ref:`here <init-methods>`.
        Defaults to ``Constant(value=0)``.

    running_inv_std : array-like, Theano variable, scalar or Initializer
        Default initialization methods you can
        find :ref:`here <init-methods>`.
        Defaults to ``Constant(value=1)``.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}

    References
    ----------
    .. [1] Batch Normalization: Accelerating Deep Network Training
           by Reducing Internal Covariate Shift,
           http://arxiv.org/pdf/1502.03167v3.pdf
    """
    axes = AxesProperty(default=None)
    epsilon = NumberProperty(default=1e-5, minval=0)
    alpha = ProperFractionProperty(default=0.1)
    beta = ParameterProperty(default=init.Constant(value=0))
    gamma = ParameterProperty(default=init.Constant(value=1))

    running_mean = ParameterProperty(default=init.Constant(value=0))
    running_inv_std = ParameterProperty(default=init.Constant(value=1))

    def initialize(self):
        super(BatchNorm, self).initialize()

        input_shape = as_tuple(None, self.input_shape)
        ndim = len(input_shape)

        if self.axes is None:
            # If ndim == 4 then axes = (0, 2, 3)
            # If ndim == 2 then axes = (0,)
            self.axes = tuple(axis for axis in range(ndim) if axis != 1)

        if any(axis >= ndim for axis in self.axes):
            raise ValueError("Cannot apply batch normalization on the axis "
                             "that doesn't exist.")

        opposite_axes = find_opposite_axes(self.axes, ndim)
        parameter_shape = [input_shape[axis] for axis in opposite_axes]

        if any(parameter is None for parameter in parameter_shape):
            unknown_dim_index = parameter_shape.index(None)
            raise ValueError("Cannot apply batch normalization on the axis "
                             "with unknown size over the dimension #{} "
                             "(0-based indeces).".format(unknown_dim_index))

        self.add_parameter(value=self.running_mean, shape=parameter_shape,
                           name='running_mean', trainable=False)
        self.add_parameter(value=self.running_inv_std, shape=parameter_shape,
                           name='running_inv_std', trainable=False)

        self.add_parameter(value=self.gamma, name='gamma',
                           shape=parameter_shape, trainable=True)
        self.add_parameter(value=self.beta, name='beta',
                           shape=parameter_shape, trainable=True)

    def output(self, input_value):
        epsilon = asfloat(self.epsilon)
        alpha = asfloat(self.alpha)
        gamma, beta = self.gamma, self.beta

        ndim = input_value.ndim
        axes = self.axes

        running_mean = self.running_mean
        running_inv_std = self.running_inv_std

        input_mean = input_value.mean(axes)
        input_var = input_value.var(axes)
        input_inv_std = T.inv(T.sqrt(input_var + epsilon))

        self.updates = [(
            running_inv_std,
            asfloat(1 - alpha) * running_inv_std + alpha * input_inv_std
        ), (
            running_mean,
            asfloat(1 - alpha) * running_mean + alpha * input_mean
        )]

        if not self.training_state:
            mean = running_mean
            inv_std = running_inv_std

        else:
            mean = input_mean
            inv_std = input_inv_std

        opposite_axes = find_opposite_axes(axes, ndim)

        beta = dimshuffle(beta, ndim, opposite_axes)
        gamma = dimshuffle(gamma, ndim, opposite_axes)
        mean = dimshuffle(mean, ndim, opposite_axes)
        inv_std = dimshuffle(inv_std, ndim, opposite_axes)

        normalized_value = (input_value - mean) * inv_std
        return gamma * normalized_value + beta
Пример #23
0
class RBM(BaseAlgorithm, BaseNetwork, MinibatchTrainingMixin, DumpableObject):
    """
    Boolean/Bernoulli Restricted Boltzmann Machine (RBM).
    Algorithm assumes that inputs are either binary
    values or values between 0 and 1.

    Parameters
    ----------
    n_visible : int
        Number of visible units. Number of features (columns)
        in the input data.

    n_hidden : int
        Number of hidden units. The large the number the more
        information network can capture from the data, but it
        also mean that network is more likely to overfit.

    batch_size : int
        Size of the mini-batch. Defaults to ``10``.

    weight : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Normal <neupy.init.Normal>`.

    hidden_bias : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    visible_bias : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    train(input_train, epochs=100)
        Trains network.

    {BaseSkeleton.fit}

    visible_to_hidden(visible_input)
        Populates data throught the network and returns output
        from the hidden layer.

    hidden_to_visible(hidden_input)
        Propagates output from the hidden layer backward
        to the visible.

    gibbs_sampling(visible_input, n_iter=1)
        Makes Gibbs sampling ``n`` times using visible input.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([
    ...     [1, 0, 1, 0],
    ...     [1, 0, 1, 0],
    ...     [1, 0, 0, 0],  # incomplete sample
    ...     [1, 0, 1, 0],
    ...
    ...     [0, 1, 0, 1],
    ...     [0, 0, 0, 1],  # incomplete sample
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ... ])
    >>>
    >>> rbm = algorithms.RBM(n_visible=4, n_hidden=1)
    >>> rbm.train(data, epochs=100)
    >>>
    >>> hidden_states = rbm.visible_to_hidden(data)
    >>> hidden_states.round(2)
    array([[ 0.99],
           [ 0.99],
           [ 0.95],
           [ 0.99],
           [ 0.  ],
           [ 0.01],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ]])

    References
    ----------
    [1] G. Hinton, A Practical Guide to Training Restricted
        Boltzmann Machines, 2010.
        http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
    """
    n_visible = IntProperty(minval=1)
    n_hidden = IntProperty(minval=1)
    batch_size = IntProperty(minval=1, default=10)

    weight = ParameterProperty(default=init.Normal())
    hidden_bias = ParameterProperty(default=init.Constant(value=0))
    visible_bias = ParameterProperty(default=init.Constant(value=0))

    def __init__(self, n_visible, n_hidden, **options):
        options.update({'n_visible': n_visible, 'n_hidden': n_hidden})
        super(RBM, self).__init__(**options)

    def init_input_output_variables(self):
        with tf.variable_scope('rbm'):
            self.weight = create_shared_parameter(value=self.weight,
                                                  name='weight',
                                                  shape=(self.n_visible,
                                                         self.n_hidden))
            self.hidden_bias = create_shared_parameter(
                value=self.hidden_bias,
                name='hidden-bias',
                shape=(self.n_hidden, ),
            )
            self.visible_bias = create_shared_parameter(
                value=self.visible_bias,
                name='visible-bias',
                shape=(self.n_visible, ),
            )

            self.variables.update(network_input=tf.placeholder(
                tf.float32,
                (None, self.n_visible),
                name="network-input",
            ),
                                  network_hidden_input=tf.placeholder(
                                      tf.float32,
                                      (None, self.n_hidden),
                                      name="network-hidden-input",
                                  ))

    def init_variables(self):
        with tf.variable_scope('rbm'):
            self.variables.update(h_samples=tf.Variable(
                tf.zeros([self.batch_size, self.n_hidden]),
                name="hidden-samples",
                dtype=tf.float32,
            ), )

    def init_methods(self):
        def free_energy(visible_sample):
            with tf.name_scope('free-energy'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias

                visible_bias_term = dot(visible_sample, self.visible_bias)

                # We can get infinity when wx_b is a relatively large number
                # (maybe 100). Taking exponent makes it even larger and
                # for with float32 it can convert it to infinity. But because
                # number is so large we don't care about +1 value before taking
                # logarithms and therefore we can just pick value as it is
                # since our operation won't change anything.
                hidden_terms = tf.where(
                    # exp(30) is such a big number that +1 won't
                    # make any difference in the outcome.
                    tf.greater(wx_b, 30),
                    wx_b,
                    tf.log1p(tf.exp(wx_b)),
                )

                hidden_term = tf.reduce_sum(hidden_terms, axis=1)
                return -(visible_bias_term + hidden_term)

        def visible_to_hidden(visible_sample):
            with tf.name_scope('visible-to-hidden'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias
                return tf.nn.sigmoid(wx_b)

        def hidden_to_visible(hidden_sample):
            with tf.name_scope('hidden-to-visible'):
                wx = tf.matmul(hidden_sample, self.weight, transpose_b=True)
                wx_b = wx + self.visible_bias
                return tf.nn.sigmoid(wx_b)

        def sample_hidden_from_visible(visible_sample):
            with tf.name_scope('sample-hidden-to-visible'):
                hidden_prob = visible_to_hidden(visible_sample)
                hidden_sample = random_binomial(hidden_prob)
                return hidden_sample

        def sample_visible_from_hidden(hidden_sample):
            with tf.name_scope('sample-visible-to-hidden'):
                visible_prob = hidden_to_visible(hidden_sample)
                visible_sample = random_binomial(visible_prob)
                return visible_sample

        network_input = self.variables.network_input
        network_hidden_input = self.variables.network_hidden_input
        input_shape = tf.shape(network_input)
        n_samples = input_shape[0]

        weight = self.weight
        h_bias = self.hidden_bias
        v_bias = self.visible_bias
        h_samples = self.variables.h_samples
        step = asfloat(self.step)

        with tf.name_scope('positive-values'):
            # We have to use `cond` instead of `where`, because
            # different if-else cases might have different shapes
            # and it triggers exception in tensorflow.
            v_pos = tf.cond(
                tf.equal(n_samples, self.batch_size), lambda: network_input,
                lambda: random_sample(network_input, self.batch_size))
            h_pos = visible_to_hidden(v_pos)

        with tf.name_scope('negative-values'):
            v_neg = sample_visible_from_hidden(h_samples)
            h_neg = visible_to_hidden(v_neg)

        with tf.name_scope('weight-update'):
            weight_update = (
                tf.matmul(v_pos, h_pos, transpose_a=True) -
                tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples)

        with tf.name_scope('hidden-bias-update'):
            h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0)

        with tf.name_scope('visible-bias-update'):
            v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0)

        with tf.name_scope('flipped-input-features'):
            # Each row will have random feature marked with number 1
            # Other values will be equal to 0
            possible_feature_corruptions = tf.eye(self.n_visible)
            corrupted_features = random_sample(possible_feature_corruptions,
                                               n_samples)

            rounded_input = tf.round(network_input)
            # If we scale input values from [0, 1] range to [-1, 1]
            # than it will be easier to flip feature values with simple
            # multiplication.
            scaled_rounded_input = 2 * rounded_input - 1
            scaled_flipped_rounded_input = (
                # for corrupted_features we convert 0 to 1 and 1 to -1
                # in this way after multiplication we will flip all
                # signs where -1 in the transformed corrupted_features
                (-2 * corrupted_features + 1) * scaled_rounded_input)
            # Scale it back to the [0, 1] range
            flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2

        with tf.name_scope('pseudo-likelihood-loss'):
            # Stochastic pseudo-likelihood
            error = tf.reduce_mean(self.n_visible * tf.log_sigmoid(
                free_energy(flipped_rounded_input) -
                free_energy(rounded_input)))

        with tf.name_scope('gibbs-sampling'):
            gibbs_sampling = sample_visible_from_hidden(
                sample_hidden_from_visible(network_input))

        initialize_uninitialized_variables()
        self.methods.update(train_epoch=function(
            [network_input],
            error,
            name='rbm/train-epoch',
            updates=[
                (weight, weight + step * weight_update),
                (h_bias, h_bias + step * h_bias_update),
                (v_bias, v_bias + step * v_bias_update),
                (h_samples, random_binomial(p=h_neg)),
            ]),
                            prediction_error=function(
                                [network_input],
                                error,
                                name='rbm/prediction-error',
                            ),
                            diff1=function(
                                [network_input],
                                free_energy(flipped_rounded_input),
                                name='rbm/diff1-error',
                            ),
                            diff2=function(
                                [network_input],
                                free_energy(rounded_input),
                                name='rbm/diff2-error',
                            ),
                            visible_to_hidden=function(
                                [network_input],
                                visible_to_hidden(network_input),
                                name='rbm/visible-to-hidden',
                            ),
                            hidden_to_visible=function(
                                [network_hidden_input],
                                hidden_to_visible(network_hidden_input),
                                name='rbm/hidden-to-visible',
                            ),
                            gibbs_sampling=function(
                                [network_input],
                                gibbs_sampling,
                                name='rbm/gibbs-sampling',
                            ))

    def train(self, input_train, input_test=None, epochs=100, summary='table'):
        """
        Train RBM.

        Parameters
        ----------
        input_train : 1D or 2D array-like
        input_test : 1D or 2D array-like or None
            Defaults to ``None``.
        epochs : int
            Number of training epochs. Defaults to ``100``.
        summary : {'table', 'inline'}
            Training summary type. Defaults to ``'table'``.
        """
        return super(RBM, self).train(input_train=input_train,
                                      target_train=None,
                                      input_test=input_test,
                                      target_test=None,
                                      epochs=epochs,
                                      epsilon=None,
                                      summary=summary)

    def train_epoch(self, input_train, target_train=None):
        """
        Train one epoch.

        Parameters
        ----------
        input_train : array-like (n_samples, n_features)

        Returns
        -------
        float
        """
        errors = self.apply_batches(
            function=self.methods.train_epoch,
            input_data=input_train,
            description='Training batches',
            show_error_output=True,
        )

        n_samples = len(input_train)
        return average_batch_errors(errors, n_samples, self.batch_size)

    def visible_to_hidden(self, visible_input):
        """
        Populates data throught the network and returns output
        from the hidden layer.

        Parameters
        ----------
        visible_input : array-like (n_samples, n_visible_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        outputs = self.apply_batches(
            function=self.methods.visible_to_hidden,
            input_data=visible_input,
            description='Hidden from visible batches',
            show_progressbar=True,
            show_error_output=False,
            scalar_output=False,
        )
        return np.concatenate(outputs, axis=0)

    def hidden_to_visible(self, hidden_input):
        """
        Propagates output from the hidden layer backward
        to the visible.

        Parameters
        ----------
        hidden_input : array-like (n_samples, n_hidden_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_hidden == 1)
        hidden_input = format_data(hidden_input, is_input_feature1d)

        outputs = self.apply_batches(
            function=self.methods.hidden_to_visible,
            input_data=hidden_input,
            description='Visible from hidden batches',
            show_progressbar=True,
            show_error_output=False,
            scalar_output=False,
        )
        return np.concatenate(outputs, axis=0)

    def prediction_error(self, input_data, target_data=None):
        """
        Compute the pseudo-likelihood of input samples.

        Parameters
        ----------
        input_data : array-like
            Values of the visible layer

        Returns
        -------
        float
            Value of the pseudo-likelihood.
        """
        is_input_feature1d = (self.n_visible == 1)
        input_data = format_data(input_data, is_input_feature1d)

        errors = self.apply_batches(
            function=self.methods.prediction_error,
            input_data=input_data,
            description='Validation batches',
            show_error_output=True,
        )
        return average_batch_errors(
            errors,
            n_samples=len(input_data),
            batch_size=self.batch_size,
        )

    def gibbs_sampling(self, visible_input, n_iter=1):
        """
        Makes Gibbs sampling n times using visible input.

        Parameters
        ----------
        visible_input : 1d or 2d array
        n_iter : int
            Number of Gibbs sampling iterations. Defaults to ``1``.

        Returns
        -------
        array-like
            Output from the visible units after perfoming n
            Gibbs samples. Array will contain only binary
            units (0 and 1).
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        gibbs_sampling = self.methods.gibbs_sampling

        input_ = visible_input
        for iteration in range(n_iter):
            input_ = gibbs_sampling(input_)

        return input_
Пример #24
0
 def test_initializer_get_value_exception(self):
     initializer = init.Constant()
     with self.assertRaises(init.UninitializedException):
         initializer.get_value()
Пример #25
0
class RBM(BaseAlgorithm, BaseNetwork, MinibatchTrainingMixin):
    """
    Boolean/Bernoulli Restricted Boltzmann Machine (RBM).
    Algorithm assumes that inputs are either binary
    values or values between 0 and 1.

    Parameters
    ----------
    n_visible : int
        Number of visible units.

    n_hidden : int
        Number of hidden units.

    {MinibatchTrainingMixin.batch_size}

    weight : array-like, Theano variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`XavierNormal <neupy.init.XavierNormal>`.

    hidden_bias : array-like, Theano variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    visible_bias : array-like, Theano variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    train(input_train, epochs=100)
        Trains network.

    {BaseSkeleton.fit}

    visible_to_hidden(visible_input)
        Populates data throught the network and returns output
        from the hidden layer.

    hidden_to_visible(hidden_input)
        Propagates output from the hidden layer backward
        to the visible.

    gibbs_sampling(visible_input, n_iter=1)
        Makes Gibbs sampling ``n`` times using visible input.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([
    ...     [1, 0, 1, 0],
    ...     [1, 0, 1, 0],
    ...     [1, 0, 0, 0],  # incomplete sample
    ...     [1, 0, 1, 0],
    ...
    ...     [0, 1, 0, 1],
    ...     [0, 0, 0, 1],  # incomplete sample
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ... ])
    >>>
    >>> rbm = algorithms.RBM(n_visible=4, n_hidden=1)
    >>> rbm.train(data, epochs=100)
    >>>
    >>> hidden_states = rbm.visible_to_hidden(data)
    >>> hidden_states.round(2)
    array([[ 0.99],
           [ 0.99],
           [ 0.95],
           [ 0.99],
           [ 0.  ],
           [ 0.01],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ]])

    References
    ----------
    [1] G. Hinton, A Practical Guide to Training Restricted
        Boltzmann Machines, 2010.
        http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
    """
    n_visible = IntProperty(minval=1)
    n_hidden = IntProperty(minval=1)

    weight = ParameterProperty(default=init.XavierNormal())
    hidden_bias = ParameterProperty(default=init.Constant(value=0))
    visible_bias = ParameterProperty(default=init.Constant(value=0))

    def __init__(self, n_visible, n_hidden, **options):
        self.theano_random = theano_random_stream()

        super(ConfigurableABC, self).__init__(n_hidden=n_hidden,
                                              n_visible=n_visible,
                                              **options)

        self.weight = create_shared_parameter(value=self.weight,
                                              name='algo:rbm/matrix:weight',
                                              shape=(n_visible, n_hidden))
        self.hidden_bias = create_shared_parameter(
            value=self.hidden_bias,
            name='algo:rbm/vector:hidden-bias',
            shape=(n_hidden, ),
        )
        self.visible_bias = create_shared_parameter(
            value=self.visible_bias,
            name='algo:rbm/vector:visible-bias',
            shape=(n_visible, ),
        )

        super(RBM, self).__init__(**options)

    def init_input_output_variables(self):
        self.variables.update(
            network_input=T.matrix(name='algo:rbm/var:network-input'), )

    def init_variables(self):
        self.variables.update(h_samples=theano.shared(
            name='algo:rbm/matrix:hidden-samples',
            value=asint(np.zeros((self.batch_size, self.n_hidden))),
        ), )

    def init_methods(self):
        def free_energy(visible_sample):
            wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias
            visible_bias_term = T.dot(visible_sample, self.visible_bias)
            hidden_term = T.log(asfloat(1) + T.exp(wx_b)).sum(axis=1)
            return -visible_bias_term - hidden_term

        def visible_to_hidden(visible_sample):
            wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias
            return T.nnet.sigmoid(wx_b)

        def hidden_to_visible(hidden_sample):
            wx_b = T.dot(hidden_sample, self.weight.T) + self.visible_bias
            return T.nnet.sigmoid(wx_b)

        def sample_hidden_from_visible(visible_sample):
            theano_random = self.theano_random
            hidden_prob = visible_to_hidden(visible_sample)
            hidden_sample = theano_random.binomial(n=1,
                                                   p=hidden_prob,
                                                   dtype=theano.config.floatX)
            return hidden_sample

        def sample_visible_from_hidden(hidden_sample):
            theano_random = self.theano_random
            visible_prob = hidden_to_visible(hidden_sample)
            visible_sample = theano_random.binomial(n=1,
                                                    p=visible_prob,
                                                    dtype=theano.config.floatX)
            return visible_sample

        network_input = self.variables.network_input
        n_samples = asfloat(network_input.shape[0])
        theano_random = self.theano_random

        weight = self.weight
        h_bias = self.hidden_bias
        v_bias = self.visible_bias
        h_samples = self.variables.h_samples
        step = asfloat(self.step)

        sample_indeces = theano_random.random_integers(
            low=0, high=n_samples - 1, size=(self.batch_size, ))
        v_pos = ifelse(
            T.eq(n_samples, self.batch_size),
            network_input,
            # In case if final batch has less number of
            # samples then expected
            network_input[sample_indeces])
        h_pos = visible_to_hidden(v_pos)

        v_neg = sample_visible_from_hidden(h_samples)
        h_neg = visible_to_hidden(v_neg)

        weight_update = v_pos.T.dot(h_pos) - v_neg.T.dot(h_neg)
        h_bias_update = (h_pos - h_neg).mean(axis=0)
        v_bias_update = (v_pos - v_neg).mean(axis=0)

        # Stochastic pseudo-likelihood
        feature_index_to_flip = theano_random.random_integers(
            low=0,
            high=self.n_visible - 1,
        )
        rounded_input = T.round(network_input)
        rounded_input = network_input
        rounded_input_flip = T.set_subtensor(
            rounded_input[:, feature_index_to_flip],
            1 - rounded_input[:, feature_index_to_flip])
        error = T.mean(self.n_visible * T.log(
            T.nnet.sigmoid(
                free_energy(rounded_input_flip) - free_energy(rounded_input))))

        self.methods.update(train_epoch=theano.function(
            [network_input],
            error,
            name='algo:rbm/func:train-epoch',
            updates=[
                (weight, weight + step * weight_update / n_samples),
                (h_bias, h_bias + step * h_bias_update),
                (v_bias, v_bias + step * v_bias_update),
                (h_samples, asint(theano_random.binomial(n=1, p=h_neg))),
            ]),
                            prediction_error=theano.function(
                                [network_input],
                                error,
                                name='algo:rbm/func:prediction-error',
                            ),
                            visible_to_hidden=theano.function(
                                [network_input],
                                visible_to_hidden(network_input),
                                name='algo:rbm/func:visible-to-hidden',
                            ),
                            hidden_to_visible=theano.function(
                                [network_input],
                                hidden_to_visible(network_input),
                                name='algo:rbm/func:hidden-to-visible',
                            ),
                            gibbs_sampling=theano.function(
                                [network_input],
                                sample_visible_from_hidden(
                                    sample_hidden_from_visible(network_input)),
                                name='algo:rbm/func:gibbs-sampling',
                            ))

    def train(self, input_train, input_test=None, epochs=100, summary='table'):
        """
        Train RBM.

        Parameters
        ----------
        input_train : 1D or 2D array-like
        input_test : 1D or 2D array-like or None
            Defaults to ``None``.
        epochs : int
            Number of training epochs. Defaults to ``100``.
        summary : {'table', 'inline'}
            Training summary type. Defaults to ``'table'``.
        """
        return super(RBM, self).train(input_train=input_train,
                                      target_train=None,
                                      input_test=input_test,
                                      target_test=None,
                                      epochs=epochs,
                                      epsilon=None,
                                      summary=summary)

    def train_epoch(self, input_train, target_train=None):
        """
        Train one epoch.

        Parameters
        ----------
        input_train : array-like (n_samples, n_features)

        Returns
        -------
        float
        """
        errors = self.apply_batches(
            function=self.methods.train_epoch,
            input_data=input_train,
            description='Training batches',
            show_error_output=True,
        )

        n_samples = len(input_train)
        return average_batch_errors(errors, n_samples, self.batch_size)

    def visible_to_hidden(self, visible_input):
        """
        Populates data throught the network and returns output
        from the hidden layer.

        Parameters
        ----------
        visible_input : array-like (n_samples, n_visible_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        outputs = self.apply_batches(function=self.methods.visible_to_hidden,
                                     input_data=visible_input,
                                     description='Hidden from visible batches',
                                     show_progressbar=True,
                                     show_error_output=False)

        return np.concatenate(outputs, axis=0)

    def hidden_to_visible(self, hidden_input):
        """
        Propagates output from the hidden layer backward
        to the visible.

        Parameters
        ----------
        hidden_input : array-like (n_samples, n_hidden_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_hidden == 1)
        hidden_input = format_data(hidden_input, is_input_feature1d)

        outputs = self.apply_batches(function=self.methods.hidden_to_visible,
                                     input_data=hidden_input,
                                     description='Visible from hidden batches',
                                     show_progressbar=True,
                                     show_error_output=False)

        return np.concatenate(outputs, axis=0)

    def prediction_error(self, input_data, target_data=None):
        """
        Compute the pseudo-likelihood of input samples.

        Parameters
        ----------
        input_data : array-like
            Values of the visible layer

        Returns
        -------
        float
            Value of the pseudo-likelihood.
        """
        is_input_feature1d = (self.n_visible == 1)
        input_data = format_data(input_data, is_input_feature1d)

        errors = self.apply_batches(
            function=self.methods.prediction_error,
            input_data=input_data,
            description='Validation batches',
            show_error_output=True,
        )
        return average_batch_errors(errors,
                                    n_samples=len(input_data),
                                    batch_size=self.batch_size)

    def gibbs_sampling(self, visible_input, n_iter=1):
        """
        Makes Gibbs sampling n times using visible input.

        Parameters
        ----------
        visible_input : 1d or 2d array
        n_iter : int
            Number of Gibbs sampling iterations. Defaults to ``1``.

        Returns
        -------
        array-like
            Output from the visible units after perfoming n
            Gibbs samples. Array will contain only binary
            units (0 and 1).
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        gibbs_sampling = self.methods.gibbs_sampling

        input_ = visible_input
        for iteration in range(n_iter):
            input_ = gibbs_sampling(input_)

        return input_
Пример #26
0
class PRelu(ActivationLayer):
    """
    The layer with the parametrized ReLu activation
    function.

    Parameters
    ----------
    alpha_axes : int or tuple
        Axes that will not include unique alpha parameter.
        Single integer value defines the same as a tuple with one value.
        Defaults to ``-1``.

    alpha : array-like, Tensorfow variable, scalar or Initializer
        Alpha parameter per each non-shared axis for the ReLu.
        Scalar value means that each element in the tensor will be
        equal to the specified value.
        Default initialization methods you can find
        :ref:`here <init-methods>`.
        Defaults to ``Constant(value=0.25)``.

    {ActivationLayer.Parameters}

    Methods
    -------
    {ActivationLayer.Methods}

    Attributes
    ----------
    {ActivationLayer.Attributes}

    Examples
    --------
    Feedforward Neural Networks (FNN)

    >>> from neupy.layers import *
    >>> network = Input(10) > PRelu(20) > PRelu(1)

    Convolutional Neural Networks (CNN)

    >>> from neupy.layers import *
    >>> network = join(
    ...     Input((32, 32, 3)),
    ...     Convolution((3, 3, 16)) > PRelu(),
    ...     Convolution((3, 3, 32)) > PRelu(),
    ...     Reshape(),
    ...     Softmax(10),
    ... )

    References
    ----------
    .. [1] https://arxiv.org/pdf/1502.01852v1.pdf
    """
    alpha_axes = AxesProperty(default=-1)
    alpha = ParameterProperty(default=init.Constant(value=0.25))

    def __init__(self, *args, **options):
        super(PRelu, self).__init__(*args, **options)

        if 0 in self.alpha_axes:
            raise ValueError("Cannot specify alpha for 0-axis")

    def validate(self, input_shape):
        if max(self.alpha_axes) > len(input_shape):
            max_axis_index = len(input_shape) - 1
            raise ValueError("Cannot specify alpha for the axis #{}. "
                             "Maximum available axis is {} (0-based indeces)."
                             "".format(max(self.alpha_axes), max_axis_index))

    def initialize(self):
        super(PRelu, self).initialize()
        output_shape = as_tuple(None, self.output_shape)

        alpha_shape = [output_shape[axis] for axis in self.alpha_axes]
        self.add_parameter(
            value=self.alpha,
            name='alpha',
            shape=alpha_shape,
            trainable=True,
        )

    def activation_function(self, input_value):
        input_value = tf.convert_to_tensor(input_value, dtype=tf.float32)
        ndim = len(input_value.get_shape())

        dimensions = np.arange(ndim)
        alpha_axes = dimensions[list(self.alpha_axes)]

        alpha = dimshuffle(self.alpha, ndim, alpha_axes)
        return tf.nn.leaky_relu(tf.to_float(input_value), tf.to_float(alpha))