示例#1
0
    def test_gain_relu_he_normal_scale(self):
        he_initializer = init.HeNormal(gain=1, seed=0)
        sample_1 = self.eval(he_initializer.sample((4, 4)))

        he_initializer = init.HeNormal(gain=2, seed=0)
        sample_2 = self.eval(he_initializer.sample((4, 4)))

        self.assertAlmostEqual(np.mean(sample_2 / sample_1),
                               math.sqrt(2),
                               places=5)
    def test_gain_relu_he_normal_scale(self):
        environment.reproducible()
        he_initializer = init.HeNormal(gain=1)
        sample_1 = he_initializer.sample((3, 2))

        environment.reproducible()
        he_initializer = init.HeNormal(gain='relu')
        sample_2 = he_initializer.sample((3, 2))

        self.assertAlmostEqual(np.mean(sample_2 / sample_1), math.sqrt(2))
示例#3
0
    def __init__(
            self,
            n_units,
            only_return_final=True,
            # Trainable parameters
            input_weights=init.HeNormal(),
            hidden_weights=init.HeNormal(),
            cell_weights=init.HeNormal(),
            biases=0,
            # Activation functions
            ingate=tf.nn.sigmoid,
            forgetgate=tf.nn.sigmoid,
            outgate=tf.nn.sigmoid,
            cell=tf.tanh,
            # Cell states
            cell_init=0,
            hidden_init=0,
            learn_init=False,
            # Misc
            unroll_scan=False,
            backwards=False,
            peepholes=False,
            gradient_clipping=0,
            name=None):

        super(LSTM, self).__init__(
            n_units=n_units,
            only_return_final=only_return_final,
            name=name,
        )

        self.input_weights = input_weights
        self.hidden_weights = hidden_weights
        self.cell_weights = cell_weights
        self.biases = biases

        self.ingate = ingate
        self.forgetgate = forgetgate
        self.outgate = outgate
        self.cell = cell

        self.learn_init = learn_init
        self.cell_init = cell_init
        self.hidden_init = hidden_init

        self.unroll_scan = unroll_scan
        self.backwards = backwards
        self.peepholes = peepholes
        self.gradient_clipping = gradient_clipping
示例#4
0
    def test_he_normal(self):
        he_normal = init.HeNormal()
        weight = self.eval(he_normal.sample((40, 40)))

        self.assertNormalyDistributed(weight)
        self.assertAlmostEqual(weight.mean(), 0, places=1)
        self.assertAlmostEqual(weight.std(), math.sqrt(1. / 40), places=2)
    def test_he_normal(self):
        he_normal = init.HeNormal()
        weight = he_normal.sample((10, 30))

        self.assertNormalyDistributed(weight)
        self.assertAlmostEqual(weight.mean(), 0, places=1)
        self.assertAlmostEqual(weight.std(), math.sqrt(2. / 10), places=2)
示例#6
0
    def __init__(self, input_size, output_size,
                 weight=init.HeNormal(), name=None):

        super(Embedding, self).__init__(name=name)

        self.input_size = input_size
        self.output_size = output_size
        self.weight = weight
示例#7
0
class Relu(ActivationLayer):
    """
    The layer with the rectifier (ReLu) activation function.

    Parameters
    ----------
    alpha : float
        Alpha parameter defines the decreasing rate
        for the negative values. If ``alpha``
        is non-zero value then layer behave like a
        leaky ReLu. Defaults to ``0``.

    {ActivationLayer.size}

    weight : array-like, Tensorfow variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`HeNormal(gain=2) <neupy.init.HeNormal>`.

    {ParameterBasedLayer.bias}

    {BaseLayer.Parameters}

    Methods
    -------
    {ActivationLayer.Methods}

    Attributes
    ----------
    {ActivationLayer.Attributes}

    Examples
    --------
    Feedforward Neural Networks (FNN)

    >>> from neupy.layers import *
    >>> network = Input(10) > Relu(20) > Relu(1)

    Convolutional Neural Networks (CNN)

    >>> from neupy.layers import *
    >>> network = join(
    ...     Input((32, 32, 3)),
    ...     Convolution((3, 3, 16)) > Relu(),
    ...     Convolution((3, 3, 32)) > Relu(),
    ...     Reshape(),
    ...     Softmax(10),
    ... )
    """
    alpha = NumberProperty(default=0, minval=0)
    weight = ParameterProperty(default=init.HeNormal(gain=2))

    def activation_function(self, input_value):
        if self.alpha == 0:
            return tf.nn.relu(input_value)
        return tf.nn.leaky_relu(input_value, asfloat(self.alpha))
示例#8
0
    def __init__(self,
                 n_units=None,
                 weight=init.HeNormal(),
                 bias=0,
                 name=None):

        super(Linear, self).__init__(name=name)

        self.n_units = n_units
        self.weight = weight
        self.bias = bias
示例#9
0
    def __init__(self, size, padding='valid', stride=1, dilation=1,
                 weight=init.HeNormal(gain=2), bias=0, name=None):

        super(Convolution, self).__init__(name=name)

        self.size = size
        self.padding = padding
        self.stride = stride
        self.dilation = dilation
        self.weight = weight
        self.bias = bias
示例#10
0
    def __init__(
            self,
            n_units,
            only_return_final=True,
            # Trainable parameters
            input_weights=init.HeNormal(),
            hidden_weights=init.HeNormal(),
            biases=0,
            # Activation functions
            resetgate=tf.nn.sigmoid,
            updategate=tf.nn.sigmoid,
            hidden_update=tf.tanh,
            # Cell states
            hidden_init=0,
            learn_init=False,
            # Misc
            unroll_scan=False,
            backwards=False,
            gradient_clipping=0,
            name=None):

        super(GRU, self).__init__(
            n_units=n_units,
            only_return_final=only_return_final,
            name=name,
        )

        self.input_weights = input_weights
        self.hidden_weights = hidden_weights
        self.biases = biases

        self.resetgate = resetgate
        self.updategate = updategate
        self.hidden_update = hidden_update

        self.hidden_init = hidden_init
        self.learn_init = learn_init

        self.unroll_scan = unroll_scan
        self.backwards = backwards
        self.gradient_clipping = gradient_clipping
示例#11
0
    def __init__(self,
                 n_units=None,
                 alpha=0,
                 weight=init.HeNormal(gain=2),
                 bias=init.Constant(value=0),
                 name=None):

        self.alpha = alpha
        super(Relu, self).__init__(n_units=n_units,
                                   weight=weight,
                                   bias=bias,
                                   name=name)
示例#12
0
    def __init__(self,
                 size,
                 padding='valid',
                 stride=1,
                 weight=init.HeNormal(gain=2),
                 bias=0,
                 name=None):

        super(Deconvolution, self).__init__(size=size,
                                            padding=padding,
                                            stride=stride,
                                            dilation=1,
                                            weight=weight,
                                            bias=bias,
                                            name=name)
示例#13
0
    def __init__(self,
                 n_units=None,
                 alpha_axes=-1,
                 alpha=0.25,
                 weight=init.HeNormal(gain=2),
                 bias=0,
                 name=None):

        self.alpha = alpha
        self.alpha_axes = as_tuple(alpha_axes)

        if 0 in self.alpha_axes:
            raise ValueError("Cannot specify alpha for 0-axis")

        super(PRelu, self).__init__(n_units=n_units,
                                    weight=weight,
                                    bias=bias,
                                    name=name)
示例#14
0
class Relu(ActivationLayer):
    """
    The layer with the rectifier (ReLu) activation function.

    Parameters
    ----------
    alpha : float
        Alpha parameter defines the decreasing rate
        for the negative values. If ``alpha``
        is non-zero value then layer behave like a
        leaky ReLu. Defaults to ``0``.

    {ActivationLayer.size}

    weight : array-like, Tensorfow variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`HeNormal(gain=2) <neupy.init.HeNormal>`.

    {ParameterBasedLayer.bias}

    {BaseLayer.Parameters}

    Methods
    -------
    {ActivationLayer.Methods}

    Attributes
    ----------
    {ActivationLayer.Attributes}
    """
    alpha = NumberProperty(default=0, minval=0)
    weight = ParameterProperty(default=init.HeNormal(gain=2))

    def activation_function(self, input_value):
        if self.alpha == 0:
            return tf.nn.relu(input_value)
        return tf.nn.leaky_relu(input_value, asfloat(self.alpha))
示例#15
0
 def test_he_initializer_repr(self):
     he_initializer = init.HeNormal()
     self.assertEqual("HeNormal(gain=1.0)", str(he_initializer))
示例#16
0
class Embedding(BaseLayer):
    """
    Embedding layer accepts indeces as an input and returns
    rows from the weight matrix associated with these indeces.
    Useful in case of categorical features or for the word
    embedding tasks.

    Parameters
    ----------
    input_size : int
        Layer's input vector dimension. Usualy associated with number
        of categories or number of unique words that input vector has.

    output_size : int
        Layer's output vector dimension.

    weight : array-like, Tensorfow variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}

    Examples
    --------

    This example converts dataset that has only categorical
    variables into format that suitable for Embedding layer.

    >>> import numpy as np
    >>> from neupy import layers
    >>>
    >>> dataset = np.array([
    ...     ['cold', 'high'],
    ...     ['hot',  'low'],
    ...     ['cold', 'low'],
    ...     ['hot',  'low'],
    ... ])
    >>>
    >>> unique_value, dataset_indeces = np.unique(
    ...     dataset, return_inverse=True
    ... )
    >>> dataset_indeces = dataset_indeces.reshape((4, 2))
    >>> dataset_indeces
    array([[0, 1],
           [2, 3],
           [0, 3],
           [2, 3]])
    >>>
    >>> n_features = dataset.shape[1]
    >>> n_unique_categories = len(unique_value)
    >>> embedded_size = 1
    >>>
    >>> connection = layers.join(
    ...     layers.Input(n_features),
    ...     layers.Embedding(n_unique_categories, embedded_size),
    ...     # Output from the embedding layer is 3D
    ...     # To make output 2D we need to reshape dimensions
    ...     layers.Reshape(),
    ... )
    """
    input_size = IntProperty(minval=1)
    output_size = IntProperty(minval=1)
    weight = ParameterProperty(default=init.HeNormal())

    def __init__(self, input_size, output_size, **options):
        super(Embedding, self).__init__(input_size=input_size,
                                        output_size=output_size,
                                        **options)

    @property
    def output_shape(self):
        if self.input_shape is not None:
            return as_tuple(self.input_shape, self.output_size)

    def initialize(self):
        super(Embedding, self).initialize()
        self.add_parameter(value=self.weight,
                           name='weight',
                           shape=as_tuple(self.input_size, self.output_size),
                           trainable=True)

    def output(self, input_value):
        input_value = tf.cast(input_value, tf.int32)
        return tf.gather(self.weight, input_value)

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}({input_size}, {output_size})'.format(
            name=classname,
            input_size=self.input_size,
            output_size=self.output_size,
        )
示例#17
0
class Convolution(ParameterBasedLayer):
    """
    Convolutional layer.

    Parameters
    ----------
    size : tuple of int
        Filter shape. In should be defined as a tuple with three
        integers ``(filter rows, filter columns, output channels)``.

    padding : {{``same``, ``valid``}}, int, tuple
        Zero padding for the input tensor.

        - ``valid`` - Padding won't be added to the tensor. Result will be
          the same as for ``padding=0``

        - ``same`` - Padding will depend on the number of rows and columns
          in the filter. This padding makes sure that image with the
          ``stride=1`` won't change its width and height. It's the same as
          ``padding=(filter rows // 2, filter columns // 2)``.

        - Custom value for the padding can be specified as an integer, like
          ``padding=1`` or it can be specified as a tuple when different
          dimensions have different padding values, for example
          ``padding=(2, 3)``.

        Defaults to ``valid``.

    stride : tuple with ints, int.
        Stride size. Defaults to ``(1, 1)``

    dilation : int, tuple
        Rate for the fiter upsampling. When ``dilation > 1`` layer will
        become diated convolution (or atrous convolution). Defaults to ``1``.

    weight : array-like, Tensorfow variable, scalar or Initializer
        Defines layer's weights. Shape of the weight will be equal to
        ``(filter rows, filter columns, input channels, output channels)``.
        Default initialization methods you can find
        :ref:`here <init-methods>`. Defaults to
        :class:`HeNormal(gain=2) <neupy.init.HeNormal>`.

    {ParameterBasedLayer.bias}

    {BaseLayer.Parameters}

    Examples
    --------
    2D Convolution

    >>> from neupy import layers
    >>>
    >>> layers.join(
    ...     layers.Input((28, 28, 3)),
    ...     layers.Convolution((3, 3, 16)),
    ... )

    1D Convolution

    >>> from neupy import layers
    >>>
    >>> layers.join(
    ...     layers.Input((30, 10)),
    ...     layers.Reshape((30, 1, 10)),
    ...     layers.Convolution((3, 1, 16)),
    ... )

    Methods
    -------
    {ParameterBasedLayer.Methods}

    Attributes
    ----------
    {ParameterBasedLayer.Attributes}
    """
    # We use gain=2 because it's suitable choice for relu non-linearity
    # and relu is the most common non-linearity used for CNN.
    weight = ParameterProperty(default=init.HeNormal(gain=2))
    size = TypedListProperty(required=True, element_type=int)
    padding = PaddingProperty(default='valid')
    stride = Spatial2DProperty(default=(1, 1))
    dilation = Spatial2DProperty(default=1)

    def validate(self, input_shape):
        if input_shape and len(input_shape) != 3:
            raise LayerConnectionError(
                "Convolutional layer expects an input with 3 "
                "dimensions, got {} with shape {}"
                "".format(len(input_shape), input_shape))

    def output_shape_per_dim(self, *args, **kwargs):
        return conv_output_shape(*args, **kwargs)

    def find_output_from_input_shape(self, input_shape):
        padding = self.padding
        rows, cols, _ = input_shape

        row_filter_size, col_filter_size, n_kernels = self.size
        row_stride, col_stride = self.stride
        row_dilation, col_dilation = self.dilation or (1, 1)

        if isinstance(padding, (list, tuple)):
            row_padding, col_padding = padding
        else:
            row_padding, col_padding = padding, padding

        output_rows = self.output_shape_per_dim(
            rows, row_filter_size,
            row_padding, row_stride, row_dilation,
        )
        output_cols = self.output_shape_per_dim(
            cols, col_filter_size,
            col_padding, col_stride, col_dilation,
        )

        return (output_rows, output_cols, n_kernels)

    @property
    def output_shape(self):
        if self.input_shape is not None:
            return self.find_output_from_input_shape(self.input_shape)

    @property
    def weight_shape(self):
        n_channels = self.input_shape[-1]
        n_rows, n_cols, n_filters = self.size
        return (n_rows, n_cols, n_channels, n_filters)

    @property
    def bias_shape(self):
        return as_tuple(self.size[-1])

    def output(self, input_value):
        padding = self.padding

        if not isinstance(padding, six.string_types):
            height_pad, weight_pad = padding
            input_value = tf.pad(input_value, [
                [0, 0],
                [height_pad, height_pad],
                [weight_pad, weight_pad],
                [0, 0],
            ])
            # VALID option will make sure that
            # convolution won't use any padding.
            padding = 'VALID'

        output = tf.nn.convolution(
            input_value,
            self.weight,
            padding=padding,
            strides=self.stride,
            dilation_rate=self.dilation,
            data_format="NHWC"
        )

        if self.bias is not None:
            bias = tf.reshape(self.bias, (1, 1, 1, -1))
            output += bias

        return output
示例#18
0
class ParameterBasedLayer(BaseLayer):
    """
    Layer that creates weight and bias parameters.

    Parameters
    ----------
    size : int
        Layer's output size.

    weight : array-like, Tensorfow variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : 1D array-like, Tensorfow variable, scalar, Initializer or None
        Defines layer's bias.
        Default initialization methods you can find
        :ref:`here <init-methods>`. Defaults to
        :class:`Constant(0) <neupy.init.Constant>`.
        The ``None`` value excludes bias from the calculations and
        do not add it into parameters list.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}
    """
    size = IntProperty(minval=1)
    weight = ParameterProperty(default=init.HeNormal())
    bias = ParameterProperty(default=init.Constant(value=0), allow_none=True)

    def __init__(self, size, **options):
        super(ParameterBasedLayer, self).__init__(size=size, **options)

    @property
    def weight_shape(self):
        return as_tuple(self.input_shape, self.output_shape)

    @property
    def bias_shape(self):
        if self.bias is not None:
            return as_tuple(self.output_shape)

    def initialize(self):
        super(ParameterBasedLayer, self).initialize()

        self.add_parameter(value=self.weight, name='weight',
                           shape=self.weight_shape, trainable=True)

        if self.bias is not None:
            self.add_parameter(value=self.bias, name='bias',
                               shape=self.bias_shape, trainable=True)

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}({size})'.format(name=classname, size=self.size)
示例#19
0
class LSTM(BaseRNNLayer):
    """
    Long Short Term Memory (LSTM) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    input_weights : Initializer, ndarray
        Weight parameters for input connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    hidden_weights : Initializer, ndarray
        Weight parameters for hidden connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    cell_weights : Initializer, ndarray
        Weight parameters for cell connection. Require only when
        ``peepholes=True`` otherwise it will be ignored.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : Initializer, ndarray
        Bias parameters for all gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import tensorflow as tf
            dict(
                ingate=tf.nn.sigmoid,
                forgetgate=tf.nn.sigmoid,
                outgate=tf.nn.sigmoid,
                cell=tf.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(ingate=tf.tanh)

        Other parameters like ``forgetgate`` or ``outgate`` will be
        equal to their default values.

    learn_init : bool
        If ``True``, make ``cell_init`` and ``hidden_init`` trainable
        variables. Defaults to ``False``.

    cell_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial cell state (:math:`c_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    hidden_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``

    {BaseRNNLayer.only_return_final}

    peepholes : bool
        If ``True``, the LSTM uses peephole connections.
        When ``False``, cell parameters  are ignored.
        Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    gradient_clipping : float or int
        If nonzero, the gradient messages are clipped to the
        given value during the backward pass. Defaults to ``0``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.LSTM(20),
                layers.Sigmoid(1),
            ]
        )
    """
    input_weights = ParameterProperty(default=init.HeNormal())
    hidden_weights = ParameterProperty(default=init.HeNormal())
    cell_weights = ParameterProperty(default=init.HeNormal())
    biases = ParameterProperty(default=init.Constant(0))

    activation_functions = MultiCallableProperty(default=dict(
        ingate=tf.nn.sigmoid,
        forgetgate=tf.nn.sigmoid,
        outgate=tf.nn.sigmoid,
        cell=tf.tanh,
    ))

    learn_init = Property(default=False, expected_type=bool)
    cell_init = ParameterProperty(default=init.Constant(0))
    hidden_init = ParameterProperty(default=init.Constant(0))

    unroll_scan = Property(default=False, expected_type=bool)
    backwards = Property(default=False, expected_type=bool)
    peepholes = Property(default=False, expected_type=bool)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(LSTM, self).initialize()
        n_inputs = np.prod(self.input_shape[1:])

        # If peephole (cell to gate) connections were enabled, initialize
        # peephole connections.  These are elementwise products with the cell
        # state, so they are represented as vectors.
        if self.peepholes:
            self.weight_cell_to_ingate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_ingate',
                shape=(self.size, ))
            self.weight_cell_to_forgetgate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_forgetgate',
                shape=(self.size, ))
            self.weight_cell_to_outgate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_outgate',
                shape=(self.size, ))

        self.input_weights = self.add_parameter(
            value=self.input_weights,
            name='input_weights',
            shape=(n_inputs, 4 * self.size),
        )
        self.hidden_weights = self.add_parameter(
            value=self.hidden_weights,
            name='hidden_weights',
            shape=(self.size, 4 * self.size),
        )
        self.biases = self.add_parameter(
            value=self.biases,
            name='biases',
            shape=(4 * self.size, ),
        )

        # Initialization parameters
        self.add_parameter(
            value=self.cell_init,
            shape=(1, self.size),
            name="cell_init",
            trainable=self.learn_init,
        )
        self.add_parameter(
            value=self.hidden_init,
            shape=(1, self.size),
            name="hidden_init",
            trainable=self.learn_init,
        )

    def output(self, input_value):
        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = tf.transpose(input_value, [1, 0, 2])
        input_shape = tf.shape(input_value)
        n_batch = input_shape[1]

        def one_lstm_step(states, input_n):
            with tf.name_scope('lstm-cell'):
                cell_previous, hid_previous = states
                input_n = tf.matmul(input_n, self.input_weights) + self.biases

                # Calculate gates pre-activations and slice
                gates = input_n + tf.matmul(hid_previous, self.hidden_weights)

                # Clip gradients
                if self.gradient_clipping != 0:
                    gates = clip_gradient(gates, self.gradient_clipping)

                # Extract the pre-activation gate values
                ingate, forgetgate, cell_input, outgate = tf.split(gates,
                                                                   4,
                                                                   axis=1)

                if self.peepholes:
                    # Compute peephole connections
                    ingate += cell_previous * self.weight_cell_to_ingate
                    forgetgate += (cell_previous *
                                   self.weight_cell_to_forgetgate)

                # Apply nonlinearities
                ingate = self.activation_functions.ingate(ingate)
                forgetgate = self.activation_functions.forgetgate(forgetgate)
                cell_input = self.activation_functions.cell(cell_input)

                # Compute new cell value
                cell = forgetgate * cell_previous + ingate * cell_input

                if self.peepholes:
                    outgate += cell * self.weight_cell_to_outgate

                outgate = self.activation_functions.outgate(outgate)

                # Compute new hidden unit activation
                hid = outgate * tf.tanh(cell)
                return [cell, hid]

        cell_init = tf.tile(self.cell_init, (n_batch, 1))
        hidden_init = tf.tile(self.hidden_init, (n_batch, 1))
        sequence = input_value

        if self.backwards:
            sequence = tf.reverse(sequence, axis=[0])

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            hid_out = unroll_scan(
                fn=one_lstm_step,
                sequence=sequence,
                outputs_info=[cell_init, hidden_init],
            )
        else:
            _, hid_out = tf.scan(
                fn=one_lstm_step,
                elems=input_value,
                initializer=[cell_init, hidden_init],
                name='lstm-scan',
            )

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = tf.reverse(hid_out, axis=[0])

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = tf.transpose(hid_out, [1, 0, 2])

        return hid_out
示例#20
0
class GRU(BaseRNNLayer):
    """
    Gated Recurrent Unit (GRU) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    input_weights : Initializer, ndarray
        Weight parameters for input connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    hidden_weights : Initializer, ndarray
        Weight parameters for hidden connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : Initializer, ndarray
        Bias parameters for all gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import tensorflow as tf
            dict(
                resetgate=tf.nn.sigmoid,
                updategate=tf.nn.sigmoid,
                hidden_update=tf.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(resetgate=tf.tanh)

        Other parameters like ``updategate`` or ``hidden_update``
        will be equal to their default values.

    learn_init : bool
        If ``True``, make ``hidden_init`` trainable variable.
        Defaults to ``False``.

    hidden_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    {BaseRNNLayer.only_return_final}

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.GRU(20),
                layers.Sigmoid(1),
            ]
        )
    """
    input_weights = ParameterProperty(default=init.HeNormal())
    hidden_weights = ParameterProperty(default=init.HeNormal())
    biases = ParameterProperty(default=init.Constant(0))

    activation_functions = MultiCallableProperty(default=dict(
        resetgate=tf.nn.sigmoid,
        updategate=tf.nn.sigmoid,
        hidden_update=tf.tanh,
    ))

    learn_init = Property(default=False, expected_type=bool)
    hidden_init = ParameterProperty(default=init.Constant(0))

    backwards = Property(default=False, expected_type=bool)
    unroll_scan = Property(default=False, expected_type=bool)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(GRU, self).initialize()
        n_inputs = np.prod(self.input_shape[1:])

        self.input_weights = self.add_parameter(
            value=self.input_weights,
            name='input_weights',
            shape=(n_inputs, 3 * self.size),
        )
        self.hidden_weights = self.add_parameter(
            value=self.hidden_weights,
            name='hidden_weights',
            shape=(self.size, 3 * self.size),
        )
        self.biases = self.add_parameter(
            value=self.biases,
            name='biases',
            shape=(3 * self.size, ),
        )

        self.add_parameter(value=self.hidden_init,
                           shape=(1, self.size),
                           name="hidden_init",
                           trainable=self.learn_init)

    def output(self, input_value):
        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = tf.transpose(input_value, [1, 0, 2])
        input_shape = tf.shape(input_value)
        n_batch = input_shape[1]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def one_gru_step(states, input_n):
            with tf.name_scope('gru-cell'):
                hid_previous, = states
                input_n = tf.matmul(input_n, self.input_weights) + self.biases

                # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1},
                # and W_{hc} h_{t - 1}
                hid_input = tf.matmul(hid_previous, self.hidden_weights)

                if self.gradient_clipping != 0:
                    input_n = clip_gradient(input_n, self.gradient_clipping)
                    hid_input = clip_gradient(hid_input,
                                              self.gradient_clipping)

                hid_resetgate, hid_updategate, hid_hidden = tf.split(hid_input,
                                                                     3,
                                                                     axis=1)

                in_resetgate, in_updategate, in_hidden = tf.split(input_n,
                                                                  3,
                                                                  axis=1)

                # Reset and update gates
                resetgate = self.activation_functions.resetgate(hid_resetgate +
                                                                in_resetgate)

                updategate = self.activation_functions.updategate(
                    hid_updategate + in_updategate)

                # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
                hidden_update = in_hidden + resetgate * hid_hidden

                if self.gradient_clipping != 0:
                    hidden_update = clip_gradient(hidden_update,
                                                  self.gradient_clipping)

                hidden_update = self.activation_functions.hidden_update(
                    hidden_update)

                # Compute (1 - u_t)h_{t - 1} + u_t c_t
                return [
                    hid_previous - updategate * (hid_previous - hidden_update)
                ]

        hidden_init = tf.tile(self.hidden_init, (n_batch, 1))
        sequence = input_value

        if self.backwards:
            sequence = tf.reverse(sequence, axis=[0])

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            hid_out = unroll_scan(fn=one_gru_step,
                                  sequence=sequence,
                                  outputs_info=[hidden_init])
        else:
            hid_out, = tf.scan(
                fn=one_gru_step,
                elems=input_value,
                initializer=[hidden_init],
                name='gru-scan',
            )

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = tf.reverse(hid_out, axis=[0])

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = tf.transpose(hid_out, [1, 0, 2])
        return hid_out
 def test_gain_relu(self):
     he_initializer = init.HeNormal(gain='relu')
     self.assertEqual(he_initializer.gain, math.sqrt(2))