Пример #1
0
def lstmemory_unit(input, name=None, size=None,
                   mixed_bias_attr=None, mixed_layer_attr=None,
                   param_attr=None, lstm_bias_attr=None,
                   act=None, gate_act=None,
                   state_act=None, lstm_layer_attr=None,
                   get_output_layer_attr=None):
    """
    TODO(yuyang18): complete docs

    @param input:
    @param name:
    @param size:
    @param mixed_bias_attr:
    @param mixed_layer_attr:
    @param param_attr:
    @param lstm_bias_attr:
    @param act:
    @param gate_act:
    @param state_act:
    @param lstm_layer_attr:
    @param get_output_layer_attr:
    @return:
    """
    if size is None:
        assert input.size % 4 == 0
        size = input.size / 4
    out_mem = memory(name=name, size=size)
    state_mem = memory(name="%s_state" % name, size=size)

    with mixed_layer(name="%s_input_recurrent" % name,
                     size=size * 4, bias_attr=mixed_bias_attr,
                     layer_attr=mixed_layer_attr,
                     act=IdentityActivation()) as m:
        m += identity_projection(input=input)
        m += full_matrix_projection(input=out_mem, param_attr=param_attr)

    lstm_out = lstm_step_layer(
        name=name,
        input=m,
        state=state_mem,
        size=size,
        bias_attr=lstm_bias_attr,
        act=act,
        gate_act=gate_act,
        state_act=state_act,
        layer_attr=lstm_layer_attr
    )
    get_output_layer(name='%s_state' % name,
                     input=lstm_out,
                     arg_name='state',
                     layer_attr=get_output_layer_attr)

    return lstm_out
Пример #2
0
def lstmemory_unit(input,
                   name=None,
                   size=None,
                   param_attr=None,
                   act=None,
                   gate_act=None,
                   state_act=None,
                   mixed_bias_attr=None,
                   lstm_bias_attr=None,
                   mixed_layer_attr=None,
                   lstm_layer_attr=None,
                   get_output_layer_attr=None):
    """
    Define calculations that a LSTM unit performs in a single time step.
    This function itself is not a recurrent layer, so that it can not be
    directly applied to sequence input. This function is always used in
    recurrent_group (see layers.py for more details) to implement attention
    mechanism.

    Please refer to  **Generating Sequences With Recurrent Neural Networks**
    for more details about LSTM. The link goes as follows:
    .. _Link: https://arxiv.org/abs/1308.0850

    ..  math::

        i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)

        f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)

        c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)

        o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)

        h_t & = o_t tanh(c_t)

    The example usage is:

    ..  code-block:: python

        lstm_step = lstmemory_unit(input=[layer1],
                                   size=256,
                                   act=TanhActivation(),
                                   gate_act=SigmoidActivation(),
                                   state_act=TanhActivation())


    :param input: input layer name.
    :type input: LayerOutput
    :param name: lstmemory unit name.
    :type name: basestring
    :param size: lstmemory unit size.
    :type size: int
    :param param_attr: Parameter config, None if use default.
    :type param_attr: ParameterAttribute
    :param act: lstm final activiation type
    :type act: BaseActivation
    :param gate_act: lstm gate activiation type
    :type gate_act: BaseActivation
    :param state_act: lstm state activiation type.
    :type state_act: BaseActivation
    :param mixed_bias_attr: bias parameter attribute of mixed layer.
                            False means no bias, None means default bias.
    :type mixed_bias_attr: ParameterAttribute|False
    :param lstm_bias_attr: bias parameter attribute of lstm layer.
                           False means no bias, None means default bias.
    :type lstm_bias_attr: ParameterAttribute|False
    :param mixed_layer_attr: mixed layer's extra attribute.
    :type mixed_layer_attr: ExtraLayerAttribute
    :param lstm_layer_attr: lstm layer's extra attribute.
    :type lstm_layer_attr: ExtraLayerAttribute
    :param get_output_layer_attr: get output layer's extra attribute.
    :type get_output_layer_attr: ExtraLayerAttribute
    :return: lstmemory unit name.
    :rtype: LayerOutput
    """
    if size is None:
        assert input.size % 4 == 0
        size = input.size / 4
    out_mem = memory(name=name, size=size)
    state_mem = memory(name="%s_state" % name, size=size)

    with mixed_layer(name="%s_input_recurrent" % name,
                     size=size * 4,
                     bias_attr=mixed_bias_attr,
                     layer_attr=mixed_layer_attr,
                     act=IdentityActivation()) as m:
        m += identity_projection(input=input)
        m += full_matrix_projection(input=out_mem, param_attr=param_attr)

    lstm_out = lstm_step_layer(name=name,
                               input=m,
                               state=state_mem,
                               size=size,
                               bias_attr=lstm_bias_attr,
                               act=act,
                               gate_act=gate_act,
                               state_act=state_act,
                               layer_attr=lstm_layer_attr)
    get_output_layer(name='%s_state' % name,
                     input=lstm_out,
                     arg_name='state',
                     layer_attr=get_output_layer_attr)

    return lstm_out
Пример #3
0
def simple_lstm(input,
                size,
                name=None,
                reverse=False,
                mat_param_attr=None,
                bias_param_attr=None,
                inner_param_attr=None,
                act=None,
                gate_act=None,
                state_act=None,
                mixed_layer_attr=None,
                lstm_cell_attr=None):
    """
    Simple LSTM Cell.

    It just combine a mixed layer with fully_matrix_projection and a lstmemory
    layer. The simple lstm cell was implemented as follow equations.

    ..  math::

        i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)

        f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)

        c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)

        o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)

        h_t & = o_t tanh(c_t)

    Please refer **Generating Sequences With Recurrent Neural Networks** if you
    want to know what lstm is. Link_ is here.

    .. _Link: http://arxiv.org/abs/1308.0850

    :param name: lstm layer name.
    :type name: basestring
    :param input: input layer name.
    :type input: LayerOutput
    :param size: lstm layer size.
    :type size: int
    :param reverse: whether to process the input data in a reverse order
    :type reverse: bool
    :param mat_param_attr: mixed layer's matrix projection parameter attribute.
    :type mat_param_attr: ParameterAttribute
    :param bias_param_attr: bias parameter attribute. False means no bias, None
                            means default bias.
    :type bias_param_attr: ParameterAttribute|False
    :param inner_param_attr: lstm cell parameter attribute.
    :type inner_param_attr: ParameterAttribute
    :param act: lstm final activiation type
    :type act: BaseActivation
    :param gate_act: lstm gate activiation type
    :type gate_act: BaseActivation
    :param state_act: lstm state activiation type.
    :type state_act: BaseActivation
    :param mixed_layer_attr: mixed layer's extra attribute.
    :type mixed_layer_attr: ExtraLayerAttribute
    :param lstm_cell_attr: lstm layer's extra attribute.
    :type lstm_cell_attr: ExtraLayerAttribute
    :return: lstm layer name.
    :rtype: LayerOutput
    """
    fc_name = 'lstm_transform_%s' % name
    with mixed_layer(name=fc_name,
                     size=size * 4,
                     act=IdentityActivation(),
                     layer_attr=mixed_layer_attr,
                     bias_attr=False) as m:
        m += full_matrix_projection(input, param_attr=mat_param_attr)

    return lstmemory(name=name,
                     input=m,
                     reverse=reverse,
                     bias_attr=bias_param_attr,
                     param_attr=inner_param_attr,
                     act=act,
                     gate_act=gate_act,
                     state_act=state_act,
                     layer_attr=lstm_cell_attr)
Пример #4
0
def lstmemory_unit(input,
                   name=None,
                   size=None,
                   param_attr=None,
                   act=None,
                   gate_act=None,
                   state_act=None,
                   mixed_bias_attr=None,
                   lstm_bias_attr=None,
                   mixed_layer_attr=None,
                   lstm_layer_attr=None,
                   get_output_layer_attr=None):
    """
    TODO(yuyang18): complete docs

    :param input: input layer name.
    :type input: LayerOutput
    :param name: lstmemory unit name.
    :type name: basestring
    :param size: lstmemory unit size.
    :type size: int
    :param param_attr: Parameter config, None if use default.
    :type param_attr: ParameterAttribute
    :param act: lstm final activate type
    :type act: BaseActivation
    :param gate_act: lstm gate activate type
    :type gate_act: BaseActivation
    :param state_act: lstm state activate type.
    :type state_act: BaseActivation
    :param mixed_bias_attr: bias parameter attribute of mixed layer. 
                            False means no bias, None means default bias.
    :type mixed_bias_attr: ParameterAttribute|False
    :param lstm_bias_attr: bias parameter attribute of lstm layer.
                           False means no bias, None means default bias.
    :type lstm_bias_attr: ParameterAttribute|False
    :param mixed_layer_attr: mixed layer's extra attribute.
    :type mixed_layer_attr: ExtraLayerAttribute
    :param lstm_layer_attr: lstm layer's extra attribute.
    :type lstm_layer_attr: ExtraLayerAttribute
    :param get_output_layer_attr: get output layer's extra attribute.
    :type get_output_layer_attr: ExtraLayerAttribute
    :return: lstmemory unit name.
    :rtype: LayerOutput
    """
    if size is None:
        assert input.size % 4 == 0
        size = input.size / 4
    out_mem = memory(name=name, size=size)
    state_mem = memory(name="%s_state" % name, size=size)

    with mixed_layer(name="%s_input_recurrent" % name,
                     size=size * 4,
                     bias_attr=mixed_bias_attr,
                     layer_attr=mixed_layer_attr,
                     act=IdentityActivation()) as m:
        m += identity_projection(input=input)
        m += full_matrix_projection(input=out_mem, param_attr=param_attr)

    lstm_out = lstm_step_layer(name=name,
                               input=m,
                               state=state_mem,
                               size=size,
                               bias_attr=lstm_bias_attr,
                               act=act,
                               gate_act=gate_act,
                               state_act=state_act,
                               layer_attr=lstm_layer_attr)
    get_output_layer(name='%s_state' % name,
                     input=lstm_out,
                     arg_name='state',
                     layer_attr=get_output_layer_attr)

    return lstm_out