def lstmemory_unit(input, name=None, size=None, mixed_bias_attr=None, mixed_layer_attr=None, param_attr=None, lstm_bias_attr=None, act=None, gate_act=None, state_act=None, lstm_layer_attr=None, get_output_layer_attr=None): """ TODO(yuyang18): complete docs @param input: @param name: @param size: @param mixed_bias_attr: @param mixed_layer_attr: @param param_attr: @param lstm_bias_attr: @param act: @param gate_act: @param state_act: @param lstm_layer_attr: @param get_output_layer_attr: @return: """ if size is None: assert input.size % 4 == 0 size = input.size / 4 out_mem = memory(name=name, size=size) state_mem = memory(name="%s_state" % name, size=size) with mixed_layer(name="%s_input_recurrent" % name, size=size * 4, bias_attr=mixed_bias_attr, layer_attr=mixed_layer_attr, act=IdentityActivation()) as m: m += identity_projection(input=input) m += full_matrix_projection(input=out_mem, param_attr=param_attr) lstm_out = lstm_step_layer( name=name, input=m, state=state_mem, size=size, bias_attr=lstm_bias_attr, act=act, gate_act=gate_act, state_act=state_act, layer_attr=lstm_layer_attr ) get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state', layer_attr=get_output_layer_attr) return lstm_out
def lstmemory_unit(input, name=None, size=None, param_attr=None, act=None, gate_act=None, state_act=None, mixed_bias_attr=None, lstm_bias_attr=None, mixed_layer_attr=None, lstm_layer_attr=None, get_output_layer_attr=None): """ Define calculations that a LSTM unit performs in a single time step. This function itself is not a recurrent layer, so that it can not be directly applied to sequence input. This function is always used in recurrent_group (see layers.py for more details) to implement attention mechanism. Please refer to **Generating Sequences With Recurrent Neural Networks** for more details about LSTM. The link goes as follows: .. _Link: https://arxiv.org/abs/1308.0850 .. math:: i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) h_t & = o_t tanh(c_t) The example usage is: .. code-block:: python lstm_step = lstmemory_unit(input=[layer1], size=256, act=TanhActivation(), gate_act=SigmoidActivation(), state_act=TanhActivation()) :param input: input layer name. :type input: LayerOutput :param name: lstmemory unit name. :type name: basestring :param size: lstmemory unit size. :type size: int :param param_attr: Parameter config, None if use default. :type param_attr: ParameterAttribute :param act: lstm final activiation type :type act: BaseActivation :param gate_act: lstm gate activiation type :type gate_act: BaseActivation :param state_act: lstm state activiation type. :type state_act: BaseActivation :param mixed_bias_attr: bias parameter attribute of mixed layer. False means no bias, None means default bias. :type mixed_bias_attr: ParameterAttribute|False :param lstm_bias_attr: bias parameter attribute of lstm layer. False means no bias, None means default bias. :type lstm_bias_attr: ParameterAttribute|False :param mixed_layer_attr: mixed layer's extra attribute. :type mixed_layer_attr: ExtraLayerAttribute :param lstm_layer_attr: lstm layer's extra attribute. :type lstm_layer_attr: ExtraLayerAttribute :param get_output_layer_attr: get output layer's extra attribute. :type get_output_layer_attr: ExtraLayerAttribute :return: lstmemory unit name. :rtype: LayerOutput """ if size is None: assert input.size % 4 == 0 size = input.size / 4 out_mem = memory(name=name, size=size) state_mem = memory(name="%s_state" % name, size=size) with mixed_layer(name="%s_input_recurrent" % name, size=size * 4, bias_attr=mixed_bias_attr, layer_attr=mixed_layer_attr, act=IdentityActivation()) as m: m += identity_projection(input=input) m += full_matrix_projection(input=out_mem, param_attr=param_attr) lstm_out = lstm_step_layer(name=name, input=m, state=state_mem, size=size, bias_attr=lstm_bias_attr, act=act, gate_act=gate_act, state_act=state_act, layer_attr=lstm_layer_attr) get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state', layer_attr=get_output_layer_attr) return lstm_out
def simple_lstm(input, size, name=None, reverse=False, mat_param_attr=None, bias_param_attr=None, inner_param_attr=None, act=None, gate_act=None, state_act=None, mixed_layer_attr=None, lstm_cell_attr=None): """ Simple LSTM Cell. It just combine a mixed layer with fully_matrix_projection and a lstmemory layer. The simple lstm cell was implemented as follow equations. .. math:: i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) h_t & = o_t tanh(c_t) Please refer **Generating Sequences With Recurrent Neural Networks** if you want to know what lstm is. Link_ is here. .. _Link: http://arxiv.org/abs/1308.0850 :param name: lstm layer name. :type name: basestring :param input: input layer name. :type input: LayerOutput :param size: lstm layer size. :type size: int :param reverse: whether to process the input data in a reverse order :type reverse: bool :param mat_param_attr: mixed layer's matrix projection parameter attribute. :type mat_param_attr: ParameterAttribute :param bias_param_attr: bias parameter attribute. False means no bias, None means default bias. :type bias_param_attr: ParameterAttribute|False :param inner_param_attr: lstm cell parameter attribute. :type inner_param_attr: ParameterAttribute :param act: lstm final activiation type :type act: BaseActivation :param gate_act: lstm gate activiation type :type gate_act: BaseActivation :param state_act: lstm state activiation type. :type state_act: BaseActivation :param mixed_layer_attr: mixed layer's extra attribute. :type mixed_layer_attr: ExtraLayerAttribute :param lstm_cell_attr: lstm layer's extra attribute. :type lstm_cell_attr: ExtraLayerAttribute :return: lstm layer name. :rtype: LayerOutput """ fc_name = 'lstm_transform_%s' % name with mixed_layer(name=fc_name, size=size * 4, act=IdentityActivation(), layer_attr=mixed_layer_attr, bias_attr=False) as m: m += full_matrix_projection(input, param_attr=mat_param_attr) return lstmemory(name=name, input=m, reverse=reverse, bias_attr=bias_param_attr, param_attr=inner_param_attr, act=act, gate_act=gate_act, state_act=state_act, layer_attr=lstm_cell_attr)
def lstmemory_unit(input, name=None, size=None, param_attr=None, act=None, gate_act=None, state_act=None, mixed_bias_attr=None, lstm_bias_attr=None, mixed_layer_attr=None, lstm_layer_attr=None, get_output_layer_attr=None): """ TODO(yuyang18): complete docs :param input: input layer name. :type input: LayerOutput :param name: lstmemory unit name. :type name: basestring :param size: lstmemory unit size. :type size: int :param param_attr: Parameter config, None if use default. :type param_attr: ParameterAttribute :param act: lstm final activate type :type act: BaseActivation :param gate_act: lstm gate activate type :type gate_act: BaseActivation :param state_act: lstm state activate type. :type state_act: BaseActivation :param mixed_bias_attr: bias parameter attribute of mixed layer. False means no bias, None means default bias. :type mixed_bias_attr: ParameterAttribute|False :param lstm_bias_attr: bias parameter attribute of lstm layer. False means no bias, None means default bias. :type lstm_bias_attr: ParameterAttribute|False :param mixed_layer_attr: mixed layer's extra attribute. :type mixed_layer_attr: ExtraLayerAttribute :param lstm_layer_attr: lstm layer's extra attribute. :type lstm_layer_attr: ExtraLayerAttribute :param get_output_layer_attr: get output layer's extra attribute. :type get_output_layer_attr: ExtraLayerAttribute :return: lstmemory unit name. :rtype: LayerOutput """ if size is None: assert input.size % 4 == 0 size = input.size / 4 out_mem = memory(name=name, size=size) state_mem = memory(name="%s_state" % name, size=size) with mixed_layer(name="%s_input_recurrent" % name, size=size * 4, bias_attr=mixed_bias_attr, layer_attr=mixed_layer_attr, act=IdentityActivation()) as m: m += identity_projection(input=input) m += full_matrix_projection(input=out_mem, param_attr=param_attr) lstm_out = lstm_step_layer(name=name, input=m, state=state_mem, size=size, bias_attr=lstm_bias_attr, act=act, gate_act=gate_act, state_act=state_act, layer_attr=lstm_layer_attr) get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state', layer_attr=get_output_layer_attr) return lstm_out