Exemplo n.º 1
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    Gated Recurrent Unit (GRU)
    The following equations defne GRU
    u = sig(x_t Wu + h_t-1 Uu + bu)
    r = sig(x_t Wr + h_t-1 Ur + br)
    h = tanh(x_t Wx + (s_t-1 . r) Ux + bx)
    s_t = (1 - u) . h + u . s_t-1
    Below some of the parameters are initlaized together and later sliced
    W = [Wu Wr], i.e. the (horizontal) concatination of Wu and Wr
    b = [bu br]
    U = [Uu Ur]
    """
    if nin == None:
        nin = options['dim_word']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate(
        [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32')
    U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix, 'Ux')] = Ux
    params[_p(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32')

    return params
Exemplo n.º 2
0
def param_init_decoder(options, params, prefix='decoder_lstm'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(4 * n_h)
    params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones(
        (n_h, )).astype(theano.config.floatX)

    return params
Exemplo n.º 3
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']

    # embedding to gates transformation weights, biases
    W = np.concatenate([norm_weight(nin, dim),
                        norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = np.zeros((2 * dim,)).astype('float32')

    # recurrent transformation weights for gates
    U = np.concatenate([ortho_weight(dim),
                        ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U

    # embedding to hidden state proposal weights, biases
    Wx = norm_weight(nin, dim)
    params[_p(prefix, 'Wx')] = Wx
    params[_p(prefix, 'bx')] = np.zeros((dim,)).astype('float32')

    # recurrent transformation weights for hidden state proposal
    Ux = ortho_weight(dim)
    params[_p(prefix, 'Ux')] = Ux

    return params
Exemplo n.º 4
0
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None):
    """
    Init the LSTM parameters
    """
    assert (not nin is None and not dim is None)

    # input to hidden weights
    W = numpy.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                          axis=1)
    params[_p(prefix, 'W')] = W
    # hidden to hidden (recurrent) weights
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)
    params[_p(prefix, 'U')] = U
    # biases
    b = numpy.zeros((4 * dim, ))
    params[_p(prefix, 'b')] = b.astype(theano.config.floatX)

    return params
Exemplo n.º 5
0
def param_init_decoder(options, params, prefix='decoder_gru'):
    
    n_x = options['n_x']
    n_h = options['n_h']
    
    W = np.concatenate([uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h)], axis=1)
    params[_p(prefix,'W')] = W
    
    U = np.concatenate([ortho_weight(n_h),
                        ortho_weight(n_h)], axis=1)
    params[_p(prefix,'U')] = U
    
    params[_p(prefix,'b')] = zero_bias(2*n_h)

    Wx = uniform_weight(n_x, n_h)
    params[_p(prefix,'Wx')] = Wx
    
    Ux = ortho_weight(n_h)
    params[_p(prefix,'Ux')] = Ux
    
    params[_p(prefix,'bx')] = zero_bias(n_h)
    
    params[_p(prefix,'b0')] = zero_bias(n_h)

    return params   
    def param_init_lstm_concat(self, options, params, nin, dim, dimctx,
                               prefix='lstm_concat'):

        # input to LSTM
        W = np.concatenate([norm_weight(nin,dim),
                            norm_weight(nin,dim),
                            norm_weight(nin,dim),
                            norm_weight(nin,dim)], axis=1)
        params[_p(prefix, 'W')] = W

        # LSTM to LSTM
        U = np.concatenate([ortho_weight(dim),
                            ortho_weight(dim),
                            ortho_weight(dim),
                            ortho_weight(dim)], axis=1)
        params[_p(prefix, 'U')] = U

        # bias to LSTM
        params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32')

        # context to LSTM
        Wc = norm_weight(dimctx, dim*4)
        params[_p(prefix, 'Wc')] = Wc

        if options['selector']:
            # attention: selector
            W_sel = norm_weight(dim, 1)
            params[_p(prefix, 'W_sel')] = W_sel
            b_sel = np.float32(0.)
            params[_p(prefix, 'b_sel')] = b_sel
        return params
Exemplo n.º 7
0
def param_init(options, params, nin, dim, dimctx, prefix='lstm_cond'):

    # input to LSTM
    W = np.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W
    # ctx to LSTM
    V = np.concatenate([
        norm_weight(dimctx, dim),
        norm_weight(dimctx, dim),
        norm_weight(dimctx, dim),
        norm_weight(dimctx, dim)
    ],
                       axis=1)
    params[_p(prefix, 'V')] = V
    # LSTM to LSTM
    U = np.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U
    params[_p(prefix, 'b')] = np.zeros((4 * dim, )).astype('float32')
    return params
Exemplo n.º 8
0
def param_init_encoder(options, params, prefix='lstm_encoder'):
    
    n_x = options['n_x']
    n_h = options['n_h']
    
    W = np.concatenate([uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h)], axis=1)
    params[_p(prefix, 'W')] = W
    
    U = np.concatenate([ortho_weight(n_h),
                        ortho_weight(n_h),
                        ortho_weight(n_h),
                        ortho_weight(n_h)], axis=1)
    params[_p(prefix, 'U')] = U
    
    params[_p(prefix,'b')] = zero_bias(4*n_h)
    
    # It is observed that setting a high initial forget gate bias for LSTMs can 
    # give slighly better results (Le et al., 2015). Hence, the initial forget
    # gate bias is set to 3.
    params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)

    return params
Exemplo n.º 9
0
def param_init_encoder(options, params, prefix='lstm_encoder'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(4 * n_h)

    # It is observed that setting a high initial forget gate bias for LSTMs can
    # give slighly better results (Le et al., 2015). Hence, the initial forget
    # gate bias is set to 3.
    params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones(
        (n_h, )).astype(theano.config.floatX)

    return params
Exemplo n.º 10
0
def param_init_decoder(options, params, prefix='decoder_gru'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([uniform_weight(n_x, n_h),
                        uniform_weight(n_x, n_h)],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(2 * n_h)

    Wx = uniform_weight(n_x, n_h)
    params[_p(prefix, 'Wx')] = Wx

    Ux = ortho_weight(n_h)
    params[_p(prefix, 'Ux')] = Ux

    params[_p(prefix, 'bx')] = zero_bias(n_h)

    params[_p(prefix, 'b0')] = zero_bias(n_h)

    return params
Exemplo n.º 11
0
def param_init_lstm(options, params, prefix='lstm', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']

    if dim is None:
        dim = options['dim_proj']

    W = numpy.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                          axis=1)

    params[prfx(prefix, 'W')] = W
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)

    params[prfx(prefix, 'U')] = U
    params[prfx(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    return params
 def param_init_lstm_cond(self,
                          options,
                          params,
                          prefix='lstm_cond',
                          nin=None,
                          dim=None,
                          dimctx=None):  #nin=512 dim=512 dimctx=2048
     if nin == None:
         nin = options['word_dim']
     if dim == None:
         dim = options['lstm_dim']
     if dimctx == None:
         dimctx = options['ctx_dim']
     # input to LSTM
     W = np.concatenate([
         norm_weight(nin, dim),
         norm_weight(nin, dim),
         norm_weight(nin, dim),
         norm_weight(nin, dim)
     ],
                        axis=1)
     params[_p(prefix, 'W')] = W  # bo_lstm_W:(512,2048)
     # LSTM to LSTM
     U = np.concatenate([
         ortho_weight(dim),
         ortho_weight(dim),
         ortho_weight(dim),
         ortho_weight(dim)
     ],
                        axis=1)
     params[_p(prefix, 'U')] = U  # bo_lstm_U:(512,2048)
     # bias to LSTM
     params[_p(prefix, 'b')] = np.zeros(
         (4 * dim, )).astype('float32')  # bo_lstm_b:(2048,)
     # attention: context -> hidden
     # Wc_att = norm_weight(dimctx, ortho=False)
     Wc_att = norm_weight(dim, ortho=False)
     params[_p(prefix, 'Wc_att')] = Wc_att  # bo_lstm_Wc_att:(2048,2048)
     # attention: LSTM -> hidden
     # Wd_att = norm_weight(dim, dimctx)
     Wd_att = norm_weight(dim, dim)
     params[_p(prefix, 'Wd_att')] = Wd_att  # bo_lstm_Wd_att:(512,2048)
     # attention: hidden bias
     # b_att = np.zeros((dimctx,)).astype('float32')
     b_att = np.zeros((dim, )).astype('float32')
     params[_p(prefix, 'b_att')] = b_att  # bo_lstm_b_att:(2048,)
     # attention:
     # U_att = norm_weight(dimctx, 1)
     U_att = norm_weight(dim, 28)
     params[_p(prefix, 'U_att')] = U_att  # bo_lstm_U_att:(2048,1)
     c_att = np.zeros((1, )).astype('float32')
     params[_p(prefix, 'c_att')] = c_att  # bo_lstm_c_att:(1,)
     if options['selector']:
         # attention: selector
         W_sel = norm_weight(dim, 1)
         params[_p(prefix, 'W_sel')] = W_sel  # bo_lstm_W_sel:(512,1)
         b_sel = np.float32(0.)
         params[_p(prefix, 'b_sel')] = b_sel  # bo_lstm_b_sel: 0
     return params
Exemplo n.º 13
0
def param_init_lstm_peep(options, params, prefix='lstm', nin=None, dim=None):
    """
    Code based on http://deeplearning.net/tutorial/code/lstm.py and Jamie's GRU code
    Long Short Term Memory Unit (LSTM)
    LSTM is defined by the follow equations,
    W = [Wi Wf Wc Wo] # input weights
    b = [bi bf bc bo] # biases
    U = [Ui Uf Uc Uo] # recurrent weights
    Pi Pf Po c_t-1    # peep hole params and the previous cell, c_t-1
    i_t = sig(Wi x_t + Ui h_t-1 + Pi c_t-1 + bi)
    f_t = sig(Wf x_t + Uf h_t-1 + Pf c_t-1 + bf)
    c_t = f_t c_t-1 + i_t tanh(Wc x_t + Uc h_t-1 + bc)
    o_t = sig(Wo x_t + Uo h_t-1 + Po c_t-1 + bo)
    h_t = o_t tanh(c_t)
    """
    if nin == None:
        nin = options['dim_word']
    if dim == None:
        dim = options['dim_proj']

    # input weight matrix is 4 times for the input gate, forget gate, output gate, and cell input
    W = numpy.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                          axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    # The recurrent weight matrix
    U = numpy.concatenate(
        [
            ortho_weight(dim),
            ortho_weight(dim),  # remember this is ortho_weight(dim, dim)
            ortho_weight(dim),
            ortho_weight(dim)
        ],
        axis=1)
    params[_p(prefix, 'U')] = U

    # Peep holes weight vectors, all initialized to zero
    # Peep hole weights are diagonal as in Grave's paper
    params[_p(prefix, 'Pi')] = numpy.zeros((dim, )).astype('float32')
    params[_p(prefix, 'Pf')] = numpy.zeros((dim, )).astype('float32')
    params[_p(prefix, 'Po')] = numpy.zeros((dim, )).astype('float32')

    # inital h_0, and cell get made in lstm_layer or passed in

    # initialize forget gates to one?
    return params
Exemplo n.º 14
0
def param_init_gru(options, param, prefix='gru', nin=None, dim=None):

    param[prefix + '_W'] = numpy.concatenate(
        [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
    param[prefix + '_U'] = numpy.concatenate(
        [ortho_weight(dim), ortho_weight(dim)], axis=1)
    param[prefix + '_b'] = zero_vector(2 * dim)

    param[prefix + '_Wx'] = norm_weight(nin, dim)
    param[prefix + '_Ux'] = ortho_weight(dim)
    param[prefix + '_bx'] = zero_vector(dim)

    return param
Exemplo n.º 15
0
    def __init__(self, n_input, n_output):
        """

        :type  nc: int
        :param nc: dimension of input vector

        :type  nh: int
        :param nh: number of hidden units in this layer

        :type  no: int
        :param no: dimension of output vector
        """

        # Parameter of this lstm layer
        self._name = "attention_blstm_fusion"
        self.n_input = n_input
        self.n_output = n_output
        # Wh = [ Wi, Wc, Wf, Wo]
        Wh = np.concatenate([np.random.randn(n_input, n_output).astype(theano.config.floatX),
                             np.random.randn(n_input, n_output).astype(theano.config.floatX),
                             np.random.randn(n_input, n_output).astype(theano.config.floatX),
                             np.random.randn(n_input, n_output).astype(theano.config.floatX)]
                            , axis=1)
        self.Wh = theano.shared(value=Wh, name='Wh', borrow=True)
        # U = [Ui, Uc, Uf, Uo]
        Uh = np.concatenate([ortho_weight(n_output, n_output), ortho_weight(n_output, n_output),
                             ortho_weight(n_output, n_output), ortho_weight(n_output, n_output)]
                            , axis=1)
        self.Uh = theano.shared(value=Uh, name='Uh', borrow=True)

        # bh = [bi, bc, bf, bo]
        bh = np.zeros((n_output * 4,)).astype(theano.config.floatX)
        self.bh = theano.shared(value=bh, name='bh', borrow=True)

        Wh_reverse = np.concatenate([np.random.randn(n_input, n_output).astype(theano.config.floatX),
                                     np.random.randn(n_input, n_output).astype(theano.config.floatX),
                                     np.random.randn(n_input, n_output).astype(theano.config.floatX),
                                     np.random.randn(n_input, n_output).astype(theano.config.floatX)]
                                    , axis=1)
        self.Wh_reverse = theano.shared(value=Wh_reverse, name='Wh_reverse', borrow=True)
        # U = [Ui, Uc, Uf, Uo]
        Uh_reverse = np.concatenate([ortho_weight(n_output, n_output), ortho_weight(n_output, n_output),
                                     ortho_weight(n_output, n_output), ortho_weight(n_output, n_output)]
                                    , axis=1)
        self.Uh_reverse = theano.shared(value=Uh_reverse, name='Uh_reverse', borrow=True)

        # bh = [bi, bc, bf, bo]
        bh_reverse = np.zeros((n_output * 4,)).astype(theano.config.floatX)
        self.bh_reverse = theano.shared(value=bh_reverse, name='bh_reverse', borrow=True)

        self._output = np.zeros(2, )
        self.params = [self.Wh, self.Uh, self.bh, self.Wh_reverse, self.Uh_reverse, self.bh_reverse]

        Wa = 0.01*np.random.rand(n_input,n_input).astype(theano.config.floatX)
        Ua = 0.01*np.random.rand(n_output,n_input).astype(theano.config.floatX)
        Va = 0.01*np.random.rand(n_input,1).astype(theano.config.floatX)
        self.Wa = theano.shared(value=Wa, name='Wa', borrow=True)
        self.Ua = theano.shared(value=Ua, name='Ua', borrow=True)
        #self.Va = theano.shared(value=Va, name='Va', borrow=True)
        self.params.extend([self.Wa, self.Ua])#, self.Va])
Exemplo n.º 16
0
    def __init__(self, n_input, n_output):
        """

        :type  nc: int
        :param nc: dimension of input vector

        :type  nh: int
        :param nh: number of hidden units in this layer

        :type  no: int
        :param no: dimension of output vector
        """

        # Parameter of this lstm layer
        self._name = "LSTM_MASK"
        self.n_input = n_input
        self.n_output = n_output
        # Wh = [ Wi, Wc, Wf, Wo]
        Wh = np.concatenate([ortho_weight(n_input, n_output), ortho_weight(n_input, n_output),
                             ortho_weight(n_input, n_output), ortho_weight(n_input, n_output)]
                            , axis=1)
        self.Wh = theano.shared(value=Wh, name='Wh', borrow=True)
        # U = [Ui, Uc, Uf, Uo]
        Uh = np.concatenate([ortho_weight(n_output, n_output), ortho_weight(n_output, n_output),
                             ortho_weight(n_output, n_output), ortho_weight(n_output, n_output)]
                            , axis=1)
        self.Uh = theano.shared(value=Uh, name='Uh', borrow=True)

        # bh = [bi, bc, bf, bo]
        bh = np.zeros((n_output * 4,)).astype(theano.config.floatX)
        self.bh = theano.shared(value=bh, name='bh', borrow=True)

        self.params = [self.Wh, self.Uh, self.bh]
Exemplo n.º 17
0
def param_init_lstm(options, params, prefix='lstm'):
    """
    Init the LSTM parameter:

    :see: init_params
    """
    W = numpy.concatenate([
        ortho_weight(options['dim_proj']),
        ortho_weight(options['dim_proj']),
        ortho_weight(options['dim_proj']),
        ortho_weight(options['dim_proj'])
    ],
                          axis=1)
    params[_p(prefix, 'W')] = W
    U = numpy.concatenate([
        ortho_weight(options['dim_proj']),
        ortho_weight(options['dim_proj']),
        ortho_weight(options['dim_proj']),
        ortho_weight(options['dim_proj'])
    ],
                          axis=1)
    params[_p(prefix, 'U')] = U
    b = numpy.zeros((4 * options['dim_proj'], ))
    params[_p(prefix, 'b')] = b.astype(config.floatX)

    return params
Exemplo n.º 18
0
 def init_params(self):
     Wi_values = utils.ortho_weight(self.dim)
     self.Wi = theano.shared(Wi_values, name="LSTM_Wi")
     Wf_values = utils.ortho_weight(self.dim)
     self.Wf = theano.shared(Wf_values, name="LSTM_Wf")
     Wc_values = utils.ortho_weight(self.dim)
     self.Wc = theano.shared(Wc_values, name="LSTM_Wc")
     Wo_values = utils.ortho_weight(self.dim)
     self.Wo = theano.shared(Wo_values, name="LSTM_Wo")
     Ui_values = utils.ortho_weight(self.dim)
     self.Ui = theano.shared(Ui_values, name="LSTM_Ui")
     Uf_values = utils.ortho_weight(self.dim)
     self.Uf = theano.shared(Uf_values, name="LSTM_Uf")
     Uc_values = utils.ortho_weight(self.dim)
     self.Uc = theano.shared(Uc_values, name="LSTM_Uc")
     Uo_values = utils.ortho_weight(self.dim)
     self.Uo = theano.shared(Uo_values, name="LSTM_Uo")
     b_values = np.zeros((self.dim, ), dtype=theano.config.floatX)
     self.bi = theano.shared(b_values, name="LSTM_bi")
     self.bf = theano.shared(b_values, name="LSTM_bf")
     self.bc = theano.shared(b_values, name="LSTM_bc")
     self.bo = theano.shared(b_values, name="LSTM_bo")
     self.params = [
         self.Wi, self.Ui, self.bi, self.Wf, self.Uf, self.bf, self.Wc,
         self.Uc, self.bc, self.Wo, self.Uo, self.bo
     ]
Exemplo n.º 19
0
    def param_init_lstm_cond(self, options, params, nin, dim, dimctx,
                             prefix='lstm_cond'):

        # input to LSTM
        W = np.concatenate([norm_weight(nin,dim),
                               norm_weight(nin,dim),
                               norm_weight(nin,dim),
                               norm_weight(nin,dim)], axis=1)
        params[_p(prefix, 'W')] = W

        # LSTM to LSTM
        U = np.concatenate([ortho_weight(dim),
                               ortho_weight(dim),
                               ortho_weight(dim),
                               ortho_weight(dim)], axis=1)
        params[_p(prefix, 'U')] = U

        # bias to LSTM
        params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32')

        # context to LSTM
        Wc = norm_weight(dimctx,dim*4)
        params[_p(prefix, 'Wc')] = Wc

        # attention: context -> hidden
        Wc_att = norm_weight(dimctx, ortho=False)
        params[_p(prefix, 'Wc_att')] = Wc_att

        # attention: LSTM -> hidden
        Wd_att = norm_weight(dim,dimctx)
        params[_p(prefix, 'Wd_att')] = Wd_att

        # attention: hidden bias
        b_att = np.zeros((dimctx,)).astype('float32')
        params[_p(prefix, 'b_att')] = b_att

        # attention:
        U_att = norm_weight(dimctx, 1)
        params[_p(prefix, 'U_att')] = U_att
        c_att = np.zeros((1,)).astype('float32')
        params[_p(prefix, 'c_tt')] = c_att

        if options['selector']:
            # attention: selector
            W_sel = norm_weight(dim, 1)
            params[_p(prefix, 'W_sel')] = W_sel
            b_sel = np.float32(0.)
            params[_p(prefix, 'b_sel')] = b_sel

        return params
Exemplo n.º 20
0
    def _init_gru(in_dim, hid_dim, prefix_):
        param[prefix_ + '_W'] = numpy.concatenate(
            [uniform_weight(in_dim, hid_dim),
             uniform_weight(in_dim, hid_dim)],
            axis=1)
        param[prefix_ + '_Wx'] = uniform_weight(in_dim, hid_dim)

        param[prefix_ + '_U'] = numpy.concatenate(
            [ortho_weight(hid_dim),
             ortho_weight(hid_dim)], axis=1)
        param[prefix_ + '_b'] = zero_vector(2 * hid_dim)

        param[prefix_ + '_Ux'] = ortho_weight(hid_dim)
        param[prefix_ + '_bx'] = zero_vector(hid_dim)
Exemplo n.º 21
0
def LSTM(input_x, rnn_size,
         batch_size):  #input(batch_size, steps, embedding_size)
    num_steps = int(input_x.get_shape()[1])
    embedding_size = int(input_x.get_shape()[2])
    #define parameter
    W = tf.get_variable("W",
                        initializer=tf.concat(1, [
                            uniform_weight(embedding_size, rnn_size),
                            uniform_weight(embedding_size, rnn_size)
                        ]))
    U = tf.get_variable(
        "U",
        initializer=tf.concat(1,
                              [ortho_weight(rnn_size),
                               ortho_weight(rnn_size)]))
    b = tf.get_variable("b", initializer=tf.zeros([2 * rnn_size]))
    Wx = tf.get_variable("Wx",
                         initializer=uniform_weight(embedding_size, rnn_size))
    Ux = tf.get_variable("Ux", initializer=ortho_weight(rnn_size))
    bx = tf.get_variable("bx", initializer=tf.zeros([rnn_size]))
    h_ = tf.zeros([batch_size, rnn_size])
    one = tf.fill([batch_size, rnn_size], 1.)
    state_below = tf.transpose(tf.batch_matmul(
        input_x,
        tf.tile(tf.reshape(W, [1, embedding_size, 2 * rnn_size]),
                [batch_size, 1, 1])) + b,
                               perm=[1, 0, 2])
    state_belowx = tf.transpose(tf.batch_matmul(
        input_x,
        tf.tile(tf.reshape(Wx, [1, embedding_size, rnn_size]),
                [batch_size, 1, 1])) + bx,
                                perm=[1, 0, 2])  #(steps, batch_size, rnn_size)
    output = []  #(steps, batch_size, rnn_size)
    with tf.variable_scope("GRU"):
        for time_step in range(num_steps):
            preact = tf.matmul(h_, U)
            preact = tf.add(preact, state_below[time_step])

            r = tf.nn.sigmoid(_slice(preact, 0, rnn_size))
            u = tf.nn.sigmoid(_slice(preact, 1, rnn_size))

            preactx = tf.matmul(h_, Ux)
            preactx = tf.mul(preactx, r)
            preactx = tf.add(preactx, state_belowx[time_step])
            h = tf.tanh(preactx)

            h_ = tf.add(tf.mul(u, h_), tf.mul(tf.sub(one, u), h))
            output.append(h_)
    output = tf.transpose(output, perm=[1, 0, 2])
    return output  #(batch_size, steps, rnn_size)
Exemplo n.º 22
0
 def param_init_lstm(self, params, nin, dim, prefix='lstm'):
     assert prefix is not None
     # Stack the weight matricies for faster dot prods
     W = np.concatenate([norm_weight(nin, dim),
                            norm_weight(nin, dim),
                            norm_weight(nin, dim),
                            norm_weight(nin, dim)], axis=1)
     params[_p(prefix, 'W')] = W     # to_lstm_W:(512,2048)
     U = np.concatenate([ortho_weight(dim),
                            ortho_weight(dim),
                            ortho_weight(dim),
                            ortho_weight(dim)], axis=1)
     params[_p(prefix, 'U')] = U     # to_lstm_U:(512,2048)
     params[_p(prefix, 'b')] = np.zeros((4*dim,)).astype('float32')    # to_lstm_b:(2048,)
     return params
Exemplo n.º 23
0
def param_init(params, nin, dim, prefix='lstm'):
    assert prefix is not None
    # Stack the weight matricies for faster dot prods
    W = np.concatenate([norm_weight(nin,dim),
                        norm_weight(nin,dim),
                        norm_weight(nin,dim),
                        norm_weight(nin,dim)], axis=1)
    params[_p(prefix, 'W')] = W
    U = np.concatenate([ortho_weight(dim),
                        ortho_weight(dim),
                        ortho_weight(dim),
                        ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U
    params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32')

    return params
Exemplo n.º 24
0
def param_init_decoder(options, params, prefix='decoder'):

    n_x = options['n_x']
    n_h = options['n_h']
    n_z = options['n_z']

    W = np.concatenate([
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h),
        uniform_weight(n_x, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h),
        ortho_weight(n_h)
    ],
                       axis=1)
    params[_p(prefix, 'U')] = U

    C = np.concatenate([
        uniform_weight(n_z, n_h),
        uniform_weight(n_z, n_h),
        uniform_weight(n_z, n_h),
        uniform_weight(n_z, n_h)
    ],
                       axis=1)
    params[_p(prefix, 'C')] = C

    params[_p(prefix, 'b')] = zero_bias(4 * n_h)
    params[_p(prefix, 'b')][n_h:2 * n_h] = 3 * np.ones(
        (n_h, )).astype(theano.config.floatX)

    C0 = uniform_weight(n_z, n_h)
    params[_p(prefix, 'C0')] = C0

    params[_p(prefix, 'b0')] = zero_bias(n_h)

    #params[_p(prefix,'b_y')] = zero_bias(n_x)  # 48

    return params
Exemplo n.º 25
0
def param_init_gru(prefix='gru', nin=None, dim=None):
    #Gated Recurrent Unit (GRU)
    params = {}
    W = [norm_weight(nin, dim), norm_weight(nin, dim)]
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b1')] = np.zeros((dim, ), dtype=np.float32)
    params[_p(prefix, 'b2')] = np.zeros((dim, ), dtype=np.float32)
    U = [ortho_weight(dim), ortho_weight(dim)]
    params[_p(prefix, 'U')] = U
    Wx = norm_weight(nin, dim)
    params[_p(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix, 'Ux')] = Ux
    params[_p(prefix, 'bx')] = np.zeros((dim, ), dtype=np.float32)

    return params[_p(prefix,'W')][0], params[_p(prefix,'W')][1],  params[_p(prefix,'U')][0] , \
                        params[_p(prefix,'U')][1], params[_p(prefix,'b1')], params[_p(prefix,'b2')], \
                        params[_p(prefix,'Wx')], params[_p(prefix,'Ux')], params[_p(prefix,'bx')]
Exemplo n.º 26
0
def param_init_cnn(options, params, prefix='cnn'):
    feature_maps = options['feature_maps']
    filter_hs = options['filter_hs']

    # Fixed image shape
    num_chn = len(options['W'])
    if options['bidir']:
        num_chn = num_chn * 2

    if options['combine']:
        num_chn = num_chn * 2

    image_shape = (options['batch_size'], num_chn, options['maxlen'],
                   options['dim_proj'])
    img_h = image_shape[2]
    img_w = image_shape[3]
    options['image_shape'] = image_shape

    # init filter,bias
    filter_shapes = []
    pool_sizes = []

    filter_w = options['dim_proj']
    for filter_h in filter_hs:
        filter_shape = (feature_maps, num_chn, filter_h, filter_w)
        pool_size = (img_h - filter_h + 1, img_w - filter_w + 1)

        #4 different initialization of filters
        if options['init'] == 'uniform':
            params['cnn_f' + str(filter_h)] = numpy.random.uniform(
                low=-0.01, high=0.01, size=filter_shape).astype(config.floatX)
        elif options['init'] == 'xavier':
            fan_in = numpy.prod(filter_shape[1:])
            fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                       numpy.prod(pool_size))
            W_bound = numpy.sqrt(6. / (fan_in + fan_out))
            params['cnn_f' + str(filter_h)] = numpy.random.uniform(
                low=-W_bound, high=W_bound,
                size=filter_shape).astype(config.floatX)
        elif options['init'] == 'gaussian':
            params['cnn_f' + str(filter_h)] = numpy.random.normal(
                size=filter_shape).astype(config.floatX)
        elif options['init'] == 'ortho':
            W_ortho = ortho_weight(numpy.prod(filter_shape[1:]))
            W_ortho = numpy.reshape(W_ortho[:filter_shape[0]], filter_shape)
            params['cnn_f' + str(filter_h)] = W_ortho

        params['cnn_b' + str(filter_h)] = numpy.zeros(
            (filter_shape[0], )).astype(config.floatX)

        filter_shapes.append(filter_shape)
        pool_sizes.append(pool_size)

    options['filter_shapes'] = filter_shapes
    options['pool_sizes'] = pool_sizes

    return params
Exemplo n.º 27
0
def param_init_lnlstm(options, params, prefix='lnlstm', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']

    if dim is None:
        dim = options['dim_proj']

    W = numpy.concatenate([
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim),
        norm_weight(nin, dim)
    ],
                          axis=1)

    params[prfx(prefix, 'W')] = W
    U = numpy.concatenate([
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim),
        ortho_weight(dim)
    ],
                          axis=1)

    params[prfx(prefix, 'U')] = U
    params[prfx(prefix, 'b')] = numpy.zeros((4 * dim, )).astype('float32')

    # lateral parameters
    scale_add = 0.0
    scale_mul = 1.0
    params[prfx(prefix, 'b1')] = scale_add * numpy.ones(
        (4 * dim)).astype('float32')
    params[prfx(prefix, 'b2')] = scale_add * numpy.ones(
        (4 * dim)).astype('float32')
    params[prfx(prefix, 'b3')] = scale_add * numpy.ones(
        (1 * dim)).astype('float32')
    params[prfx(prefix, 's1')] = scale_mul * numpy.ones(
        (4 * dim)).astype('float32')
    params[prfx(prefix, 's2')] = scale_mul * numpy.ones(
        (4 * dim)).astype('float32')
    params[prfx(prefix, 's3')] = scale_mul * numpy.ones(
        (1 * dim)).astype('float32')

    return params
Exemplo n.º 28
0
def param_init_gru(options, param, prefix='gru', nin=None, dim=None):

    param[prefix + '_W'] = numpy.concatenate(
        [
            norm_weight(nin, dim), norm_weight(nin, dim)
        ],
        axis=1)
    param[prefix + '_U'] = numpy.concatenate(
        [
            ortho_weight(dim), ortho_weight(dim)
        ],
        axis=1)
    param[prefix + '_b'] = zero_vector(2 * dim)

    param[prefix + '_Wx'] = norm_weight(nin, dim)
    param[prefix + '_Ux'] = ortho_weight(dim)
    param[prefix + '_bx'] = zero_vector(dim)

    return param
Exemplo n.º 29
0
def param_init_lngru(options, params, prefix='lngru', nin=None, dim=None):
    """
     Gated Recurrent Unit (GRU) with LN
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate(
        [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W.astype('float32')
    params[_p(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32')
    U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U.astype('float32')

    Wx = norm_weight(nin, dim)
    params[_p(prefix, 'Wx')] = Wx.astype('float32')
    Ux = ortho_weight(dim)
    params[_p(prefix, 'Ux')] = Ux.astype('float32')
    params[_p(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32')

    # LN parameters
    scale_add = 0.0
    scale_mul = 1.0
    params[_p(prefix, 'b1')] = scale_add * numpy.ones(
        (2 * dim)).astype('float32')
    params[_p(prefix, 'b2')] = scale_add * numpy.ones(
        (1 * dim)).astype('float32')
    params[_p(prefix, 'b3')] = scale_add * numpy.ones(
        (2 * dim)).astype('float32')
    params[_p(prefix, 'b4')] = scale_add * numpy.ones(
        (1 * dim)).astype('float32')
    params[_p(prefix, 's1')] = scale_mul * numpy.ones(
        (2 * dim)).astype('float32')
    params[_p(prefix, 's2')] = scale_mul * numpy.ones(
        (1 * dim)).astype('float32')
    params[_p(prefix, 's3')] = scale_mul * numpy.ones(
        (2 * dim)).astype('float32')
    params[_p(prefix, 's4')] = scale_mul * numpy.ones(
        (1 * dim)).astype('float32')

    return params
Exemplo n.º 30
0
def param_init_gru_cond(options,
                        param,
                        prefix='gru_cond',
                        nin=None,
                        dim=None,
                        dimctx=None,
                        nin_nonlin=None,
                        dim_nonlin=None):
    if nin_nonlin is None:
        nin_nonlin = nin
    if dim_nonlin is None:
        dim_nonlin = dim

    param = param_init_gru(options, param, prefix=prefix, nin=nin, dim=dim)

    param[prefix + '_U_nl'] = numpy.concatenate(
        [ortho_weight(dim_nonlin),
         ortho_weight(dim_nonlin)], axis=1)
    param[prefix + '_b_nl'] = zero_vector(2 * dim_nonlin)

    param[prefix + '_Ux_nl'] = ortho_weight(dim_nonlin)
    param[prefix + '_bx_nl'] = zero_vector(dim_nonlin)

    # context to LSTM
    param[prefix + '_Wc'] = uniform_weight(dimctx, dim * 2)
    param[prefix + '_Wcx'] = uniform_weight(dimctx, dim)

    # attention: combined -> hidden
    param[prefix + '_W_comb_att'] = uniform_weight(dim, dimctx)

    # attention: context -> hidden
    param[prefix + '_Wc_att'] = uniform_weight(dimctx, dimctx)

    # attention: hidden bias
    param[prefix + '_b_att'] = zero_vector(dimctx)

    # attention:
    param[prefix + '_U_att'] = uniform_weight(dimctx, 1)
    param[prefix + '_c_att'] = zero_vector(1)

    return param
Exemplo n.º 31
0
Arquivo: layers.py Projeto: afcarl/nmt
def param_init_gru(options, params, prefix='gru', nin=None, dim=None, hiero=False):
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    if not hiero:
        W = numpy.concatenate([norm_weight(nin,dim),
                               norm_weight(nin,dim)], axis=1)
        params[prfx(prefix,'W')] = W
        params[prfx(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[prfx(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[prfx(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[prfx(prefix,'Ux')] = Ux
    params[prfx(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params
Exemplo n.º 32
0
Arquivo: rmn.py Projeto: orhanf/rmn
def param_init_gru_rmn(params, prefix='gru_rmn', nin=None, dim=None,
                       vocab_size=None, memory_dim=None, memory_size=None):
    assert dim == memory_dim, 'Should be fixed!'

    # first GRU params
    W = numpy.concatenate([norm_weight(nin, dim),
                           norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U
    params[_p(prefix, 'Wx')] = norm_weight(nin, dim)
    params[_p(prefix, 'Ux')] = ortho_weight(dim)
    params[_p(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32')

    # memory block params
    params[_p(prefix, 'M')] = norm_weight(vocab_size, memory_dim)
    params[_p(prefix, 'C')] = norm_weight(vocab_size, memory_dim)
    params[_p(prefix, 'T')] = norm_weight(memory_size, memory_dim)

    # second GRU params
    params[_p(prefix, 'Wz')] = norm_weight(dim, memory_dim, ortho=False)
    params[_p(prefix, 'Wr')] = norm_weight(dim, memory_dim, ortho=False)
    params[_p(prefix, 'W2')] = norm_weight(dim, memory_dim, ortho=False)
    params[_p(prefix, 'Uz')] = ortho_weight(dim)
    params[_p(prefix, 'Ur')] = ortho_weight(dim)
    params[_p(prefix, 'U2')] = ortho_weight(dim)

    return params
Exemplo n.º 33
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    Gated Recurrent Unit (GRU)
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate(
        [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32')
    U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix, 'Ux')] = Ux
    params[_p(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32')

    return params
Exemplo n.º 34
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    Gated Recurrent Unit (GRU)
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix,'Ux')] = Ux
    params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params
Exemplo n.º 35
0
def param_init_decoder(options, params, prefix='decoder_vanilla'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = uniform_weight(n_x, n_h)
    params[_p(prefix, 'W')] = W

    U = ortho_weight(n_h)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(n_h)

    return params
Exemplo n.º 36
0
def param_init_gru_cond(options, params, prefix='gru_cond',
                        nin=None, dim=None, dimctx=None,
                        nin_nonlin=None, dim_nonlin=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    if nin_nonlin is None:
        nin_nonlin = nin
    if dim_nonlin is None:
        dim_nonlin = dim

    W = np.concatenate([norm_weight(nin, dim),
                        norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = np.zeros((2 * dim,)).astype('float32')
    U = np.concatenate([ortho_weight(dim_nonlin),
                        ortho_weight(dim_nonlin)], axis=1)
    params[_p(prefix, 'U')] = U

    Wx = norm_weight(nin_nonlin, dim_nonlin)
    params[_p(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim_nonlin)
    params[_p(prefix, 'Ux')] = Ux
    params[_p(prefix, 'bx')] = np.zeros((dim_nonlin,)).astype('float32')

    U_nl = np.concatenate([ortho_weight(dim_nonlin),
                           ortho_weight(dim_nonlin)], axis=1)
    params[_p(prefix, 'U_nl')] = U_nl
    params[_p(prefix, 'b_nl')] = np.zeros((2 * dim_nonlin,)).astype('float32')

    Ux_nl = ortho_weight(dim_nonlin)
    params[_p(prefix, 'Ux_nl')] = Ux_nl
    params[_p(prefix, 'bx_nl')] = np.zeros((dim_nonlin,)).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx, dim * 2)
    params[_p(prefix, 'Wc')] = Wc

    Wcx = norm_weight(dimctx, dim)
    params[_p(prefix, 'Wcx')] = Wcx

    # attention: combined -> hidden
    W_comb_att = norm_weight(dim, dimctx)
    params[_p(prefix, 'W_comb_att')] = W_comb_att

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx)
    params[_p(prefix, 'Wc_att')] = Wc_att

    # attention: hidden bias
    b_att = np.zeros((dimctx,)).astype('float32')
    params[_p(prefix, 'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx, 1)
    params[_p(prefix, 'U_att')] = U_att
    c_att = np.zeros((1,)).astype('float32')
    params[_p(prefix, 'c_tt')] = c_att

    return params