Exemplo n.º 1
0
def param_init_encoder(options, params, prefix='lstm_encoder'):
    
    n_x = options['n_x']
    n_h = options['n_h']
    
    W = np.concatenate([uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h)], axis=1)
    params[_p(prefix, 'W')] = W
    
    U = np.concatenate([ortho_weight(n_h),
                        ortho_weight(n_h),
                        ortho_weight(n_h),
                        ortho_weight(n_h)], axis=1)
    params[_p(prefix, 'U')] = U
    
    params[_p(prefix,'b')] = zero_bias(4*n_h)
    
    # It is observed that setting a high initial forget gate bias for LSTMs can 
    # give slighly better results (Le et al., 2015). Hence, the initial forget
    # gate bias is set to 3.
    params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX)

    return params
Exemplo n.º 2
0
def gru_layer(tparams, state_below, init_state, options, prefix='gru', mask=None, **kwargs):
    """
    Feedforward pass through GRU
    """
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    dim = tparams[_p(prefix,'Ux')].shape[1]

    if init_state == None:
        init_state = tensor.alloc(0., n_samples, dim)

    if mask == None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
    state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
    U = tparams[_p(prefix, 'U')]
    Ux = tparams[_p(prefix, 'Ux')]

    def _step_slice(m_, x_, xx_, h_, U, Ux):
        preact = tensor.dot(h_, U)
        preact += x_

        r = tensor.nnet.sigmoid(_slice(preact, 0, dim))
        u = tensor.nnet.sigmoid(_slice(preact, 1, dim))

        preactx = tensor.dot(h_, Ux)
        preactx = preactx * r
        preactx = preactx + xx_

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h
        h = m_[:,None] * h + (1. - m_)[:,None] * h_

        return h

    seqs = [mask, state_below_, state_belowx]
    _step = _step_slice

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info = [init_state],
                                non_sequences = [tparams[_p(prefix, 'U')],
                                                 tparams[_p(prefix, 'Ux')]],
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps,
                                profile=False,
                                strict=True)
    rval = [rval]
    return rval
Exemplo n.º 3
0
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None,
                       ortho=True):
    if nin is None:
        nin = options['dim_proj']
    if nout is None:
        nout = options['dim_proj']
    params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho)
    params[_p(prefix, 'b')] = np.zeros((nout,)).astype('float32')

    return params
Exemplo n.º 4
0
def fflayer(tparams,
            state_below,
            options,
            prefix='rconv',
            activ='lambda x: tensor.tanh(x)',
            **kwargs):
    """
    Feedforward pass
    """
    return eval(activ)(tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
                       tparams[_p(prefix, 'b')])
Exemplo n.º 5
0
def batch_norm(tparams, input, options, prefix='cnn'):
    """ layer1_input:  n_sample * n_feature    64*20
        input_shape: (num of hiddens, number of input features)   200*20
        pred_shape: (num of labels, number of hiddens) 2*200
        y_recon : n_label *n_sample 2*64
    """

    input_hat = (input - input.mean(0)) / (input.std(0) + 1.0 / options['L'])
    input_ = input_hat * tparams[_p(prefix, 'gamma')] + tparams[_p(
        prefix, 'beta')]
    return input_
Exemplo n.º 6
0
def encoder(tparams, state_below, mask, seq_output=False, prefix='lstm_encoder'):
    
    """ state_below: size of  n_steps * n_samples * n_x
    """

    n_steps = state_below.shape[0]
    n_samples = state_below.shape[1]

    n_h = tparams[_p(prefix,'U')].shape[0]

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \
                    tparams[_p(prefix, 'b')]

    def _step(m_, x_, h_, c_, U):
        preact = tensor.dot(h_, U)
        preact += x_

        i = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
        f = tensor.nnet.sigmoid(_slice(preact, 1, n_h))
        o = tensor.nnet.sigmoid(_slice(preact, 2, n_h))
        c = tensor.tanh(_slice(preact, 3, n_h))
        
        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_

        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c

    seqs = [mask, state_below_]

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info=[tensor.alloc(numpy_floatX(0.),
                                                    n_samples,n_h),
                                              tensor.alloc(numpy_floatX(0.),
                                                    n_samples,n_h)],
                                non_sequences = [tparams[_p(prefix, 'U')]],
                                name=_p(prefix, '_layers'),
                                n_steps=n_steps,
                                strict=True)
    
    h_rval = rval[0] 
    if seq_output:
        return h_rval
    else:
        # size of n_samples * n_h
        return h_rval[-1]  
Exemplo n.º 7
0
    def _init_params(self):

        shape_io = (self.n_in, self.n_out)

        if self.orth:
            if self.n_in != self.n_out :
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W = ortho_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        else:
            self.W = norm_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        self.b = constant_weight(shape=(self.n_out, ), name=_p(self.pname, 'b'))
        self.params = [self.W, self.b]
Exemplo n.º 8
0
def mlp_layer_linear(tparams, layer1_input, prefix='mlp_layer'):
    
    """ layer1_input:  n_sample * n_feature    64*20
        input_shape: (num of hiddens, number of input features)   200*20
        pred_shape: (num of labels, number of hiddens) 2*200
        y_recon : n_label *n_sample 2*64
    """
    hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] )  # 64*200  
    y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')]  
    #y_recons = tensor.tanh(y_recons) * 10   # avoid numerical issues/label smoothing
    #y_recons = tensor.nnet.softmax(y_recons) # 64*2
    return y_recons      
Exemplo n.º 9
0
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
    """
    Affine transformation + point-wise nonlinearity
    """
    if nin == None:
        nin = options['dim_proj']
    if nout == None:
        nout = options['dim_proj']
    params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho)
    params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')

    return params
Exemplo n.º 10
0
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
    """
    Affine transformation + point-wise nonlinearity
    """
    if nin == None:
        nin = options['dim_proj']
    if nout == None:
        nout = options['dim_proj']
    params[_p(prefix,'W')] = xavier_weight(nin, nout)
    params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')

    return params
Exemplo n.º 11
0
def decoder_layer(tparams, state_below, prefix='decoder_lstm'):
    """ state_below: size of n_steps * n_samples * n_x 
    """

    nsteps = state_below.shape[0]
    n_h = tparams[_p(prefix, 'U')].shape[0]

    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n + 1) * dim]
        return _x[:, n * dim:(n + 1) * dim]

    def _step(x_, h_, c_, U):
        preact = tensor.dot(h_, U)
        preact += x_

        i = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
        f = tensor.nnet.sigmoid(_slice(preact, 1, n_h))
        o = tensor.nnet.sigmoid(_slice(preact, 2, n_h))
        c = tensor.tanh(_slice(preact, 3, n_h))

        c = f * c_ + i * c

        h = o * tensor.tanh(c)

        return h, c

    state_below_ = tensor.dot(state_below, tparams[_p(
        prefix, 'W')]) + tparams[_p(prefix, 'b')]

    seqs = [state_below_]

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info=[
                                    tensor.alloc(numpy_floatX(0.), n_samples,
                                                 n_h),
                                    tensor.alloc(numpy_floatX(0.), n_samples,
                                                 n_h)
                                ],
                                non_sequences=[tparams[_p(prefix, 'U')]],
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps,
                                strict=True)

    h_rval = rval[0]

    return h_rval
Exemplo n.º 12
0
def param_init_batch_norm(input_shape, params, prefix='cnn'):
    """ input_shape: (num of hiddens, number of input features)
        pred_shape: (num of labels, number of hiddens)
    """

    beta = np.ones((input_shape[1], ), dtype=theano.config.floatX) * 0.01
    gamma = np.ones((input_shape[1], ), dtype=theano.config.floatX) * 0.1

    params[_p(prefix, 'beta')] = beta
    params[_p(prefix, 'gamma')] = gamma

    return params
Exemplo n.º 13
0
    def _init_params(self):

        shape_xh = (self.n_in * 3, self.n_hids)
        shape_hh = (self.n_hids * 3, self.n_hids)
        self.W_x = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_x'))
        self.b = constant_weight(shape=(self.n_hids * 3, ),
                                 name=_p(self.pname, 'b'))
        self.W_h = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_h'))
        self.params = [self.W_x, self.W_h, self.b]
        self.GRU_op = mkl_gru.GRU(hid=self.n_hids,
                                  return_sequences=True,
                                  max_len=self.max_len)
        self.h_init_state = numpy.zeros((80, 1000), numpy.float64)
Exemplo n.º 14
0
 def param_init_fflayer(self,
                        options,
                        params,
                        prefix='ff',
                        nin=None,
                        nout=None):
     if nin == None:
         nin = options['dim_proj']
     if nout == None:
         nout = options['dim_proj']
     params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01)
     params[_p(prefix, 'b')] = numpy.zeros((nout, )).astype('float32')
     return params
Exemplo n.º 15
0
    def _init_params(self):

        shape_io = (self.n_in_0, self.n_out)

        if self.orth:
            if self.n_in_0 != self.n_out:
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W = ortho_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        else:
            self.W = norm_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        self.params = [self.W]

        self.ff =  FeedForward(self.n_in_1, self.n_out, orth=self.orth, rng=self.rng, name=_p(self.pname, 'FF_W') )
        self.params.extend(self.ff.params)
Exemplo n.º 16
0
def param_init_encoder(filter_shape, params, prefix='cnn_encoder'):
    
    """ filter_shape: (number of filters, num input feature maps, filter height,
                        filter width)
        image_shape: (batch_size, num input feature maps, image height, image width)
    """
   
    W = np.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape),dtype=theano.config.floatX)
    b = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
      
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = b

    return params
Exemplo n.º 17
0
def param_init_encoder(filter_shape, params, prefix='cnn_encoder'):
    
    """ filter_shape: (number of filters, num input feature maps, filter height,
                        filter width)
        image_shape: (batch_size, num input feature maps, image height, image width)
    """
   
    W = np.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape),dtype=theano.config.floatX)
    b = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
      
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = b

    return params
Exemplo n.º 18
0
    def apply_pyramid(self, state_below, mask_below=None, init_state=None, context=None):
        '''
        state_below: shape=[seq_len, batch, n_in]
        init_state:  shape=[batch, seq_len, hid]
        '''
        n_steps = state_below.shape[0]
        if state_below.ndim == 3:
            batch_size = state_below.shape[1]
        else:
            batch_size = 1
            state_below = state_below.reshape((n_steps, batch_size, state_below.shape[1]))

        if mask_below is None:
            mask_below = T.alloc(numpy.float32(1.), n_steps, 1)

        if self.with_context:
            assert context

            if init_state is None:
                init_state = T.tanh(T.dot(context, self.W_c_init))

            c_z = T.dot(context, self.W_cz)
            c_r = T.dot(context, self.W_cr)
            c_h = T.dot(context, self.W_ch)
            non_sequences = [c_z, c_r, c_h]

            rval, updates = theano.scan(self._step_context,
                                        sequences=[state_below, mask_below],
                                        non_sequences=non_sequences,
                                        outputs_info=[init_state],
                                        name=_p(self.pname, 'layers'),
                                        n_steps=n_steps)
        else:
            if init_state is None:
                init_state = T.alloc(numpy.float32(0.), batch_size, n_steps, self.n_hids)

            state_below_xh = T.dot(state_below, self.W_xh) + self.b_h
            state_below_xzr = T.dot(state_below, self.W_xzr) + self.b_zr
            step_idx =  T.arange(n_steps)
            sequences = [state_below_xh, state_below_xzr, mask_below, step_idx]
            outputs_info=[init_state]
            non_sequences = []
            rval, updates = theano.scan(self._pyramid_step,
                                        sequences=sequences,
                                        outputs_info=outputs_info,
                                        non_sequences=non_sequences,
                                        name=_p(self.pname, 'layers'),
                                        n_steps=n_steps)
        self.output = rval
        return self.output
Exemplo n.º 19
0
def param_init_decoder(options, params, prefix='decoder_vanilla'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = uniform_weight(n_x, n_h)
    params[_p(prefix, 'W')] = W

    U = ortho_weight(n_h)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(n_h)

    return params
Exemplo n.º 20
0
def param_init_action_response_layer(options, params, constraints, prefix='ar',
                                     nin=0, rng=None, unif_range=0.2,
                                     level=0, **kwargs):
    '''
    Action response layers.
    '''
    rng = init_rng(rng)
    n_features = options['hidden_units'][-1]

    if options['shared_ld']:
        params, constraints = init_level_dist(params, unif_range, nin, rng, constraints)
    else:
        if level > 0:
            params[_p(prefix, 'ld')] = floatx(rng.uniform(size=(level),
                                              low=0.1,
                                              high=0.9))
            params[_p(prefix, 'ld')] /= params[_p(prefix, 'ld')].sum()
            constraints['simplex'] = constraints.get('simplex', []) + [_p(prefix, 'ld')]

    initial_Wf = numpy.zeros(n_features)
    initial_Wf += floatx(rng.uniform(size=(n_features), low=0., high=unif_range))
    initial_Wf /= initial_Wf.sum()

    if level == 0:
        params[_p(prefix, 'Wf')] = floatx(initial_Wf)
        constraints['simplex'] = constraints.get('simplex', []) + [_p(prefix, 'Wf')]

    if level > 0:
        params[_p(prefix, 'W_h')] = floatx(rng.uniform(size=(1+options['hidden_units'][-1]),
                                            low=-0.01,
                                            high=0.01))
    if level > 0:
        params[_p(prefix, 'lam')] = floatx(1.0)
    return params, constraints
Exemplo n.º 21
0
def deconv_depool2(layer0_input, tparams, options, prefix):
    if prefix == 'd':
        depool_out = depool_repeat(layer0_input, options['e_pool_size'])
    elif prefix == 'd2':
        depool_out = depool_repeat(layer0_input, options['e2_pool_size'])
    s = int(np.floor(options[_p(prefix, 'Nt')] / 2.))
    h = int((2 * options[_p(prefix, 'K')] - 2) / 2.)
    deconv_out = conv.conv2d(input=depool_out.dimshuffle(0, 'x', 1, 2),
                             filters=tparams[_p(prefix, 'W')],
                             filter_shape=options[_p(prefix, 'filter_shape')],
                             border_mode='full')[:, :, h, s - 1:-s]
    doutput = (deconv_out +
               tparams[_p(prefix, 'bias')].dimshuffle('x', 0, 'x')
               )  #.reshape((deconv_out.shape[0],options['C'],options['T']))
    return doutput
Exemplo n.º 22
0
 def param_init_lstm(self, params, nin, dim, prefix='lstm'):
     assert prefix is not None
     # Stack the weight matricies for faster dot prods
     W = np.concatenate([norm_weight(nin, dim),
                            norm_weight(nin, dim),
                            norm_weight(nin, dim),
                            norm_weight(nin, dim)], axis=1)
     params[_p(prefix, 'W')] = W     # to_lstm_W:(512,2048)
     U = np.concatenate([ortho_weight(dim),
                            ortho_weight(dim),
                            ortho_weight(dim),
                            ortho_weight(dim)], axis=1)
     params[_p(prefix, 'U')] = U     # to_lstm_U:(512,2048)
     params[_p(prefix, 'b')] = np.zeros((4*dim,)).astype('float32')    # to_lstm_b:(2048,)
     return params
Exemplo n.º 23
0
def encoder(tparams, layer0_input, filter_shape, pool_size,
                      prefix='cnn_encoder'):
    
    """ filter_shape: (number of filters, num input feature maps, filter height,
                        filter width)
        image_shape: (batch_size, num input feature maps, image height, image width)
    """
    
    conv_out = conv.conv2d(input=layer0_input, filters=tparams[_p(prefix,'W')], 
                            filter_shape=filter_shape)
    
    conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x'))
    output = pool.pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=True)

    return output.flatten(2)
Exemplo n.º 24
0
    def param_init(self):
        if not self.initialized:
            # call object specific param_init
            self.__param_init__()
            # set object params with theano shared
            for (k, v) in self.params.iteritems():
                setattr(self, k, theano.shared(
                    v, name=_p(self.get_prefix(), k), borrow=True))
            # fill params with the theano shared
            self._params = OrderedDict([
                (_p(self.get_prefix(), k), getattr(self, k)) for (k, v) in
                self.params.iteritems() if self.params])

            self.initialized = True
        return self.params
Exemplo n.º 25
0
    def _init_params(self):

        shape_i0o = (self.n_in_0, self.n_out)
        shape_i1o = (self.n_in_1, self.n_out)

        if self.orth:
            if self.n_in_0 != self.n_out or self.n_in_1 != self.n_out:
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W0 = ortho_weight(rng=self.rng, shape=shape_i0o, name=_p(self.pname, 'W0'))
            self.W1 = ortho_weight(rng=self.rng, shape=shape_i1o, name=_p(self.pname, 'W1'))
        else:
            self.W0 = norm_weight(rng=self.rng, shape=shape_i0o, name=_p(self.pname, 'W0'))
            self.W1 = norm_weight(rng=self.rng, shape=shape_i1o, name=_p(self.pname, 'W1'))
        self.b = constant_weight(shape=(self.n_out, ), name=_p(self.pname, 'b'))
        self.params = [self.W0, self.W1, self.b]
Exemplo n.º 26
0
def encoder(tparams, layer0_input, filter_shape, pool_size,
                      prefix='cnn_encoder'):
    
    """ filter_shape: (number of filters, num input feature maps, filter height,
                        filter width)
        image_shape: (batch_size, num input feature maps, image height, image width)
    """
    
    conv_out = conv.conv2d(input=layer0_input, filters=tparams[_p(prefix,'W')], 
                            filter_shape=filter_shape)
    
    conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x'))
    output = pool.pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=True)

    return output.flatten(2)
Exemplo n.º 27
0
def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    assert mask is not None

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n + 1) * dim]
        return _x[:, n * dim:(n + 1) * dim]

    def _step(m_, x_, h_, c_):
        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
        preact += x_

        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))

        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_

        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c

    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
                   tparams[_p(prefix, 'b')])

    dim_proj = options['dim_proj']
    rval, updates = theano.scan(_step,
                                sequences=[mask, state_below],
                                outputs_info=[
                                    tensor.alloc(numpy_floatX(0.), n_samples,
                                                 dim_proj),
                                    tensor.alloc(numpy_floatX(0.), n_samples,
                                                 dim_proj)
                                ],
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps)
    # outputs_info include h_ and c_
    # return only hidden states, so return rval[0]
    return rval[0]
Exemplo n.º 28
0
def action_response_layer(tparams, features, options, payoff=None,
                          prefix='ar', opposition=None, level=0, **kwargs):
    """
    action_response_layer:  tensor3, (tensor3) -> matrix
                            features, (opposition) -> ar_layer

    Tensor dims:
    features: iter, action_payoff, feature
    opposition: iter, level, prob of action
    output: iter, prob of action

    Probability of an action given features and beliefs about opposition.
    """
    n, f, i = features.shape

    # Weights on opposition players
    if level == 0:
        w_feat = tparams[_p(prefix, 'Wf')]
        weighted_features = tensor.sum(features * w_feat.dimshuffle('x', 0, 'x'), axis=1)

        ar = weighted_features
        return ar, weighted_features, None
    else:
        weighted_features = None
        lam = tparams[_p(prefix, 'lam')] 
        if options['shared_ld']:
            level_dist = tparams['ld']
            ld = level_dist
            ld += floatx(1e-32) # avoid divide by zero
            ld = ld[0:level]
            ld /= ld.sum()
        else:
            ld = tparams[_p(prefix, 'ld')]
            ld += floatx(1e-32)
            ld /= ld.sum()
        
        # U * AR * ld (where * is matrix product)
        weighting = opposition * ld.dimshuffle('x', 0, 'x')
        prob_a = tensor.sum(weighting, axis=1)


        payoff = payoff * tparams[_p(prefix, 'W_h')].dimshuffle('x', 0, 'x', 'x')
        payoff = tensor.sum(payoff,axis=1)
        
        br = tensor.sum(payoff * prob_a.dimshuffle(0, 'x', 1), axis=2)
        out = br
        # remove weighted_features, br when done with visualisation
        return tensor.nnet.softmax(out * lam), weighted_features, br
Exemplo n.º 29
0
    def get_all_params(self, prev_prefix=None):
        '''Return an OrderedDict with all the parameters.

        Return an OrderedDict with the parameters of self and
        of every child, renamed so that they contain the full
        inclusion path in their name.

        Corresponds to:
        for k, v in self.params.iteritems():
            part = [(k, v)]
            for child in unroll(self.children):
                if child:
                    for k, v in child.get_all_params().iteritems():
                        part += [(_p(self.get_prefix(), k), v)]
        return OrderedDict(part)
        '''
        if prev_prefix:
            self.baseprefix = _p(prev_prefix, self.baseprefix)
        if self.children == []:
            return self.param_init()
        else:
            return OrderedDict([(k, v)
                                for k, v in self.param_init().iteritems()] +
                               [(k, v)
                                for child in unroll(self.children) if child
                                for k, v in child.get_all_params(
                                    self.get_prefix()).iteritems()])
Exemplo n.º 30
0
    def get_all_params(self, prev_prefix=None):
        '''Return an OrderedDict with all the parameters.

        Return an OrderedDict with the parameters of self and
        of every child, renamed so that they contain the full
        inclusion path in their name.

        Corresponds to:
        for k, v in self.params.iteritems():
            part = [(k, v)]
            for child in unroll(self.children):
                if child:
                    for k, v in child.get_all_params().iteritems():
                        part += [(_p(self.get_prefix(), k), v)]
        return OrderedDict(part)
        '''
        if prev_prefix:
            self.baseprefix = _p(prev_prefix, self.baseprefix)
        if self.children == []:
            return self.param_init()
        else:
            return OrderedDict([
                (k, v) for k, v in self.param_init().iteritems()] +
                [(k, v) for child in
                 unroll(self.children) if child
                 for k, v in child.get_all_params(
                     self.get_prefix()).iteritems()])
Exemplo n.º 31
0
    def __init__(self, mkl, n_in, n_hids, n_cdim, maxout_part=2,
                 name='rnn_decoder',
                 with_attention=True,
                 with_coverage=False,
                 coverage_dim=1,
                 coverage_type='linguistic',
                 max_fertility=2,
                 with_context_gate=False):

        self.n_in = n_in
        self.n_hids = n_hids
        self.n_cdim = n_cdim
        self.maxout_part = maxout_part
        self.pname = name
        self.with_attention = with_attention
        self.with_coverage = with_coverage
        self.coverage_dim = coverage_dim
        assert coverage_type in ['linguistic', 'neural'], 'Coverage type must be either linguistic or neural'
        self.coverage_type = coverage_type
        self.max_fertility = max_fertility
        self.with_context_gate = with_context_gate
	self.mkl = mkl

        self._init_params()

	#mkl decoder
	self.attention_ = Attention_(self.n_hids, name=_p(name, '_attention'))
	self.GRU_op = mkl_gru.GRU(hid=self.n_hids, return_sequences=True)
Exemplo n.º 32
0
def param_init(params, nin, dim, prefix='lstm'):
    assert prefix is not None
    # Stack the weight matricies for faster dot prods
    W = np.concatenate([norm_weight(nin,dim),
                        norm_weight(nin,dim),
                        norm_weight(nin,dim),
                        norm_weight(nin,dim)], axis=1)
    params[_p(prefix, 'W')] = W
    U = np.concatenate([ortho_weight(dim),
                        ortho_weight(dim),
                        ortho_weight(dim),
                        ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U
    params[_p(prefix, 'b')] = np.zeros((4 * dim,)).astype('float32')

    return params
Exemplo n.º 33
0
    def apply(self, state_below, mask_below=None, init_state=None, context=None):

        if K.ndim(state_below) == 2:
            state_below = K.expand_dims(state_below, 1)

        if mask_below is None:
            mask_below = K.ones_like(K.sum(state_below, axis=2, keepdims=True))

        if init_state is None:
            # nb_samples,n_hids
            init_state = K.repeat_elements(K.expand_dims(K.zeros_like(K.sum(state_below, axis=[0, 2]))), self.n_hids, axis=1)
        print('init state ',K.ndim(init_state))

        state_below_xh = K. dot(state_below, self.W_xh)
        state_below_xz = K. dot(state_below, self.W_xz)
        state_below_xr = K.dot(state_below, self.W_xr)
        sequences = [state_below_xh, state_below_xz, state_below_xr, mask_below]

        if K._BACKEND == 'theano':
            fn = lambda x_h, x_z, x_r, x_m, h_tm1: self._step(x_h, x_z, x_r, x_m, h_tm1)
        else:
            fn = lambda h_tm1, (x_h, x_z, x_r, x_m): self._step(x_h, x_z, x_r, x_m, h_tm1)

        rval = K.scan(fn,
                      sequences=sequences,
                      outputs_initials=init_state,
                      name=_p(self.pname, 'layers'))

        self.output = rval
        return self.output
    def _lstm(m_, x_, h_, c_, prefix='lstm_en'):
        preact = tensor.dot(x_, tparams[_p(prefix, 'W')]) + tparams[_p(
            prefix, 'b')]
        preact += tensor.dot(h_, tparams[_p(prefix, 'U')])

        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))

        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_
        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c
Exemplo n.º 35
0
def deconv_depool(layer0_input, tparams, options, prefix):
    if prefix == 'd':
        depool_out = depool_repeat(layer0_input, options['e_pool_size'])
    elif prefix == 'd2':
        depool_out = depool_repeat(layer0_input, options['e2_pool_size'])
    s = int(np.floor(options[_p(prefix, 'Nt')] / 2.))
    _W = get_filter(tparams, options, prefix).astype(theano.config.floatX)
    deconv_out = conv.conv2d(input=depool_out.dimshuffle(0, 1, 'x', 2),
                             filters=_W,
                             filter_shape=options[_p(prefix, 'filter_shape')],
                             border_mode='full')[:, :, :, s - 1:-s]
    doutput = (
        deconv_out +
        tparams[_p(prefix, 'bias')].dimshuffle('x', 0, 'x', 'x')).reshape(
            (deconv_out.shape[0], options['C'], options['T']))
    return doutput
Exemplo n.º 36
0
def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    assert mask is not None

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n+1) * dim]
        return _x[:, n * dim:(n+1) * dim]

    def _step(m_, x_, h_, c_):
        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
        preact += x_

        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))

        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_

        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c

    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
                   tparams[_p(prefix, 'b')])

    dim_proj = options['dim_proj']
    rval, updates = theano.scan(_step,
                                sequences=[mask, state_below],
                                outputs_info=[tensor.alloc(numpy_floatX(0.),
                                                           n_samples,
                                                           dim_proj),
                                              tensor.alloc(numpy_floatX(0.),
                                                           n_samples,
                                                           dim_proj)],
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps)

    return rval[0][-1]
Exemplo n.º 37
0
def decoder(tparams, state_below, z, mask=None, prefix='decoder'):
    
    """ state_below: size of n_steps * n_samples * n_x 
        z: size of n_samples * n_z
    """

    n_steps = state_below.shape[0]
    n_samples = state_below.shape[1]
     
    n_h = tparams[_p(prefix,'U')].shape[0]

    #  n_samples * n_h
    state_belowx0 = tensor.dot(z, tparams[_p(prefix, 'C0')]) + \
            tparams[_p(prefix, 'b0')]
    h0 = tensor.tanh(state_belowx0)
    
    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    # n_steps * n_samples * n_h
    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \
        tensor.dot(z, tparams[_p(prefix, 'C')]) + tparams[_p(prefix, 'b')]
    
    def _step(m_, x_, h_, c_, U):
        preact = tensor.dot(h_, U)
        preact += x_

        i = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
        f = tensor.nnet.sigmoid(_slice(preact, 1, n_h))
        o = tensor.nnet.sigmoid(_slice(preact, 2, n_h))
        c = tensor.tanh(_slice(preact, 3, n_h))
        
        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_

        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c
                          
    seqs = [mask[:n_steps-1], state_below_[:n_steps-1]]

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info = [h0,tensor.alloc(numpy_floatX(0.),
                                                    n_samples,n_h)],
                                non_sequences = [tparams[_p(prefix, 'U')]],
                                name=_p(prefix, '_layers'),
                                n_steps=n_steps-1,
                                strict=True)
                                
    h0x = tensor.shape_padleft(h0)
    h_rval = rval[0]
                            
    return tensor.concatenate((h0x,h_rval))  
Exemplo n.º 38
0
    def apply(self, state_below, mask_below=None, init_state=None, context=None):

        n_steps = state_below.shape[0]
        if state_below.ndim == 3:
            batch_size = state_below.shape[1]
        else:
            batch_size = 1
            state_below = state_below.reshape((n_steps, batch_size, state_below.shape[1]))

        if mask_below is None:
            mask_below = T.alloc(numpy.float32(1.), n_steps, 1)

        if self.with_context:
            assert context

            if init_state is None:
                init_state = T.tanh(T.dot(context, self.W_c_init))

            c_z = T.dot(context, self.W_cz)
            c_r = T.dot(context, self.W_cr)
            c_h = T.dot(context, self.W_ch)
            non_sequences = [c_z, c_r, c_h]

            rval, updates = theano.scan(self._step_context,
                                        sequences=[state_below, mask_below],
                                        non_sequences=non_sequences,
                                        outputs_info=[init_state],
                                        name=_p(self.pname, 'layers'),
                                        n_steps=n_steps)
        else:
            if init_state is None:
                init_state = T.alloc(numpy.float32(0.), batch_size, self.n_hids)

            state_below_xh = T.dot(state_below, self.W_xh)
            state_below_xz = T.dot(state_below, self.W_xz)
            state_below_xr = T.dot(state_below, self.W_xr)
            sequences = [state_below_xh, state_below_xz, state_below_xr, mask_below]

            rval, updates = theano.scan(self._step,
                                        sequences=sequences,
                                        outputs_info=[init_state],
                                        name=_p(self.pname, 'layers'),
                                        n_steps=n_steps)
        self.output = rval

        return self.output
Exemplo n.º 39
0
def build_encoder(tparams, options):

    x = tensor.matrix('x', dtype='int32')
    y = tensor.matrix('y', dtype='int32')

    layer0_input = tparams['Wemb'][tensor.cast(x.flatten(),
                                               dtype='int32')].reshape(
                                                   (x.shape[0], 1, x.shape[1],
                                                    tparams['Wemb'].shape[1]))

    layer1_inputs = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(tparams,
                             layer0_input,
                             filter_shape=filter_shape,
                             pool_size=pool_size,
                             prefix=_p('cnn_encoder', i))
        layer1_input = conv_layer
        layer1_inputs.append(layer1_input)
    layer1_input_x = tensor.concatenate(layer1_inputs, 1)

    layer0_input = tparams['Wemb'][tensor.cast(y.flatten(),
                                               dtype='int32')].reshape(
                                                   (y.shape[0], 1, y.shape[1],
                                                    tparams['Wemb'].shape[1]))

    layer1_inputs = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(tparams,
                             layer0_input,
                             filter_shape=filter_shape,
                             pool_size=pool_size,
                             prefix=_p('cnn_encoder', i))
        layer1_input = conv_layer
        layer1_inputs.append(layer1_input)
    layer1_input_y = tensor.concatenate(layer1_inputs, 1)

    feat_x = l2norm(layer1_input_x)
    feat_y = l2norm(layer1_input_y)

    return [x, y], feat_x, feat_y
Exemplo n.º 40
0
def param_init_decoder(options, params, prefix='decoder_gru'):

    n_x = options['n_x']
    n_h = options['n_h']

    W = np.concatenate([uniform_weight(n_x, n_h),
                        uniform_weight(n_x, n_h)],
                       axis=1)
    params[_p(prefix, 'W')] = W

    U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1)
    params[_p(prefix, 'U')] = U

    params[_p(prefix, 'b')] = zero_bias(2 * n_h)

    Wx = uniform_weight(n_x, n_h)
    params[_p(prefix, 'Wx')] = Wx

    Ux = ortho_weight(n_h)
    params[_p(prefix, 'Ux')] = Ux

    params[_p(prefix, 'bx')] = zero_bias(n_h)

    params[_p(prefix, 'b0')] = zero_bias(n_h)

    return params
Exemplo n.º 41
0
def decoder_layer(tparams, state_below, prefix='decoder_gru'):
    """ state_below: size of n_steps *  n_x 
    """

    n_steps = state_below.shape[0]
    n_h = tparams[_p(prefix, 'Ux')].shape[1]

    state_belowx0 = tparams[_p(prefix, 'b0')]
    h0vec = tensor.tanh(state_belowx0)
    h0 = h0vec.dimshuffle('x', 0)

    def _slice(_x, n, dim):
        return _x[n * dim:(n + 1) * dim]

    state_below_ = tensor.dot(state_below, tparams[_p(
        prefix, 'W')]) + tparams[_p(prefix, 'b')]
    state_belowx = tensor.dot(state_below, tparams[_p(
        prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]

    def _step_slice(x_, xx_, h_, U, Ux):
        preact = tensor.dot(h_, U)
        preact += x_

        r = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
        u = tensor.nnet.sigmoid(_slice(preact, 1, n_h))

        preactx = tensor.dot(h_, Ux)
        preactx = preactx * r
        preactx = preactx + xx_

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h

        return h

    seqs = [state_below_[:n_steps - 1], state_belowx[:n_steps - 1]]
    _step = _step_slice

    rval, updates = theano.scan(
        _step,
        sequences=seqs,
        outputs_info=[h0vec],
        non_sequences=[tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]],
        name=_p(prefix, '_layers'),
        n_steps=n_steps - 1)

    #h0x = h0.dimshuffle('x',0,1)

    return tensor.concatenate((h0, rval))
Exemplo n.º 42
0
def param_init_decoder(options, params, prefix='decoder_gru'):
    
    n_x = options['n_x']
    n_h = options['n_h']
    
    W = np.concatenate([uniform_weight(n_x,n_h),
                        uniform_weight(n_x,n_h)], axis=1)
    params[_p(prefix,'W')] = W
    
    U = np.concatenate([ortho_weight(n_h),
                        ortho_weight(n_h)], axis=1)
    params[_p(prefix,'U')] = U
    
    params[_p(prefix,'b')] = zero_bias(2*n_h)

    Wx = uniform_weight(n_x, n_h)
    params[_p(prefix,'Wx')] = Wx
    
    Ux = ortho_weight(n_h)
    params[_p(prefix,'Ux')] = Ux
    
    params[_p(prefix,'bx')] = zero_bias(n_h)
    
    params[_p(prefix,'b0')] = zero_bias(n_h)

    return params   
Exemplo n.º 43
0
def decoder_layer(tparams, state_below, prefix='decoder_gru'):
    
    """ state_below: size of n_steps *  n_x 
    """

    n_steps = state_below.shape[0]
    n_h = tparams[_p(prefix,'Ux')].shape[1]
        
    state_belowx0 = tparams[_p(prefix, 'b0')]
    h0vec = tensor.tanh(state_belowx0)
    h0 = h0vec.dimshuffle('x',0)
    
    def _slice(_x, n, dim):
        return _x[n*dim:(n+1)*dim]
        
    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')])  + tparams[_p(prefix, 'b')]
    state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')]
    
    def _step_slice(x_, xx_, h_, U, Ux):
        preact = tensor.dot(h_, U)
        preact += x_

        r = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
        u = tensor.nnet.sigmoid(_slice(preact, 1, n_h))

        preactx = tensor.dot(h_, Ux)
        preactx = preactx * r
        preactx = preactx + xx_

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h

        return h
    
    seqs = [state_below_[:n_steps-1], state_belowx[:n_steps-1]]
    _step = _step_slice

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info = [h0vec],
                                non_sequences = [tparams[_p(prefix, 'U')],
                                                 tparams[_p(prefix, 'Ux')]],
                                name=_p(prefix, '_layers'),
                                n_steps=n_steps-1)
                                
    #h0x = h0.dimshuffle('x',0,1)
                            
    return tensor.concatenate((h0,rval))
Exemplo n.º 44
0
    def __init__(self,
                 n_in,
                 n_hids,
                 table,
                 mkl,
                 name='rnn_encoder',
                 max_len=None):

        # lookup table
        self.table = table
        # embedding dimension
        self.n_in = n_in
        # hidden state dimension
        self.n_hids = n_hids
        self.mkl = mkl
        self.params = []
        self.layers = []
        self.max_len = max_len

        if self.mkl == True:
            print('with mkl')
            self.forward = MKL_GRU(self.n_in,
                                   self.n_hids,
                                   name=_p(name, 'forward'),
                                   max_len=max_len)
        else:
            print('with no mkl')
            self.forward = GRU(self.n_in,
                               self.n_hids,
                               name=_p(name, 'forward'))
        self.layers.append(self.forward)
        if self.mkl == True:
            self.backward = MKL_GRU(self.n_in,
                                    self.n_hids,
                                    name=_p(name, 'backward'),
                                    max_len=max_len)
        else:
            self.backward = GRU(self.n_in,
                                self.n_hids,
                                name=_p(name, 'backward'))
        self.layers.append(self.backward)

        for layer in self.layers:
            self.params.extend(layer.params)
Exemplo n.º 45
0
def encoder(tparams, state_below, mask, seq_output=False, prefix='gru_encoder'):
    
    """ state_below: size of n_steps * n_samples * n_x 
    """

    n_steps = state_below.shape[0]
    n_samples = state_below.shape[1]

    n_h = tparams[_p(prefix,'Ux')].shape[1]

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \
                    tparams[_p(prefix, 'b')]
    state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + \
                    tparams[_p(prefix, 'bx')]

    def _step(m_, x_, xx_, h_, U, Ux):
        preact = tensor.dot(h_, U)
        preact += x_

        r = tensor.nnet.sigmoid(_slice(preact, 0, n_h))
        u = tensor.nnet.sigmoid(_slice(preact, 1, n_h))

        preactx = tensor.dot(h_, Ux)
        preactx = preactx * r
        preactx = preactx + xx_

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h
        h = m_[:,None] * h + (1. - m_)[:,None] * h_

        return h

    seqs = [mask, state_below_, state_belowx]

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info = [tensor.alloc(numpy_floatX(0.),
                                                             n_samples, n_h)],
                                non_sequences = [tparams[_p(prefix, 'U')],
                                                 tparams[_p(prefix, 'Ux')]],
                                name=_p(prefix, '_layers'),
                                n_steps=n_steps,
                                strict=True)
    if seq_output:
        return rval
    else:
        # size of n_samples * n_h
        return rval[-1]  
Exemplo n.º 46
0
    def param_init(self):
        if not self.initialized:
            # call object specific param_init
            self.__param_init__()
            # set object params with theano shared
            for (k, v) in self.params.iteritems():
                setattr(
                    self, k,
                    theano.shared(v,
                                  name=_p(self.get_prefix(), k),
                                  borrow=True))
            # fill params with the theano shared
            self._params = OrderedDict([(_p(self.get_prefix(),
                                            k), getattr(self, k))
                                        for (k, v) in self.params.iteritems()
                                        if self.params])

            self.initialized = True
        return self.params
Exemplo n.º 47
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    if nin is None:
        nin = options['dim_proj']
    if dim is None:
        dim = options['dim_proj']

    # embedding to gates transformation weights, biases
    W = np.concatenate([norm_weight(nin, dim),
                        norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = np.zeros((2 * dim,)).astype('float32')

    # recurrent transformation weights for gates
    U = np.concatenate([ortho_weight(dim),
                        ortho_weight(dim)], axis=1)
    params[_p(prefix, 'U')] = U

    # embedding to hidden state proposal weights, biases
    Wx = norm_weight(nin, dim)
    params[_p(prefix, 'Wx')] = Wx
    params[_p(prefix, 'bx')] = np.zeros((dim,)).astype('float32')

    # recurrent transformation weights for hidden state proposal
    Ux = ortho_weight(dim)
    params[_p(prefix, 'Ux')] = Ux

    return params
Exemplo n.º 48
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_xh4 = (self.n_in, 4*self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)
        shape_hh4 = (self.n_hids, 4*self.n_hids)

        self.W_pre_x = norm_weight(rng=self.rng, shape=shape_xh4, name=_p(self.pname, 'W_pre_x'))
        self.W_h = multi_orth(rng=self.rng, size=shape_hh4, name=_p(self.pname, 'W_h'))

        b_i = constant_weight(share=False, shape=(self.n_hids, ), name=_p(self.pname, 'b_i'))
        b_f = constant_weight(share=False, value=1., shape=(self.n_hids, ), name=_p(self.pname, 'b_f'))
        b_o = constant_weight(share=False, shape=(self.n_hids, ), name=_p(self.pname, 'b_o'))
        b_c = constant_weight(share=False, shape=(self.n_hids, ), name=_p(self.pname, 'b_c'))
        b_ifoc = numpy.concatenate([b_i, b_f, b_o, b_c], axis=0)
        self.b_pre_x = theano.shared(value=b_ifoc, borrow=True, name=_p(self.pname, 'b_pre_x'))

        self.params += [self.W_pre_x, self.W_h, self.b_pre_x]

        if self.with_context:
            raise NotImplementedError

        if self.with_begin_tag:
            self.struct_begin_tag = constant_weight(shape=(self.n_hids,), value=0., name=_p(self.pname, 'struct_begin_tag'))
            self.params += [self.struct_begin_tag]

        if self.with_end_tag:
            self.struct_end_tag = constant_weight(shape=(self.n_in,), value=0., name=_p(self.pname, 'struct_end_tag'))
            self.params += [self.struct_end_tag]

        if self.n_att_ctx:
            self.lstm_combine_ctx_h = LSTM(self.n_att_ctx, self.n_hids, rng=self.rng, name=_p(self.pname, 'lstm_combine_ctx_h'))
            self.params.extend(self.lstm_combine_ctx_h.params)
            self.attention = ATTENTION(self.n_hids, self.rng, name=_p(self.pname, 'att_ctx'))
            self.params.extend(self.attention.params)
            if self.seq_pyramid:
                self.pyramid_on_seq = LSTM(self.n_att_ctx, self.n_att_ctx, rng=self.rng, name=_p(self.pname, 'pyramid_on_seq'))
                self.params.extend(self.pyramid_on_seq.params)
                self.ff_pyramid2ctx = FeedForward(self.n_att_ctx, self.n_hids, name=_p(self.pname, 'ff_pyramid2ctx'))
                self.params.extend(self.ff_pyramid2ctx.params)
Exemplo n.º 49
0
    def apply_seq_pyramid(self, state_below,
                                state_below_p,
                                mask_below=None,
                                init_state_h=None,
                                init_state_c=None,
                                init_state_hp=None,
                                init_state_cp=None,
                                context=None):
        '''
        state_below: shape=[seq_len, batch, n_in]
        state_below_p: shape=[seq_len, batch, n_in_p]
        init_state:  shape=[batch, seq_len, hid]
        '''
        n_steps = state_below.shape[0]
        if state_below.ndim == 3:
            batch_size = state_below.shape[1]
        else:
            batch_size = 1
            state_below = state_below.reshape((n_steps, batch_size, state_below.shape[1]))
            state_below_p = state_below_p.reshape((n_steps, batch_size, state_below_p.shape[1]))

        if mask_below is None:
            mask_below = T.alloc(numpy.float32(1.), n_steps, 1)

        if self.with_context:
            raise NotImplementedError
        else:
            if init_state_h is None:
                init_state_h = T.alloc(numpy.float32(0.), batch_size, self.n_hids)
            if init_state_c is None:
                init_state_c = T.alloc(numpy.float32(0.), batch_size, self.n_hids)
            if init_state_hp is None:
                init_state_hp = T.alloc(numpy.float32(0.), batch_size, n_steps, self.n_hids)
            if init_state_cp is None:
                init_state_cp = T.alloc(numpy.float32(0.), batch_size, n_steps, self.n_hids)

            state_below_pre = T.dot(state_below, self.W_pre_x) + self.b_pre_x
            state_below_p_pre = T.dot(state_below_p, self.pyramid_on_seq.W_pre_x) + self.pyramid_on_seq.b_pre_x
            step_idx        = T.arange(n_steps)
            sequences       = [state_below_pre, state_below_p_pre, mask_below, step_idx]
            outputs_info    = [init_state_h, init_state_c, init_state_hp, init_state_cp]
            non_sequences   = []

            rval, updates = theano.scan(self._seq_pyramid_step,
                                        sequences=sequences,
                                        outputs_info=outputs_info,
                                        non_sequences=non_sequences,
                                        name=_p(self.pname, 'layers'),
                                        n_steps=n_steps)
        self.output = rval
        return self.output
Exemplo n.º 50
0
def param_init_lstm(options, params, prefix='lstm'):
    """Init the LSTM parameter

    :options: TODO
    :params: TODO
    :prefix: TODO
    :returns: TODO

    """
    W = numpy.concatenate([ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj'])], axis=1)
    params[_p(prefix, 'W')] = W
    U = numpy.concatenate([ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj'])], axis=1)
    params[_p(prefix, 'U')] = U
    b = numpy.zeros((4 * options['dim_proj'],))
    params[_p(prefix, 'b')] = b.astype(config.floatX)

    return params
Exemplo n.º 51
0
    def _step(m_, x_, h_, c_):
        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
        preact += x_

        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))

        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_

        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c
Exemplo n.º 52
0
def init_params(options,W):
    
    params = OrderedDict()
    # W is initialized by the pretrained word embedding
    params['Wemb'] = W.astype(config.floatX)
    # otherwise, W will be initialized randomly
    # n_words = options['n_words']
    # n_x = options['n_x'] 
    # params['Wemb'] = uniform_weight(n_words,n_x)
    
    length = len(options['filter_shapes'])
    for idx in range(length):
        params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx))
    
    n_h = options['feature_maps'] * length
    params['Wy'] = uniform_weight(n_h,options['n_y'])
    params['by'] = zero_bias(options['n_y'])                                     

    return params
Exemplo n.º 53
0
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    Gated Recurrent Unit (GRU)
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix,'Ux')] = Ux
    params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params
Exemplo n.º 54
0
def gru_cond_layer(tparams, state_below, options, prefix='gru',
                   mask=None, context=None, one_step=False,
                   init_memory=None, init_state=None,
                   context_mask=None,
                   **kwargs):
    assert context, 'Context must be provided'

    if one_step:
        assert init_state, 'previous state must be provided'

    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    # mask
    if mask is None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    dim = tparams[_p(prefix, 'Wcx')].shape[1]

    # initial/previous state
    if init_state is None:
        init_state = tensor.alloc(0., n_samples, dim)

    # projected context
    assert context.ndim == 3, \
        'Context must be 3-d: #annotation x #sample x dim'
    pctx_ = tensor.dot(context, tparams[_p(prefix, 'Wc_att')]) + \
            tparams[_p(prefix, 'b_att')]

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n + 1) * dim]
        return _x[:, n * dim:(n + 1) * dim]

    # projected x
    state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + \
                   tparams[_p(prefix, 'bx')]
    state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \
                   tparams[_p(prefix, 'b')]

    def _step_slice(m_, x_, xx_, h_, ctx_, alpha_, pctx_, cc_,
                    U, Wc, W_comb_att, U_att, c_tt, Ux, Wcx,
                    U_nl, Ux_nl, b_nl, bx_nl):
        preact1 = tensor.dot(h_, U)
        preact1 += x_
        preact1 = tensor.nnet.sigmoid(preact1)

        r1 = _slice(preact1, 0, dim)
        u1 = _slice(preact1, 1, dim)

        preactx1 = tensor.dot(h_, Ux)
        preactx1 *= r1
        preactx1 += xx_

        h1 = tensor.tanh(preactx1)

        h1 = u1 * h_ + (1. - u1) * h1
        h1 = m_[:, None] * h1 + (1. - m_)[:, None] * h_

        # attention
        pstate_ = tensor.dot(h1, W_comb_att)
        pctx__ = pctx_ + pstate_[None, :, :]
        # pctx__ += xc_
        pctx__ = tensor.tanh(pctx__)
        alpha = tensor.dot(pctx__, U_att) + c_tt
        alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]])
        alpha = tensor.exp(alpha)
        if context_mask:
            alpha = alpha * context_mask
        alpha = alpha / alpha.sum(0, keepdims=True)
        ctx_ = (cc_ * alpha[:, :, None]).sum(0)  # current context

        preact2 = tensor.dot(h1, U_nl) + b_nl
        preact2 += tensor.dot(ctx_, Wc)
        preact2 = tensor.nnet.sigmoid(preact2)

        r2 = _slice(preact2, 0, dim)
        u2 = _slice(preact2, 1, dim)

        preactx2 = tensor.dot(h1, Ux_nl) + bx_nl
        preactx2 *= r2
        preactx2 += tensor.dot(ctx_, Wcx)

        h2 = tensor.tanh(preactx2)

        h2 = u2 * h1 + (1. - u2) * h2
        h2 = m_[:, None] * h2 + (1. - m_)[:, None] * h1

        return h2, ctx_, alpha.T  # pstate_, preact, preactx, r, u

    seqs = [mask, state_below_, state_belowx]
    # seqs = [mask, state_below_, state_belowx, state_belowc]
    _step = _step_slice

    shared_vars = [tparams[_p(prefix, 'U')],
                   tparams[_p(prefix, 'Wc')],
                   tparams[_p(prefix, 'W_comb_att')],
                   tparams[_p(prefix, 'U_att')],
                   tparams[_p(prefix, 'c_tt')],
                   tparams[_p(prefix, 'Ux')],
                   tparams[_p(prefix, 'Wcx')],
                   tparams[_p(prefix, 'U_nl')],
                   tparams[_p(prefix, 'Ux_nl')],
                   tparams[_p(prefix, 'b_nl')],
                   tparams[_p(prefix, 'bx_nl')]]

    if one_step:
        rval = _step(*(seqs + [init_state, None, None, pctx_, context] +
                       shared_vars))
    else:
        rval, updates = theano.scan(_step,
                                    sequences=seqs,
                                    outputs_info=[init_state,
                                                  tensor.alloc(0., n_samples,
                                                               context.shape[2]),
                                                  tensor.alloc(0., n_samples,
                                                               context.shape[0])],
                                    non_sequences=[pctx_, context] + shared_vars,
                                    name=_p(prefix, '_layers'),
                                    n_steps=nsteps,
                                    profile=profile,
                                    strict=True)
    return rval
Exemplo n.º 55
0
 def get_prefix(self):
     if self.prefix is '':
         return self.baseprefix
     else:
         return _p(self.baseprefix, self.prefix)
Exemplo n.º 56
0
 def _pname(self, name):
     return _p(self.get_prefix(), name)
Exemplo n.º 57
0
    def _init_params(self):
        shape_xh = (self.n_in, self.n_hids)
        shape_xh2 = (self.n_in, 2*self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)
        shape_hh2 = (self.n_hids, 2*self.n_hids)

        self.W_xzr = norm_weight(rng=self.rng, shape=shape_xh2, name=_p(self.pname, 'W_xzr'))
        self.W_xh  = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_zr  = constant_weight(shape=(2*self.n_hids, ), name=_p(self.pname, 'b_zr'))
        self.b_h   = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hzr = multi_orth(rng=self.rng, size=shape_hh2, name=_p(self.pname, 'W_hzr'))
        self.W_hh  = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params += [self.W_xzr, self.W_xh,
                        self.W_hzr, self.W_hh,
                        self.b_zr,  self.b_h]

        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        if self.with_begin_tag:
            self.struct_begin_tag = constant_weight(shape=(self.n_hids,), value=0., name=_p(self.pname, 'struct_begin_tag'))
            self.params += [self.struct_begin_tag]

        if self.with_end_tag:
            self.struct_end_tag = constant_weight(shape=(self.n_in,), value=0., name=_p(self.pname, 'struct_end_tag'))
            self.params += [self.struct_end_tag]

        if self.n_att_ctx:
            self.gru_combine_ctx_h = GRU(self.n_att_ctx, self.n_hids, rng=self.rng, name=_p(self.pname, 'gru_combine_ctx_h'))
            self.params.extend(self.gru_combine_ctx_h.params)
            self.attention = ATTENTION(self.n_hids, self.rng, name=_p(self.pname, 'att_ctx'))
            self.params.extend(self.attention.params)
Exemplo n.º 58
0
 def _init_params(self):
     shape_hh = (self.n_hids, self.n_hids)
     self.W_comb_att = norm_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_comb_att'))
     self.U_att = norm_weight(rng=self.rng, shape=(self.n_hids, 1), name=_p(self.pname, 'U_att'))
     self.c_att = constant_weight(shape=(1,), name=_p(self.pname, 'c_att'))
     self.params = [self.W_comb_att, self.U_att, self.c_att]
Exemplo n.º 59
0
def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
    """
    Feedforward pass
    """
    return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])
Exemplo n.º 60
0
def param_init_gru_cond(options, params, prefix='gru_cond',
                        nin=None, dim=None, dimctx=None,
                        nin_nonlin=None, dim_nonlin=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']
    if nin_nonlin is None:
        nin_nonlin = nin
    if dim_nonlin is None:
        dim_nonlin = dim

    W = np.concatenate([norm_weight(nin, dim),
                        norm_weight(nin, dim)], axis=1)
    params[_p(prefix, 'W')] = W
    params[_p(prefix, 'b')] = np.zeros((2 * dim,)).astype('float32')
    U = np.concatenate([ortho_weight(dim_nonlin),
                        ortho_weight(dim_nonlin)], axis=1)
    params[_p(prefix, 'U')] = U

    Wx = norm_weight(nin_nonlin, dim_nonlin)
    params[_p(prefix, 'Wx')] = Wx
    Ux = ortho_weight(dim_nonlin)
    params[_p(prefix, 'Ux')] = Ux
    params[_p(prefix, 'bx')] = np.zeros((dim_nonlin,)).astype('float32')

    U_nl = np.concatenate([ortho_weight(dim_nonlin),
                           ortho_weight(dim_nonlin)], axis=1)
    params[_p(prefix, 'U_nl')] = U_nl
    params[_p(prefix, 'b_nl')] = np.zeros((2 * dim_nonlin,)).astype('float32')

    Ux_nl = ortho_weight(dim_nonlin)
    params[_p(prefix, 'Ux_nl')] = Ux_nl
    params[_p(prefix, 'bx_nl')] = np.zeros((dim_nonlin,)).astype('float32')

    # context to LSTM
    Wc = norm_weight(dimctx, dim * 2)
    params[_p(prefix, 'Wc')] = Wc

    Wcx = norm_weight(dimctx, dim)
    params[_p(prefix, 'Wcx')] = Wcx

    # attention: combined -> hidden
    W_comb_att = norm_weight(dim, dimctx)
    params[_p(prefix, 'W_comb_att')] = W_comb_att

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx)
    params[_p(prefix, 'Wc_att')] = Wc_att

    # attention: hidden bias
    b_att = np.zeros((dimctx,)).astype('float32')
    params[_p(prefix, 'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx, 1)
    params[_p(prefix, 'U_att')] = U_att
    c_att = np.zeros((1,)).astype('float32')
    params[_p(prefix, 'c_tt')] = c_att

    return params