Exemplo n.º 1
0
def init_params(options):
    """
    Global (not LSTM) parameter. For the embedding and the classifier.
    """
    params = OrderedDict()
    # embedding
    if options['dataset'] != 'mnist':
        randn = rand_weight(options['n_words'], options['dim_word'])
        params['Wemb'] = randn.astype(config.floatX)

    # encoder layer
    params = get_layer(options['encoder'])[0](options,
                                              params,
                                              prefix=options['encoder'])

    # classifier
    if options['lastHiddenLayer'] is not None:
        params['U'] = 0.01 * numpy.random.randn(
            options['lastHiddenLayer'], options['ydim']).astype(config.floatX)
        params['b'] = numpy.zeros((options['ydim'], )).astype(config.floatX)

        params['ToLastHidden_W'] = 0.01 * numpy.random.randn(
            options['dim_proj'], options['lastHiddenLayer']).astype(
                config.floatX)
        params['ToLastHidden_b'] = numpy.zeros(
            (options['lastHiddenLayer'], )).astype(config.floatX)

    else:
        params['U'] = 0.01 * numpy.random.randn(
            options['dim_proj'], options['ydim']).astype(config.floatX)
        params['b'] = numpy.zeros((options['ydim'], )).astype(config.floatX)

    return params
Exemplo n.º 2
0
 def _encode(x_sub, mask_sub, proj_sub):
     n_timesteps = x_sub.shape[0]
     n_samples = x_sub.shape[1]
     emb_sub = tparams['Wemb'][x_sub.flatten()].reshape(
         [n_timesteps, n_samples, options['dim_word']])
     proj_sub = get_layer(options['encoder'])[1](tparams,
                                                 emb_sub,
                                                 options,
                                                 prefix=options['encoder'] +
                                                 '_word',
                                                 mask=mask_sub)
     return proj_sub[-1]
Exemplo n.º 3
0
def build_model(tparams, options):
    trng = RandomStreams(SEED)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    if options['dataset'] == 'mnist':
        print 'Using mnist dataset with single number input'
        x = tensor.matrix('x', dtype='float32')
    else:
        print 'Using text dataset with embedding input'
        x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype=config.floatX)
    y = tensor.vector('y', dtype='int64')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # input word embedding
    if options['dataset'] == 'mnist':
        emb = x.reshape([n_timesteps, n_samples, options['dim_word']])
    else:
        emb = tparams['Wemb'][x.flatten()].reshape(
            [n_timesteps, n_samples, options['dim_word']])

    # dropout on embedding
    if options['dropout_input'] > 0:
        print 'Applying drop-out on input embedding (dropout_input:', options[
            'dropout_input'], ')'
        emb = dropout_layer(emb, options['dropout_input'], use_noise, trng)

    # encoder information
    print 'Using', options['encoder'], 'unit'
    if options['truncate_grad'] is not None and options['truncate_grad'] > 0:
        print 'Using gradient truncation to', options['truncate_grad'], 'steps'
    else:
        options['truncate_grad'] = -1

    # encoding layer
    proj = get_layer(options['encoder'])[1](tparams,
                                            emb,
                                            options,
                                            prefix=options['encoder'],
                                            mask=mask)

    # pooling
    if options['mean_pooling']:
        print 'Using mean_pooling'
        proj = (proj * mask[:, :, None]).sum(axis=0)  # mean pooling
        proj = proj / mask.sum(axis=0)[:, None]
    else:
        print 'Using last hidden state'
        proj = proj[-1]  # last hidden state

    sys.stdout.flush()

    # dropout on hidden states
    if options['lastHiddenLayer'] is not None:
        lastH = tensor.dot(
            proj, tparams['ToLastHidden_W']) + tparams['ToLastHidden_b']
        lastH = tensor.nnet.sigmoid(lastH)
        if options['dropout_output'] > 0:
            lastH = dropout_layer(lastH, options['dropout_output'], use_noise,
                                  trng)
        pred = tensor.nnet.softmax(
            tensor.dot(lastH, tparams['U']) + tparams['b'])
    else:
        if options['dropout_output'] > 0:
            print 'Applying drop-out on hidden states (dropout_output:', options[
                'dropout_output'], ")"
            proj = dropout_layer(proj, options['dropout_output'], use_noise,
                                 trng)

        pred = tensor.nnet.softmax(
            tensor.dot(proj, tparams['U']) + tparams['b'])

    # for training
    f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
    f_pred = theano.function([x, mask], pred.argmax(axis=1),
                             name='f_pred')  # sample by argmax

    off = 1e-8
    if pred.dtype == 'float16':
        off = 1e-6
    nlls = -tensor.log(pred[tensor.arange(n_samples), y] + off)

    return use_noise, x, mask, y, f_pred_prob, f_pred, nlls