def init_params(options): """ Global (not LSTM) parameter. For the embedding and the classifier. """ params = OrderedDict() # embedding if options['dataset'] != 'mnist': randn = rand_weight(options['n_words'], options['dim_word']) params['Wemb'] = randn.astype(config.floatX) # encoder layer params = get_layer(options['encoder'])[0](options, params, prefix=options['encoder']) # classifier if options['lastHiddenLayer'] is not None: params['U'] = 0.01 * numpy.random.randn( options['lastHiddenLayer'], options['ydim']).astype(config.floatX) params['b'] = numpy.zeros((options['ydim'], )).astype(config.floatX) params['ToLastHidden_W'] = 0.01 * numpy.random.randn( options['dim_proj'], options['lastHiddenLayer']).astype( config.floatX) params['ToLastHidden_b'] = numpy.zeros( (options['lastHiddenLayer'], )).astype(config.floatX) else: params['U'] = 0.01 * numpy.random.randn( options['dim_proj'], options['ydim']).astype(config.floatX) params['b'] = numpy.zeros((options['ydim'], )).astype(config.floatX) return params
def _encode(x_sub, mask_sub, proj_sub): n_timesteps = x_sub.shape[0] n_samples = x_sub.shape[1] emb_sub = tparams['Wemb'][x_sub.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) proj_sub = get_layer(options['encoder'])[1](tparams, emb_sub, options, prefix=options['encoder'] + '_word', mask=mask_sub) return proj_sub[-1]
def build_model(tparams, options): trng = RandomStreams(SEED) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) if options['dataset'] == 'mnist': print 'Using mnist dataset with single number input' x = tensor.matrix('x', dtype='float32') else: print 'Using text dataset with embedding input' x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) y = tensor.vector('y', dtype='int64') n_timesteps = x.shape[0] n_samples = x.shape[1] # input word embedding if options['dataset'] == 'mnist': emb = x.reshape([n_timesteps, n_samples, options['dim_word']]) else: emb = tparams['Wemb'][x.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) # dropout on embedding if options['dropout_input'] > 0: print 'Applying drop-out on input embedding (dropout_input:', options[ 'dropout_input'], ')' emb = dropout_layer(emb, options['dropout_input'], use_noise, trng) # encoder information print 'Using', options['encoder'], 'unit' if options['truncate_grad'] is not None and options['truncate_grad'] > 0: print 'Using gradient truncation to', options['truncate_grad'], 'steps' else: options['truncate_grad'] = -1 # encoding layer proj = get_layer(options['encoder'])[1](tparams, emb, options, prefix=options['encoder'], mask=mask) # pooling if options['mean_pooling']: print 'Using mean_pooling' proj = (proj * mask[:, :, None]).sum(axis=0) # mean pooling proj = proj / mask.sum(axis=0)[:, None] else: print 'Using last hidden state' proj = proj[-1] # last hidden state sys.stdout.flush() # dropout on hidden states if options['lastHiddenLayer'] is not None: lastH = tensor.dot( proj, tparams['ToLastHidden_W']) + tparams['ToLastHidden_b'] lastH = tensor.nnet.sigmoid(lastH) if options['dropout_output'] > 0: lastH = dropout_layer(lastH, options['dropout_output'], use_noise, trng) pred = tensor.nnet.softmax( tensor.dot(lastH, tparams['U']) + tparams['b']) else: if options['dropout_output'] > 0: print 'Applying drop-out on hidden states (dropout_output:', options[ 'dropout_output'], ")" proj = dropout_layer(proj, options['dropout_output'], use_noise, trng) pred = tensor.nnet.softmax( tensor.dot(proj, tparams['U']) + tparams['b']) # for training f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob') f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred') # sample by argmax off = 1e-8 if pred.dtype == 'float16': off = 1e-6 nlls = -tensor.log(pred[tensor.arange(n_samples), y] + off) return use_noise, x, mask, y, f_pred_prob, f_pred, nlls