def pred_error(f_pred, prepare_data, data, iterator, verbose=False): """ Just compute the error f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 for _, valid_index in iterator: x, mask, y = prepare_data([data[0][t] for t in valid_index], np.array(data[1])[valid_index], maxlen=None) preds = f_pred(x, mask) targets = np.array(data[1])[valid_index] valid_err += (preds == targets).sum() print '-' * 80 print preds print '-'*40 print targets valid_err = 1. - utils.cast_floatX(valid_err) / len(data[0]) return valid_err
def pred_error(f_pred, prepare_data, data, iterator, verbose=False): """ Just compute the error f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 for _, valid_index in iterator: x, mask, y = prepare_data([data[0][t] for t in valid_index], np.array(data[1])[valid_index], maxlen=None) preds = f_pred(x, mask) targets = np.array(data[1])[valid_index] valid_err += (preds == targets).sum() print '-' * 80 print preds print '-' * 40 print targets valid_err = 1. - utils.cast_floatX(valid_err) / len(data[0]) return valid_err
def init_emb(shape): num_in, num_out = shape randn = numpy.random.rand(num_in, num_out) return utils.cast_floatX(0.01 * randn)
def ortho_weight(shape): ndim = shape[0] assert shape[0] == shape[1] W = np.random.randn(ndim, ndim) u, s, v = np.linalg.svd(W) return utils.cast_floatX(u)
def init_emb(shape): num_in, num_out = shape randn = np.random.rand(num_in, num_out) return utils.cast_floatX(0.01 * randn)
def build_model(n_words, encoder, dim_proj, num_hidden, p_dropout, maxlen, decay_c, use_dropout=True, optimizer=optimizers.sgd): trng = RandomStreams(SEED) # by using shared variable, we can control whether to use noise without recompiling use_noise = theano.shared(utils.cast_floatX(0.)) x = T.matrix('x', dtype='int64') xshape = (1, maxlen) mask = T.bmatrix('mask') y = T.vector('y', dtype='int64') lr = T.scalar() options = {'lr': lr} net = {} params = {} def init_emb(shape): num_in, num_out = shape randn = np.random.rand(num_in, num_out) return utils.cast_floatX(0.01 * randn) fc_winit = init_emb def ortho_weight(shape): ndim = shape[0] assert shape[0] == shape[1] W = np.random.randn(ndim, ndim) u, s, v = np.linalg.svd(W) return utils.cast_floatX(u) ''' generate weights ''' w_emb = init_emb((n_words, dim_proj)) shape_x = (dim_proj, num_hidden) shape_h = (num_hidden, num_hidden) w_xi, w_hi, b_i = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden) w_xf, w_hf, b_f = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden) w_xo, w_ho, b_o = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden) w_xc, w_hc, b_c = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden) print(w_xc[10:, 0]) w_fc = fc_winit((num_hidden, 2)) net['emb'] = layers.EmbeddingLayer((x, xshape), params, n_words, dim_proj, w=w_emb) if encoder == 'lstm': ''' net['encoder'] = layers.LSTMLayer(net['emb'], 0., 0., params, num_hidden, mask, w_initializer=init.Orth()) ''' net['encoder'] = layers.LSTMLayer(net['emb'], 0., 0., params, num_hidden, mask, w_xi=w_xi, w_hi=w_hi, b_i=b_i, w_xf=w_xf, w_hf=w_hf, b_f=b_f, w_xo=w_xo, w_ho=w_ho, b_o=b_o, w_xc=w_xc, w_hc=w_hc, b_c=b_c, ) elif encoder == 'rnn': net['encoder'] = layers.RNNLayer(net['emb'], 0., params, num_hidden, mask, w_initializer=ortho_weight) net['mean_pool'] = (net['encoder'][0] * mask[:, :, None]).sum(axis=1) / mask.sum(axis=1)[:, None].astype(theano.config.floatX) #mean pool. multiplied by mask to remove "EOS noise" encoder_shape = list(net['encoder'][1]) net['mean_pool'] = (net['mean_pool'], [encoder_shape[0]] + encoder_shape[2:]) if(use_dropout): net['dropout'] = layers.dropoutLayer(net['mean_pool'], use_noise, trng, p_dropout) #pred, pred_shape = layers.FCLayer(net['dropout'], params, 2, activation=T.nnet.softmax, w_name='U', w=w_fc) params['w_fc'] = theano.shared(w_fc) params['b_fc'] = theano.shared(utils.cast_floatX(np.zeros(2))) pred = T.nnet.softmax(T.dot(net['dropout'][0], params['w_fc']) + params['b_fc']) encoder = theano.function([x, mask], net['encoder'][0], name='encoder', allow_input_downcast=True) mean_pool = theano.function([x, mask], net['mean_pool'][0], name='mean_pool', allow_input_downcast=True) off = 1e-8 #off = 0 #cost = (T.nnet.categorical_crossentropy(pred, y)).mean() #correct n_samples = x.shape[0] cost = -T.log(pred[T.arange(n_samples), y] + off).mean() #cost_arr0 = T.nnet.categorical_crossentropy(pred, y) #cost_arr1 = -T.log(pred[T.arange(n_samples), y] + off) #n_samples = x.shape[0] if decay_c > 0: weight_decay = decay_c * (params['U'] ** 2).sum() cost += weight_decay f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob', allow_input_downcast=True) f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred', allow_input_downcast=True) opt = optimizer(cost, [x, mask, y], params, options=options) #opt = theano.function([x, mask, y], cost, allow_input_downcast=True) ''' f_emb = theano.function([x], net['emb'][0], allow_input_downcast=True) f_encoder = theano.function([x, mask], net['encoder'][0], allow_input_downcast=True) f_dp = theano.function([x, mask], net['dropout'][0], allow_input_downcast=True) cost_arr0 = theano.function([x, mask, y], cost_arr0, allow_input_downcast=True) cost_arr1 = theano.function([x, mask, y], cost_arr1, allow_input_downcast=True) return f_pred_prob, f_pred, opt, params, use_noise, (f_emb, f_encoder, f_dp, cost_arr0, cost_arr1) ''' return f_pred_prob, f_pred, opt, params, use_noise
def build_model(n_words, encoder, dim_proj, num_hidden, p_dropout, maxlen, decay_c, use_dropout=True, optimizer=optimizers.sgd): trng = RandomStreams(SEED) # by using shared variable, we can control whether to use noise without recompiling use_noise = theano.shared(utils.cast_floatX(0.)) x = T.matrix('x', dtype='int64') xshape = (1, maxlen) mask = T.bmatrix('mask') y = T.vector('y', dtype='int64') lr = T.scalar() options = {'lr': lr} net = {} params = {} def init_emb(shape): num_in, num_out = shape randn = np.random.rand(num_in, num_out) return utils.cast_floatX(0.01 * randn) fc_winit = init_emb def ortho_weight(shape): ndim = shape[0] assert shape[0] == shape[1] W = np.random.randn(ndim, ndim) u, s, v = np.linalg.svd(W) return utils.cast_floatX(u) ''' generate weights ''' w_emb = init_emb((n_words, dim_proj)) shape_x = (dim_proj, num_hidden) shape_h = (num_hidden, num_hidden) w_xi, w_hi, b_i = init.Orth().sample(shape_x), init.Orth().sample( shape_h), init.Const().sample(num_hidden) w_xf, w_hf, b_f = init.Orth().sample(shape_x), init.Orth().sample( shape_h), init.Const().sample(num_hidden) w_xo, w_ho, b_o = init.Orth().sample(shape_x), init.Orth().sample( shape_h), init.Const().sample(num_hidden) w_xc, w_hc, b_c = init.Orth().sample(shape_x), init.Orth().sample( shape_h), init.Const().sample(num_hidden) print(w_xc[10:, 0]) w_fc = fc_winit((num_hidden, 2)) net['emb'] = layers.EmbeddingLayer((x, xshape), params, n_words, dim_proj, w=w_emb) if encoder == 'lstm': ''' net['encoder'] = layers.LSTMLayer(net['emb'], 0., 0., params, num_hidden, mask, w_initializer=init.Orth()) ''' net['encoder'] = layers.LSTMLayer( net['emb'], 0., 0., params, num_hidden, mask, w_xi=w_xi, w_hi=w_hi, b_i=b_i, w_xf=w_xf, w_hf=w_hf, b_f=b_f, w_xo=w_xo, w_ho=w_ho, b_o=b_o, w_xc=w_xc, w_hc=w_hc, b_c=b_c, ) elif encoder == 'rnn': net['encoder'] = layers.RNNLayer(net['emb'], 0., params, num_hidden, mask, w_initializer=ortho_weight) net['mean_pool'] = (net['encoder'][0] * mask[:, :, None]).sum( axis=1) / mask.sum(axis=1)[:, None].astype( theano.config.floatX ) #mean pool. multiplied by mask to remove "EOS noise" encoder_shape = list(net['encoder'][1]) net['mean_pool'] = (net['mean_pool'], [encoder_shape[0]] + encoder_shape[2:]) if (use_dropout): net['dropout'] = layers.dropoutLayer(net['mean_pool'], use_noise, trng, p_dropout) #pred, pred_shape = layers.FCLayer(net['dropout'], params, 2, activation=T.nnet.softmax, w_name='U', w=w_fc) params['w_fc'] = theano.shared(w_fc) params['b_fc'] = theano.shared(utils.cast_floatX(np.zeros(2))) pred = T.nnet.softmax( T.dot(net['dropout'][0], params['w_fc']) + params['b_fc']) encoder = theano.function([x, mask], net['encoder'][0], name='encoder', allow_input_downcast=True) mean_pool = theano.function([x, mask], net['mean_pool'][0], name='mean_pool', allow_input_downcast=True) off = 1e-8 #off = 0 #cost = (T.nnet.categorical_crossentropy(pred, y)).mean() #correct n_samples = x.shape[0] cost = -T.log(pred[T.arange(n_samples), y] + off).mean() #cost_arr0 = T.nnet.categorical_crossentropy(pred, y) #cost_arr1 = -T.log(pred[T.arange(n_samples), y] + off) #n_samples = x.shape[0] if decay_c > 0: weight_decay = decay_c * (params['U']**2).sum() cost += weight_decay f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob', allow_input_downcast=True) f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred', allow_input_downcast=True) opt = optimizer(cost, [x, mask, y], params, options=options) #opt = theano.function([x, mask, y], cost, allow_input_downcast=True) ''' f_emb = theano.function([x], net['emb'][0], allow_input_downcast=True) f_encoder = theano.function([x, mask], net['encoder'][0], allow_input_downcast=True) f_dp = theano.function([x, mask], net['dropout'][0], allow_input_downcast=True) cost_arr0 = theano.function([x, mask, y], cost_arr0, allow_input_downcast=True) cost_arr1 = theano.function([x, mask, y], cost_arr1, allow_input_downcast=True) return f_pred_prob, f_pred, opt, params, use_noise, (f_emb, f_encoder, f_dp, cost_arr0, cost_arr1) ''' return f_pred_prob, f_pred, opt, params, use_noise
import theano from theano import tensor as T from theano.tensor import nnet import numpy as np from neuralcraft.utils import cast_floatX from neuralcraft.layers import LSTMLayer, RNNLayer import lasagne batch_size = 3 seq_len = 4 hid_size = 2 x_size = 2 w_xi = cast_floatX(np.random.randn(x_size, hid_size)) w_xf = cast_floatX(np.random.randn(x_size, hid_size)) w_xo = cast_floatX(np.random.randn(x_size, hid_size)) w_xc = cast_floatX(np.random.randn(x_size, hid_size)) w_hi = cast_floatX(np.random.randn(hid_size, hid_size)) w_hf = cast_floatX(np.random.randn(hid_size, hid_size)) w_ho = cast_floatX(np.random.randn(hid_size, hid_size)) w_hc = cast_floatX(np.random.randn(hid_size, hid_size)) bi = cast_floatX(np.zeros(hid_size)) bf = cast_floatX(np.zeros(hid_size)) bo = cast_floatX(np.zeros(hid_size)) bc = cast_floatX(np.zeros(hid_size)) x = cast_floatX(np.random.randn(batch_size, seq_len, x_size)) x_shape = x.shape mask = np.random.rand(batch_size, seq_len) > 0.5