示例#1
0
def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
  """
  Just compute the error
  f_pred: Theano fct computing the prediction
  prepare_data: usual prepare_data for that dataset.
  """
  valid_err = 0
  for _, valid_index in iterator:
    x, mask, y = prepare_data([data[0][t] for t in valid_index],
                              np.array(data[1])[valid_index],
                              maxlen=None)
    preds = f_pred(x, mask)
    targets = np.array(data[1])[valid_index]
    valid_err += (preds == targets).sum()
  print '-' * 80
  print preds
  print '-'*40
  print targets
  valid_err = 1. - utils.cast_floatX(valid_err) / len(data[0])

  return valid_err
示例#2
0
def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
    """
  Just compute the error
  f_pred: Theano fct computing the prediction
  prepare_data: usual prepare_data for that dataset.
  """
    valid_err = 0
    for _, valid_index in iterator:
        x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                  np.array(data[1])[valid_index],
                                  maxlen=None)
        preds = f_pred(x, mask)
        targets = np.array(data[1])[valid_index]
        valid_err += (preds == targets).sum()
    print '-' * 80
    print preds
    print '-' * 40
    print targets
    valid_err = 1. - utils.cast_floatX(valid_err) / len(data[0])

    return valid_err
示例#3
0
文件: lstm.py 项目: tyeah/NeuralCraft
 def init_emb(shape):
   num_in, num_out = shape
   randn = numpy.random.rand(num_in, num_out)
   return utils.cast_floatX(0.01 * randn)
示例#4
0
 def ortho_weight(shape):
   ndim = shape[0]
   assert shape[0] == shape[1]
   W = np.random.randn(ndim, ndim)
   u, s, v = np.linalg.svd(W)
   return utils.cast_floatX(u)
示例#5
0
 def init_emb(shape):
   num_in, num_out = shape
   randn = np.random.rand(num_in, num_out)
   return utils.cast_floatX(0.01 * randn)
示例#6
0
def build_model(n_words, encoder, dim_proj, num_hidden, p_dropout, maxlen, decay_c, use_dropout=True, optimizer=optimizers.sgd):
  trng = RandomStreams(SEED)

  # by using shared variable, we can control whether to use noise without recompiling
  use_noise = theano.shared(utils.cast_floatX(0.))

  x = T.matrix('x', dtype='int64')
  xshape = (1, maxlen)
  mask = T.bmatrix('mask')
  y = T.vector('y', dtype='int64')
  lr = T.scalar()
  options = {'lr': lr}

  net = {}
  params = {}

  def init_emb(shape):
    num_in, num_out = shape
    randn = np.random.rand(num_in, num_out)
    return utils.cast_floatX(0.01 * randn)
  fc_winit = init_emb
  def ortho_weight(shape):
    ndim = shape[0]
    assert shape[0] == shape[1]
    W = np.random.randn(ndim, ndim)
    u, s, v = np.linalg.svd(W)
    return utils.cast_floatX(u)

  '''
  generate weights
  '''
  w_emb = init_emb((n_words, dim_proj))
  shape_x = (dim_proj, num_hidden)
  shape_h = (num_hidden, num_hidden)
  w_xi, w_hi, b_i = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden)
  w_xf, w_hf, b_f = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden)
  w_xo, w_ho, b_o = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden)
  w_xc, w_hc, b_c = init.Orth().sample(shape_x), init.Orth().sample(shape_h), init.Const().sample(num_hidden)
  print(w_xc[10:, 0])
  w_fc = fc_winit((num_hidden, 2))

  net['emb'] = layers.EmbeddingLayer((x, xshape), params, n_words, dim_proj, w=w_emb)
  if encoder == 'lstm':
    '''
    net['encoder'] = layers.LSTMLayer(net['emb'], 0., 0., params, num_hidden, mask, w_initializer=init.Orth())
    '''
    net['encoder'] = layers.LSTMLayer(net['emb'], 0., 0., params, num_hidden, mask,
        w_xi=w_xi, w_hi=w_hi, b_i=b_i,
        w_xf=w_xf, w_hf=w_hf, b_f=b_f,
        w_xo=w_xo, w_ho=w_ho, b_o=b_o,
        w_xc=w_xc, w_hc=w_hc, b_c=b_c,
        )
  elif encoder == 'rnn':
    net['encoder'] = layers.RNNLayer(net['emb'], 0., params, num_hidden, mask, w_initializer=ortho_weight)

  net['mean_pool'] = (net['encoder'][0] * mask[:, :, None]).sum(axis=1) / mask.sum(axis=1)[:, None].astype(theano.config.floatX) #mean pool. multiplied by mask to remove "EOS noise"
  encoder_shape = list(net['encoder'][1])
  net['mean_pool'] = (net['mean_pool'], [encoder_shape[0]] + encoder_shape[2:])
  if(use_dropout):
    net['dropout'] = layers.dropoutLayer(net['mean_pool'], use_noise, trng, p_dropout)

  #pred, pred_shape = layers.FCLayer(net['dropout'], params, 2, activation=T.nnet.softmax, w_name='U', w=w_fc)
  params['w_fc'] = theano.shared(w_fc)
  params['b_fc'] = theano.shared(utils.cast_floatX(np.zeros(2)))
  pred = T.nnet.softmax(T.dot(net['dropout'][0], params['w_fc']) + params['b_fc'])
  encoder = theano.function([x, mask], net['encoder'][0], name='encoder', allow_input_downcast=True)
  mean_pool = theano.function([x, mask], net['mean_pool'][0], name='mean_pool', allow_input_downcast=True)

  off = 1e-8
  #off = 0
  #cost = (T.nnet.categorical_crossentropy(pred, y)).mean() #correct
  n_samples = x.shape[0]
  cost = -T.log(pred[T.arange(n_samples), y] + off).mean()
  #cost_arr0 = T.nnet.categorical_crossentropy(pred, y)
  #cost_arr1 = -T.log(pred[T.arange(n_samples), y] + off)
  #n_samples = x.shape[0]
  if decay_c > 0:
    weight_decay = decay_c * (params['U'] ** 2).sum()
    cost += weight_decay


  f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob', allow_input_downcast=True)
  f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred', allow_input_downcast=True)
  opt = optimizer(cost, [x, mask, y], params, options=options)
  #opt = theano.function([x, mask, y], cost, allow_input_downcast=True)

  '''
  f_emb = theano.function([x], net['emb'][0], allow_input_downcast=True)
  f_encoder = theano.function([x, mask], net['encoder'][0], allow_input_downcast=True)
  f_dp = theano.function([x, mask], net['dropout'][0], allow_input_downcast=True)
  cost_arr0 = theano.function([x, mask, y], cost_arr0, allow_input_downcast=True)
  cost_arr1 = theano.function([x, mask, y], cost_arr1, allow_input_downcast=True)
  return f_pred_prob, f_pred, opt, params, use_noise, (f_emb, f_encoder, f_dp, cost_arr0, cost_arr1)
  '''
  return f_pred_prob, f_pred, opt, params, use_noise
示例#7
0
 def ortho_weight(shape):
     ndim = shape[0]
     assert shape[0] == shape[1]
     W = np.random.randn(ndim, ndim)
     u, s, v = np.linalg.svd(W)
     return utils.cast_floatX(u)
示例#8
0
def build_model(n_words,
                encoder,
                dim_proj,
                num_hidden,
                p_dropout,
                maxlen,
                decay_c,
                use_dropout=True,
                optimizer=optimizers.sgd):
    trng = RandomStreams(SEED)

    # by using shared variable, we can control whether to use noise without recompiling
    use_noise = theano.shared(utils.cast_floatX(0.))

    x = T.matrix('x', dtype='int64')
    xshape = (1, maxlen)
    mask = T.bmatrix('mask')
    y = T.vector('y', dtype='int64')
    lr = T.scalar()
    options = {'lr': lr}

    net = {}
    params = {}

    def init_emb(shape):
        num_in, num_out = shape
        randn = np.random.rand(num_in, num_out)
        return utils.cast_floatX(0.01 * randn)

    fc_winit = init_emb

    def ortho_weight(shape):
        ndim = shape[0]
        assert shape[0] == shape[1]
        W = np.random.randn(ndim, ndim)
        u, s, v = np.linalg.svd(W)
        return utils.cast_floatX(u)

    '''
  generate weights
  '''
    w_emb = init_emb((n_words, dim_proj))
    shape_x = (dim_proj, num_hidden)
    shape_h = (num_hidden, num_hidden)
    w_xi, w_hi, b_i = init.Orth().sample(shape_x), init.Orth().sample(
        shape_h), init.Const().sample(num_hidden)
    w_xf, w_hf, b_f = init.Orth().sample(shape_x), init.Orth().sample(
        shape_h), init.Const().sample(num_hidden)
    w_xo, w_ho, b_o = init.Orth().sample(shape_x), init.Orth().sample(
        shape_h), init.Const().sample(num_hidden)
    w_xc, w_hc, b_c = init.Orth().sample(shape_x), init.Orth().sample(
        shape_h), init.Const().sample(num_hidden)
    print(w_xc[10:, 0])
    w_fc = fc_winit((num_hidden, 2))

    net['emb'] = layers.EmbeddingLayer((x, xshape),
                                       params,
                                       n_words,
                                       dim_proj,
                                       w=w_emb)
    if encoder == 'lstm':
        '''
    net['encoder'] = layers.LSTMLayer(net['emb'], 0., 0., params, num_hidden, mask, w_initializer=init.Orth())
    '''
        net['encoder'] = layers.LSTMLayer(
            net['emb'],
            0.,
            0.,
            params,
            num_hidden,
            mask,
            w_xi=w_xi,
            w_hi=w_hi,
            b_i=b_i,
            w_xf=w_xf,
            w_hf=w_hf,
            b_f=b_f,
            w_xo=w_xo,
            w_ho=w_ho,
            b_o=b_o,
            w_xc=w_xc,
            w_hc=w_hc,
            b_c=b_c,
        )
    elif encoder == 'rnn':
        net['encoder'] = layers.RNNLayer(net['emb'],
                                         0.,
                                         params,
                                         num_hidden,
                                         mask,
                                         w_initializer=ortho_weight)

    net['mean_pool'] = (net['encoder'][0] * mask[:, :, None]).sum(
        axis=1) / mask.sum(axis=1)[:, None].astype(
            theano.config.floatX
        )  #mean pool. multiplied by mask to remove "EOS noise"
    encoder_shape = list(net['encoder'][1])
    net['mean_pool'] = (net['mean_pool'],
                        [encoder_shape[0]] + encoder_shape[2:])
    if (use_dropout):
        net['dropout'] = layers.dropoutLayer(net['mean_pool'], use_noise, trng,
                                             p_dropout)

    #pred, pred_shape = layers.FCLayer(net['dropout'], params, 2, activation=T.nnet.softmax, w_name='U', w=w_fc)
    params['w_fc'] = theano.shared(w_fc)
    params['b_fc'] = theano.shared(utils.cast_floatX(np.zeros(2)))
    pred = T.nnet.softmax(
        T.dot(net['dropout'][0], params['w_fc']) + params['b_fc'])
    encoder = theano.function([x, mask],
                              net['encoder'][0],
                              name='encoder',
                              allow_input_downcast=True)
    mean_pool = theano.function([x, mask],
                                net['mean_pool'][0],
                                name='mean_pool',
                                allow_input_downcast=True)

    off = 1e-8
    #off = 0
    #cost = (T.nnet.categorical_crossentropy(pred, y)).mean() #correct
    n_samples = x.shape[0]
    cost = -T.log(pred[T.arange(n_samples), y] + off).mean()
    #cost_arr0 = T.nnet.categorical_crossentropy(pred, y)
    #cost_arr1 = -T.log(pred[T.arange(n_samples), y] + off)
    #n_samples = x.shape[0]
    if decay_c > 0:
        weight_decay = decay_c * (params['U']**2).sum()
        cost += weight_decay

    f_pred_prob = theano.function([x, mask],
                                  pred,
                                  name='f_pred_prob',
                                  allow_input_downcast=True)
    f_pred = theano.function([x, mask],
                             pred.argmax(axis=1),
                             name='f_pred',
                             allow_input_downcast=True)
    opt = optimizer(cost, [x, mask, y], params, options=options)
    #opt = theano.function([x, mask, y], cost, allow_input_downcast=True)
    '''
  f_emb = theano.function([x], net['emb'][0], allow_input_downcast=True)
  f_encoder = theano.function([x, mask], net['encoder'][0], allow_input_downcast=True)
  f_dp = theano.function([x, mask], net['dropout'][0], allow_input_downcast=True)
  cost_arr0 = theano.function([x, mask, y], cost_arr0, allow_input_downcast=True)
  cost_arr1 = theano.function([x, mask, y], cost_arr1, allow_input_downcast=True)
  return f_pred_prob, f_pred, opt, params, use_noise, (f_emb, f_encoder, f_dp, cost_arr0, cost_arr1)
  '''
    return f_pred_prob, f_pred, opt, params, use_noise
示例#9
0
import theano
from theano import tensor as T
from theano.tensor import nnet
import numpy as np
from neuralcraft.utils import cast_floatX
from neuralcraft.layers import LSTMLayer, RNNLayer
import lasagne

batch_size = 3
seq_len = 4
hid_size = 2
x_size = 2

w_xi = cast_floatX(np.random.randn(x_size, hid_size))
w_xf = cast_floatX(np.random.randn(x_size, hid_size))
w_xo = cast_floatX(np.random.randn(x_size, hid_size))
w_xc = cast_floatX(np.random.randn(x_size, hid_size))

w_hi = cast_floatX(np.random.randn(hid_size, hid_size))
w_hf = cast_floatX(np.random.randn(hid_size, hid_size))
w_ho = cast_floatX(np.random.randn(hid_size, hid_size))
w_hc = cast_floatX(np.random.randn(hid_size, hid_size))

bi = cast_floatX(np.zeros(hid_size))
bf = cast_floatX(np.zeros(hid_size))
bo = cast_floatX(np.zeros(hid_size))
bc = cast_floatX(np.zeros(hid_size))

x = cast_floatX(np.random.randn(batch_size, seq_len, x_size))
x_shape = x.shape
mask = np.random.rand(batch_size, seq_len) > 0.5