def __init__(self, inputs, mask, load_from=None, rand_init_params=None): '''rand_init_params: (rng, (n_in, n_out)) n_in = emb_dim (* context window size) n_out = n_hidden ''' self.inputs = inputs self.mask = mask if load_from is not None: W_values = pickle.load(load_from) U_values = pickle.load(load_from) b_values = pickle.load(load_from) n_out = W_values.shape[1] elif rand_init_params is not None: rng, (n_in, n_out) = rand_init_params limS = 4 * (6 / (n_in + n_out))**0.5 W_values = rand_matrix(rng, limS, (n_in, n_out)) U_values = rand_matrix(rng, limS, (n_out, n_out)) b_values = np.zeros(n_out, dtype=theano.config.floatX) else: raise Exception('Invalid initial inputs!') self.W = theano.shared(value=W_values, name='rnn_W', borrow=True) self.U = theano.shared(value=U_values, name='rnn_U', borrow=True) self.b = theano.shared(value=b_values, name='rnn_b', borrow=True) self.params = [self.W, self.U, self.b] def _step(m_t, x_t, h_tm1): # hidden units at time t, h(t) is formed from THREE parts: # input at time t, x(t) # hidden units at time t-1, h(t-1) # hidden layer bias, b h_t = T.nnet.sigmoid( T.dot(x_t, self.W) + T.dot(h_tm1, self.U) + self.b) # mask h_t = m_t[:, None] * h_t + (1 - m_t)[:, None] * h_tm1 return h_t n_steps, n_samples, emb_dim = inputs.shape hs, updates = theano.scan( fn=_step, sequences=[mask, inputs], outputs_info=[ T.alloc(np.asarray(0., dtype=theano.config.floatX), n_samples, n_out) ]) self.outputs = hs
def __init__(self, inputs, load_from=None, rand_init_params=None, gensim_w2v=None, dic=None): '''rand_init_params: (rng, (voc_dim, emb_dim)) ''' self.inputs = inputs if load_from is not None: W_values = pickle.load(load_from) elif rand_init_params is not None: rng, (voc_dim, emb_dim) = rand_init_params W_values = rand_matrix(rng, 1, (voc_dim, emb_dim)) if gensim_w2v is not None and dic is not None: assert gensim_w2v.vector_size == emb_dim n_sub = 0 for idx, word in dic._idx2word.items(): if word in gensim_w2v.wv: W_values[idx] = gensim_w2v.wv[word] n_sub += 1 print('Substituted words by word2vec: %d/%d' % (n_sub, voc_dim)) W_values = normalize_matrix(W_values) else: raise Exception('Invalid initial inputs!') self.W = theano.shared(value=W_values, name='emb_W', borrow=True) self.params = [self.W] self.outputs = self.W[inputs]
def __init__(self, inputs, activation=T.tanh, load_from=None, rand_init_params=None): '''rand_init_params: (rng, (n_in, n_out)) ''' self.inputs = inputs self.activation = activation if load_from is not None: W_values = pickle.load(load_from) b_values = pickle.load(load_from) elif rand_init_params is not None: rng, (n_in, n_out) = rand_init_params limT = (6 / (n_in + n_out)) ** 0.5 W_values = rand_matrix(rng, limT, (n_in, n_out)) if activation is T.nnet.sigmoid: W_values *= 4 b_values = np.zeros(n_out, dtype=theano.config.floatX) else: raise Exception('Invalid initial inputs!') self.W = theano.shared(value=W_values, name='hidden_W', borrow=True) self.b = theano.shared(value=b_values, name='hidden_b', borrow=True) self.params = [self.W, self.b] linear_out = T.dot(inputs, self.W) + self.b self.outputs = linear_out if activation is None else activation(linear_out)
def __init__(self, inputs, image_shape, load_from=None, rand_init_params=None): '''rand_init_params: (rng, filter_shape) inputs: (batch size, stack size, n_words/steps, emb_dim) filter_shape: (output stack size, input stack size, filter height, filter width) output stack size = ? input stack size = 1 filter height = ? filter width = emb_dim (* context window size) image_shape(input shape): (batch_size, input stack size, input feature map height, input feature map width) batch_size = ? input stack size = 1 input feature map height = n_words/steps input feature map width = emb_dim (* context window size) output shape: (batch size, output stack size, output feature map height, output feature map width) batch_size = ? output stack size = ? output feature map height = n_words/steps - filter height + 1 output feature map width = 1 ''' self.inputs = inputs if load_from is not None: W_values = pickle.load(load_from) b_values = pickle.load(load_from) filter_shape = W_values.shape elif rand_init_params is not None: rng, filter_shape = rand_init_params fan_in = filter_shape[1] * filter_shape[2] * filter_shape[3] fan_out = filter_shape[0] * filter_shape[2] * filter_shape[3] limT = (6 / (fan_in + fan_out))**0.5 W_values = rand_matrix(rng, limT, filter_shape) b_values = np.zeros(filter_shape[0], dtype=theano.config.floatX) else: raise Exception('Invalid initial inputs!') self.W = theano.shared(value=W_values, name='conv_W', borrow=True) self.b = theano.shared(value=b_values, name='conv_b', borrow=True) self.params = [self.W, self.b] conv_res = conv.conv2d(input=self.inputs, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) self.outputs = T.tanh(conv_res + self.b[None, :, None, None])
def __init__(self, inputs, mask, load_from=None, rand_init_params=None): '''rand_init_params: (rng, (n_in, n_out)) n_in = emb_dim (* context window size) n_out = n_hidden ''' self.inputs = inputs self.mask = mask if load_from is not None: W_values = pickle.load(load_from) U_values = pickle.load(load_from) b_values = pickle.load(load_from) n_out = W_values.shape[1] // 4 elif rand_init_params is not None: rng, (n_in, n_out) = rand_init_params limT = (6 / (n_in + n_out * 2))**0.5 limS = 4 * limT # [Wi, Wf, Wo, Wc] W_values = rand_matrix(rng, limS, (n_in, 4 * n_out)) W_values[:, (3 * n_out):(4 * n_out)] /= 4 # [Ui, Uf, Uo, Uc] U_values = rand_matrix(rng, limS, (n_out, 4 * n_out)) U_values[:, (3 * n_out):(4 * n_out)] /= 4 # [bi, bf, bo, bc] b_values = np.zeros(4 * n_out, dtype=theano.config.floatX) else: raise Exception('Invalid initial inputs!') self.W = theano.shared(value=W_values, name='lstm_W', borrow=True) self.U = theano.shared(value=U_values, name='lstm_U', borrow=True) self.b = theano.shared(value=b_values, name='lstm_b', borrow=True) self.params = [self.W, self.U, self.b] def _step(m_t, x_t, h_tm1, c_tm1): # x_t is a row of embeddings for several words in same position of different sentences in a minibatch # x_t has dimension of (n_samples, n_emb), so it is a matrix # m_t is a row of mask matrix, so it is a vector, with dimension of (n_samples, ) # h_t and c_t are all (n_samples, n_hidden) linear_res = T.dot(x_t, self.W) + T.dot(h_tm1, self.U) + self.b i_t = T.nnet.sigmoid(linear_res[:, (0 * n_out):(1 * n_out)]) f_t = T.nnet.sigmoid(linear_res[:, (1 * n_out):(2 * n_out)]) o_t = T.nnet.sigmoid(linear_res[:, (2 * n_out):(3 * n_out)]) c_t = T.tanh(linear_res[:, (3 * n_out):(4 * n_out)]) c_t = f_t * c_tm1 + i_t * c_t c_t = m_t[:, None] * c_t + (1 - m_t)[:, None] * c_tm1 h_t = o_t * T.tanh(c_t) h_t = m_t[:, None] * h_t + (1 - m_t)[:, None] * h_tm1 return h_t, c_t n_steps, n_samples, emb_dim = inputs.shape (hs, cs), updates = theano.scan( fn=_step, sequences=[mask, inputs], outputs_info=[ T.alloc(np.asarray(0., dtype=theano.config.floatX), n_samples, n_out), T.alloc(np.asarray(0., dtype=theano.config.floatX), n_samples, n_out) ]) self.outputs = hs