def build_cnn(): data_size = (None, 10, 100) # Batch size x Img Channels x Height x Width input_var = T.tensor3(name="input", dtype='int64') values = np.array(np.random.randint(0, 1, (5, 10, 100))) input_var.tag.test_value = values input_layer = L.InputLayer(data_size, input_var=input_var) W = create_char_embedding_matrix() embed_layer = L.EmbeddingLayer(input_layer, input_size=102, output_size=101, W=W) reshape = L.reshape(embed_layer, (-1, 100, 101)) dim_shuffle = L.dimshuffle(reshape, (0, 2, 1)) #conv_layer_1 = L.Conv2DLayer(embed_layer, 4, (1), 1, 0) #pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=1) print L.get_output(dim_shuffle).tag.test_value.shape conv_layer_1 = L.Conv1DLayer(dim_shuffle, 50, 2, 1) print L.get_output(conv_layer_1).tag.test_value.shape print "TEST" pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=99) print L.get_output(pool_layer_1).tag.test_value.shape reshape_conv_1 = L.reshape(pool_layer_1, (-1, 50)) conv_layer_2 = L.Conv1DLayer(dim_shuffle, 50, 3, 1) pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=98) reshape_conv_2 = L.reshape(pool_layer_2, (-1, 50)) merge_layer = L.ConcatLayer([reshape_conv_1, reshape_conv_2], 1) print L.get_output(merge_layer).tag.test_value.shape reshape_output = L.reshape(merge_layer, (-1, 10, 100)) print L.get_output(reshape_output).tag.test_value.shape x = T.tensor3(name="testname", dtype='int32') #x = T.imatrix() #output = L.get_output(conv_layer_1,x) #f = theano.function([x],output) word = unicode("Tat") word_index = np.array([]) #print word_index #x_test = np.array([word_index]).astype('int32') #print f(x_test) return reshape_output
def build_cnn(input): #data_size = (None,103,130) # Batch size x Img Channels x Height x Width #input_var = T.tensor3(name = "input",dtype='int64') input_var = input #values = np.array(np.random.randint(0,102,(1,9,50))) #input_var.tag.test_value = values #number sentences x words x characters input_layer = L.InputLayer((None,9,50), input_var=input) W = create_char_embedding_matrix() embed_layer = L.EmbeddingLayer(input_layer, input_size=103,output_size=101, W=W) #print "EMBED", L.get_output(embed_layer).tag.test_value.shape reshape_embed = L.reshape(embed_layer,(-1,50,101)) #print "reshap embed", L.get_output(reshape_embed).tag.test_value.shape conv_layer_1 = L.Conv1DLayer(reshape_embed, 55, 2) conv_layer_2 = L.Conv1DLayer(reshape_embed, 55, 3) #print "TEST" #print "Convolution Layer 1", L.get_output(conv_layer_1).tag.test_value.shape #print "Convolution Layer 2", L.get_output(conv_layer_2).tag.test_value.shape #flatten_conv_1 = L.flatten(conv_layer_1,3) #flatten_conv_2 = L.flatten(conv_layer_2,3) #reshape_max_1 = L.reshape(flatten_conv_1,(-1,49)) #reshape_max_2 = L.reshape(flatten_conv_2, (-1,48)) #print "OUTPUT Flatten1", L.get_output(flatten_conv_1).tag.test_value.shape #print "OUTPUT Flatten2", L.get_output(flatten_conv_2).tag.test_value.shape #print "OUTPUT reshape_max_1", L.get_output(reshape_max_1).tag.test_value.shape #print "OUTPUT reshape_max_2", L.get_output(reshape_max_2).tag.test_value.shape pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=54) pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=53) #print "OUTPUT POOL1", L.get_output(pool_layer_1).tag.test_value.shape #print "OUTPUT POOL2",L.get_output(pool_layer_2).tag.test_value.shape merge_layer = L.ConcatLayer([pool_layer_1, pool_layer_2], 1) flatten_merge = L.flatten(merge_layer, 2) reshape_merge = L.reshape(flatten_merge, (1,9,110)) print L.get_output(reshape_embed).shape #print L.get_output(reshape_merge).tag.test_value.shape return reshape_merge, char_index_lookup
def get_context(self, conv_in, avg=False): suf = '_avg' if avg else '' conv_out = [] # for n in [2,3,4,5,6,7,8,9]: # for n in [2,3,4,5]: for n in self.args.context_ngrams: conv = conv_in for i in range(self.args.conv_layers): conv = L.Conv1DLayer( conv, 128, n, name='conv_window_%d(%d)%s' % (n, i, suf), # W=HeNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1) W=GlorotNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1) conv = L.MaxPool1DLayer( conv, self.args.window_size - (n - 1) * self.args.conv_layers) # (100, 128, 1) conv = L.flatten(conv, 2) # (100, 128) conv_out.append(conv) x = L.concat(conv_out, axis=1) # (100, 1024) return x
def set_conv_layer(self, network, layer_name, dropout=True, pad=0, bnorm=False): opts = self.net_opts[layer_name] ll = layers.Conv1DLayer( layers.dropout(network, p=self.net_opts['dropout_p']) if dropout else network, num_filters=opts['num_filters'], filter_size=opts['filter_size'], stride=opts['stride'], pad=pad, name=layer_name ) return layers.batch_norm(ll) if bnorm else ll
def build_network(W, number_unique_tags, longest_word, longest_sentence, input_var=None): print("Building network ...") input_layer = L.InputLayer((None, longest_sentence, longest_word), input_var=input_var) embed_layer = L.EmbeddingLayer(input_layer, input_size=103, output_size=101, W=W) reshape_embed = L.reshape(embed_layer, (-1, longest_word, 101)) conv_layer_1 = L.Conv1DLayer(reshape_embed, longest_word, 2) conv_layer_2 = L.Conv1DLayer(reshape_embed, longest_word, 3) pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=longest_word - 1) pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=longest_word - 2) merge_layer = L.ConcatLayer([pool_layer_1, pool_layer_2], 1) flatten_merge = L.flatten(merge_layer, 2) reshape_merge = L.reshape(flatten_merge, (-1, longest_sentence, int(longest_word * 2))) l_re = lasagne.layers.RecurrentLayer( reshape_merge, N_HIDDEN, nonlinearity=lasagne.nonlinearities.sigmoid, mask_input=None) l_out = lasagne.layers.DenseLayer( l_re, number_unique_tags, nonlinearity=lasagne.nonlinearities.softmax) print "DONE BUILDING NETWORK" return l_out
def _build_net(self, emb_char_filter_size=5, emb_dropout=True, **kwargs): batch_size = self.mask_context_var.shape[0] context_len = self.mask_context_var.shape[1] question_len = self.question_var.shape[1] context_word_len = self.context_char_var.shape[2] question_word_len = self.question_char_var.shape[2] self.batch_size = batch_size self.context_len = context_len ''' Inputs and word embeddings''' l_context_char = LL.InputLayer(shape=(None, None, None), input_var=self.context_char_var) l_question_char = LL.InputLayer(shape=(None, None, None), input_var=self.question_char_var) l_c_mask = LL.InputLayer(shape=(None, None), input_var=self.mask_context_var) l_q_mask = LL.InputLayer(shape=(None, None), input_var=self.mask_question_var) l_c_char_mask = LL.InputLayer(shape=(None, None, None), input_var=self.mask_context_char_var) l_q_char_mask = LL.InputLayer(shape=(None, None, None), input_var=self.mask_question_char_var) l_c_emb = LL.InputLayer(shape=(None, None, self.emb_size), input_var=self.context_var) l_q_emb = LL.InputLayer(shape=(None, None, self.emb_size), input_var=self.question_var) if self.train_unk: l_c_unk_mask = LL.InputLayer(shape=(None, None), input_var=self.mask_context_unk_var) l_q_unk_mask = LL.InputLayer(shape=(None, None), input_var=self.mask_question_unk_var) l_c_emb = TrainUnkLayer(l_c_emb, l_c_unk_mask, output_size=self.emb_size, W=self.word_embeddings[0]) l_q_emb = TrainUnkLayer(l_q_emb, l_q_unk_mask, output_size=self.emb_size, W=l_c_emb.W) if self.negative: l_c_emb = TrainNAWLayer(l_c_emb, l_c_mask, output_size=self.emb_size) ''' Char-embeddings ''' # (batch_size x context_len x context_word_len x emb_char_size) l_c_char_emb = LL.EmbeddingLayer(l_context_char, input_size=self.alphabet_size, output_size=self.emb_char_size) l_q_char_emb = LL.EmbeddingLayer(l_question_char, input_size=self.alphabet_size, output_size=self.emb_char_size, W=l_c_char_emb.W) # here I do multiplication of character embeddings with masks, # because I want to pad them with constant zeros l_c_char_mask = ForgetSizeLayer( LL.dimshuffle(l_c_char_mask, (0, 1, 2, 'x'))) l_q_char_mask = ForgetSizeLayer( LL.dimshuffle(l_q_char_mask, (0, 1, 2, 'x'))) l_c_char_emb = LL.ElemwiseMergeLayer([l_c_char_emb, l_c_char_mask], T.mul) l_q_char_emb = LL.ElemwiseMergeLayer([l_q_char_emb, l_q_char_mask], T.mul) # convolutions l_c_char_emb = LL.dimshuffle( LL.reshape(l_c_char_emb, (batch_size * context_len, context_word_len, self.emb_char_size)), (0, 2, 1)) l_c_char_conv = LL.Conv1DLayer(l_c_char_emb, num_filters=self.num_emb_char_filters, filter_size=emb_char_filter_size, nonlinearity=L.nonlinearities.tanh, pad=self.conv) # (batch_size * context_len x num_filters x context_word_len + filter_size - 1) l_c_char_emb = LL.ExpressionLayer(l_c_char_conv, lambda X: X.max(2), output_shape='auto') l_c_char_emb = LL.reshape( l_c_char_emb, (batch_size, context_len, self.num_emb_char_filters)) l_q_char_emb = LL.dimshuffle( LL.reshape(l_q_char_emb, (batch_size * question_len, question_word_len, self.emb_char_size)), (0, 2, 1)) l_q_char_conv = LL.Conv1DLayer(l_q_char_emb, num_filters=self.num_emb_char_filters, filter_size=emb_char_filter_size, nonlinearity=L.nonlinearities.tanh, W=l_c_char_conv.W, b=l_c_char_conv.b, pad=self.conv) # (batch_size * question_len x num_filters x question_word_len + filter_size - 1) l_q_char_emb = LL.ExpressionLayer(l_q_char_conv, lambda X: X.max(2), output_shape='auto') l_q_char_emb = LL.reshape( l_q_char_emb, (batch_size, question_len, self.num_emb_char_filters)) ''' Concatenating both embeddings ''' l_c_emb = LL.concat([l_c_emb, l_c_char_emb], axis=2) l_q_emb = LL.concat([l_q_emb, l_q_char_emb], axis=2) # originally I had dropout here ''' Highway layer allowing for interaction between embeddings ''' l_c_P = LL.reshape(l_c_emb, (batch_size * context_len, self.emb_size + self.num_emb_char_filters)) l_c_P = LL.DenseLayer(l_c_P, num_units=self.rec_size, b=None, nonlinearity=None) l_c_high = HighwayLayer(l_c_P) l_c_emb = LL.reshape(l_c_high, (batch_size, context_len, self.rec_size)) l_q_P = LL.reshape(l_q_emb, (batch_size * question_len, self.emb_size + self.num_emb_char_filters)) l_q_P = LL.DenseLayer(l_q_P, num_units=self.rec_size, W=l_c_P.W, b=None, nonlinearity=None) l_q_high = HighwayLayer(l_q_P, W1=l_c_high.W1, b1=l_c_high.b1, W2=l_c_high.W2, b2=l_c_high.b2) l_q_emb = LL.reshape(l_q_high, (batch_size, question_len, self.rec_size)) ''' Calculating wiq features from https://arxiv.org/abs/1703.04816 ''' l_weighted_feat = WeightedFeatureLayer( [l_c_emb, l_q_emb, l_c_mask, l_q_mask]) # batch_size x context_len l_weighted_feat = LL.dimshuffle(l_weighted_feat, (0, 1, 'x')) # batch_size x context_len l_bin_feat = LL.InputLayer(shape=(None, None), input_var=self.bin_feat_var) l_bin_feat = LL.dimshuffle(l_bin_feat, (0, 1, 'x')) ''' Dropout at the embeddings ''' if emb_dropout: print('Using dropout after wiq calculation.') l_c_emb = LL.dropout(l_c_emb) l_q_emb = LL.dropout(l_q_emb) ''' Here we concatenate wiq features to embeddings''' # both features are concatenated to the embeddings # for the question we fix the features to 1 l_c_emb = LL.concat([l_c_emb, l_bin_feat, l_weighted_feat], axis=2) l_q_emb = LL.pad(l_q_emb, width=[(0, 2)], val=L.utils.floatX(1), batch_ndim=2) ''' Context and question encoding using the same BiLSTM for both ''' # output shape is (batch_size x context_len x rec_size) l_c_enc_forw = LL.LSTMLayer(l_c_emb, num_units=self.rec_size, grad_clipping=100, mask_input=l_c_mask) l_c_enc_back = LL.LSTMLayer(l_c_emb, num_units=self.rec_size, grad_clipping=100, mask_input=l_c_mask, backwards=True) # output shape is (batch_size x question_len x rec_size) l_q_enc_forw = LL.LSTMLayer( l_q_emb, num_units=self.rec_size, grad_clipping=100, mask_input=l_q_mask, ingate=LL.Gate(W_in=l_c_enc_forw.W_in_to_ingate, W_hid=l_c_enc_forw.W_hid_to_ingate, W_cell=l_c_enc_forw.W_cell_to_ingate, b=l_c_enc_forw.b_ingate), forgetgate=LL.Gate(W_in=l_c_enc_forw.W_in_to_forgetgate, W_hid=l_c_enc_forw.W_hid_to_forgetgate, W_cell=l_c_enc_forw.W_cell_to_forgetgate, b=l_c_enc_forw.b_forgetgate), outgate=LL.Gate(W_in=l_c_enc_forw.W_in_to_outgate, W_hid=l_c_enc_forw.W_hid_to_outgate, W_cell=l_c_enc_forw.W_cell_to_outgate, b=l_c_enc_forw.b_outgate), cell=LL.Gate(W_in=l_c_enc_forw.W_in_to_cell, W_hid=l_c_enc_forw.W_hid_to_cell, W_cell=None, b=l_c_enc_forw.b_cell, nonlinearity=L.nonlinearities.tanh)) l_q_enc_back = LL.LSTMLayer( l_q_emb, num_units=self.rec_size, grad_clipping=100, mask_input=l_q_mask, backwards=True, ingate=LL.Gate(W_in=l_c_enc_back.W_in_to_ingate, W_hid=l_c_enc_back.W_hid_to_ingate, W_cell=l_c_enc_back.W_cell_to_ingate, b=l_c_enc_back.b_ingate), forgetgate=LL.Gate(W_in=l_c_enc_back.W_in_to_forgetgate, W_hid=l_c_enc_back.W_hid_to_forgetgate, W_cell=l_c_enc_back.W_cell_to_forgetgate, b=l_c_enc_back.b_forgetgate), outgate=LL.Gate(W_in=l_c_enc_back.W_in_to_outgate, W_hid=l_c_enc_back.W_hid_to_outgate, W_cell=l_c_enc_back.W_cell_to_outgate, b=l_c_enc_back.b_outgate), cell=LL.Gate(W_in=l_c_enc_back.W_in_to_cell, W_hid=l_c_enc_back.W_hid_to_cell, W_cell=None, b=l_c_enc_back.b_cell, nonlinearity=L.nonlinearities.tanh)) # batch_size x context_len x 2*rec_size l_c_enc = LL.concat([l_c_enc_forw, l_c_enc_back], axis=2) # batch_size x question_len x 2*rec_size l_q_enc = LL.concat([l_q_enc_forw, l_q_enc_back], axis=2) def proj_init(): return np.vstack([ np.eye(self.rec_size, dtype=theano.config.floatX), np.eye(self.rec_size, dtype=theano.config.floatX) ]) # this is H from the paper, shape: (batch_size * context_len x # rec_size) l_c_proj = LL.reshape(l_c_enc, (batch_size * context_len, 2 * self.rec_size)) l_c_proj = LL.DenseLayer(l_c_proj, num_units=self.rec_size, W=proj_init(), b=None, nonlinearity=L.nonlinearities.tanh) # this is Z from the paper, shape: (batch_size * question_len x # rec_size) l_q_proj = LL.reshape(l_q_enc, (batch_size * question_len, 2 * self.rec_size)) l_q_proj = LL.DenseLayer(l_q_proj, num_units=self.rec_size, W=proj_init(), b=None, nonlinearity=L.nonlinearities.tanh) ''' Additional, weighted question encoding (alphas from paper) ''' l_alpha = LL.DenseLayer( l_q_proj, # batch_size * question_len x 1 num_units=1, b=None, nonlinearity=None) # batch_size x question_len l_alpha = MaskedSoftmaxLayer( LL.reshape(l_alpha, (batch_size, question_len)), l_q_mask) # batch_size x rec_size l_z_hat = BatchedDotLayer([ LL.reshape(l_q_proj, (batch_size, question_len, self.rec_size)), l_alpha ]) return l_c_proj, l_z_hat