def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN_SentLSTM,self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.embedding_size = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] # mention char_cnn D = self.embedding_size + self.char_hidden_dim self.convs1 = nn.ModuleList([nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks]) self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size) #sentence lstm self.lstm_hidden = opt.hidden_size self.lstm = nn.GRU(self.embedding_size, self.lstm_hidden, num_layers=1, batch_first=True, bidirectional=True) self.sent_hidden_size = opt.sent_hidden_size self.sent_hidden = nn.Linear(self.lstm_hidden*2, self.sent_hidden_size) self.hidden = nn.Linear(self.hidden_size + self.sent_hidden_size, self.hidden_size) # mention_hidden_size + sentence_hidden_size self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout)
def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN, self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) D = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size D = D + self.char_hidden_dim #mention cnn Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] self.convs1 = nn.ModuleList([ nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks ]) self.hidden = nn.Linear(len(Ks) * Co, self.hidden_size) self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout)
def __init__(self, vocab, num_classes, char_alphabet): super(AttenCNN,self).__init__() self.embed_size = opt.word_emb_size self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.input_size = self.embed_size + self.char_hidden_dim self.W = nn.Linear(self.input_size, 1, bias=False) self.hidden = nn.Linear(self.input_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout)
class AttenCNN(nn.Module): def __init__(self, vocab, num_classes, char_alphabet): super(AttenCNN,self).__init__() self.embed_size = opt.word_emb_size self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.input_size = self.embed_size + self.char_hidden_dim self.W = nn.Linear(self.input_size, 1, bias=False) self.hidden = nn.Linear(self.input_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout) def forward(self, input, char_inputs): """ inputs: (unpacked_padded_output: batch_size x seq_len x hidden_size, lengths: batch_size) """ entity_words, _, entity_lengths, entity_seq_recover = input entity_words = autograd.Variable(entity_words) entity_words_embeds = self.embedding(entity_words) batch_size, max_len, _ = entity_words_embeds.size() char_inputs, _, char_seq_lengths, char_seq_recover = char_inputs char_features = self.char_feature.get_last_hiddens(char_inputs) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, max_len, -1) input_embeds = torch.cat((entity_words_embeds, char_features), 2) flat_input = input_embeds.contiguous().view(-1, self.input_size) logits = self.W(flat_input).view(batch_size, max_len) alphas = functional.softmax(logits, dim=1) # computing mask tmp = torch.LongTensor(max_len) if opt.gpu >= 0 and torch.cuda.is_available(): tmp = tmp.cuda(opt.gpu) idxes = torch.arange(0, max_len, out=tmp).unsqueeze(0) # idxes = torch.arange(0, max_len, out=torch.LongTensor(max_len)).unsqueeze(0).cuda(opt.gpu) mask = autograd.Variable((idxes < entity_lengths.unsqueeze(1)).float()) alphas = alphas * mask alphas = alphas / torch.sum(alphas, 1).view(-1, 1) atten_input = torch.bmm(alphas.unsqueeze(1), input_embeds).squeeze(1) atten_input = self.dropout(atten_input) hidden = self.hidden(atten_input) output = self.out(hidden) return output
def __init__(self, data): super(WordRep, self).__init__() self.char_hidden_dim = data.char_hidden_dim # 50 self.char_embedding_dim = data.char_emb_dim # 300 self.char_feature = CharCNN(data.char_alphabet_size, data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.dropout) self.drop = nn.Dropout(data.dropout) self.word_embedding = nn.Embedding(data.word_alphabet_size, data.word_emb_dim) self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet_size, data.word_emb_dim))) self.feature_embedding = nn.Embedding(data.feat_alphabet_size, data.feature_emb_dim) self.feature_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings))
class WordRep(nn.Module): def __init__(self, data): super(WordRep, self).__init__() self.char_hidden_dim = data.char_hidden_dim # 50 self.char_embedding_dim = data.char_emb_dim # 300 self.char_feature = CharCNN(data.char_alphabet_size, data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.dropout) self.drop = nn.Dropout(data.dropout) self.word_embedding = nn.Embedding(data.word_alphabet_size, data.word_emb_dim) self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet_size, data.word_emb_dim))) self.feature_embedding = nn.Embedding(data.feat_alphabet_size, data.feature_emb_dim) self.feature_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings)) def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embedding(word_inputs) word_list = [word_embs] word_list.append(self.feature_embedding(feature_inputs)) char_features = self.char_feature.get_last_hiddens(char_inputs) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) word_list.append(char_features) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent
def __init__(self, data): super(WordRep, self).__init__() print("build word representation...") self.gpu = data.HP_gpu self.use_char = data.use_char self.use_trans = data.use_trans self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim) if self.use_trans: self.trans_hidden_dim = data.HP_trans_hidden_dim self.trans_embedding_dim = data.trans_emb_dim self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim, self.trans_hidden_dim, data.HP_dropout, data.pretrain_trans_embedding, self.gpu) if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_seq_feature == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)." ) exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() self.word_embedding = self.word_embedding.cuda() for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda()
class WordRep(nn.Module): def __init__(self, data): super(WordRep, self).__init__() print("build word representation...") self.gpu = data.HP_gpu self.use_char = data.use_char self.use_trans = data.use_trans self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim) if self.use_trans: self.trans_hidden_dim = data.HP_trans_hidden_dim self.trans_embedding_dim = data.trans_emb_dim self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim, self.trans_hidden_dim, data.HP_dropout, data.pretrain_trans_embedding, self.gpu) if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_seq_feature == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)." ) exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() self.word_embedding = self.word_embedding.cuda() for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length, trans_seq_recover): """ input: word_inputs: (batch_size, sent_len) features: list [(batch_size, sent_len), (batch_len, sent_len),...] word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embedding(word_inputs) word_list = [word_embs] for idx in range(self.feature_num): word_list.append(self.feature_embeddings[idx](feature_inputs[idx])) if self.use_char: # calculate char lstm last hidden char_features, _ = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) # concat word and char together word_list.append(char_features) # word_embs = torch.cat([word_embs, char_features], 2) if self.char_all_feature: char_features_extra, _ = self.char_feature_extra.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features_extra = char_features_extra[char_seq_recover] char_features_extra = char_features_extra.view( batch_size, sent_len, -1) # concat word and char together word_list.append(char_features_extra) if self.use_trans: trans_features, trans_rnn_length = self.trans_feature.get_last_hiddens( trans_inputs, trans_seq_length.cpu().numpy()) trans_features_wc = trans_features if self.gpu: trans_features_wc.cuda() trans_features_wc = trans_features_wc[trans_seq_recover] trans_inputs = trans_inputs[trans_seq_recover] word_embs_temp = word_embs.view(batch_size * sent_len, -1) for index, line in enumerate(trans_inputs): if line[0].data.cpu().numpy()[0] == 0: trans_features_wc[index] = self.w(word_embs_temp[index]) trans_features_wc_temp = trans_features_wc trans_features_wc = trans_features_wc.view(batch_size, sent_len, -1) word_list.append(trans_features_wc) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent, self.w(word_embs_temp), trans_features_wc_temp
class CNNCNN_SentLSTM(nn.Module): def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN_SentLSTM,self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.embedding_size = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] # mention char_cnn D = self.embedding_size + self.char_hidden_dim self.convs1 = nn.ModuleList([nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks]) self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size) #sentence lstm self.lstm_hidden = opt.hidden_size self.lstm = nn.GRU(self.embedding_size, self.lstm_hidden, num_layers=1, batch_first=True, bidirectional=True) self.sent_hidden_size = opt.sent_hidden_size self.sent_hidden = nn.Linear(self.lstm_hidden*2, self.sent_hidden_size) self.hidden = nn.Linear(self.hidden_size + self.sent_hidden_size, self.hidden_size) # mention_hidden_size + sentence_hidden_size self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout) def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3) # (N, Co, W) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x def forward(self, mention_inputs, char_inputs, sent_inputs): inputs, lengths, seq_recover = mention_inputs mention_embedding = self.embedding(inputs) # (N, W, D) batch_size, max_len = inputs.size() char_inputs, char_seq_lengths, char_seq_recover = char_inputs char_features = self.char_feature.get_last_hiddens(char_inputs) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, max_len, -1) mention_char = torch.cat((mention_embedding, char_features), 2) mention_char = mention_char.unsqueeze(1) # (N, Ci, W, D) mention_char = [F.relu(conv(mention_char)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks) mention_char = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in mention_char] # [(N, Co), ...]*len(Ks) mention_char = torch.cat(mention_char, 1) mention_hidden = self.mention_hidden(mention_char) sent_inputs, sent_seq_lengths = sent_inputs sent_embedding = self.embedding(sent_inputs) packed_words = pack_padded_sequence(sent_embedding, sent_seq_lengths.cpu().numpy(), True) hidden = None lstm_out, hidden = self.lstm(packed_words, hidden) lstm_out, _ = pad_packed_sequence(lstm_out) hid_size = lstm_out.size(2) // 2 sents_bilstm_out = torch.cat([lstm_out[0, :, :hid_size], lstm_out[-1, :, hid_size:]], dim=1) sent_hidden = self.sent_hidden(sents_bilstm_out) x = torch.cat((mention_hidden, sent_hidden), 1) x = self.dropout(x) # (N, len(Ks)*Co) hidden = self.hidden(x) # (N, hidden) output = self.out(hidden) return output
def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordRep, self).__init__() self.gpu = data.HP_gpu self.use_char = use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = 0 self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() if use_cap: self.feature_num += 1 alphabet_id = data.feature_name2id['[Cap]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) if use_postag: self.feature_num += 1 alphabet_id = data.feature_name2id['[POS]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) self.use_position = use_position if self.use_position: position_alphabet_id = data.re_feature_name2id['[POSITION]'] self.position_embedding_dim = data.re_feature_emb_dims[ position_alphabet_id] self.position1_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position1_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) self.position2_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position2_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) if torch.cuda.is_available(): self.drop = self.drop.cuda(self.gpu) self.word_embedding = self.word_embedding.cuda(self.gpu) for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda(self.gpu) if self.use_position: self.position1_emb = self.position1_emb.cuda(self.gpu) self.position2_emb = self.position2_emb.cuda(self.gpu)
class WordRep(nn.Module): def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordRep, self).__init__() self.gpu = data.HP_gpu self.use_char = use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = 0 self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() if use_cap: self.feature_num += 1 alphabet_id = data.feature_name2id['[Cap]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) if use_postag: self.feature_num += 1 alphabet_id = data.feature_name2id['[POS]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) self.use_position = use_position if self.use_position: position_alphabet_id = data.re_feature_name2id['[POSITION]'] self.position_embedding_dim = data.re_feature_emb_dims[ position_alphabet_id] self.position1_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position1_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) self.position2_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position2_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) if torch.cuda.is_available(): self.drop = self.drop.cuda(self.gpu) self.word_embedding = self.word_embedding.cuda(self.gpu) for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda(self.gpu) if self.use_position: self.position1_emb = self.position1_emb.cuda(self.gpu) self.position2_emb = self.position2_emb.cuda(self.gpu) def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, position1_inputs, position2_inputs): """ input: word_inputs: (batch_size, sent_len) features: list [(batch_size, sent_len), (batch_len, sent_len),...] word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embedding(word_inputs) word_list = [word_embs] for idx in range(self.feature_num): word_list.append(self.feature_embeddings[idx](feature_inputs[idx])) if self.use_char: ## calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features) word_embs = torch.cat([word_embs, char_features], 2) if self.char_all_feature: char_features_extra = self.char_feature_extra.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features_extra = char_features_extra[char_seq_recover] char_features_extra = char_features_extra.view( batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features_extra) if self.use_position: position1_feature = self.position1_emb(position1_inputs) position2_feature = self.position2_emb(position2_inputs) word_list.append(position1_feature) word_list.append(position2_feature) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent
def __init__(self, data): super(BiLSTM, self).__init__() print( "build batched bilstm...") self.use_bigram = data.use_bigram self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.use_gaz = data.HP_use_gaz self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_features (either CNN or LSTM).") exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim) self.bilstm_flag = data.HP_bilstm # self.bilstm_flag = False self.lstm_layer = data.HP_lstm_layer if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) if data.pretrain_biword_embedding is not None: self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding)) else: self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim lstm_input = self.embedding_dim + self.char_hidden_dim if self.use_bigram: lstm_input += data.biword_emb_dim print("********************use_lattice",self.use_gaz) if self.use_gaz: self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu) if self.bilstm_flag: self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu) else: self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size) self.hidden2tag_ner = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_ner) self.hidden2tag_general = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_general) if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.biword_embeddings = self.biword_embeddings.cuda() if self.use_gaz: self.forward_lstm = self.forward_lstm.cuda() if self.bilstm_flag: self.backward_lstm = self.backward_lstm.cuda() else: self.lstm = self.lstm.cuda() self.hidden2tag = self.hidden2tag.cuda() self.hidden2tag_ner = self.hidden2tag_ner.cuda() self.hidden2tag_general = self.hidden2tag_general.cuda()
def __init__(self, rnn_type, vocab_size, embedding_dim, hidden_dim, num_layers, tie_weights, dropout, device, pretrain_emb=None, use_ch=False, use_he=False, use_i=False, use_h=False, use_g=True, **kwargs): super(RNNModel, self).__init__() self.rnn_type = rnn_type self.n_layers = num_layers self.hi_dim = hidden_dim self.device = device self.use_i = use_i self.use_h = use_h self.use_g = use_g self.use_ch = use_ch self.use_he = use_he self.drop = nn.Dropout(dropout) char_hid_dim = 0 char_len = 0 he_dim = 0 self.embedding = nn.Embedding(vocab_size, embedding_dim) if pretrain_emb is not None: self.embedding.weight.data.copy_(torch.from_numpy(pretrain_emb)) else: self.embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(vocab_size, embedding_dim))) self.embedding.weight.requires_grad = False # ch if use_ch: char_vocab_size = kwargs['char_vocab_size'] char_emb_dim = kwargs['char_emb_dim'] char_hid_dim = kwargs['char_hid_dim'] char_len = kwargs['char_len'] self.ch = CharCNN(char_vocab_size, None, char_emb_dim, char_hid_dim, dropout).to(device) # he if use_he: print("Build Hypernym Embeddings...") he_dim = embedding_dim self.he = Hypernym(embedding_dim, self.embedding, device) concat_embedding_dim = embedding_dim + char_len * char_hid_dim + he_dim if self.use_i: embedding_dim = embedding_dim + concat_embedding_dim if self.use_h: self.h_linear = nn.Linear(concat_embedding_dim + hidden_dim, hidden_dim) if self.use_g: self.zt_linear = nn.Linear(concat_embedding_dim + hidden_dim, hidden_dim) self.rt_linear = nn.Linear(concat_embedding_dim + hidden_dim, concat_embedding_dim) self.ht_linear = nn.Linear(concat_embedding_dim + hidden_dim, hidden_dim) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, num_layers, dropout=dropout) else: try: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""" ) self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, nonlinearity=nonlinearity, dropout=dropout) self.word2hidden = nn.Linear(concat_embedding_dim, hidden_dim) self.decoder = nn.Linear(hidden_dim, vocab_size) if tie_weights: if hidden_dim != embedding_dim: raise ValueError( 'When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.embedding.weight self.init_weights()
def __init__(self, vocab_size, emb_dim, hid_dim, device, pretrain_emb=None, dropout=0, use_i=False, use_h=False, use_g=True, use_ch=True, use_he=False, **kwargs): super(DefSeq, self).__init__() self.device = device self.use_i = use_i self.use_h = use_h self.use_g = use_g self.use_ch = use_ch self.use_he = use_he char_emb_dim = 0 char_hid_dim = 0 char_len = 0 he_dim = 0 def weight_init(m): if isinstance(m, nn.Embedding): nn.init.orthogonal_(m.weight.data) if isinstance(m, nn.Linear): nn.init.orthogonal_(m.weight.data) nn.init.constant_(m.bias.data, 0.5) if isinstance(m, nn.LSTMCell): nn.init.orthogonal_(m.weight_hh.data) nn.init.orthogonal_(m.weight_ih.data) nn.init.constant_(m.bias_hh.data, 0.5) nn.init.constant_(m.bias_ih.data, 0.5) self.embedding = nn.Embedding(vocab_size, emb_dim) if pretrain_emb is not None: # self.embedding.weight.data.copy_(pretrain_emb) self.embedding.from_pretrained(pretrain_emb, freeze=True) # self.embedding.weight.requires_grad = False else: weight_init(self.embedding) if self.use_ch: print("build char sequence feature extractor: CNN ...") char_vocab_size = kwargs['char_vocab_size'] char_emb_dim = kwargs['char_emb_dim'] char_hid_dim = kwargs['char_hid_dim'] char_len = kwargs['char_len'] self.ch = CharCNN(char_vocab_size, None, char_emb_dim, char_hid_dim, dropout, device) if self.use_he: print("build Hypernym Embeddings...") he_dim = emb_dim self.he = Hypernym(emb_dim, self.embedding, device) final_word_dim = emb_dim + char_hid_dim * char_len + he_dim self.word_linear = nn.Linear(final_word_dim, hid_dim) weight_init(self.word_linear) self.s_lstm = nn.LSTMCell(emb_dim, hid_dim) weight_init(self.s_lstm) if self.use_i: self.i_lstm = nn.LSTMCell(final_word_dim + emb_dim, hid_dim) weight_init(self.i_lstm) if self.use_h: self.h_linear = nn.Linear(final_word_dim + hid_dim, hid_dim) weight_init(self.h_linear) if self.use_g: self.g_zt_linear = nn.Linear(final_word_dim + hid_dim, hid_dim) weight_init(self.g_zt_linear) self.g_rt_linear = nn.Linear(final_word_dim + hid_dim, final_word_dim) weight_init(self.g_rt_linear) self.g_ht_linear = nn.Linear(final_word_dim + hid_dim, hid_dim) weight_init(self.g_ht_linear) self.hidden2tag_linear = nn.Linear(hid_dim, vocab_size) weight_init(self.hidden2tag_linear) self.dropout = nn.Dropout(p=dropout)
class CNNCNN_SentATTEN(nn.Module): def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN_SentATTEN, self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) self.embedding_size = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] # mention char_cnn D = self.embedding_size + self.char_hidden_dim self.convs1 = nn.ModuleList([ nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks ]) self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size) #sentence atten self.atten_W = nn.Linear(self.embedding_size, 1, bias=False) self.sent_hidden_size = opt.sent_hidden_size self.sent_hidden = nn.Linear(self.embedding_size, self.sent_hidden_size) self.hidden = nn.Linear( self.hidden_size + self.sent_hidden_size, self.hidden_size) # mention_hidden_size + sentence_hidden_size self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout) def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3) # (N, Co, W) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x def forward(self, mention_inputs, char_inputs, sent_inputs): inputs, lengths, seq_recover = mention_inputs mention_embedding = self.embedding(inputs) # (N, W, D) batch_size, max_len = inputs.size() char_inputs, char_seq_lengths, char_seq_recover = char_inputs char_features = self.char_feature.get_last_hiddens(char_inputs) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, max_len, -1) mention_char = torch.cat((mention_embedding, char_features), 2) mention_char = mention_char.unsqueeze(1) # (N, Ci, W, D) mention_char = [ F.relu(conv(mention_char)).squeeze(3) for conv in self.convs1 ] # [(N, Co, W), ...]*len(Ks) mention_char = [ F.max_pool1d(i, i.size(2)).squeeze(2) for i in mention_char ] # [(N, Co), ...]*len(Ks) mention_char = torch.cat(mention_char, 1) mention_hidden = self.mention_hidden(mention_char) sent_inputs, sent_seq_lengths = sent_inputs sent_embedding = self.embedding(sent_inputs) sent_batch_size, sent_max_len, _ = sent_embedding.size() flat_input = sent_embedding.contiguous().view(-1, self.embedding_size) logits = self.atten_W(flat_input).view(sent_batch_size, sent_max_len) alphas = F.softmax(logits, dim=1) # computing mask idxes = torch.arange( 0, sent_max_len, out=torch.LongTensor(sent_max_len)).unsqueeze(0).cuda(opt.gpu) mask = autograd.Variable( (idxes < sent_seq_lengths.unsqueeze(1)).float()) alphas = alphas * mask # renormalize alphas = alphas / torch.sum(alphas, 1).view(-1, 1) sent_atten_input = torch.bmm(alphas.unsqueeze(1), sent_embedding).squeeze(1) sent_atten_input = self.dropout(sent_atten_input) sent_hidden = self.sent_hidden(sent_atten_input) x = torch.cat((mention_hidden, sent_hidden), 1) x = self.dropout(x) hidden = self.hidden(x) # (N, hidden) output = self.out(hidden) return output
class CNNCNN(nn.Module): def __init__(self, vocab, num_classes, char_alphabet): super(CNNCNN, self).__init__() self.embedding = vocab.init_embed_layer() self.hidden_size = opt.hidden_size # charcnn self.char_hidden_dim = 10 self.char_embedding_dim = 20 self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim, opt.dropout, opt.gpu) D = self.embedding.weight.size(1) self.hidden_size = opt.hidden_size D = D + self.char_hidden_dim #mention cnn Ci = 1 Co = opt.kernel_num Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","] self.convs1 = nn.ModuleList([ nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=False) for K in Ks ]) self.hidden = nn.Linear(len(Ks) * Co, self.hidden_size) self.out = nn.Linear(self.hidden_size, num_classes) self.dropout = nn.Dropout(opt.dropout) def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3) # (N, Co, W) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x def forward(self, x, char_inputs): inputs, lengths, seq_recover = x x = self.embedding(inputs) # (N, W, D) batch_size, max_len = inputs.size() char_inputs, char_seq_lengths, char_seq_recover = char_inputs char_features = self.char_feature.get_last_hiddens(char_inputs) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, max_len, -1) x = torch.cat((x, char_features), 2) x = x.unsqueeze(1) # (N, Ci, W, D) x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N, Co), ...]*len(Ks) x = torch.cat(x, 1) ''' x1 = self.conv_and_pool(x,self.conv13) #(N,Co) x2 = self.conv_and_pool(x,self.conv14) #(N,Co) x3 = self.conv_and_pool(x,self.conv15) #(N,Co) x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) ''' x = self.dropout(x) # (N, len(Ks)*Co) hidden = self.hidden(x) # (N, hidden) output = self.out(hidden) return output