def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, char_size, pretrained_weight_embeddings, USE_CRF, BIDIRECTIONAL=False, USE_BIGRAM=False, bigram_size=0, CNN=False, use_gpu=0): super(BiLSTM_CRF_CNN, self).__init__() # include start and end tags self.gpu = use_gpu self.bidirectional = BIDIRECTIONAL self.lstm_cnn = BILSTM_CNN(embedding_dim, hidden_dim, vocab_size, tagset_size, char_size, pretrained_weight_embeddings, USE_CRF=False, BIDIRECTIONAL=self.bidirectional, USE_BIGRAM=False, bigram_size=0, CNN=False, use_gpu=0) self.crf = CRF(tagset_size, self.gpu)
def __init__(self): super(Model, self).__init__() self.bert = BertModel.from_pretrained(model_config.pretrain_model_path) self.dropout = nn.Dropout(self.bert.config.hidden_dropout_prob) self.classifier = nn.Linear(self.bert.config.hidden_size, config.num_labels) self.crf = CRF(tagset_size=config.num_labels, tag_dictionary=config.label2id, is_bert=True)
def __init__(self, args, data): super(Token_Classification, self).__init__() self.pretrain_model = BertModel.from_pretrained(args.bert_file) self.dropout = nn.Dropout(args.hidden_dropout_prob) self.to_crf = nn.Linear(768, data.label_alphabet.size() + 2) self.crf = CRF(data.label_alphabet.size(), args.use_gpu, args.average_batch) if args.use_gpu: self.to_crf = self.to_crf.cuda() self.pretrain_model = self.pretrain_model.cuda() self.gpu = True
class Token_Classification(nn.Module): def __init__(self, args, data): super(Token_Classification, self).__init__() self.pretrain_model = BertModel.from_pretrained(args.bert_file) self.dropout = nn.Dropout(args.hidden_dropout_prob) self.to_crf = nn.Linear(768, data.label_alphabet.size() + 2) self.crf = CRF(data.label_alphabet.size(), args.use_gpu, args.average_batch) if args.use_gpu: self.to_crf = self.to_crf.cuda() self.pretrain_model = self.pretrain_model.cuda() self.gpu = True def forward(self, input_ids, attention_mask, crf_mask, scopes): crf_input = self.get_crf_input(input_ids, attention_mask, scopes) _, best_path = self.crf._viterbi_decode(crf_input, crf_mask) return best_path def neg_log_likelihood(self, input_ids, attention_mask, batch_label, crf_mask, scopes): crf_input = self.get_crf_input(input_ids, attention_mask, scopes) total_loss = self.crf.neg_log_likelihood_loss(crf_input, crf_mask, batch_label) _, best_path = self.crf._viterbi_decode(crf_input, crf_mask) return total_loss, best_path def get_crf_input(self, input_ids, attention_mask, scopes): pretrain_model_output = self.pretrain_model( input_ids, attention_mask=attention_mask) hidden_repr = self.to_crf(pretrain_model_output[0]) max_len = max(map(len, scopes)) repr_dim = hidden_repr.size()[-1] crf_input = [] for scope, repr in zip(scopes, hidden_repr): c_repr = [] for i in range(len(scope)): c_repr.append(torch.mean(repr[scope[i][0]:scope[i][1]], dim=0)) c_repr = torch.stack(c_repr) if max_len - len(scope) > 0: if self.gpu: pad_repr = torch.zeros(max_len - len(scope), repr_dim).cuda() else: pad_repr = torch.zeros(max_len - len(scope), repr_dim) crf_input.append(torch.cat((c_repr, pad_repr), dim=0)) else: crf_input.append(c_repr) return torch.stack(crf_input)
class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.bert = BertModel.from_pretrained(model_config.pretrain_model_path) self.dropout = nn.Dropout(self.bert.config.hidden_dropout_prob) self.classifier = nn.Linear(self.bert.config.hidden_size, config.num_labels) self.crf = CRF(tagset_size=config.num_labels, tag_dictionary=config.label2id, is_bert=True) def forward(self, input_ids, token_type_ids=None, attention_mask=None, input_lens=None, labels=None): outputs = self.bert(input_ids, token_type_ids, attention_mask) sequence_output = outputs[0] sequence_output = self.dropout(sequence_output) logits = self.classifier(sequence_output) outputs = (logits, ) if labels is not None: loss = self.crf.calculate_loss(logits, tag_list=labels, lengths=input_lens) outputs = (loss, ) + outputs return outputs
def __init__(self, config): super(BertCrfForNer, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.crf = CRF(num_tags=config.num_labels, batch_first=True) self.init_weights()
class Model(nn.Module): def __init__(self, ): super(Model, self).__init__() self.hidden_size = model_config.hidden_size self.embedding = nn.Embedding(config.num_vocab, config.embed_dim) self.bilstm = nn.LSTM(input_size=config.embed_dim, hidden_size=self.hidden_size, batch_first=True, num_layers=2, dropout=model_config.dropout, bidirectional=True) # self.dropout = SpatialDropout(drop_p) self.dropout = nn.Dropout(model_config.dropout) self.layer_norm = LayerNorm(self.hidden_size * 2) self.classifier = nn.Linear(self.hidden_size * 2, config.num_labels) self.crf = CRF(tagset_size=config.num_labels, tag_dictionary=config.label2id, is_bert=True) def forward(self, input_ids, attention_mask, input_lens, labels=None): embs = self.embedding(input_ids) embs = self.dropout(embs) embs = embs * attention_mask.float().unsqueeze(2) seqence_output, _ = self.bilstm(embs) seqence_output = self.layer_norm(seqence_output) logits = self.classifier(seqence_output) outputs = (logits, ) if labels is not None: loss = self.crf.calculate_loss(logits, tag_list=labels, lengths=input_lens) outputs = (loss, ) + outputs return outputs
def __init__(self, ): super(Model, self).__init__() self.hidden_size = model_config.hidden_size self.embedding = nn.Embedding(config.num_vocab, config.embed_dim) self.bilstm = nn.LSTM(input_size=config.embed_dim, hidden_size=self.hidden_size, batch_first=True, num_layers=2, dropout=model_config.dropout, bidirectional=True) # self.dropout = SpatialDropout(drop_p) self.dropout = nn.Dropout(model_config.dropout) self.layer_norm = LayerNorm(self.hidden_size * 2) self.classifier = nn.Linear(self.hidden_size * 2, config.num_labels) self.crf = CRF(tagset_size=config.num_labels, tag_dictionary=config.label2id, is_bert=True)
def __init__(self, config): super().__init__() self.embedding = torch.nn.Embedding(len(config.tokenizer.vocab), config.emb_size) self.in_fc = nn.Linear(config.emb_size, config.d_model) self.transformer = TransformerEncoder(config) self.fc_dropout = nn.Dropout(config.fc_dropout) self.out_fc = nn.Linear(config.d_model, len(config.label2id)) self.crf = CRF(num_tags=len(config.label2id), batch_first=True) self.apply(self.init_model_weights)
def __init__(self, args): super(BilstmCrf, self).__init__() self.embedding = nn.Embedding(len(args.tokenizer.vocab), args.embedding_size) self.bilstm = nn.LSTM(input_size=args.embedding_size, hidden_size=args.hidden_size, batch_first=True, num_layers=2, dropout=0.1, bidirectional=True) self.dropout = SpatialDropout(0.1) self.layer_norm = LayerNorm(args.hidden_size * 2) self.classifier = nn.Linear(args.hidden_size * 2, args.num_labels) self.crf = CRF(num_tags=args.num_labels, batch_first=True) self.apply(self.init_model_weights)
class BiLSTM_CRF_CNN(nn.Module): def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, char_size, pretrained_weight_embeddings, USE_CRF, BIDIRECTIONAL=False, USE_BIGRAM=False, bigram_size=0, CNN=False, use_gpu=0): super(BiLSTM_CRF_CNN, self).__init__() # include start and end tags self.gpu = use_gpu self.bidirectional = BIDIRECTIONAL self.lstm_cnn = BILSTM_CNN(embedding_dim, hidden_dim, vocab_size, tagset_size, char_size, pretrained_weight_embeddings, USE_CRF=False, BIDIRECTIONAL=self.bidirectional, USE_BIGRAM=False, bigram_size=0, CNN=False, use_gpu=0) self.crf = CRF(tagset_size, self.gpu) def neg_ll_loss(self, sentence, gold_labels, chars): feats = self.lstm_cnn.forward(sentence, chars) return self.crf.neg_ll_loss(sentence, gold_labels, feats) def forward(self, sentence, feats): feats = self.lstm.forward(sentence) score, tag_seq = self.crf.forward(sentence, feats) return score, tag_seq
def __init__(self, ner_processor, config): super().__init__() vocab_size = len(ner_processor.vocab) num_labels = len(ner_processor.idx2label) self.embedding = torch.nn.Embedding(vocab_size, config.emb_size) nn.init.normal_(self.embedding.weight, 0.0, 0.02) self.embed_size = config.emb_size self.in_fc = nn.Linear(config.emb_size, config.d_model) self.transformer = TransformerEncoder(config) self.fc_dropout = nn.Dropout(config.fc_dropout) self.out_fc = nn.Linear(config.d_model, num_labels) self.crf = CRF(num_tags=num_labels, batch_first=True)
def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, tag_to_ix, USE_CRF=False, BIDIRECTIONAL=False, USE_BIGRAM=False, bigram_size=0): super(LSTM_CRF, self).__init__() self.hidden_dim = hidden_dim self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) # self.word_embeddings.weight.requires_grad = False print("Entered!!!!") # if pretrained_weight_embeddings != None: # self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrained_weight_embeddings)) if BIDIRECTIONAL: self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, bidirectional=BIDIRECTIONAL) else: self.lstm = nn.LSTM(embedding_dim, hidden_dim) self.crf = CRF(tagset_size, tag_to_ix, 0) self.bidirectional = BIDIRECTIONAL self.append_bigram = USE_BIGRAM self.hidden = self.init_hidden() if self.append_bigram: self.hidden2tag = nn.Linear(hidden_dim + bigram_size, tagset_size) else: self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
class LSTM_CRF(nn.Module): def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, tag_to_ix, USE_CRF=False, BIDIRECTIONAL=False, USE_BIGRAM=False, bigram_size=0): super(LSTM_CRF, self).__init__() self.hidden_dim = hidden_dim self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) # self.word_embeddings.weight.requires_grad = False print("Entered!!!!") # if pretrained_weight_embeddings != None: # self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrained_weight_embeddings)) if BIDIRECTIONAL: self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, bidirectional=BIDIRECTIONAL) else: self.lstm = nn.LSTM(embedding_dim, hidden_dim) self.crf = CRF(tagset_size, tag_to_ix, 0) self.bidirectional = BIDIRECTIONAL self.append_bigram = USE_BIGRAM self.hidden = self.init_hidden() if self.append_bigram: self.hidden2tag = nn.Linear(hidden_dim + bigram_size, tagset_size) else: self.hidden2tag = nn.Linear(hidden_dim, tagset_size) def init_hidden(self): if not self.bidirectional: return (autograd.Variable(torch.randn(1, 1, self.hidden_dim)), autograd.Variable(torch.randn(1, 1, self.hidden_dim))) else: return (autograd.Variable(torch.randn(2, 1, self.hidden_dim // 2)), autograd.Variable(torch.randn(2, 1, self.hidden_dim // 2))) def forward_lstm(self, sentence, bigram_one_hot=None): self.hidden = self.init_hidden() embeds = self.word_embeddings(sentence) # shape seq_length * emb_size # print(embeds.view(len(sentence), 1, -1).shape) lstm_out, self.hidden = self.lstm( embeds.view(len(sentence), 1, -1), self.hidden) # print("original shape before MLP "+str(lstm_out.view(len(sentence), -1).shape)) # print("shape of onehot bigram "+str(bigram_one_hot)) if self.append_bigram: # print("concatednated vector"+str(lstm_out.view(len(sentence), -1))) tag_space=self.hidden2tag(torch.cat([lstm_out.view(len(sentence), -1),bigram_one_hot],dim=1)) else: tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1)) # tag_scores = F.log_softmax(tag_space, dim=1) return tag_space def neg_ll_loss(self, sentence, gold_labels): feats = self.forward_lstm(sentence) return self.crf.neg_ll_loss(sentence, gold_labels, feats) def forward(self, sentence): feats = self.forward_lstm(sentence) score, tag_seq = self.crf.forward(sentence, feats) return score, tag_seq
def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, char_size, pretrained_weight_embeddings, tag_to_ix, USE_CRF=False, BIDIRECTIONAL=False, USE_BIGRAM=False, bigram_size=0, CNN=False, use_gpu=0): super(BILSTM_CNN, self).__init__() self.char_dim = 25 self.char_lstm_dim = 25 self.CNN = CNN self.use_gpu = use_gpu self.hidden_dim = hidden_dim self.n_cap = 4 self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) # self.word_embeddings.weight.requires_grad = False # if pretrained_weight_embeddings != None: self.word_embeddings.weight.data.copy_( torch.from_numpy(pretrained_weight_embeddings)) #CHAR self.cap_embedding_dim = 25 self.cap_embeds = nn.Embedding(self.n_cap, self.cap_embedding_dim) b = np.sqrt(3.0 / self.cap_embeds.weight.size(1)) nn.init.uniform(self.cap_embeds.weight, -b, b) if self.CNN: print("Entered!!!!") self.char_embeds = nn.Embedding(char_size, self.char_dim) #as given in the paper, initialising b = np.sqrt(3.0 / self.char_embeds.weight.size(1)) nn.init.uniform(self.char_embeds.weight, -b, b) # self.init_embedding(self.char_embeds.weight) self.char_cnn = nn.Conv2d(in_channels=1, out_channels=self.char_lstm_dim, kernel_size=(3, self.char_dim), padding=(2, 0)) if BIDIRECTIONAL: print("Bidirectional") self.lstm = nn.LSTM(embedding_dim + self.char_lstm_dim + self.cap_embedding_dim, hidden_dim, bidirectional=BIDIRECTIONAL) else: self.lstm = nn.LSTM(embedding_dim + self.char_lstm_dim, hidden_dim) self.drop_probout = nn.Dropout(0.5) self.bidirectional = BIDIRECTIONAL self.append_bigram = USE_BIGRAM self.hidden = self.init_hidden() # if self.append_bigram: # self.hidden2tag = nn.Linear(hidden_dim + bigram_size, tagset_size) # else: # self.hidden2tag = nn.Linear(hidden_dim, tagset_size) self.crf = CRF(tagset_size, tag_to_ix, self.use_gpu) if self.use_gpu: self.crf = self.crf.cuda() if self.bidirectional: self.hidden2tag = nn.Linear(2 * hidden_dim, tagset_size) b = np.sqrt(6.0 / (self.hidden2tag.weight.size(0) + self.hidden2tag.weight.size(1))) nn.init.uniform(self.hidden2tag.weight, -b, b) self.hidden2tag.bias.data.zero_() else: self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
class BILSTM_CNN(nn.Module): def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, char_size, pretrained_weight_embeddings, tag_to_ix, USE_CRF=False, BIDIRECTIONAL=False, USE_BIGRAM=False, bigram_size=0, CNN=False, use_gpu=0): super(BILSTM_CNN, self).__init__() self.char_dim = 25 self.char_lstm_dim = 25 self.CNN = CNN self.use_gpu = use_gpu self.hidden_dim = hidden_dim self.n_cap = 4 self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) # self.word_embeddings.weight.requires_grad = False # if pretrained_weight_embeddings != None: self.word_embeddings.weight.data.copy_( torch.from_numpy(pretrained_weight_embeddings)) #CHAR self.cap_embedding_dim = 25 self.cap_embeds = nn.Embedding(self.n_cap, self.cap_embedding_dim) b = np.sqrt(3.0 / self.cap_embeds.weight.size(1)) nn.init.uniform(self.cap_embeds.weight, -b, b) if self.CNN: print("Entered!!!!") self.char_embeds = nn.Embedding(char_size, self.char_dim) #as given in the paper, initialising b = np.sqrt(3.0 / self.char_embeds.weight.size(1)) nn.init.uniform(self.char_embeds.weight, -b, b) # self.init_embedding(self.char_embeds.weight) self.char_cnn = nn.Conv2d(in_channels=1, out_channels=self.char_lstm_dim, kernel_size=(3, self.char_dim), padding=(2, 0)) if BIDIRECTIONAL: print("Bidirectional") self.lstm = nn.LSTM(embedding_dim + self.char_lstm_dim + self.cap_embedding_dim, hidden_dim, bidirectional=BIDIRECTIONAL) else: self.lstm = nn.LSTM(embedding_dim + self.char_lstm_dim, hidden_dim) self.drop_probout = nn.Dropout(0.5) self.bidirectional = BIDIRECTIONAL self.append_bigram = USE_BIGRAM self.hidden = self.init_hidden() # if self.append_bigram: # self.hidden2tag = nn.Linear(hidden_dim + bigram_size, tagset_size) # else: # self.hidden2tag = nn.Linear(hidden_dim, tagset_size) self.crf = CRF(tagset_size, tag_to_ix, self.use_gpu) if self.use_gpu: self.crf = self.crf.cuda() if self.bidirectional: self.hidden2tag = nn.Linear(2 * hidden_dim, tagset_size) b = np.sqrt(6.0 / (self.hidden2tag.weight.size(0) + self.hidden2tag.weight.size(1))) nn.init.uniform(self.hidden2tag.weight, -b, b) self.hidden2tag.bias.data.zero_() else: self.hidden2tag = nn.Linear(hidden_dim, tagset_size) def init_hidden(self): if self.use_gpu: if not self.bidirectional: return (autograd.Variable( torch.randn(1, 1, self.hidden_dim).cuda()), autograd.Variable(torch.randn(1, 1, self.hidden_dim)).cuda()) else: return (autograd.Variable( torch.randn(2, 1, self.hidden_dim).cuda()), autograd.Variable(torch.randn(2, 1, self.hidden_dim)).cuda()) else: if not self.bidirectional: return (autograd.Variable(torch.randn(1, 1, self.hidden_dim)), autograd.Variable(torch.randn(1, 1, self.hidden_dim))) else: return (autograd.Variable(torch.randn(2, 1, self.hidden_dim)), autograd.Variable(torch.randn(2, 1, self.hidden_dim))) def forward_lstm(self, sentence, chars, caps, drop_prob): d = nn.Dropout(p=drop_prob) self.hidden = self.init_hidden() embeds = self.word_embeddings(sentence) # shape seq_length * emb_size # embeds = self.word_embeddings(sentence) # shape seq_length * emb_size cap_embedding = self.cap_embeds(caps) if self.CNN == True: chars_embeds = self.char_embeds(chars).unsqueeze(1) cnn_output = self.char_cnn(d(chars_embeds)) chars_embeds = nn.functional.max_pool2d( cnn_output, kernel_size=(cnn_output.size(2), 1)).view(cnn_output.size(0), self.char_lstm_dim) if self.use_gpu: embeds = torch.cat((embeds, chars_embeds, cap_embedding), 1).cuda() else: embeds = torch.cat((embeds, chars_embeds, cap_embedding), 1) # print(embeds.view(len(sentence), 1, -1).shape) #lstm_out, self.hidden = self.lstm(embeds.unsqueeze(1), self.hidden) lstm_out, _ = self.lstm(d(embeds).unsqueeze(1)) # lstm_out, _ = self.lstm(embeds.unsqueeze(1)) lstm_out = d(lstm_out.view(len(sentence), self.hidden_dim * 2)) # lstm_out = lstm_out.view(len(sentence), self.hidden_dim*2) # print("original shape before MLP "+str(lstm_out.view(len(sentence), -1).shape)) # print("shape of onehot bigram "+str(bigram_one_hot)) if self.append_bigram: # print("concatednated vector"+str(lstm_out.view(len(sentence), -1))) tag_space = self.hidden2tag( torch.cat([lstm_out.view(len(sentence), -1), bigram_one_hot], dim=1)) else: tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1)) ## uncomment for crf # return tag_space tag_scores = F.log_softmax(tag_space, dim=1) return tag_scores def neg_ll_loss(self, sentence, gold_labels, chars, caps, drop_prob): feats = self.forward_lstm(sentence, chars, caps, drop_prob) return self.crf.neg_ll_loss(sentence, gold_labels, feats) def forward(self, sentence, chars, caps, drop_prob): # feats = self.forward_lstm(sentence, chars, caps, drop_prob) # score, tag_seq = self.crf.forward(sentence, feats) scores = self.forward_lstm(sentence, chars, caps, drop_prob) # return score, tag_seq return scores