def __init__(self, args): super().__init__() self.args = args self.hidden_dim = args.hidden_dim self.tag_num = args.tag_num self.batch_size = args.batch_size self.bidirectional = True self.num_layers = args.num_layers self.pad_index = args.pad_index self.dropout = args.dropout vocabulary_size = args.vocabulary_size embedding_dimension = args.embedding_dim self.embedding = nn.Embedding(vocabulary_size, embedding_dimension, padding_idx=pad_idx).to(device) self.lstm = nn.LSTM(embedding_dimension, self.hidden_dim // 2, bidirectional=self.bidirectional, num_layers=self.num_layers, dropout=self.dropout).to(device) self.hidden2label = nn.Linear(self.hidden_dim, self.tag_num).to(device) self.crflayer = CRF(self.tag_num).to(device) self.dropoutlayer = nn.Dropout(self.dropout)
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim, embedding_mat=None): super(BiLSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) - 2 # 减去start + stop # 注:Embedding可以进行更新 self.word_embeds = nn.Embedding(vocab_size, embedding_dim) if embedding_mat is not None: # print(embedding_mat) self.word_embeds.load_state_dict( {'weight': torch.tensor(embedding_mat)}) # num_layers —— 叠在一起的lstm层数 # hidden_size —— 隐状态的维度(注:可以与x维度不同!) # LSTM —— 隐状态维度 视为C_0 与 H_0相加 又因为这两个相等 所以除以二 self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, batch_first=True, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. # 将隐状态映射到targetSize 仅线性加权 self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size) # CRF的实现自动添加start 与 end self.crf = CRF(len(tag_to_ix) - 2, use_gpu=False)
def __init__(self, params): """ We define an recurrent network that predicts the NER tags for each token in the sentence. The components required are: - an embedding layer: this layer maps each index in range(params.vocab_size) to a params.embedding_dim vector - lstm: applying the LSTM on the sequential input returns an output for each token in the sentence - fc: a fully connected layer that converts the LSTM output for each token to a distribution over NER tags Args: params: (Params) contains vocab_size, embedding_dim, lstm_hidden_dim """ super(Netcppos, self).__init__() # the embedding takes as input the vocab_size and the embedding_dim self.embedding = nn.Embedding(params.vocab_size, params.embedding_dim) # the LSTM takes as input the size of its input (embedding_dim), its hidden size # for more details on how to use it, check out the documentation self.lstm = nn.LSTM(params.embedding_dim, params.lstm_hidden_dim, batch_first=True) #self.lstm = nn.LSTM(params.embedding_dim+params.pos_dim, params.lstm_hidden_dim, batch_first=True) # the fully connected layer transforms the output to give the final output layer self.fc1 = nn.Linear(params.lstm_hidden_dim, params.pos_dim) self.fc = nn.Linear(params.pos_dim + params.pos_dim, params.number_of_tags) #self.crf_model = CRF(9, batch_first=False) #num_tags=9 self.crf_model = CRF(9)
def __init__(self, args): super(BiLstmCrf, self).__init__(args) self.args = args self.hidden_dim = 300 self.tag_num = args.tag_num self.batch_size = args.batch_size self.bidirectional = True self.num_layers = args.num_layers self.pad_index = args.pad_index self.dropout = args.dropout self.save_path = args.save_path vocabulary_size = args.vocabulary_size embedding_dimension = args.embedding_dim self.embedding = nn.Embedding(vocabulary_size, embedding_dimension).to(DEVICE) if args.static: logger.info('logging word vectors from {}/{}'.format( args.pretrained_path, args.pretrained_name)) vectors = Vectors(args.pretrained_name, args.pretrained_path).vectors self.embedding = self.embedding.from_pretrained( vectors, freeze=not args.non_static).to(DEVICE) self.lstm = nn.LSTM(embedding_dimension, self.hidden_dim // 2, bidirectional=self.bidirectional, num_layers=self.num_layers, dropout=self.dropout).to(DEVICE) self.hidden2label = nn.Linear(self.hidden_dim, self.tag_num).to(DEVICE) self.crflayer = CRF(self.tag_num).to(DEVICE)
def __init__(self, num_labels: int, rnn_hidden_size: int, word_emb_dim: int, char_emb_dim: int, pos_emb_dim: int, dropout_rate: float = 0.5): """ Args: num_labels (int): [description] rnn_hidden_size (int): [description] word_emb_dim (int): [description] char_emb_dim (int): [description] pos_emb_dim (int): [description] dropout_rate (float, optional): [description]. Defaults to 0.5. """ super().__init__() input_dim = word_emb_dim + char_emb_dim + pos_emb_dim * 2 self.num_labels = num_labels # bilstm output -> next bilstm input. So, hidden_size == input_size self.bilstm = nn.LSTM(input_size=input_dim, hidden_size=rnn_hidden_size // 2, num_layers=1, bias=True, batch_first=True, bidirectional=True) self.linear = nn.Linear(rnn_hidden_size, num_labels) self.dropout_layer = nn.Dropout(p=dropout_rate) self.crf = CRF(num_labels, 0) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu')
def __init__( self, tag_to_idx: Dict, embeddings_dim: int = 300, hidden_dim: int = 256, num_lstm_layers: int = 2, spatial_dropout: float = 0.2, **kwargs: Dict ): super().__init__() self.embedding_dim = embeddings_dim self.hidden_dim = hidden_dim self.num_lstm_layers = num_lstm_layers self.tag_to_idx = tag_to_idx self.tagset_size = len(tag_to_idx.values()) self.crf = CRF(self.tagset_size, batch_first=True) self.embedding_dropout = SpatialDropout(spatial_dropout) self.lstm = nn.LSTM(embeddings_dim, hidden_dim // 2, num_layers=self.num_lstm_layers, bidirectional=True, batch_first=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim, hidden_dim // 2) self.hidden2tag2 = nn.Linear(hidden_dim // 2, self.tagset_size)
def test_initialize_score_when_set_padidx(self): crf = CRF(self.num_labels, 1) self.assertTrue(0.1 > torch.max(crf.trans_matrix)) self.assertTrue(-10000.0 == torch.min(crf.trans_matrix)) self.assertTrue(0.1 > torch.max(crf.start_trans)) self.assertTrue(-10000.0 == torch.min(crf.start_trans)) self.assertTrue(0.1 > torch.max(crf.end_trans)) self.assertTrue(-0.1 < torch.min(crf.end_trans))
def __init__(self, num_classes, model_name) -> None: super(bertCRF, self).__init__() if model_name == "bert-base-cased-crf": self.bert = BertModel(BertConfig()) if model_name == "roberta-base-crf": self.bert = RobertaModel(RobertaConfig()) self.dropout = nn.Dropout(0.1) self.position_wise_ff = nn.Linear(768, num_classes) self.crf = CRF(num_classes)
def setUp(self): self.batch_size = 2 self.sequence_size = 3 self.num_labels = 5 self.crf = CRF(self.num_labels) self.mask = torch.FloatTensor([[1, 1, 1], [1, 1, 0]]) self.labels = torch.LongTensor([[0, 2, 3], [1, 4, 1]]) self.hidden = torch.autograd.Variable(torch.randn( self.batch_size, self.sequence_size, self.num_labels), requires_grad=True)
def __init__(self, config): super(BERT_CRF, self).__init__() self.num_tags = config.num_tags self.hidden_size = config.hidden_size self.bert_layer = BertModel.from_pretrained(args.model_name) self.dropout = nn.Dropout(args.droupout_prob) self.hidden_to_tag_layer = nn.Linear(args.hidden_size, args.num_tags) self.crf_layer = CRF(args.num_tags)
def test_initialize_variables(self): self.assertEqual(self.crf.num_labels, self.num_labels) self.assertEqual(self.crf.trans_matrix.size(), (self.num_labels, self.num_labels)) self.assertEqual(self.crf.start_trans.size(), (self.num_labels, )) self.assertEqual(self.crf.end_trans.size(), (self.num_labels, )) num_labels = -1 with self.assertRaises(ValueError) as er: CRF(num_labels) exception = er.exception self.assertEqual(exception.args[0], 'invalid number of labels: -1')
def setUp(self): device = "cuda" if torch.cuda.is_available() else "cpu" self.batch_size = 2 self.sequence_size = 3 self.num_labels = 5 self.crf = CRF(self.num_labels) self.mask = torch.ByteTensor([[1, 1, 1], [1, 1, 0]]).to(device) self.labels = torch.LongTensor([[0, 2, 3], [1, 4, 1]]).to(device) self.hidden = torch.autograd.Variable( torch.randn(self.batch_size, self.sequence_size, self.num_labels), requires_grad=True, ).to(device)
def __init__(self): super(RNN2, self).__init__() self.rnn1 = nn.GRU( input_size=912, hidden_size=100, # rnn hidden unit num_layers=2, # number of rnn layer batch_first= True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) dropout=0.3, bidirectional=True) self.fc = nn.Linear(200, 5) # output = nn.Softmax(fc) self.crf = CRF(5)
def __init__(self, embed_dim, num_layers, hidden_dim, text_vocab, bio_vocab): super(LSTM_CRF_Model, self).__init__() self.bio_vocab = bio_vocab self.text_vocab = text_vocab self.embed_dim = embed_dim self.num_layers = num_layers self.hidden_dim = hidden_dim self.vocab_dim = len(text_vocab) self.n_classes = len(bio_vocab) self.word_embeddings = Embeddings(self.embed_dim, self.vocab_dim) self.dropout = nn.Dropout(args.dropout) self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers=num_layers) self.hidden2tag = nn.Linear(hidden_dim, self.n_classes) self.crf = CRF(self.n_classes)
def __init__(self, num_labels: int, hidden_size: int, dropout_rate: float, wordemb_dim: int, charemb_dim: int): """ :param num_labels: number of label :param hidden_size: size of hidden state :param dropout_rate: dropout rate (0.0 <= dropout_rate < 1.0) :param wordemb_dim: dimension of word embedding :param charemb_dim: dimension of character embedding """ super().__init__() self.blstm = BLSTM(num_labels, hidden_size, dropout_rate, wordemb_dim, charemb_dim) self.crf = CRF(num_labels) self = self.cuda() if BLSTM.CUDA else self
def __init__(self, vocab_size, input_size, hidden_size, num_labels, n_layers, lr, dropout): super(BLSTM_CRF, self).__init__() self.name = "BLSTM_CRF" self.blstm = BLSTM(vocab_size, input_size, hidden_size, num_labels, n_layers, dropout) self.crf = CRF(num_labels) self.blstm_optimizer = optim.Adam(self.blstm.parameters(), lr=lr, weight_decay=1e-4) self.crf_optimizer = optim.Adam(self.crf.parameters(), lr=lr, weight_decay=1e-4) self.loss = 0 self.print_every = 1 self.max_score = 0. if USE_CUDA: self.blstm = self.blstm.cuda() self.crf = self.crf.cuda()
def __init__(self, vocabs, word_dim, pos_dim, hidden_size, rnn_layers, dropout_rate, device, bidirectional=True, use_crf=False, embedding=None): super(Model, self).__init__() word2id, tag2id, label2id = vocabs # word embedding set self.word_embeddings = nn.Embedding(len(word2id), word_dim) if embedding is not None: self.word_embeddings.weight.data.copy_(torch.from_numpy(embedding)) self.tag_embeddings = nn.Embedding(len(tag2id), pos_dim) # lstm set self.lstm = nn.LSTM(word_dim + pos_dim, hidden_size, rnn_layers, batch_first=True, bidirectional=bidirectional, dropout=dropout_rate) # bidirectional is ouput size * 2 # no bidirectional is ouput size * 1 output_size = hidden_size * 2 if bidirectional else hidden_size # output size self.linear = nn.Linear(output_size, len(label2id)) self.dropout_rate = dropout_rate self.use_crf = use_crf if use_crf: self.crf = CRF(len(label2id), batch_first=True) self.cross_entropy = nn.CrossEntropyLoss(reduction='none')
def __init__(self, config): super(Model, self).__init__() self.embedding_dim = config.embedding_dim self.hidden_dim = config.hidden_dim self.vocab_size = config.vocab_size self.num_tags = config.num_tags self.embeds = nn.Embedding(self.vocab_size, self.embedding_dim) self.lstm = nn.LSTM( self.embedding_dim, self.hidden_dim // 2, num_layers=1, bidirectional=True, batch_first=True, ) self.dropout = nn.Dropout(config.dropout) self.linear = nn.Linear(self.hidden_dim, self.num_tags) self.crf = CRF(self.num_tags)
def __init__(self, num_labels: int, dropout_rate: float, word_emb_dim: int, char_emb_dim: int, pos_emb_dim: int, pad_idx: int = 0, other_idx: int = 1): """ Args: num_labels (int): [description] dropout_rate (float): [description] word_emb_dim (int): [description] char_emb_dim (int): [description] pos_emb_dim (int): [description] pad_idx (int, optional): [description]. Defaults to 0. other_idx (int, optional): [description]. Defaults to 1. """ super().__init__() input_dim = word_emb_dim + char_emb_dim + pos_emb_dim * 2 self.USE_CHAR = True if char_emb_dim > 0 else False self.USE_POS = True if pos_emb_dim > 0 else False self.num_labels = num_labels # bilstm output -> next bilstm input. So, hidden_size == input_size self.bilstm = nn.LSTM( input_size=input_dim, hidden_size=input_dim // 2, num_layers=1, bias=True, batch_first=True, bidirectional=True ) self.linear = nn.Linear(input_dim, num_labels) self.dropout_layer = nn.Dropout(p=dropout_rate) self.crf = CRF(num_labels, pad_idx) self.pad_idx = pad_idx self.other_idx = other_idx self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu' )
def __init__(self, bert_config, args, intent_label_lst, slot_label_lst): super(JointBERT, self).__init__(bert_config) self.args = args self.num_intent_labels = len(intent_label_lst) self.num_slot_labels = len(slot_label_lst) #执行预测任务 这两个有什么区别??? if args.do_pred: self.bert = PRETRAINED_MODEL_MAP[args.model_type]( config=bert_config) # 执行训练任务 else: self.bert = PRETRAINED_MODEL_MAP[args.model_type].from_pretrained( args.model_name_or_path, config=bert_config) # Load pretrained bert self.intent_classifier = IntentClassifier(bert_config.hidden_size, self.num_intent_labels, args.dropout_rate) self.slot_classifier = SlotClassifier(bert_config.hidden_size, self.num_slot_labels, args.dropout_rate) if args.use_crf: self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)
def __init__(self, embedding, nemb, nhid, nlayers, drop, ntags, batch_first=True): super(BiLstmTagger, self).__init__() self.embedding = embedding self.tagger_rnn = nn.LSTM(input_size=nemb, hidden_size=nhid, num_layers=nlayers, dropout=drop, bidirectional=True) self.projection = nn.Sequential( nn.Linear(in_features=nhid * 2, out_features=ntags)) self.crf_tagger = CRF(ntags) self._batch_first = batch_first
def __init__(self, vocabs, word_dim, pos_dim, hidden_size, rnn_layers, dropout_rate, device, bidirectional=True, use_crf=False, embedding=None): super(LabelAttention, self).__init__() word2id, tag2id, label2id = vocabs # vocab == wor2id, tag2id, label2id output_size = hidden_size * 2 if bidirectional else hidden_size # because bidirectional # word embedding set self.word_embeddings = nn.Embedding(len(word2id), word_dim) # dimension == 100 # parameter embedding is preprocessing(use pretrained or not use pretrained) # parameter copy to local variable if embedding is not None: self.word_embeddings.weight.data.copy_(torch.from_numpy(embedding)) # preprocessing not embedding tag and label self.tag_embeddings = nn.Embedding(len(tag2id), pos_dim) # tag embedding # this is no labelAttention difference self.label_embeddings = nn.Embedding(len(label2id), output_size) # lstm set # word_dim + pos_dom == 150 self.lstm1 = nn.LSTM(word_dim + pos_dim, hidden_size, 1, batch_first=True, bidirectional=bidirectional, dropout=dropout_rate) self.label_attn1 = MultiHeadAttention(input_size=output_size, hidden_size=hidden_size, n_head=8, dropout=dropout_rate, device=device) self.lstm2 = nn.LSTM(hidden_size, hidden_size, 1, batch_first=True, bidirectional=bidirectional, dropout=dropout_rate) self.label_attn2 = MultiHeadAttention(input_size=output_size, hidden_size=hidden_size, n_head=1, dropout=dropout_rate, device=device) # bidirectional is ouput size * 2 # no bidirectional is ouput size * 1 # output size self.linear = nn.Linear(output_size, len(label2id)) # drop out set self.dropout_rate = dropout_rate # using crf self.use_crf = use_crf if use_crf: self.crf = CRF(len(label2id), batch_first=True) # parameter: label index # loss function: cross entroyp self.cross_entropy = nn.CrossEntropyLoss(reduction='none') # label total size self.label_size = len(label2id) self.device = device
def __init__(self, tagset_size, start_tag_idx, stop_tag_idx): from ..models.crf import CRF self.crf = CRF(tagset_size, start_tag_idx, stop_tag_idx)
def __init__(self, config): super(BertCrfForNer, self).__init__(config) # self.num_labels = len(label2id) self.num_labels = config.num_labels self.crf_type = config.crf_type self.no_crf_loss = config.no_crf_loss self.loss_type = config.loss_type self.focalloss_gamma = config.focalloss_gamma self.focalloss_alpha = config.focalloss_alpha self.diceloss_weight = config.diceloss_weight self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.num_labels) label2id = config.label2id start_tag_idx = label2id["[CLS]"] stop_tag_idx = label2id["[SEP]"] tagset_size = len(label2id) # CRFPP if self.crf_type == 'ncrfpp': from ..models.ncrfpp_crf import CRF as CRFPP self.crf = CRFPP(tagset_size, start_tag_idx, stop_tag_idx) elif self.crf_type == 'pytorch-crf': from torchcrf import CRF # https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py self.crf = CRF(len(label2id), batch_first=torch.BoolTensor([True])) # self.crf = PyTorchCRF(tagset_size, start_tag_idx, stop_tag_idx) elif self.crf_type == 'old_crf': from ..models.crf_0 import CRF # self.crf_0 = CRF0(tagset_size=len(label2id), # tag_dictionary=label2id, # device=device) self.crf_0 = CRF(tagset_size, start_tag_idx, stop_tag_idx) # self.crf = OldCRF(tagset_size, start_tag_idx, stop_tag_idx) elif self.crf_type == 'new_crf': from ..models.crf import CRF self.crf = CRF(len(label2id), batch_first=True) # self.crf = NewCRF(tagset_size, start_tag_idx, stop_tag_idx) elif self.crf_type == 'lstm_crf': from ..models.lstm_crf import CRF self.crf = CRF(tagset_size, start_tag_idx, stop_tag_idx) # self.crf = LSTMCRF(tagset_size, start_tag_idx, stop_tag_idx) # lstm_crf # self.crf = CRF(tagset_size, start_tag_idx, stop_tag_idx) # self.crf = LSTMCRF(tagset_size, start_tag_idx, stop_tag_idx) # TorchCRF # self.crf = CRF(tagset_size, start_tag_idx, stop_tag_idx) # self.crf = TorchCRF(tagset_size, start_tag_idx, stop_tag_idx) # torchcrf # self.crf = CRF(len(label2id), batch_first=torch.BoolTensor([True])) # self.crf = PyTorchCRF(tagset_size, start_tag_idx, stop_tag_idx) # NewCRF # self.crf = CRF(len(label2id), batch_first=True) # self.crf = NewCRF(tagset_size, start_tag_idx, stop_tag_idx) # self.crf_0 = CRF0(tagset_size=len(label2id), # tag_dictionary=label2id, # device=device) # self.crf_0 = CRF0(tagset_size, start_tag_idx, stop_tag_idx) # self.crf = OldCRF(tagset_size, start_tag_idx, stop_tag_idx) # CRFPP # self.crf = CRFPP(tagset_size, start_tag_idx, stop_tag_idx) # self.crf = NCRFPP(tagset_size, start_tag_idx, stop_tag_idx) # CRFSLTK # self.crf = CRFSLTK(target_size, start_tag_idx, stop_tag_idx) # self.crf = CRFSLTK(tagset_size, start_tag_idx, stop_tag_idx) self.init_weights()
def __init__(self, tagset_size, start_tag_idx, stop_tag_idx): from torchcrf import CRF # https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py self.crf = CRF(tagset_size, batch_first=torch.BoolTensor([True]))
def __init__(self, tagset_size, start_tag_idx, stop_tag_idx): from TorchCRF import CRF self.crf = CRF(tagset_size)
def __init__(self, bert, hidden_size, num_tags, dropout): super().__init__() self.bert = bert self.crf = CRF(num_tags) self.fc = nn.Linear(hidden_size, num_tags) self.dropout = nn.Dropout(dropout)
def __init__(self, pretrained_model, freeze_bert=False, lstm_dim=-1): super(DeepPunctuationCRF, self).__init__() self.bert_lstm = DeepPunctuation(pretrained_model, freeze_bert, lstm_dim) self.crf = CRF(len(punctuation_dict), batch_first=True)