Exemplo n.º 1
0
    def __init__(self, char_init_embed, word_init_embed, pos_init_embed, spo_embed_dim, sentence_length, 
        hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bieso', weight=None):
        
        super().__init__()
        
        # self.Embedding = nn.Embedding(init_embed)
#         print(char_init_embed)
        self.char_embed = nn.Embedding(char_init_embed[0], char_init_embed[1])
        self.word_embed = nn.Embedding(word_init_embed[0], word_init_embed[1])
        # word2vec
        self.word_embed.weight.data.copy_(torch.from_numpy(weight))
        self.pos_embed = nn.Embedding(pos_init_embed[0], pos_init_embed[1])
        # spo embed size: 50
        self.embed_dim = self.char_embed.embedding_dim + self.word_embed.embedding_dim + self.pos_embed.embedding_dim + spo_embed_dim
        # sentence length
        #self.sen_len = sentence_length
        #self.zeros = torch.zeros(self.sen_len, dtype=torch.long)

        self.norm1 = torch.nn.LayerNorm(self.embed_dim)
        self.Rnn = nn.LSTM(input_size=self.embed_dim, hidden_size=hidden_size, num_layers=2,
                            dropout=dropout, bidirectional=True, batch_first=True)
        self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3)
        self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3)
        self.relu = torch.nn.LeakyReLU()
        self.drop = torch.nn.Dropout(dropout)
        self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes)
        
        if id2words is None:
            self.Crf = CRF(num_classes, include_start_end_trans=False)
        else:
            self.Crf = CRF(num_classes, include_start_end_trans=False,
                            allowed_transitions=allowed_transitions(id2words, encoding_type=encoding_type))
Exemplo n.º 2
0
    def __init__(self, config, args):
        super(NERBert, self).__init__(config)
        self.args = args
        self.bert = BertModel(config)
        if self.args.weighted:
            self.weight = nn.Parameter(torch.Tensor(self.args.num_layers))
            self.weight.data.uniform_(0.1, 0.9)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.args.num_labels)
        if self.args.use_crf:
            self.crf = ConditionalRandomField(self.args.num_labels)

        self.apply(self.init_bert_weights)
Exemplo n.º 3
0
    def __init__(self,
                 char_init_embed,
                 word_init_embed,
                 pos_init_embed,
                 spo_embed_dim,
                 num_classes,
                 num_layers,
                 inner_size,
                 key_size,
                 value_size,
                 num_head,
                 dropout=0.1,
                 id2words=None,
                 encoding_type='bieso',
                 weight=None):
        super().__init__()

        # self.Embedding = nn.Embedding(init_embed)
        #print(char_init_embed)
        self.char_embed = nn.Embedding(char_init_embed[0], char_init_embed[1])
        self.word_embed = nn.Embedding(word_init_embed[0], word_init_embed[1])
        self.word_embed.weight.data.copy_(torch.from_numpy(weight))
        self.pos_embed = nn.Embedding(pos_init_embed[0], pos_init_embed[1])
        # spo embed size: 50
        self.embed_dim = self.char_embed.embedding_dim + self.word_embed.embedding_dim + self.pos_embed.embedding_dim + spo_embed_dim

        self.norm1 = torch.nn.LayerNorm(self.embed_dim)
        self.transformer = encoder.TransformerEncoder(
            num_layers=num_layers,
            model_size=self.embed_dim,
            inner_size=inner_size,
            key_size=key_size,
            value_size=value_size,
            num_head=num_head,
            dropout=dropout)
        self.Linear1 = nn.Linear(self.embed_dim, self.embed_dim // 3)
        self.norm2 = torch.nn.LayerNorm(self.embed_dim // 3)
        self.relu = torch.nn.LeakyReLU()
        self.drop = torch.nn.Dropout(dropout)
        self.Linear2 = nn.Linear(self.embed_dim // 3, num_classes)
        self.Linear = nn.Linear(self.embed_dim, num_classes)

        if id2words is None:
            self.Crf = CRF(num_classes, include_start_end_trans=False)
        else:
            self.Crf = CRF(num_classes,
                           include_start_end_trans=False,
                           allowed_transitions=allowed_transitions(
                               id2words, encoding_type=encoding_type))
Exemplo n.º 4
0
    def __init__(self, args):
        super(LacNet, self).__init__()

        vocab_size = args.vocab_size
        word_dim = args.word_dim
        num_gru_layers = args.num_gru_layers
        num_labels = args.num_labels
        hidden_dim = args.hidden_dim

        self.word_emb = nn.Embedding(vocab_size, word_dim)
        self.gru_layers = nn.ModuleList(
            [BiGruLayer(args) for _ in range(num_gru_layers)])
        self.emission = nn.Linear(hidden_dim * 2, num_labels)

        self.crf = ConditionalRandomField(num_labels)
Exemplo n.º 5
0
    def __init__(self,
                 tagset_size,
                 vocab_size,
                 hidden_dim,
                 embedding_dim,
                 pretrained_embeddings,
                 dropout,
                 num_layers,
                 pad_index,
                 device,
                 fine_tune=True,
                 bidirectional=True):

        super(LSTM_CRF_Softmax, self).__init__()

        self.tagset_size = tagset_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(p=dropout)
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.pad_index = pad_index
        self.device = device

        self.embedding_layer = nn.Embedding(self.vocab_size,
                                            self.embedding_dim)

        if type(pretrained_embeddings) == torch.Tensor:
            self.embedding_layer.weight.data.copy_(pretrained_embeddings)

        if not fine_tune:
            self.embedding_layer.weight.requires_grad = False

        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            num_layers=self.num_layers,
                            bidirectional=self.bidirectional)

        self.hidden2tag = nn.Linear(2 * self.hidden_dim, self.tagset_size)

        self.crf = ConditionalRandomField(self.tagset_size, 1, 2)

        self.loss_fn = nn.CrossEntropyLoss(ignore_index=self.pad_index)
Exemplo n.º 6
0
class LacNet(nn.Module):
    def __init__(self, args):
        super(LacNet, self).__init__()

        vocab_size = args.vocab_size
        word_dim = args.word_dim
        num_gru_layers = args.num_gru_layers
        num_labels = args.num_labels
        hidden_dim = args.hidden_dim

        self.word_emb = nn.Embedding(vocab_size, word_dim)
        self.gru_layers = nn.ModuleList(
            [BiGruLayer(args) for _ in range(num_gru_layers)])
        self.emission = nn.Linear(hidden_dim * 2, num_labels)

        self.crf = ConditionalRandomField(num_labels)
        # self.crf_decode = crf_decoding()
        # self.crf_cost = linear_chain_crf()

    def forward(self, x, lens=None):
        x = self.word_emb(x)
        for gru in self.gru_layers:
            x = gru(x)
        x = self.emission(x)

        if lens is None:
            lens = torch.tensor([words.size(1)], device=words.device)
        mask = sequence_mask(lens)

        # Run features through Viterbi decode algorithm.
        preds = self.crf.viterbi_tags(feats, mask)

        # loglik = self.crf(feats, labs, mask=mask)
        # loss = -1. * loglik
        return preds

    def get_trainable_params(self):
        module_params = [
            self.char_feats_layer.parameters(),
            self.rnn.parameters(),
            self.rnn_to_crf.parameters(),
            self.crf.parameters()
        ]
        return module_params
Exemplo n.º 7
0
class NERBert(BertPreTrainedModel):
    def __init__(self, config, args):
        super(NERBert, self).__init__(config)
        self.args = args
        self.bert = BertModel(config)
        if self.args.weighted:
            self.weight = nn.Parameter(torch.Tensor(self.args.num_layers))
            self.weight.data.uniform_(0.1, 0.9)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.args.num_labels)
        if self.args.use_crf:
            self.crf = ConditionalRandomField(self.args.num_labels)

        self.apply(self.init_bert_weights)

    def forward(self, input_ids, labels=None):
        attention_mask = input_ids.gt(0)
        encoded_layers, _ = self.bert(input_ids,
                                      None,
                                      attention_mask,
                                      output_all_encoded_layers=True)
        if not self.args.weighted:
            sequence_output = encoded_layers[-1]
        else:
            last_layers = torch.cat(encoded_layers[-self.num_layers:],
                                    dim=-1).view(encoded_layers[0].size(0),
                                                 encoded_layers[0].size(1),
                                                 encoded_layers[0].size(2),
                                                 self.num_layers)
            soft_weight = F.softmax(self.weight)
            sequence_output = torch.matmul(last_layers, soft_weight)
        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)
        if not self.args.use_crf:
            if labels is not None:
                loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
                # Only keep active parts of the loss
                loss = loss_fct(logits.view(-1, self.args.num_labels),
                                labels.view(-1))
                return logits, loss
            else:
                return logits
        else:
            if labels is not None:
                # Only keep active parts of the loss
                if attention_mask is not None:
                    total_loss = self.crf(logits, labels, attention_mask)
                    return torch.mean(total_loss)
            else:
                max_len = logits.shape[1]

                tag_seq = self.crf.viterbi_decode(logits, attention_mask)
                for pred in tag_seq:
                    if len(pred) < max_len:
                        pred += [0] * (max_len - len(pred))
                return tag_seq
Exemplo n.º 8
0
    def __init__(self, args, word_emb_matrix=None):
        super(NERModel, self).__init__()
        self.args = args

        if word_emb_matrix is not None:
            self.embedding = nn.Embedding.from_pretrained(torch.tensor(word_emb_matrix, dtype=torch.float))
            self.embedding.weight.requires_grad = args.trainable_embedding
        else:
            self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim)
            self.embedding.weight.requires_grad = True
        if args.model == 'cnn':
            self.encoder = CNNEncoder(args)
        elif args.model == 'rnn':
            self.encoder = DynamicRNN(args.embedding_dim, args.hidden_dim, bidirectional=True)

        self.linear = nn.Linear(args.hidden_dim*2, args.num_labels)
        self.dropout = nn.Dropout(0.2)
        if self.args.use_crf:
            self.crf = ConditionalRandomField(args.num_labels)
Exemplo n.º 9
0
class NERModel(nn.Module):

    def __init__(self, args, word_emb_matrix=None):
        super(NERModel, self).__init__()
        self.args = args

        if word_emb_matrix is not None:
            self.embedding = nn.Embedding.from_pretrained(torch.tensor(word_emb_matrix, dtype=torch.float))
            self.embedding.weight.requires_grad = args.trainable_embedding
        else:
            self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim)
            self.embedding.weight.requires_grad = True
        if args.model == 'cnn':
            self.encoder = CNNEncoder(args)
        elif args.model == 'rnn':
            self.encoder = DynamicRNN(args.embedding_dim, args.hidden_dim, bidirectional=True)

        self.linear = nn.Linear(args.hidden_dim*2, args.num_labels)
        self.dropout = nn.Dropout(0.2)
        if self.args.use_crf:
            self.crf = ConditionalRandomField(args.num_labels)

    def forward(self, input_ids, labels=None):
        attention_mask = input_ids.gt(0)
        inputs = self.embedding(input_ids)
        if self.args.model == 'cnn':

            rep = self.encoder(inputs)
        elif self.args.model == 'rnn':
            x_len = torch.sum(input_ids != 0, dim=1)
            rep, _ = self.encoder(inputs, x_len)
        logits = self.linear(self.dropout(rep))

        if not self.args.use_crf:
            if labels is not None:
                loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
                # Only keep active parts of the loss
                loss = loss_fct(logits.view(-1, self.args.num_labels), labels.view(-1))
                return logits, loss
            else:
                return logits
        else:
            if labels is not None:
                # Only keep active parts of the loss
                if attention_mask is not None:
                    total_loss = self.crf(logits, labels, attention_mask)
                    return 0,torch.mean(total_loss)
            else:
                max_len = logits.shape[1]

                tag_seq = self.crf.viterbi_decode(logits, attention_mask)
                for pred in tag_seq:
                    if len(pred) < max_len:
                        pred += [0] * (max_len - len(pred))
                return tag_seq
Exemplo n.º 10
0
    def __init__(
        self,
        bert_model,
        num_labels=9,
        embedding_dim=512,
        hidden_dim=512,
        rnn_layers=1,
        rnn_dropout=0.1,
        output_dropout=0.1,
        use_cuda=False,
    ):
        super(BertLstmCrf, self).__init__()
        self.bert_encoder = bert_model

        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.rnn_layers = rnn_layers

        self.lstm = None
        if rnn_layers > 0:
            self.lstm = nn.LSTM(
                embedding_dim,
                hidden_dim,
                num_layers=rnn_layers,
                bidirectional=True,
                dropout=rnn_dropout,
                batch_first=True,
            )

        # TODO: add contraints
        constraints = None
        include_start_end_transitions = False
        self.crf = ConditionalRandomField(
            num_labels,
            constraints,
            include_start_end_transitions=include_start_end_transitions,
        )

        self.liner = nn.Linear(hidden_dim * 2, num_labels)
        self.num_labels = num_labels

        self.output_dropout = nn.Dropout(p=output_dropout)
    def __init__(self, args, config, word_embedding):
        super(Transformer, self).__init__()

        n_classes = args.n_classes
        d_model = int(config['d_model'])
        h = int(config['n_head'])
        d_ff = int(config['d_ff'])
        N = int(config['n_layer'])
        dropout = float(config['dropout'])
        vocab_size = args.vocab_size
        self.use_crf = int(config['use_crf'])

        c = copy.deepcopy
        attn = MultiHeadedAttention(h, d_model)
        ffn = PositionwiseFeedForward(d_model, d_ff, dropout)
        pe = PositionalEncoding(d_model, dropout)
        layer = EncoderLayer(d_model, c(attn), c(ffn), dropout)

        self.embed = nn.Sequential(
            Embeddings(word_embedding, vocab_size, d_model), c(pe))
        self.encoder = Encoder(c(layer), N)
        self.out = nn.ModuleList([
            nn.Linear(d_model, n_classes[0]),
            nn.Linear(d_model, n_classes[1]),
            nn.Linear(d_model, n_classes[2])
        ])

        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

        if self.use_crf:
            self.crf = nn.ModuleList([
                ConditionalRandomField(n_classes[0], True),
                ConditionalRandomField(n_classes[1], True),
                ConditionalRandomField(n_classes[2], True)
            ])

        self.log_sigma_square_1 = nn.Parameter(torch.Tensor([0]))
        self.log_sigma_square_2 = nn.Parameter(torch.Tensor([0]))
        self.log_sigma_square_3 = nn.Parameter(torch.Tensor([0]))
Exemplo n.º 12
0
    def __init__(self, args, config, word_embedding):
        super(BiLSTM, self).__init__()

        self.vocab_size = args.vocab_size
        self.embed_dim = int(config['embed_dim'])
        self.hidden_size = int(config['hidden_size'])
        self.n_classes = args.n_classes
        self.dropout_p = float(config['dropout'])
        self.n_layer = int(config['n_layer'])
        self.use_crf = int(config['use_crf'])

        we = torch.from_numpy(word_embedding).float()
        self.embed = nn.Embedding(self.vocab_size, self.embed_dim, _weight=we)
        self.dropout = nn.Dropout(self.dropout_p)
        self.bilstm = nn.LSTM(input_size=self.embed_dim,
                              hidden_size=self.hidden_size,
                              num_layers=self.n_layer,
                              batch_first=True,
                              bidirectional=True)
        self.out = nn.ModuleList([
            nn.Linear(self.hidden_size * 2, self.n_classes[0]),
            nn.Linear(self.hidden_size * 2, self.n_classes[1]),
            nn.Linear(self.hidden_size * 2, self.n_classes[2])
        ])

        init_linear(self.out[0])
        init_linear(self.out[1])
        init_linear(self.out[2])

        if self.use_crf:
            self.crf = nn.ModuleList([
                ConditionalRandomField(self.n_classes[0]),
                ConditionalRandomField(self.n_classes[1]),
                ConditionalRandomField(self.n_classes[2])
            ])

        self.log_sigma_square_pos = nn.Parameter(torch.Tensor([0]))
        self.log_sigma_square_ner = nn.Parameter(torch.Tensor([0]))
        self.log_sigma_square_chunk = nn.Parameter(torch.Tensor([0]))
Exemplo n.º 13
0
class BertLstmCrf(nn.Module):
    """
    bert_lstm_crf model
    """
    def __init__(
        self,
        bert_model,
        num_labels=9,
        embedding_dim=512,
        hidden_dim=512,
        rnn_layers=1,
        rnn_dropout=0.1,
        output_dropout=0.1,
        use_cuda=False,
    ):
        super(BertLstmCrf, self).__init__()
        self.bert_encoder = bert_model

        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.rnn_layers = rnn_layers

        self.lstm = None
        if rnn_layers > 0:
            self.lstm = nn.LSTM(
                embedding_dim,
                hidden_dim,
                num_layers=rnn_layers,
                bidirectional=True,
                dropout=rnn_dropout,
                batch_first=True,
            )

        # TODO: add contraints
        constraints = None
        include_start_end_transitions = False
        self.crf = ConditionalRandomField(
            num_labels,
            constraints,
            include_start_end_transitions=include_start_end_transitions,
        )

        self.liner = nn.Linear(hidden_dim * 2, num_labels)
        self.num_labels = num_labels

        self.output_dropout = nn.Dropout(p=output_dropout)

    def rand_init_hidden(self, batch_size):
        """
        random initialize hidden variable
        """
        return (
            torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim),
            torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim),
        )

    def forward(self, **kwargs):
        """
        args:
            sentence (word_seq_len, batch_size) : word-level representation of sentence
            hidden: initial hidden state

        return:
            crf output (word_seq_len, batch_size, tag_size, tag_size), hidden
        """

        kwargs_copy = copy.deepcopy(kwargs)
        if "labels" in kwargs_copy:
            kwargs_copy.pop("labels")

        batch_size = kwargs["input_ids"].size(0)
        seq_length = kwargs["input_ids"].size(1)

        bert_outputs = self.bert_encoder(**kwargs_copy)
        sequence_output = bert_outputs[0]

        if self.lstm is not None:
            hidden = self.rand_init_hidden(batch_size)
            if kwargs["input_ids"].is_cuda:
                hidden = [i.cuda() for i in hidden]
            sequence_output, hidden = self.lstm(sequence_output, hidden)
            sequence_output = sequence_output.contiguous().view(
                -1, self.hidden_dim * 2)
            sequence_output = self.output_dropout(sequence_output)

        out = self.liner(sequence_output)
        logits = out.contiguous().view(batch_size, seq_length, -1)

        best_paths = self.crf.viterbi_tags(logits,
                                           kwargs["attention_mask"].long(),
                                           top_k=1)
        # Just get the top tags and ignore the scores.
        predicted_tags = cast(List[List[int]], [x[0][0] for x in best_paths])

        if kwargs.get("labels") is not None:
            labels = kwargs.get("labels")

            log_likelihood = self.crf(logits, labels, kwargs["attention_mask"])
            loss = -log_likelihood
            return loss, logits, predicted_tags

        return None, logits, predicted_tags
Exemplo n.º 14
0
class BiLSTM_CRF(nn.Module):
    """
    别名::class:`fastNLP.models.AdvSeqLabel`  :class:`fastNLP.models.sequence_labeling.AdvSeqLabel`

    更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。
    
    :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray init_embed: Embedding的大小(传入tuple(int, int),
        第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding
    :param int hidden_size: LSTM的隐层大小
    :param int num_classes: 有多少个类
    :param float dropout: LSTM中以及DropOut层的drop概率
    :param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S'
        不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证
        'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。)
    :param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况有用。
    """
    
    def __init__(self, char_init_embed, word_init_embed, pos_init_embed, spo_embed_dim, sentence_length, 
        hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bieso', weight=None):
        
        super().__init__()
        
        # self.Embedding = nn.Embedding(init_embed)
#         print(char_init_embed)
        self.char_embed = nn.Embedding(char_init_embed[0], char_init_embed[1])
        self.word_embed = nn.Embedding(word_init_embed[0], word_init_embed[1])
        # word2vec
        self.word_embed.weight.data.copy_(torch.from_numpy(weight))
        self.pos_embed = nn.Embedding(pos_init_embed[0], pos_init_embed[1])
        # spo embed size: 50
        self.embed_dim = self.char_embed.embedding_dim + self.word_embed.embedding_dim + self.pos_embed.embedding_dim + spo_embed_dim
        # sentence length
        #self.sen_len = sentence_length
        #self.zeros = torch.zeros(self.sen_len, dtype=torch.long)

        self.norm1 = torch.nn.LayerNorm(self.embed_dim)
        self.Rnn = nn.LSTM(input_size=self.embed_dim, hidden_size=hidden_size, num_layers=2,
                            dropout=dropout, bidirectional=True, batch_first=True)
        self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3)
        self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3)
        self.relu = torch.nn.LeakyReLU()
        self.drop = torch.nn.Dropout(dropout)
        self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes)
        
        if id2words is None:
            self.Crf = CRF(num_classes, include_start_end_trans=False)
        else:
            self.Crf = CRF(num_classes, include_start_end_trans=False,
                            allowed_transitions=allowed_transitions(id2words, encoding_type=encoding_type))
    
    def _decode(self, x):
        """
        :param torch.FloatTensor x: [batch_size, max_len, tag_size]
        :return torch.LongTensor, [batch_size, max_len]
        """
        tag_seq, _ = self.Crf.viterbi_decode(x, self.mask)
        return tag_seq
    
    def _internal_loss(self, x, y):
        """
        Negative log likelihood loss.
        :param x: Tensor, [batch_size, max_len, tag_size]
        :param y: Tensor, [batch_size, max_len]
        :return loss: a scalar Tensor

        """
        x = x.float()
        y = y.long()
        assert x.shape[:2] == y.shape
        assert y.shape == self.mask.shape
        total_loss = self.Crf(x, y, self.mask)
        return torch.mean(total_loss)
    
    def _make_mask(self, x, seq_len):
        batch_size, max_len = x.size(0), x.size(1)
        mask = seq_len_to_mask(seq_len, max_len)
#         print(seq_len)
#         print(seq_len.size())
#         print(x)
#         print(x.size())
#         print(mask.size())
#         mask = mask.view(batch_size, max_len)
        mask = mask.to(x).float()
        return mask

    def _forward(self, char, word, pos, spo, seq_len, tag=None):
        """
        :param torch.LongTensor words: [batch_size, mex_len]
        :param torch.LongTensor seq_len:[batch_size, ]
        :param torch.LongTensor target: [batch_size, max_len]
        :return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting.
                   If truth is not None, return loss, a scalar. Used in training.
        """
        
        char = char.long()
        #word = word.long()
        #pos = pos.long()
        #spo = spo.long()
        seq_len = seq_len.long()
        self.mask = self._make_mask(char, seq_len)
        
        # seq_len = seq_len.long()
        tag = tag.long() if tag is not None else None
        
        #if next(self.parameters()).is_cuda:
        #    char = char.cuda()
        #    self.mask = self.mask.cuda()
        
        # x = self.Embedding(words)
        char = self.char_embed(char)
        word = self.word_embed(word)
        pos = self.pos_embed(pos)
        #print(spo)
        #print(self.zeros)
        spo = spo.unsqueeze(1).repeat(1, char.shape[1], 1).float()
        #print(char.shape)
        #print(word.shape)
        #print(pos.shape)
        #print(spo.shape)
        x = torch.cat((char, word, pos, spo), dim=2)
        #print(x.shape)

        x = self.norm1(x)
        # [batch_size, max_len, char_embed_dim + word_embed_dim + pos_embed_dim + spo_embed_dim ]
        
#         x, _ = self.Rnn(x, seq_len=seq_len)
        x, _ = self.Rnn(x)
        
        x = self.Linear1(x)
        x = self.norm2(x)
        x = self.relu(x)
        x = self.drop(x)
        x = self.Linear2(x)
        if tag is not None:
            return self._internal_loss(x, tag)
        else:
            return self._decode(x)

    def forward(self, char, word, pos, spo, seq_len, tag):
        """
        
        :param torch.LongTensor words: [batch_size, mex_len]
        :param torch.LongTensor seq_len: [batch_size, ]
        :param torch.LongTensor target: [batch_size, max_len], 目标
        :return torch.Tensor: a scalar loss
        """
        return self._forward(char, word, pos, spo, seq_len, tag)

    def predict(self, char, word, pos, spo, seq_len):
        """
        
        :param torch.LongTensor words: [batch_size, mex_len]
        :param torch.LongTensor seq_len: [batch_size, ]
        :return torch.LongTensor: [batch_size, max_len]
        """
        return self._forward(char, word, pos, spo, seq_len)
Exemplo n.º 15
0
class LSTM_CRF(nn.Module):
    def __init__(self,
                 tagset_size,
                 vocab_size,
                 hidden_dim,
                 embedding_dim,
                 pretrained_embeddings,
                 dropout,
                 num_layers,
                 pad_index,
                 device,
                 fine_tune=True,
                 bidirectional=True):

        super(LSTM_CRF, self).__init__()

        self.tagset_size = tagset_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(p=dropout)
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.pad_index = pad_index
        self.device = device

        self.embedding_layer = nn.Embedding(self.vocab_size,
                                            self.embedding_dim)

        if type(pretrained_embeddings) == torch.Tensor:
            self.embedding_layer.weight.data.copy_(pretrained_embeddings)

        if not fine_tune:
            self.embedding_layer.weight.requires_grad = False

        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            num_layers=self.num_layers,
                            bidirectional=self.bidirectional)

        self.hidden2tag = nn.Linear(2 * self.hidden_dim, self.tagset_size)

        self.crf = ConditionalRandomField(self.tagset_size, 1, 2)

    def get_lstm_feats(self, batch):

        lens = batch['lens']
        word_sequences = batch['word_sequences']
        max_len = max(lens)
        batch_size = len(word_sequences)

        embeddings = self.embedding_layer(word_sequences)
        embeddings = self.dropout(embeddings)

        packed_input = pack_padded_sequence(embeddings, lens, batch_first=True)
        packed_hidden_states, _ = self.lstm(packed_input)
        hidden_states, _ = pad_packed_sequence(packed_hidden_states,
                                               batch_first=True)
        hidden_states = self.dropout(hidden_states)

        logits = self.hidden2tag(hidden_states)

        return logits
        #logits = logits.view(batch_size * max_len, self.tagset_size)

    def loss(self, batch):
        logits = self.get_lstm_feats(batch)
        mask = batch['mask'].squeeze(1)
        return self.crf.forward(logits, batch['tag_sequences'], mask)

    def forward(self, batch):
        logits = self.get_lstm_feats(batch)
        mask = batch['mask'].squeeze(1)
        all_tags = self.crf.viterbi_tags(logits.to('cpu'), mask.to('cpu'))
        max_len = max(batch['lens'])
        for i in range(len(all_tags)):
            all_tags[i] += [0 for i in range(max_len - len(all_tags[i]))]
            #print(all_tags[i])
        return None, torch.tensor(all_tags)
Exemplo n.º 16
0
class Transformer_CRF(nn.Module):
    def __init__(self,
                 char_init_embed,
                 word_init_embed,
                 pos_init_embed,
                 spo_embed_dim,
                 num_classes,
                 num_layers,
                 inner_size,
                 key_size,
                 value_size,
                 num_head,
                 dropout=0.1,
                 id2words=None,
                 encoding_type='bieso',
                 weight=None):
        super().__init__()

        # self.Embedding = nn.Embedding(init_embed)
        #print(char_init_embed)
        self.char_embed = nn.Embedding(char_init_embed[0], char_init_embed[1])
        self.word_embed = nn.Embedding(word_init_embed[0], word_init_embed[1])
        self.word_embed.weight.data.copy_(torch.from_numpy(weight))
        self.pos_embed = nn.Embedding(pos_init_embed[0], pos_init_embed[1])
        # spo embed size: 50
        self.embed_dim = self.char_embed.embedding_dim + self.word_embed.embedding_dim + self.pos_embed.embedding_dim + spo_embed_dim

        self.norm1 = torch.nn.LayerNorm(self.embed_dim)
        self.transformer = encoder.TransformerEncoder(
            num_layers=num_layers,
            model_size=self.embed_dim,
            inner_size=inner_size,
            key_size=key_size,
            value_size=value_size,
            num_head=num_head,
            dropout=dropout)
        self.Linear1 = nn.Linear(self.embed_dim, self.embed_dim // 3)
        self.norm2 = torch.nn.LayerNorm(self.embed_dim // 3)
        self.relu = torch.nn.LeakyReLU()
        self.drop = torch.nn.Dropout(dropout)
        self.Linear2 = nn.Linear(self.embed_dim // 3, num_classes)
        self.Linear = nn.Linear(self.embed_dim, num_classes)

        if id2words is None:
            self.Crf = CRF(num_classes, include_start_end_trans=False)
        else:
            self.Crf = CRF(num_classes,
                           include_start_end_trans=False,
                           allowed_transitions=allowed_transitions(
                               id2words, encoding_type=encoding_type))

    def _decode(self, x):
        """
        :param torch.FloatTensor x: [batch_size, max_len, tag_size]
        :return torch.LongTensor, [batch_size, max_len]
        """
        tag_seq, _ = self.Crf.viterbi_decode(x, self.mask)
        return tag_seq

    def _internal_loss(self, x, y):
        """
        Negative log likelihood loss.
        :param x: Tensor, [batch_size, max_len, tag_size]
        :param y: Tensor, [batch_size, max_len]
        :return loss: a scalar Tensor

        """
        x = x.float()
        y = y.long()
        assert x.shape[:2] == y.shape
        assert y.shape == self.mask.shape
        total_loss = self.Crf(x, y, self.mask)
        return torch.mean(total_loss)

    def _make_mask(self, x, seq_len):
        batch_size, max_len = x.size(0), x.size(1)
        mask = seq_len_to_mask(seq_len)
        #         mask = mask.view(batch_size, max_len)
        mask = mask.to(x).float()
        return mask

    def _forward(self, char, word, pos, spo, seq_len, tag=None):
        """
        :param torch.LongTensor words: [batch_size, mex_len]
        :param torch.LongTensor seq_len:[batch_size, ]
        :param torch.LongTensor target: [batch_size, max_len]
        :return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting.
                   If truth is not None, return loss, a scalar. Used in training.
        """

        char = char.long()
        #word = word.long()
        #pos = pos.long()
        #spo = spo.long()
        seq_len = seq_len.long()
        self.mask = self._make_mask(char, seq_len)

        # seq_len = seq_len.long()
        tag = tag.long() if tag is not None else None

        #if next(self.parameters()).is_cuda:
        #    char = char.cuda()
        #    self.mask = self.mask.cuda()

        # x = self.Embedding(words)
        char = self.char_embed(char)
        word = self.word_embed(word)
        pos = self.pos_embed(pos)
        #print(spo)
        #print(self.zeros)
        spo = spo.unsqueeze(1).repeat(1, char.shape[1], 1).float()
        #print(char.shape)
        #print(word.shape)
        #print(pos.shape)
        #print(spo.shape)
        x = torch.cat((char, word, pos, spo), dim=2)
        #print(x.shape)

        x = self.norm1(x)
        # [batch_size, max_len, char_embed_dim + word_embed_dim + pos_embed_dim + spo_embed_dim ]

        x = self.transformer(x, seq_mask=self.mask)

        #x = self.Linear1(x)
        #x = self.norm2(x)
        #x = self.relu(x)
        #x = self.drop(x)
        #x = self.Linear2(x)
        x = self.Linear(x)
        if tag is not None:
            return self._internal_loss(x, tag)
        else:
            return self._decode(x)
        #return {"pred": self._decode(x)}

    def forward(self, char, word, pos, spo, seq_len, tag):
        """
        
        :param torch.LongTensor words: [batch_size, mex_len]
        :param torch.LongTensor seq_len: [batch_size, ]
        :param torch.LongTensor target: [batch_size, max_len], 目标
        :return torch.Tensor: a scalar loss
        """
        return self._forward(char, word, pos, spo, seq_len, tag)

    def predict(self, char, word, pos, spo, seq_len):
        """
        
        :param torch.LongTensor words: [batch_size, mex_len]
        :param torch.LongTensor seq_len: [batch_size, ]
        :return torch.LongTensor: [batch_size, max_len]
        """
        return self._forward(char, word, pos, spo, seq_len)
Exemplo n.º 17
0
class Lattice_Transformer_SeqLabel(nn.Module):
    def __init__(self,
                 lattice_weight,
                 lattice_num,
                 lattice_dim,
                 bigram_weight,
                 bigram_num,
                 bigram_dim,
                 hidden_size,
                 label_size,
                 num_heads,
                 num_layers,
                 learnable_position,
                 layer_preprocess_sequence,
                 layer_postprocess_sequence,
                 ff_size=-1,
                 dropout=None,
                 max_seq_len=-1):
        super().__init__()
        self.lattice_embed = nn.Embedding(lattice_num, lattice_dim)
        self.lattice_embed.weight.data.copy_(torch.from_numpy(lattice_weight))
        self.bigram_embed = nn.Embedding(bigram_num, bigram_dim)
        self.bigram_embed.weight.data.copy_(torch.from_numpy(bigram_weight))

        pe_ss = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)
        pe_se = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)
        pe_es = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)
        pe_ee = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)

        # self.bigram_size = self.bigram_embed.embedding.weight.size(1)
        # char_input_size = self.lattice_embed.embedding.weight.size(1) + self.bigram_embed.embedding.weight.size(1)
        # lex_input_size = self.lattice_embed.embedding.weight.size(1)

        self.bigram_size = bigram_dim
        char_input_size = bigram_dim + lattice_dim
        lex_input_size = lattice_dim

        self.embed_dropout = nn.Dropout(p=dropout['embed'])
        self.gaz_dropout = nn.Dropout(p=dropout['gaz'])
        self.output_dropout = nn.Dropout(p=dropout['output'])

        self.char_proj = nn.Linear(char_input_size, hidden_size)
        self.lex_proj = nn.Linear(lex_input_size, hidden_size)

        self.encoder = Transformer_Encoder(
            hidden_size,
            num_heads,
            num_layers,
            learnable_position=learnable_position,
            layer_preprocess_sequence=layer_preprocess_sequence,
            layer_postprocess_sequence=layer_postprocess_sequence,
            dropout=dropout,
            ff_size=ff_size,
            max_seq_len=max_seq_len,
            pe_ss=pe_ss,
            pe_se=pe_se,
            pe_es=pe_es,
            pe_ee=pe_ee)
        self.output = nn.Linear(hidden_size, label_size)
        self.crf = ConditionalRandomField(label_size,
                                          include_start_end_trans=True)
        # self.crf.trans_m = nn.Parameter(torch.zeros(size=[label_size, label_size], requires_grad=True))
        self.loss_func = nn.CrossEntropyLoss(ignore_index=-100)

    # 训练时用
    # TODO 参数类型
    def forward(self, lattice: torch.Tensor, bigrams: torch.Tensor,
                seq_len: torch.Tensor, lex_num: torch.Tensor,
                pos_s: torch.Tensor, pos_e: torch.Tensor,
                target: Optional[torch.Tensor]):
        batch_size = lattice.size(0)
        max_seq_len_and_lex_num = lattice.size(1)
        max_seq_len = bigrams.size(1)

        raw_embed = self.lattice_embed(lattice)
        bigrams_embed = self.bigram_embed(bigrams)
        bigrams_embed = torch.cat([
            bigrams_embed,
            torch.zeros(size=[
                batch_size, max_seq_len_and_lex_num -
                max_seq_len, self.bigram_size
            ]).to(bigrams_embed)
        ],
                                  dim=1)
        raw_embed_char = torch.cat([raw_embed, bigrams_embed], dim=-1)

        raw_embed_char = self.embed_dropout(raw_embed_char)
        raw_embed = self.gaz_dropout(raw_embed)

        embed_char = self.char_proj(raw_embed_char)
        char_mask = seq_len_to_mask(seq_len, max_len=max_seq_len_and_lex_num)
        embed_char.masked_fill_(~(char_mask.unsqueeze(-1)), 0)

        embed_lex = self.lex_proj(raw_embed)
        lex_mask = (seq_len_to_mask(seq_len + lex_num) ^ char_mask)
        embed_lex.masked_fill_(~(lex_mask).unsqueeze(-1), 0)

        embedding = embed_char + embed_lex
        encoded = self.encoder(embedding,
                               seq_len,
                               lex_num=lex_num,
                               pos_s=pos_s,
                               pos_e=pos_e)
        encoded = self.output_dropout(encoded)

        # 这里只获取transformer输出的char部分
        encoded = encoded[:, :max_seq_len, :]
        pred = self.output(encoded)
        mask = seq_len_to_mask(seq_len)

        # script使用
        # pred, path = self.crf.viterbi_decode(pred, mask)
        # return pred

        if self.training:
            loss = self.crf(pred, target, mask).mean(dim=0)
            return {'loss': loss}
        else:
            pred, path = self.crf.viterbi_decode(pred, mask)
            result = {'pred': pred}
            return result
Exemplo n.º 18
0
    def __init__(self,
                 lattice_weight,
                 lattice_num,
                 lattice_dim,
                 bigram_weight,
                 bigram_num,
                 bigram_dim,
                 hidden_size,
                 label_size,
                 num_heads,
                 num_layers,
                 learnable_position,
                 layer_preprocess_sequence,
                 layer_postprocess_sequence,
                 ff_size=-1,
                 dropout=None,
                 max_seq_len=-1):
        super().__init__()
        self.lattice_embed = nn.Embedding(lattice_num, lattice_dim)
        self.lattice_embed.weight.data.copy_(torch.from_numpy(lattice_weight))
        self.bigram_embed = nn.Embedding(bigram_num, bigram_dim)
        self.bigram_embed.weight.data.copy_(torch.from_numpy(bigram_weight))

        pe_ss = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)
        pe_se = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)
        pe_es = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)
        pe_ee = nn.Parameter(get_pos_embedding(max_seq_len,
                                               hidden_size,
                                               rel_pos_init=0),
                             requires_grad=learnable_position)

        # self.bigram_size = self.bigram_embed.embedding.weight.size(1)
        # char_input_size = self.lattice_embed.embedding.weight.size(1) + self.bigram_embed.embedding.weight.size(1)
        # lex_input_size = self.lattice_embed.embedding.weight.size(1)

        self.bigram_size = bigram_dim
        char_input_size = bigram_dim + lattice_dim
        lex_input_size = lattice_dim

        self.embed_dropout = nn.Dropout(p=dropout['embed'])
        self.gaz_dropout = nn.Dropout(p=dropout['gaz'])
        self.output_dropout = nn.Dropout(p=dropout['output'])

        self.char_proj = nn.Linear(char_input_size, hidden_size)
        self.lex_proj = nn.Linear(lex_input_size, hidden_size)

        self.encoder = Transformer_Encoder(
            hidden_size,
            num_heads,
            num_layers,
            learnable_position=learnable_position,
            layer_preprocess_sequence=layer_preprocess_sequence,
            layer_postprocess_sequence=layer_postprocess_sequence,
            dropout=dropout,
            ff_size=ff_size,
            max_seq_len=max_seq_len,
            pe_ss=pe_ss,
            pe_se=pe_se,
            pe_es=pe_es,
            pe_ee=pe_ee)
        self.output = nn.Linear(hidden_size, label_size)
        self.crf = ConditionalRandomField(label_size,
                                          include_start_end_trans=True)
        # self.crf.trans_m = nn.Parameter(torch.zeros(size=[label_size, label_size], requires_grad=True))
        self.loss_func = nn.CrossEntropyLoss(ignore_index=-100)