class BiLstmCrf(nn.Module):
    def __init__(self, data, configs):
        super(BiLstmCrf, self).__init__()
        if configs['random_embedding']:
            self.char_embeddings = nn.Embedding(data.char_alphabet_size,
                                                configs['char_emb_dim'])
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.char_alphabet_size,
                                          configs['char_emb_dim'])))
            self.char_drop = nn.Dropout(configs['dropout'])
        else:
            pass
        self.feature_embeddings = nn.Embedding(data.feat_alphabet_size,
                                               configs['feature_emb_dim'])
        self.input_drop = nn.Dropout(configs['dropout'])
        self.lstm = nn.LSTM(configs['char_emb_dim'] +
                            configs['feature_emb_dim'],
                            configs['hidden_dim'] // 2,
                            num_layers=configs['num_layers'],
                            batch_first=configs['batch_first'],
                            bidirectional=configs['bidirectional'])
        self.drop_lstm = nn.Dropout(configs['dropout'])
        self.hidden2tag = nn.Linear(configs['hidden_dim'],
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, configs['gpu'])

    def forward(self,
                batch_input,
                batch_feature,
                batch_len,
                batch_recover,
                mask,
                batch_label=None):
        batch_size = len(batch_input)
        char_embeds = self.char_drop(self.char_embeddings(batch_input))
        feat_embeds = self.feature_embeddings(batch_feature)
        feat_embeds = torch.repeat_interleave(feat_embeds,
                                              batch_input.size(1),
                                              dim=1)
        # input_embeds = char_embeds * feat_embeds

        input_embeds = torch.cat([char_embeds, feat_embeds], 2)
        input_represent = self.input_drop(input_embeds)

        packed_words = pack_padded_sequence(input_represent,
                                            batch_len.cpu().numpy(),
                                            batch_first=True)
        # 不加feat_embeds(即只用char_embedding),用下面这行即可
        # packed_words = pack_padded_sequence(char_embeds, batch_len.cpu().numpy(), batch_first=True)
        hidden = None
        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.drop_lstm(lstm_out.transpose(1, 0))

        outputs = self.hidden2tag(lstm_out)

        if batch_label is not None:
            total_loss = self.crf.neg_log_likelihood_loss(
                outputs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return total_loss, tag_seq
        else:
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return tag_seq

    @staticmethod
    def random_embedding(vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb
class CnnAttnLstmCRF(nn.Module):
    def __init__(self, data, configs):
        super(CnnAttnLstmCRF, self).__init__()
        if configs['random_embedding']:
            self.char_embeddings = nn.Embedding(data.char_alphabet_size,
                                                configs['char_emb_dim'])
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.char_alphabet_size,
                                          configs['char_emb_dim'])))
            self.char_drop = nn.Dropout(configs['dropout'])

            self.word_embeddings = nn.Embedding(data.word_alphabet_size,
                                                configs['word_emb_dim'])
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet_size,
                                          configs['word_emb_dim'])))
            # self.word_drop = nn.Dropout(configs['dropout'])

            self.lexi_embeddings = nn.Embedding(data.lexicon_alphabet_size,
                                                configs['lexi_emb_dim'])
            self.lexi_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.lexicon_alphabet_size,
                                          configs['lexi_emb_dim'])))
        else:
            pass
        # self.word_feat_embeddings = nn.Embedding()
        self.intent_embeddings = nn.Embedding(data.feat_alphabet_size,
                                              configs['intent_emb_dim'])
        self.word_drop = nn.Dropout(configs['dropout'])
        self.char_cnn = nn.Conv1d(in_channels=configs['char_emb_dim'],
                                  out_channels=configs['cnn_hidden_dim'],
                                  kernel_size=3,
                                  padding=1)

        self.lstm = nn.LSTM(configs['cnn_hidden_dim'] +
                            configs['intent_emb_dim'],
                            configs['lstm_hidden_dim'] // 2,
                            num_layers=configs['num_layers'],
                            batch_first=configs['batch_first'],
                            bidirectional=configs['bidirectional'])
        self.drop_lstm = nn.Dropout(configs['dropout'])
        self.hidden2tag = nn.Linear(configs['lstm_hidden_dim'],
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, configs['gpu'])

        temperature = np.power(configs['char_emb_dim'], 0.5)
        self.attention = ScaledDotProductAttention(temperature)

    def forward(self,
                batch_word,
                batch_intents,
                batch_wordlen,
                batch_char,
                batch_charlen,
                mask,
                batch_lexi,
                batch_label=None):
        char_embeds = self.char_drop(
            self.char_embeddings(batch_char)).transpose(1, 2)
        char_cnn_out = self.char_cnn(char_embeds).transpose(1, 2)
        # char_cnn_out = torch.max_pool1d(char_cnn_out, kernel_size=char_cnn_out.size(2)).view(char_batch_size, -1)
        intent_embeds = self.intent_embeddings(batch_intents)
        char_intent_embeds = torch.repeat_interleave(intent_embeds,
                                                     batch_char.size(1),
                                                     dim=1)

        char_features = torch.cat([char_cnn_out, char_intent_embeds],
                                  2)  # (b, 14, 300)

        word_embeds = self.word_drop(
            self.word_embeddings(batch_word))  # (b, 9, 280)
        lexi_embeds = self.lexi_embeddings(batch_lexi)  # (b, 9, 20)
        word_intent_embeds = torch.repeat_interleave(intent_embeds,
                                                     batch_word.size(1),
                                                     dim=1)  # (b, 9, 100)
        word_features = torch.cat(
            [word_embeds, lexi_embeds, word_intent_embeds],
            2)  # (b, 9, 280+20+100)

        q = char_features  # (14, 400)
        k = word_features  # (9, 400)
        v = word_features  # (9, 400)
        attn_output, _ = self.attention(q, k, v)  # (14, 300)

        packed_words = pack_padded_sequence(attn_output,
                                            batch_charlen.cpu().numpy(),
                                            batch_first=True)
        hidden = None
        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.drop_lstm(lstm_out.transpose(1, 0))  # (b, 14, 200)

        outputs = self.hidden2tag(lstm_out)  # (b, 14, 30)

        if batch_label is not None:
            total_loss = self.crf.neg_log_likelihood_loss(
                outputs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return total_loss, tag_seq
        else:
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return tag_seq

    @staticmethod
    def random_embedding(vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb
class AttnBiLstmCRF(nn.Module):
    def __init__(self, data, configs):
        super(AttnBiLstmCRF, self).__init__()
        if configs['random_embedding']:
            self.char_embeddings = nn.Embedding(data.char_alphabet_size,
                                                configs['char_emb_dim'])
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.char_alphabet_size,
                                          configs['char_emb_dim'])))
            self.char_drop = nn.Dropout(configs['dropout'])
        else:
            pass
        self.feature_embeddings = nn.Embedding(data.feat_alphabet_size,
                                               configs['feature_emb_dim'])
        self.input_drop = nn.Dropout(configs['dropout'])
        self.lstm = nn.LSTM(configs['char_emb_dim'],
                            configs['hidden_dim'] // 2,
                            num_layers=configs['num_layers'],
                            batch_first=configs['batch_first'],
                            bidirectional=configs['bidirectional'])
        self.drop_lstm = nn.Dropout(configs['dropout'])
        self.hidden2tag = nn.Linear(configs['hidden_dim'],
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, configs['gpu'])

        temperature = np.power(configs['char_emb_dim'], 0.5)
        self.attention = ScaledDotProductAttention(temperature)

    # torch.bmm计算对char_embeds对权重,收敛较慢,准确率不错
    def forward(self,
                batch_input,
                batch_feature,
                batch_len,
                batch_recover,
                mask,
                batch_label=None):
        batch_size = len(batch_input)
        char_embeds = self.char_drop(
            self.char_embeddings(batch_input))  # (b,len,300)
        feat_embeds = self.feature_embeddings(batch_feature)  # (b,1,300)
        feat_embeds = feat_embeds.transpose(1, 2)
        attn_weights = torch.softmax(torch.bmm(char_embeds, feat_embeds),
                                     dim=1)  # (b,len,1)
        # 点乘
        input_embeds = self.input_drop(char_embeds * attn_weights)
        # bilstm
        packed_words = pack_padded_sequence(input_embeds,
                                            batch_len.cpu().numpy(),
                                            batch_first=True)
        hidden = None
        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.drop_lstm(lstm_out.transpose(1, 0))
        # fc
        outputs = self.hidden2tag(lstm_out)
        # crf
        if batch_label is not None:
            total_loss = self.crf.neg_log_likelihood_loss(
                outputs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return total_loss, tag_seq
        else:
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return tag_seq

    # self_attention方法,效果不理想
    # def forward(self, batch_input, batch_feature, batch_len, batch_recover, mask, batch_label=None):
    # 	char_embeds = self.char_drop(self.char_embeddings(batch_input))  # (b,len,300)
    # 	feat_embeds = self.feature_embeddings(batch_feature)  # (b,1,300)
    # 	q = torch.repeat_interleave(feat_embeds, batch_input.size(1), dim=1)
    # 	k = char_embeds
    # 	v = char_embeds
    # 	attn_output, _ = self.attention(q, k, v)
    # 	packed_words = pack_padded_sequence(attn_output, batch_len.cpu().numpy(), batch_first=True)
    # 	hidden = None
    # 	lstm_out, hidden = self.lstm(packed_words, hidden)
    # 	lstm_out, _ = pad_packed_sequence(lstm_out)
    # 	lstm_out = self.drop_lstm(lstm_out.transpose(1, 0))
    # 	outputs = self.hidden2tag(lstm_out)
    #
    # 	if batch_label is not None:
    # 		total_loss = self.crf.neg_log_likelihood_loss(outputs, mask, batch_label)
    # 		scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
    # 		return total_loss, tag_seq
    # 	else:
    # 		scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
    # 		return tag_seq

    @staticmethod
    def random_embedding(vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb
示例#4
0
class CnnLstmCrf(nn.Module):
    def __init__(self, data, config):
        super(CnnLstmCrf, self).__init__()
        self.char_embeddings = nn.Embedding(data.char_alphabet_size,
                                            config.char_emb_dim)
        self.char_embeddings.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding(data.char_alphabet_size,
                                      config.char_emb_dim)))
        self.char_drop = nn.Dropout(config.dropout)
        self.char_cnn = nn.Conv1d(in_channels=config.char_emb_dim,
                                  out_channels=config.char_hidden_dim,
                                  kernel_size=3,
                                  padding=1)

        self.word_embeddings = nn.Embedding(data.word_alphabet_size,
                                            config.word_emb_dim)
        self.word_embeddings.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding(data.word_alphabet_size,
                                      config.word_emb_dim)))
        self.word_drop = nn.Dropout(config.dropout)

        self.feature_embeddings = nn.Embedding(data.feat_alphabet_size,
                                               config.feature_emb_dim)

        self.lstm = nn.LSTM(config.char_hidden_dim + config.word_emb_dim +
                            config.feature_emb_dim,
                            config.word_hidden_dim // 2,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)
        self.drop_lstm = nn.Dropout(config.dropout)

        self.hidden2tag = nn.Linear(config.word_hidden_dim,
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, config.gpu)

    def forward(self,
                batch_word,
                batch_features,
                batch_wordlen,
                batch_char,
                batch_charlen,
                batch_charrecover,
                mask,
                batch_label=None):
        char_batch_size = batch_char.size(0)
        char_embeds = self.char_drop(
            self.char_embeddings(batch_char)).transpose(1, 2)
        char_cnn_out = self.char_cnn(char_embeds)
        char_cnn_out = torch.max_pool1d(
            char_cnn_out, kernel_size=char_cnn_out.size(2))  # 在hidden的维度做最大池化
        char_cnn_out = char_cnn_out.view(char_batch_size,
                                         -1)  # shape=(词的数量, 最大词的长度)
        char_cnn_out = char_cnn_out[batch_charrecover]
        char_features = char_cnn_out.view(batch_word.size(0),
                                          batch_word.size(1), -1)

        feat_embeds = self.feature_embeddings(batch_features)  # (10, 1, 50)
        feat_embeds = torch.repeat_interleave(feat_embeds,
                                              batch_word.size(1),
                                              dim=1)

        word_embeds = self.word_embeddings(batch_word)
        word_embeds = torch.cat([word_embeds, char_features, feat_embeds], 2)
        # word_embeds = word_embeds * feat_embeds  # 与intent的特征向量做点乘,映射到每一个词上
        word_represent = self.word_drop(word_embeds)

        packed_words = pack_padded_sequence(word_represent,
                                            batch_wordlen.cpu().numpy(),
                                            batch_first=True)
        hidden = None
        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.drop_lstm(lstm_out.transpose(1, 0))

        outputs = self.hidden2tag(lstm_out)

        if batch_label is not None:
            total_loss = self.crf.neg_log_likelihood_loss(
                outputs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return total_loss, tag_seq
        else:
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return tag_seq

    @staticmethod
    def random_embedding(vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb