示例#1
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()

        self.gpu = data.HP_gpu

        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        # data.label_alphabet_size += 2
        # self.word_hidden = WordSequence(data, False, True, data.use_char)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, label_size + 2)

        self.crf = CRF(label_size, self.gpu)

        if torch.cuda.is_available():
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)

    # def neg_log_likelihood_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
    # outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, None, None)
    def neg_log_likelihood_loss(self, hidden, hidden_adv, batch_label, mask):
        if hidden_adv is not None:
            hidden = (hidden + hidden_adv)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)

        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, hidden, mask):

        outs = self.hidden2tag(hidden)

        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        return tag_seq

    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

    def decode_nbest(self, hidden, mask, nbest):

        outs = self.hidden2tag(hidden)

        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq
示例#2
0
class SeqModel(nn.Module):
    def __init__(self, data, opt):
        super(SeqModel, self).__init__()

        self.gpu = opt.gpu

        ## add two more label for downlayer lstm, use original label size for CRF
        self.word_hidden = WordSequence(data, opt)
        self.crf = CRF(data.label_alphabet.size(), self.gpu)

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover, batch_label, mask,
                                feature_inputs, text_inputs):

        outs = self.word_hidden(word_inputs, word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                feature_inputs, text_inputs)
        batch_size = word_inputs.size(0)

        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)

        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        total_loss = total_loss / batch_size

        return total_loss, tag_seq

    def forward(self, word_inputs, word_seq_lengths, char_inputs,
                char_seq_lengths, char_seq_recover, mask, feature_inputs,
                text_inputs):
        outs = self.word_hidden(word_inputs, word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                feature_inputs, text_inputs)

        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        return tag_seq

    def decode_nbest(self, word_inputs, word_seq_lengths, char_inputs,
                     char_seq_lengths, char_seq_recover, mask, nbest,
                     feature_inputs, text_inputs):

        outs = self.word_hidden(word_inputs, word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                feature_inputs, text_inputs)

        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq
示例#3
0
文件: seqmodel.py 项目: zqma2/NCRFpp
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf
        print "build network..."
        print "use_char: ", data.use_char 
        if data.use_char:
            print "char feature extractor: ", data.char_feature_extractor
        print "word feature extractor: ", data.word_feature_extractor
        print "use crf: ", self.use_crf

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.word_hidden = WordSequence(data)        
        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)


    def neg_log_likelihood_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
        outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            outs = outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(outs, 1)
            total_loss = loss_function(score, batch_label.view(batch_size * seq_len))
            _, tag_seq  = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq


    def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask):
        outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = outs.view(batch_size * seq_len, -1)
            _, tag_seq  = torch.max(outs, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            ## filter padded position with zero
            tag_seq = mask.long() * tag_seq
        return tag_seq


    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)


    def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask, nbest):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)
        outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq

        
示例#4
0
文件: seqmodel.py 项目: NLP1502/NLP
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf
        self.use_trans = data.use_trans
        self.use_mapping = data.use_mapping
        print "build network..."
        print "use_char: ", data.use_char
        if data.use_char:
            print "char feature extractor: ", data.char_seq_feature

        print "use_trans: ", data.use_trans
        print "word feature extractor: ", data.word_feature_extractor
        print "use crf: ", self.use_crf

        self.gpu = data.gpu
        self.average_batch = data.average_batch_loss
        # add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2

        self.word_hidden = WordSequence(data)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

    def neg_log_likelihood_loss(self, word_inputs, feature_inputs,
                                word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                batch_label, mask, trans_inputs,
                                trans_seq_length, trans_seq_recover):
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        wc_loss = 0
        if self.use_trans:
            if self.use_crf:
                total_loss = self.crf.neg_log_likelihood_loss(
                    outs, mask, batch_label)
                scores, tag_seq = self.crf._viterbi_decode(outs, mask)
                if self.use_mapping:
                    wc_loss = torch.norm(w_word_embs - trans_features_wc)
            else:
                loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
                outs = outs.view(batch_size * seq_len, -1)
                score = F.log_softmax(outs, 1)
                total_loss = loss_function(
                    score, batch_label.view(batch_size * seq_len))
                _, tag_seq = torch.max(score, 1)
                tag_seq = tag_seq.view(batch_size, seq_len)
                if self.use_mapping:
                    wc_loss = torch.norm(w_word_embs - trans_features_wc)
        else:
            if self.use_crf:
                total_loss = self.crf.neg_log_likelihood_loss(
                    outs, mask, batch_label)
                scores, tag_seq = self.crf._viterbi_decode(outs, mask)
            else:
                loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
                outs = outs.view(batch_size * seq_len, -1)
                score = F.log_softmax(outs, 1)
                total_loss = loss_function(
                    score, batch_label.view(batch_size * seq_len))
                _, tag_seq = torch.max(score, 1)
                tag_seq = tag_seq.view(batch_size, seq_len)

        if self.average_batch:
            total_loss = total_loss / batch_size
            if self.use_mapping:
                wc_loss = wc_loss / batch_size

        return total_loss, tag_seq, wc_loss

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, mask,
                trans_inputs, trans_seq_length, trans_seq_recover):
        # outs:(after hidden) [batch * seq_len * label_size]
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        if self.use_crf:

            scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        else:
            outs = outs.view(batch_size * seq_len,
                             -1)  # [batch_size * seq_len,label_size]
            _, tag_seq = torch.max(
                outs, 1
            )  # tag_seq:[batch_size * seq_len , 1] range from 0 to label_size-1
            tag_seq = tag_seq.view(batch_size, seq_len)  # [batch_size,seq_len]
            # print "before mask:{}".format(tag_seq)
            # print "mask:{}".format(mask)

            # filter padded position with zero
            tag_seq = mask.long() * tag_seq

        return tag_seq  # [batch_size,seq_len] and padding part is zero

    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

    def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths,
                     char_inputs, char_seq_lengths, char_seq_recover, mask,
                     nbest, trans_inputs, trans_seq_length, trans_seq_recover):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq

    def decode_output_intermediate_result(self, word_inputs, feature_inputs,
                                          word_seq_lengths, char_inputs,
                                          char_seq_lengths, char_seq_recover,
                                          mask, trans_inputs, trans_seq_length,
                                          trans_seq_recover):
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        return outs, self.crf.transitions
示例#5
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf
        print "build network..."
        print "use_char: ", data.use_char
        if data.use_char:
            print "char feature extractor: ", data.char_feature_extractor
        print "word feature extractor: ", data.word_feature_extractor
        print "use crf: ", self.use_crf

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        # data.label_alphabet_size += 2
        # self.word_hidden = WordSequence(data, False, True, data.use_char)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, label_size + 2)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

        if torch.cuda.is_available():
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)

        self.frozen = False

    # def neg_log_likelihood_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
    # outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, None, None)
    def neg_log_likelihood_loss(self, hidden, hidden_adv, batch_label, mask):
        if hidden_adv is not None:
            hidden = (hidden + hidden_adv)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                outs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            outs = outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(outs, 1)
            total_loss = loss_function(score,
                                       batch_label.view(batch_size * seq_len))
            _, tag_seq = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, hidden, mask):

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = outs.view(batch_size * seq_len, -1)
            _, tag_seq = torch.max(outs, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            ## filter padded position with zero
            tag_seq = mask.long() * tag_seq
        return tag_seq

    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

    def decode_nbest(self, hidden, mask, nbest):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq

    def freeze_net(self):
        if self.frozen:
            return
        self.frozen = True

        for p in self.parameters():
            p.requires_grad = False

    def unfreeze_net(self):
        if not self.frozen:
            return
        self.frozen = False

        for p in self.parameters():
            p.requires_grad = True