Exemplo n.º 1
0
    def __init__(self, tagset_size, char_size, char_dim, char_hidden_dim, char_rnn_layers, embedding_dim, word_hidden_dim, word_rnn_layers, vocab_size, dropout_ratio, char_embeds, large_CRF=True, if_highway = False, in_doc_words = 2, highway_layers = 1):

        super(LM_LSTM_CRF, self).__init__()
        self.char_dim = char_dim
        self.char_hidden_dim = char_hidden_dim
        self.char_size = char_size
        self.word_dim = embedding_dim
        self.word_hidden_dim = word_hidden_dim
        self.word_size = vocab_size
        self.if_highway = if_highway

        self.char_embeds = char_embeds
        #self.char_embeds = nn.Embedding(char_size, char_dim)
        
        self.forw_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio)
        self.back_char_lstm = nn.LSTM(char_dim, char_hidden_dim, num_layers=char_rnn_layers, bidirectional=False, dropout=dropout_ratio)
        #self.forw_char_lstm = forw_char_lstm
        #self.back_char_lstm = back_char_lstm
        self.char_rnn_layers = char_rnn_layers

        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)

        self.word_lstm = nn.LSTM(embedding_dim + char_hidden_dim * 2, word_hidden_dim // 2, num_layers=word_rnn_layers, bidirectional=True, dropout=dropout_ratio)

        self.word_rnn_layers = word_rnn_layers

        self.dropout = nn.Dropout(p=dropout_ratio)

        self.tagset_size = tagset_size
        if large_CRF:
            self.crf = crf.CRF_L(word_hidden_dim, tagset_size)
        else:
            self.crf = crf.CRF_S(word_hidden_dim, tagset_size)

        if if_highway:
            self.forw2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio)
            self.back2char = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio)
            self.forw2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio)
            self.back2word = highway.hw(char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio)
            self.fb2char = highway.hw(2 * char_hidden_dim, num_layers=highway_layers, dropout_ratio=dropout_ratio)

        self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size)
        #self.char_pre_train_out = char_pre_train_out
        self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words)

        self.batch_size = 1
        self.word_seq_length = 1
    def __init__(self,
                 tagset_sizes,
                 char_size,
                 char_dim,
                 char_hidden_dim,
                 char_rnn_layers,
                 embedding_dim,
                 word_hidden_dim,
                 word_rnn_layers,
                 vocab_size,
                 dropout_ratio,
                 file_num,
                 large_CRF=True,
                 if_highway=False,
                 in_doc_words=2,
                 highway_layers=1):

        super(LM_LSTM_CRF, self).__init__()
        self.char_dim = char_dim
        self.char_hidden_dim = char_hidden_dim
        self.char_size = char_size
        self.word_dim = embedding_dim
        self.word_hidden_dim = word_hidden_dim
        self.word_size = vocab_size
        self.if_highway = if_highway

        self.char_embeds = nn.Embedding(char_size, char_dim)
        self.forw_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      num_layers=char_rnn_layers,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.back_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      num_layers=char_rnn_layers,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.char_rnn_layers = char_rnn_layers

        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)

        self.word_lstms_list = nn.ModuleList()

        self.word_lstm_task_0 = nn.LSTM(embedding_dim + char_hidden_dim * 2,
                                        word_hidden_dim // 2,
                                        num_layers=word_rnn_layers,
                                        bidirectional=True,
                                        dropout=dropout_ratio)

        self.word_lstms_list.append(self.word_lstm_task_0)

        for i in range(file_num - 1):
            self.word_lstms_list.append(
                nn.LSTM(word_hidden_dim,
                        word_hidden_dim // 2,
                        num_layers=1,
                        bidirectional=True,
                        dropout=dropout_ratio))

        self.word_rnn_layers = word_rnn_layers

        self.dropout = nn.Dropout(p=dropout_ratio)

        self.tagset_sizes = tagset_sizes
        self.crflist = nn.ModuleList()
        for i in range(file_num):
            if large_CRF:
                self.crflist.append(crf.CRF_L(word_hidden_dim,
                                              tagset_sizes[i]))
            else:
                self.crflist.append(crf.CRF_S(word_hidden_dim,
                                              tagset_sizes[i]))

        if if_highway:
            self.forw2char = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.back2char = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.forw2word = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.back2word = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.fb2char = highway.hw(2 * char_hidden_dim,
                                      num_layers=highway_layers,
                                      dropout_ratio=dropout_ratio)

        self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size)
        self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words)

        self.batch_size = 1
        self.word_seq_length = 1
Exemplo n.º 3
0
    def __init__(self,
                 tagset_size,
                 char_size,
                 char_dim,
                 char_hidden_dim,
                 embedding_dim,
                 word_hidden_dim,
                 win_size,
                 vocab_size,
                 dropout_ratio,
                 tag_dim=100,
                 segtgt_size=None,
                 enttgt_size=None,
                 if_highway=False,
                 ex_embedding_dim=None,
                 segment_loss=0,
                 entity_loss=0):

        super(LSTM_TH, self).__init__()

        self.xentropy = nn.CrossEntropyLoss(size_average=False)

        self.char_dim = char_dim
        self.char_hidden_dim = char_hidden_dim
        self.char_size = char_size
        self.word_dim = embedding_dim
        self.ex_word_dim = ex_embedding_dim
        self.win_size = win_size
        self.word_hidden_dim = word_hidden_dim
        self.tag_dim = tag_dim
        self.word_size = vocab_size
        self.if_highway = if_highway
        self.char_embeds = nn.Embedding(char_size, char_dim)
        self.segment_loss = segment_loss
        self.entity_loss = entity_loss
        self.W1 = nn.Parameter(torch.zeros(word_hidden_dim, word_hidden_dim))
        self.W2 = nn.Parameter(torch.zeros(word_hidden_dim, word_hidden_dim))
        self.b1 = nn.Parameter(torch.zeros(word_hidden_dim))
        self.b2 = nn.Parameter(torch.zeros(word_hidden_dim))

        self.forw_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.back_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        if not ex_embedding_dim:
            self.word_lstm = nn.LSTM(self.word_dim * self.win_size +
                                     char_hidden_dim * 2,
                                     self.word_hidden_dim // 2,
                                     bidirectional=True,
                                     dropout=dropout_ratio)
        else:
            '''
            use two embeddings
            '''
            self.word_lstm = nn.LSTM(
                (self.word_dim + self.ex_word_dim) * self.win_size +
                char_hidden_dim * 2,
                self.word_hidden_dim // 2,
                bidirectional=True,
                dropout=dropout_ratio)

        self.word_embeds = nn.Embedding(vocab_size, self.word_dim)

        if self.ex_word_dim > 0:
            self.ex_word_embeds = nn.Embedding(vocab_size, self.ex_word_dim)
        else:
            self.ex_word_embeds = None

        # pdb.set_trace()

        self.dropout = nn.Dropout(p=dropout_ratio)
        '''
        highway nets
        '''
        if if_highway:
            self.fbchar_highway = highway.hw(2 * char_hidden_dim,
                                             dropout_ratio=dropout_ratio)

        self.tag_size = tagset_size
        self.seg_size = segtgt_size
        self.ent_size = enttgt_size

        if self.segment_loss != 2 and self.entity_loss != 2:
            self.hidden2tag = nn.Linear(self.word_hidden_dim, self.tag_size)
        elif self.segment_loss == 2 and self.entity_loss != 2:
            self.hidden2tag = nn.Linear(self.word_hidden_dim * 2,
                                        self.tag_size)
        elif self.segment_loss != 2 and self.entity_loss == 2:
            self.hidden2tag = nn.Linear(self.word_hidden_dim * 2,
                                        self.tag_size)
        elif self.segment_loss == 2 and self.entity_loss == 2:
            self.hidden2tag = nn.Linear(self.word_hidden_dim * 3,
                                        self.tag_size)
            '''
            bilinear layer
            '''
            # self.bilinear = nn.Bilinear(self.word_hidden_dim, self.word_hidden_dim, self.tag_size)

        if self.segment_loss != 0:
            self.segtgt_size = segtgt_size
            if not ex_embedding_dim:
                self.word_lstm_seg = nn.LSTM(self.word_dim * self.win_size +
                                             char_hidden_dim * 2,
                                             self.word_hidden_dim // 2,
                                             bidirectional=True,
                                             dropout=dropout_ratio)
            else:
                '''
                use two embeddings
                '''
                self.word_lstm_seg = nn.LSTM(
                    (self.word_dim + self.ex_word_dim) * self.win_size +
                    char_hidden_dim * 2,
                    self.word_hidden_dim // 2,
                    bidirectional=True,
                    dropout=dropout_ratio)
            self.hidden2seg = nn.Linear(self.word_hidden_dim, self.segtgt_size)

        if self.entity_loss != 0:
            self.enttgt_size = enttgt_size
            if not ex_embedding_dim:
                self.word_lstm_ent = nn.LSTM(self.word_dim * self.win_size +
                                             char_hidden_dim * 2,
                                             self.word_hidden_dim // 2,
                                             bidirectional=True,
                                             dropout=dropout_ratio)
            else:
                '''
                use two embeddings
                '''
                self.word_lstm_ent = nn.LSTM(
                    (self.word_dim + self.ex_word_dim) * self.win_size +
                    char_hidden_dim * 2,
                    self.word_hidden_dim // 2,
                    bidirectional=True,
                    dropout=dropout_ratio)
            self.hidden2ent = nn.Linear(self.word_hidden_dim, self.enttgt_size)

        # '''
        # self.tag_embeddings = nn.Parameter(torch.zeros(self.tag_size+2, self.tag_dim)) #tag_embeddings
        # self.to_tag = nn.Parameter(torch.zeros(self.tag_size+2, self.tag_dim))
        # '''

        # pdb.set_trace()

        self.rand_init()
    def __init__(self,
                 tagset_sizes,
                 char_size,
                 char_dim,
                 char_hidden_dim,
                 char_rnn_layers,
                 embedding_dim,
                 word_hidden_dim,
                 word_rnn_layers,
                 vocab_size,
                 dropout_ratio,
                 file_num,
                 pad_value,
                 large_CRF=True,
                 if_highway=False,
                 in_doc_words=2,
                 highway_layers=1,
                 n_heads=4,
                 d_model=128):

        super(LM_LSTM_CRF, self).__init__()
        self.char_dim = char_dim
        self.char_hidden_dim = char_hidden_dim
        self.char_size = char_size
        self.word_dim = embedding_dim
        self.word_hidden_dim = word_hidden_dim
        self.word_size = vocab_size
        self.if_highway = if_highway
        self.d_model = char_hidden_dim * 2 + embedding_dim
        self.n_heads = n_heads
        self.pad_value = pad_value

        self.self_attn = MultiHeadedAttention(
            n_heads, char_hidden_dim * 2 + embedding_dim)

        self.char_embeds = nn.Embedding(char_size, char_dim)
        self.forw_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      num_layers=char_rnn_layers,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.back_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      num_layers=char_rnn_layers,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.char_rnn_layers = char_rnn_layers

        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)

        self.word_lstms_list = nn.ModuleList()
        self.lstm_self_attn_list = nn.ModuleList()

        word_input_size = embedding_dim + char_hidden_dim * 2 + self.d_model

        self.word_lstm_task_0 = nn.LSTM(embedding_dim + char_hidden_dim * 2 +
                                        self.d_model,
                                        word_hidden_dim // 2,
                                        num_layers=word_rnn_layers,
                                        bidirectional=True,
                                        dropout=dropout_ratio)
        self.word_lstm_task_0_self_attn = MultiHeadedAttention(
            n_heads, word_hidden_dim)

        self.word_lstms_list.append(self.word_lstm_task_0)
        self.lstm_self_attn_list.append(self.word_lstm_task_0_self_attn)

        for i in range(file_num - 1):
            # Due to self attention after every lstm, the input size to next lstm layer is 2 * word_hidden_dim
            self.word_lstms_list.append(
                nn.LSTM(word_input_size + 2 * word_hidden_dim,
                        word_hidden_dim // 2,
                        num_layers=1,
                        bidirectional=True,
                        dropout=dropout_ratio))
            self.lstm_self_attn_list.append(
                MultiHeadedAttention(n_heads, word_hidden_dim))

        self.word_rnn_layers = word_rnn_layers

        self.dropout = nn.Dropout(p=dropout_ratio)

        self.tagset_sizes = tagset_sizes
        self.crflist = nn.ModuleList()
        for i in range(file_num):
            if large_CRF:
                self.crflist.append(
                    crf.CRF_L(2 * word_hidden_dim, tagset_sizes[i]))
            else:
                self.crflist.append(
                    crf.CRF_S(2 * word_hidden_dim, tagset_sizes[i]))

        if if_highway:
            self.forw2char = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.back2char = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.forw2word = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.back2word = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.fb2char = highway.hw(2 * char_hidden_dim,
                                      num_layers=highway_layers,
                                      dropout_ratio=dropout_ratio)

        self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size)
        self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words)

        self.batch_size = 1
        self.word_seq_length = 1
Exemplo n.º 5
0
    def __init__(self,
                 tagset_size,
                 char_size,
                 char_dim,
                 char_hidden_dim,
                 char_rnn_layers,
                 embedding_dim,
                 word_hidden_dim,
                 vocab_size,
                 dropout_ratio,
                 repeats,
                 which_loss,
                 large_CRF=True,
                 if_highway=False,
                 in_doc_words=2,
                 highway_layers=1,
                 layer_residual=True,
                 block_residual=True):

        super(LM_LSTM_CRF, self).__init__()
        self.char_dim = char_dim
        self.char_hidden_dim = char_hidden_dim
        self.char_size = char_size
        self.word_dim = embedding_dim
        self.word_hidden_dim = word_hidden_dim
        self.word_size = vocab_size
        self.if_highway = if_highway

        #Hyper Parameters needed to be changed
        self.initial_filter_width = 3
        self.initial_padding = 1
        self.padding = [1, 2, 1]
        self.dilation = [1, 2, 1]
        self.take_layer = [False, False, True]
        self.repeats = int(repeats)
        self.which_loss = which_loss
        self.layer_residual = layer_residual
        self.block_residual = block_residual
        print(repeats, which_loss, type(which_loss), which_loss,
              self.which_loss == "block")

        self.char_embeds = nn.Embedding(char_size, char_dim)
        self.forw_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      num_layers=char_rnn_layers,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.back_char_lstm = nn.LSTM(char_dim,
                                      char_hidden_dim,
                                      num_layers=char_rnn_layers,
                                      bidirectional=False,
                                      dropout=dropout_ratio)
        self.char_rnn_layers = char_rnn_layers
        #Word Embedding Layer
        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)

        #Initial CNN Layer
        initial_filter_width = self.initial_filter_width
        initial_num_filters = word_hidden_dim
        self.itdicnn0 = nn.Conv1d(embedding_dim + char_hidden_dim * 2,
                                  initial_num_filters,
                                  kernel_size=initial_filter_width,
                                  padding=self.initial_padding,
                                  bias=True)
        self.itdicnn = nn.ModuleList([
            nn.Conv1d(initial_num_filters,
                      initial_num_filters,
                      kernel_size=initial_filter_width,
                      padding=self.padding[i],
                      dilation=self.dilation[i],
                      bias=True) for i in range(0, len(self.padding))
        ])
        self.dropout = nn.Dropout(p=dropout_ratio)
        self.tagset_size = tagset_size
        if large_CRF:
            self.crf = crf.CRF_L(word_hidden_dim, tagset_size)
        else:
            self.crf = crf.CRF_S(word_hidden_dim, tagset_size)

        if if_highway:
            self.forw2char = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.back2char = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.forw2word = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.back2word = highway.hw(char_hidden_dim,
                                        num_layers=highway_layers,
                                        dropout_ratio=dropout_ratio)
            self.fb2char = highway.hw(2 * char_hidden_dim,
                                      num_layers=highway_layers,
                                      dropout_ratio=dropout_ratio)

        self.char_pre_train_out = nn.Linear(char_hidden_dim, char_size)
        self.word_pre_train_out = nn.Linear(char_hidden_dim, in_doc_words)

        self.batch_size = 1
        self.word_seq_length = 1