def forward(self, text, text_lengths):

        # 按照句子长度从大到小排序
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)
        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text)).to(torch.float32)
        # embedded = [batch size,sent len,  emb dim]

        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths, batch_first=self.batch_first)

        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        # output = [sent len, batch size, hidden dim * num_direction]
        seq_output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # 把句子序列再调整成输入时的顺序
        seq_output = seq_output[desorted_indices]

        output = matrix_mul(seq_output, self.word_weight, self.word_bias)
        output = matrix_mul(output, self.context_weight)
        output = F.softmax(output, dim=-1)
        output = element_wise_mul(seq_output, output)

        return output, hidden
    def forward(self, sentence_tensor, text_lengths):

        # 按照句子长度从大到小排序
        sentence_tensor, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            sentence_tensor, text_lengths)
        # text = [batch size,sent len,  emb dim]

        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            sentence_tensor, sorted_seq_lengths, batch_first=self.batch_first)

        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        # output = [sent len, batch size, hidden dim * num_direction]
        seq_output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # 把句子序列再调整成输入时的顺序
        seq_output = seq_output[desorted_indices]
        output = matrix_mul(seq_output, self.sent_weight, self.sent_bias)
        output = matrix_mul(output, self.context_weight).permute(1, 0)
        output = F.softmax(output, dim=-1)
        output = element_wise_mul(seq_output, output.permute(1, 0)).squeeze(0)
        output = self.fc(output)

        return output
    def forward(self, text, bert_masks, seq_lens):
        # text = [batch size,sent len]
        # context    输入的句子
        # mask  对padding部分进行mask,和句子一个size,padding部分用0表示,如:[1, 1, 1, 1, 0, 0]
        bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks)
        sentence_len = bert_sentence.shape[1]
        bert_cls = bert_cls.unsqueeze(dim=1).repeat(1, sentence_len, 1)
        bert_sentence = bert_sentence + bert_cls
        # 按照句子长度从大到小排序
        bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            bert_sentence, seq_lens)
        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            bert_sentence, sorted_seq_lengths, batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        output = output[desorted_indices]
        batch_size, max_seq_len, hidden_dim = output.shape
        out = torch.transpose(output.relu(), 1, 2)

        out = F.max_pool1d(out, max_seq_len).squeeze()
        out = self.fc(out)

        return out
    def forward(self, text, _, text_lengths):
        # 按照句子长度从大到小排序
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)
        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text)).to(torch.float32)
        # embedded = [batch size, sent len, emb dim]
        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths, batch_first=self.batch_first)

        # packed_output
        # hidden [n_layers * bi_direction,batch_size,hidden_dim]
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        # output [sent len, batch_size * n_layers * bi_direction]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # 把句子序列再调整成输入时的顺序
        output = output[desorted_indices]
        # output = [batch_size,seq_len,hidden_dim * num_directionns ]
        batch_size, max_seq_len, hidden_dim = output.shape
        # 拼接左右上下文信息
        output = torch.tanh(self.fc_cat(torch.cat((output, embedded), dim=2)))

        output = torch.transpose(output, 1, 2)
        output = F.max_pool1d(output, max_seq_len).squeeze().contiguous()
        output = self.fc(output)

        return output
    def forward(self, input_ids, attention_masks, text_lengths):

        # text = [batch size,sent len]
        # context = input
        # mask  the padding to be the same size as text length,padding as '0':[1, 1, 1, 1, 0, 0]
        sentence_out = self.transformer_model(input_ids,
                                              attention_mask=attention_masks)
        # reorder sentences in the descending order
        bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            sentence_out.last_hidden_state, text_lengths)
        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            bert_sentence,
            sorted_seq_lengths.cpu(),
            batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # output = [ batch size,sent len, hidden_dim * bidirectional]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        output = output[desorted_indices]

        batch_size, max_seq_len, hidden_dim = output.shape
        hidden = torch.mean(torch.reshape(hidden,
                                          [batch_size, -1, hidden_dim]),
                            dim=1)
        output = torch.sum(output, dim=1)
        fc_input = self.dropout(output + hidden)
        out = self.fc_rnn(fc_input)
        print(out, type(out), out.shape)

        return out, fc_input
示例#6
0
    def forward(self, text, _, text_lengths):
        # sort
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths)
        # text [batch_size, seq_len]
        embedded = self.dropout(self.embedding(text)).float()
        # embedded [batch_size, seq_len, emb_dim]
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first)
        # packed_embedded [batch_size, seq_len, emb_dim]
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)
        # output [seq_len, batch_size, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output [batch_size, seq_len, hidden_dim * num_direction]
        output = output[desorted_indices]

        # operate hidden
        hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]])
        # hidden [batch_size, batch_size(-1), hidden_dim * num_direction]
        hidden = torch.mean(hidden, dim=1)
        # hidden [batch_size, hidden_dim * num_direction]

        # operate output
        # output [batch_size, seq_len, hidden_dim * num_direction]
        output = torch.mean(output, dim=1)
        # output [batch_size, hidden_dim * num_direction]

        # add
        fc_input = self.dropout(output + hidden)
        # fc_input [batch_size, hidden_dim * num_direction]
        out = self.fc(fc_input)
        # out [batch_size, output_dim]
        return out
    def forward(self, input_ids, attention_masks, text_lengths):
        # text = [batch size,sent len]
        # context = input
        # mask  the padding to be the same size as text length,padding as '0':[1, 1, 1, 1, 0, 0]
        sentence_out = self.transformer_model(input_ids,
                                              attention_mask=attention_masks)
        sentence_out, sentence_len, cls = sentence_out.last_hidden_state, sentence_out.last_hidden_state.shape[
            1], sentence_out.pooler_out
        cls = cls.unsqueeze(dim=1).repeat(1, sentence_len, 1)
        sentence_out = sentence_out + cls
        # descending order
        bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            sentence_out, text_lengths)
        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            bert_sentence, sorted_seq_lengths, batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        output = output[desorted_indices]
        batch_size, max_seq_len, hidden_dim = output.shape
        out = torch.transpose(output.relu(), 1, 2)

        out_embedding = F.max_pool1d(out, int(max_seq_len)).squeeze()
        out = self.fc(out_embedding)
        print(out, type(out), out.shape)

        return out, out_embedding
示例#8
0
    def forward(self, text, _, text_lengths):
        # sort
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths)
        # text [batch_size, seq_len]
        embedded = self.dropout(self.embedding(text)).to(torch.float32)
        # embedded [batch_size, seq_len, emb_dim]
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)
        # packed_output [seq_len, batch_size, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output [batch_size, seq_len, hidden_dim * num_direction]
        output = output[desorted_indices]

        _, max_seq_len, _ = output.shape
        output = torch.cat((output, embedded), dim=2)
        # output [batch_size, seq_len, hidden_dim * num_direction + emb_dim]
        output = self.tanh(self.fc_cat(output))
        # output [batch_size, seq_len, emb_dim]
        output = torch.transpose(output, 1, 2)
        # output [batch_size, emb_dim, seq_len]
        output = F.max_pool1d(output, max_seq_len).squeeze().contiguous()
        # output [batch_size, emb_dim]
        output = self.fc(output)
        # output [batch_size, output_dim]
        return output
    def forward(self, text, bert_masks, seq_lens):

        # text = [batch size,sent len]
        # context    输入的句子
        # mask  对padding部分进行mask,和句子一个size,padding部分用0表示,如:[1, 1, 1, 1, 0, 0]
        bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks)
        # 按照句子长度从大到小排序
        bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            bert_sentence, seq_lens)
        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            bert_sentence, sorted_seq_lengths, batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # output = [ batch size,sent len, hidden_dim * bidirectional]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        output = output[desorted_indices]

        batch_size, max_seq_len, hidden_dim = output.shape
        hidden = torch.mean(torch.reshape(hidden,
                                          [batch_size, -1, hidden_dim]),
                            dim=1)
        output = torch.sum(output, dim=1)
        fc_input = self.dropout(output + hidden)
        out = self.fc_rnn(fc_input)

        return out
示例#10
0
    def forward(self, text, _, text_lengths):
        # 按照句子长度从大到小排序
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)
        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text))
        # embedded = [batch size, sent len, emb dim]

        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths, batch_first=self.batch_first)

        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            # output (seq_len, batch, num_directions * hidden_size)
            # hidden (num_layers * num_directions, batch, hidden_size)
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # 把句子序列再调整成输入时的顺序
        output = output[desorted_indices]
        # output = [batch_size,seq_len,hidden_dim * num_directionns ]
        batch_size, max_seq_len, hidden_dim = output.shape
        hidden = torch.mean(torch.reshape(hidden,
                                          [batch_size, -1, hidden_dim]),
                            dim=1)
        output = torch.mean(output, dim=1)
        fc_input = self.dropout(output + hidden)
        out = self.fc(fc_input)

        return out
    def forward(self, text, _, text_lengths):
        # sort sentences length in descending order
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)

        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text)).float()

        # embedded = [sent len, batch size, emb _dim]
        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths.cpu(), batch_first=self.batch_first)
        #print(packed_embedded.data.shape)
        # packed_output
        # hidden [batch_size, seq_len, embed_size +2*hidden_size]
        self.rnn.flatten_parameters()
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)
        #print(packed_output.data.shape)

        # unpack sequence
        # output [sent len, batch_size * n_layers * bi_direction]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # turn back into the original order
        output = output[desorted_indices]

        #print(output.shape)
        # output = [batch_size,seq_len,hidden_dim * num_directionns ]
        batch_size, max_seq_len, hidden_dim = output.shape

        # concating the output and embedded layer
        input_features = torch.cat([output, embedded], dim=2)
        #input_features.shape = [batch_size, seq_len, embed_size + 2*hidden]
        #print(input_features.shape)

        #
        output = torch.tanh(self.fc_cat(input_features))
        #linear_output.shape = (batch_size, seq_len, hidden_size)
        #print(output.shape)

        output = output.permute(0, 2, 1)  # reshape for maxpooling
        #print(output.shape[2])
        output = F.max_pool1d(output, int(output.shape[2])).squeeze(2)

        #max out features[batch_size, hidden_dim]
        output = self.dropout(output)

        return self.fc(output), output
示例#12
0
    def forward(self, text, _, text_lengths):
        # sort
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths)
        # text [batch_size, seq_len]
        embedded = self.dropout(self.embedding(text)).to(torch.float32)
        # embedded [batch_size, seq_len, emb_dim]
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)
        # output [seq_len, batch_size, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output [batch_size, seq_len, hidden_dim * num_direction]
        output = output[desorted_indices]

        # attention
        m = self.tanh(output)
        # m [batch_size, seq_len, hidden_dim * num_direction]

        # w [hidden_dim * 1(or 2)]
        score = torch.matmul(m, self.w)
        # score [batch_size, seq_len]

        alpha = F.softmax(score, dim=0).unsqueeze(-1)
        # alpha [batch_size, seq_len, 1]

        output_attention = output * alpha
        # output_attention [batch_size, seq_len, hidden_dim * num_direction]

        # operate hidden
        hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]])
        hidden = torch.mean(hidden, dim=1)
        # hidden [batch_size, hidden_dim * num_direction]

        # operate output_attention
        output_attention = torch.sum(output_attention, dim=1)
        # output_attention [batch_size, hidden_dim * num_direction]

        # operate output
        output = torch.sum(output, dim=1)
        # output [batch_size, hidden_dim * num_direction]

        # add
        fc_input = self.dropout(output + output_attention + hidden)
        # fc_input [batch_size, hidden_dim * num_direction]
        out = self.fc(fc_input)
        # out [batch_size, output_dim]
        return out
示例#13
0
    def forward(self, text, bert_masks, seq_lens):
        # text [batch_size, seq_len]
        bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks)
        # bert_sentence [batch_size, sen_len, H=768]
        # bert_cls [batch_size, H=768]

        """
        torch.randn(33, 55).repeat(2,1).size()
        --->>>torch.Size([66, 55])
        
        变化到倍数维度
        """
        bert_cls = bert_cls.unsqueeze(dim=1)
        # bert_cls [batch_size]

        bert_cls = bert_cls.repeat(1, bert_sentence.shape[1], 1)
        # bert_cls [batch_size, sen_len, 1]
        # bert_sentence [batch_size, sen_len, H=768]

        bert_sentence = bert_sentence + bert_cls
        # bert_sentence [batch_size, sen_len, H=768]

        bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(bert_sentence, seq_lens)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(bert_sentence, sorted_seq_lengths,
                                                            batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)
        # output = [seq_len, batch_size, hidden_dim * bidirectional]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output = [batch_size, seq_len, hidden_dim * bidirectional]
        output = output[desorted_indices]
        output = output.relu()
        # output = [batch_size, seq_len, hidden_dim * bidirectional]
        _, max_seq_len, _ = output.shape
        out = torch.transpose(output, 1, 2)
        # output = [batch_size, hidden_dim * bidirectional, seq_len]

        out = F.max_pool1d(out, max_seq_len).squeeze()

        # output = [batch_size, hidden_dim * bidirectional]

        out = self.fc(out)

        # output = [batch_size, output_dim]

        return out
示例#14
0
    def forward(self, text, bert_masks, seq_lens):
        # text [batch_size, seq_len]
        bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks)
        # bert_sentence [batch_size, sen_len, H=768]
        # bert_cls [batch_size, H=768]

        # rnn
        bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(bert_sentence, seq_lens)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(bert_sentence, sorted_seq_lengths,
                                                            batch_first=self.batch_first)
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)
        # output = [seq_len, batch_size, hidden_dim * bidirectional]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output = [batch_size, seq_len, hidden_dim * bidirectional]
        output = output[desorted_indices]

        # attention
        # operate output_attention
        m = self.tanh(output)
        # m [batch_size, seq_len, hidden_dim * bidirectional]
        score = torch.matmul(m, self.w)
        # score [batch_size, seq_len]
        alpha = F.softmax(score, dim=0).unsqueeze(-1)
        # alpha [batch_size, seq_len, 1]
        output_attention = output * alpha
        # output_attention [batch_size, seq_len, hidden_dim * bidirectional]
        output_attention = torch.sum(output_attention, dim=1)
        # output_attention [batch_size, hidden_dim * bidirectional]
        # operate hidden
        hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]])
        hidden = torch.mean(hidden, dim=1)
        # hidden [batch_size, hidden_dim * bidirectional]
        # operate output
        output = torch.sum(output, dim=1)
        # output [batch_size, hidden_dim * bidirectional]
        # add
        fc_input = self.dropout(output + output_attention + hidden)
        # fc_input [batch_size, hidden_dim * bidirectional]

        out = self.fc(fc_input)
        # out [batch_size, num_classes]
        return out
示例#15
0
    def forward(self, text, _, text_lengths):
        # 按照句子长度从大到小排序
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)
        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text)).to(torch.float32)
        # embedded = [batch size,sent len,  emb dim]

        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths, batch_first=self.batch_first)

        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        # output = [sent len, batch size, hidden dim * num_direction]
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # 把句子序列再调整成输入时的顺序
        output = output[desorted_indices]
        hidden = hidden[desorted_indices]
        # attention
        # M = [sent len, batch size, hidden dim * num_direction]
        # M = self.tanh1(output)
        alpha = F.softmax(torch.matmul(self.tanh1(output), self.w),
                          dim=0).unsqueeze(-1)  # dim=0表示针对文本中的每个词的输出softmax
        output_attention = output * alpha

        batch_size, max_seq_len, hidden_dim = output.shape
        hidden = torch.mean(torch.reshape(hidden,
                                          [batch_size, -1, hidden_dim]),
                            dim=1)

        output_attention = torch.sum(output_attention, dim=1)
        output = torch.sum(output, dim=1)

        fc_input = self.dropout(output + output_attention + hidden)
        # fc_input = self.dropout(output_attention)
        out = self.fc(fc_input)
        return out, fc_input
    def forward(self, text, _, text_lengths):
        # sort sentences in the descending order
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)
        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text)).float()
        # embedded = [batch size, sent len, emb dim]

        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths.cpu(), batch_first=self.batch_first)
        self.rnn.flatten_parameters()
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            # output (seq_len, batch, num_directions * hidden_size)
            # hidden (num_layers * num_directions, batch, hidden_size)
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # turn back into the original order
        output = output[desorted_indices]
        print(output.shape)
        # output = [batch_size,seq_len,hidden_dim * num_directionns ]
        batch_size, max_seq_len, hidden_dim = output.shape
        hidden = torch.mean(torch.reshape(hidden,
                                          [batch_size, -1, hidden_dim]),
                            dim=1)
        #print(hidden.shape)
        output = torch.mean(output, dim=1)
        #print(output.shape)
        fc_input = self.dropout(output + hidden)
        #print(fc_input.shape)
        out = self.fc(fc_input)

        return out, fc_input
示例#17
0
    def forward(self, text, _, text_lengths):
        """

        :param text: 输入的句子,
        第一版,batch_first配置在config里面,为False
        第二版,batch_first配置在这个类缺省参数里面,为True
        现在它全是正常的顺序[batch_size, seq_len],
        CNN第二版不用这个参数
        :param text_lengths: 句子长度
        """

        # 降序文本,降序文本长度,一开始不排序的索引
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths)
        # text [batch_size, seq_len]

        # 先embedding,然后dropout
        embedded = self.dropout(self.embedding(text)).float()
        # embedded [batch_size, seq_len, emb_dim]

        # pack
        """
        pack_padded_sequence(),Packs a Tensor containing padded sequences of variable length,
        pack一个(已经padding过的)sequence,记下了做了padding的长度的list。

        一般在处理数据时就已经将序列pad成等长了,但是LSTM需要一种方法来告知自己处理变长输入,
        一个batch里的序列不一定等长,需要pad操作,用0把它们都填充成max_length长度。

        LSTM的一次forward对应一个time step,接收的是across batches的输入,
        这就导致短序列可能在当前time step上已经结束,而你还是在给它输入东西(pad),
        这就会对结果产生影响(可以对照公式看看,即便输入全0还是会有影响),
        我们想要的效果是,LSTM知道batch中每个序列的长度,等到某个序列输入结束后下面的time step就不带它了。

        batch_first=self.batch_first,如果要保存batch_first的维度。
        
        nn.utils.rnn.pack_padded_sequence():
            Packs a Tensor containing padded sequences of variable length.
    
            :attr:`input` can be of size ``T x B x *`` where `T` is the length of the
            longest sequence (equal to ``lengths[0]``), ``B`` is the batch size, and
            ``*`` is any number of dimensions (including 0). If ``batch_first`` is
            ``True``, ``B x T x *`` :attr:`input` is expected.
        """
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first)
        # packed_embedded [batch_size, seq_len, emb_dim]

        """
        注意,
        这里batch_first=True,
        会保护batch_size的维度,
        下面的情况是官方的False情况的理解,
        区别只是pytorch会自动先把两个维度调换,
        输出的维度是一样的
        """
        # rnn、gru
        if self.rnn_type in ['rnn', 'gru']:
            # rnn
            """
            最后一层隐藏层状态集合,所有层最后一时刻隐藏层状态
            packed_output,(seq_len, batch, num_directions * hidden_size)
            hidden,(num_layers * num_directions, batch, hidden_size)
            
            Outputs: output, h_n
                - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
                  containing the output features (`h_t`) from the last layer of the RNN,
                  for each `t`.  If a :class:`torch.nn.utils.rnn.PackedSequence` has
                  been given as the input, the output will also be a packed sequence.
                  For the unpacked case, the directions can be separated
                  using ``output.view(seq_len, batch, num_directions, hidden_size)``,
                  with forward and backward being direction `0` and `1` respectively.
                  Similarly, the directions can be separated in the packed case.

                - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
                  containing the hidden state for `t = seq_len`.
                  Like *output*, the layers can be separated using
                  ``h_n.view(num_layers, num_directions, batch, hidden_size)``.
            """

            # gru
            """
            最后一层隐藏层状态集合,所有层最后一时刻隐藏层状态
            packed_output,(seq_len, batch, num_directions * hidden_size)
            hidden,(num_layers * num_directions, batch, hidden_size)
            
            Outputs: output, h_n
                - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
                  containing the output features h_t from the last layer of the GRU,
                  for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
                  given as the input, the output will also be a packed sequence.
                  For the unpacked case, the directions can be separated
                  using ``output.view(seq_len, batch, num_directions, hidden_size)``,
                  with forward and backward being direction `0` and `1` respectively.
                  Similarly, the directions can be separated in the packed case.

                - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
                  containing the hidden state for `t = seq_len`
                  Like *output*, the layers can be separated using
                  ``h_n.view(num_layers, num_directions, batch, hidden_size)``.
            """

            packed_output, hidden = self.rnn(packed_embedded)

        # lstm
        else:
            """
            最后一层外部状态集合,(所有层最后一个时刻外部状态,所有层最后一个时刻内部状态)

            packed_output,(seq_len, batch, num_directions * hidden_size)
            hidden,(num_layers * num_directions, batch, hidden_size)
            cell,(num_layers * num_directions, batch, hidden_size)

            output,
            最后一层,每个time_step的输出h,
            num_directions * hidden_size可以看出来,
            (双向,每个time_step的输出h = [h正向, h逆向],是同一个time_step的正向和逆向的h连接起来!)

            h_n,
            每一层,最后一个time_step的输出h,
            num_layers * num_directions可以看出来,
            (双向,单独保存前向和后向的最后一个time_step的输出h)
            (一层的话,h_n和output最后一个是一样的【应该一定】)

            c_n,
            和h_n一样意思,保存的是c
            
            
            Outputs: output, (h_n, c_n)
                - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
                  containing the output features `(h_t)` from the last layer of the LSTM,
                  for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
                  given as the input, the output will also be a packed sequence.
                  For the unpacked case, the directions can be separated
                  using ``output.view(seq_len, batch, num_directions, hidden_size)``,
                  with forward and backward being direction `0` and `1` respectively.
                  Similarly, the directions can be separated in the packed case.

                - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
                  containing the hidden state for `t = seq_len`.
                  Like *output*, the layers can be separated using
                  ``h_n.view(num_layers, num_directions, batch, hidden_size)`` and similarly for *c_n*.

                - **c_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
                  containing the cell state for `t = seq_len`.
            """
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # output(最后一层的每个词时刻),hidden(每一层的最后一时刻)
        # output [seq_len, batch_size, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]

        # unpack
        """
        unpack一个(经过packed的)sequence,
        output,output对应pad长度
        pad_packed_sequence(),Pads a packed batch of variable length sequences.
        
        这里的batch_first,把batch_size调回第一维
        """
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output [batch_size, seq_len, hidden_dim * num_direction]

        # 文本顺序调回输入时的顺序
        output = output[desorted_indices]

        # 处理hidden
        # output [batch_size, seq_len, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]

        # hidden要对应的维度,直接取自output的某一维度
        """
        -1,指第1维自动适应计算(不动),记作batch_size(-1)        
        """
        hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]])
        # hidden [batch_size, batch_size(-1), hidden_dim * num_direction]

        # 在第1维求均值,去掉第1维
        hidden = torch.mean(hidden, dim=1)
        # hidden [batch_size, hidden_dim * num_direction]

        # 处理output
        # output [batch_size, seq_len, hidden_dim * num_direction]

        # 在第1维求均值,去掉第1维
        output = torch.mean(output, dim=1)
        # output [batch_size, hidden_dim * num_direction]

        # 相加
        # output [batch_size, hidden_dim * num_direction]
        # hidden [batch_size, hidden_dim * num_direction]
        # output(最后一层的每个词时刻),hidden(每一层的最后一时刻),直接相加(不是拼接),再dropout
        fc_input = self.dropout(output + hidden)

        # 线性映射,hidden_dim * n_layers到output_dim
        out = self.fc(fc_input)
        # output [batch_size, output_dim]

        return out
示例#18
0
    def forward(self, text, _, text_lengths):

        # text [batch_size, seq_len]
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths)

        embedded = self.dropout(self.embedding(text)).to(torch.float32)
        # embedded [batch_size, seq_len, emb_dim]

        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first)

        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # output [seq_len, batch_size, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]

        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first)
        # output [batch_size, seq_len, hidden_dim * num_direction]

        output = output[desorted_indices]

        # 注意力,
        """
        用的是中科院自动所论文提出的the attention for relation classification tasks
        """
        # output [batch_size, seq_len, hidden_dim * num_direction]
        # 计算m
        m = self.tanh(output)
        # m [batch_size, seq_len, hidden_dim * num_direction]

        # w [hidden_dim * 1(or 2)]
        # 计算score(注意力打分函数),矩阵乘
        """
        torch.matmul(),
        w一维,和m最后一维相同,相乘会消掉m最后一维
        """
        score = torch.matmul(m, self.w)
        # score [batch_size, seq_len]

        # 计算alpha(注意力分布)
        """
        dim=0表示针对文本中的每个词的输出softmax
        softmax(dim=0),对每个数做softmax(第1维,废话)
        unsqueeze(-1),在最后一维插入一个维度
        """
        alpha = F.softmax(score, dim=0).unsqueeze(-1)
        # alpha [batch_size, seq_len, 1]

        # output [batch_size, seq_len, hidden_dim * num_direction]
        # 计算r(加权平均),点乘
        output_attention = output * alpha
        # output_attention [batch_size, seq_len, hidden_dim * num_direction]

        # 处理hidden
        # output [batch_size, seq_len, hidden_dim * num_direction]
        # hidden [n_layers * num_direction, batch_size, hidden_dim]
        hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]])
        hidden = torch.mean(hidden, dim=1)
        # hidden [batch_size, hidden_dim * num_direction]

        # 处理output_attention
        # output_attention [batch_size, seq_len, hidden_dim * num_direction]
        # output_attention,第1维求和,消掉第1维
        output_attention = torch.sum(output_attention, dim=1)
        # output_attention [batch_size, hidden_dim * num_direction]

        # 处理output
        # output [batch_size, seq_len, hidden_dim * num_direction]
        output = torch.sum(output, dim=1)
        # output [batch_size, hidden_dim * num_direction]

        # 相加
        # output [batch_size, hidden_dim * num_direction]
        # output_attention [batch_size, hidden_dim * num_direction]
        # hidden [batch_size, hidden_dim * num_direction]
        # output,attention的output,hidden,三个相加,然后dropout
        fc_input = self.dropout(output + output_attention + hidden)
        # fc_input [batch_size, hidden_dim * num_direction]

        # 线性映射,hidden_dim * n_layers到output_dim
        out = self.fc(fc_input)
        # output [batch_size, output_dim]

        return out
    def forward(self, text, _, text_lengths):
        #CNN
        # text = [batch size, sent len]
        embedded = self.embedding(text)
        # embedded = [batch size, sent len, emb dim]
        embedded = embedded.unsqueeze(1).float()
        # embedded = [batch size, 1, sent_len, emb_dim]
        #CNN
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
        # conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]

        pooled = [
            F.max_pool1d(conv, int(conv.shape[2])).squeeze(2)
            for conv in conved
        ]
        # pooled_n = [batch size, n_filters]
        cat = torch.cat(pooled, dim=1)
        cnn_out = self.dropout(cat)
        #print(cnn_out.shape)
        # cnn_out = [batch size, n_filters * len(filter_sizes)]

        # LSTM
        # sort sentences in the descending order
        text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(
            text, text_lengths)
        # text = [batch size,sent len]
        embedded = self.dropout(self.embedding(text)).float()
        # embedded = [batch size, sent len, emb dim]

        # pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embedded, sorted_seq_lengths.cpu(), batch_first=self.batch_first)
        self.rnn.flatten_parameters()
        if self.rnn_type in ['rnn', 'gru']:
            packed_output, hidden = self.rnn(packed_embedded)
        else:
            # output (seq_len, batch, num_directions * hidden_size)
            # hidden (num_layers * num_directions, batch, hidden_size)
            packed_output, (hidden, cell) = self.rnn(packed_embedded)

        # unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(
            packed_output, batch_first=self.batch_first)
        # turn back into the original order
        output = output[desorted_indices]
        #

        # output = [batch_size,seq_len,hidden_dim * num_directionns ]
        batch_size, _, hidden_dim = output.shape
        hidden = torch.mean(torch.reshape(hidden,
                                          [batch_size, -1, hidden_dim]),
                            dim=1)
        output = torch.mean(output, dim=1)
        #print(output.shape)
        bilstm_out = output + hidden

        #print(bilstm_out.shape)
        #fc_input = self.dropout(output)# + hidden)
        # fc_input [batch_size, hidden_dim * num_directions,]

        # CNN and BiLSTM CAT

        cnn_lstm_out = torch.cat((cnn_out, bilstm_out), 1)

        #print(cnn_lstm_out.shape)

        # linear
        cnn_lstm_out = self.fc(F.tanh(cnn_lstm_out))
        #cnn_lstm_out = self.fc2(F.tanh(cnn_lstm_out))
        #print (cnn_lstm_out.shape)

        # output
        logit = cnn_lstm_out
        #print(logit.shape)
        return logit, cnn_lstm_out