Exemplo n.º 1
class RNN(nn.Module):

    def __init__(self, embed_size, hidden_size, vocabList, device):
        super(RNN, self).__init__()

        self.vocab = Vocab(vocabList)
        self.device = device
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.model_embeddings = loadWordEmbedding(self.vocab)

        self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True)
        self.classify = nn.Linear(2*hidden_size, 5)
        self.drop = nn.Dropout(0.8)
        self.activate = nn.ReLU()

    def forward(self, data):
        x = self.vocab.to_input_tensor(data, self.device, -1)
        x = self.model_embeddings(x)

        x = x.permute(1, 0, 2)   # (seq_len, batch, input_size)
        # rnn, lstm
        x, (_, _) = self.lstm(x) # x: (seq_len, batch, num_direction*hidden_size)
        x = self.activate(x)
        x = x.permute(1, 2, 0) # x: (batch, num_direction*hidden_size, seq_len)

        # max pooling
        x = torch.max(x, dim=2)[0]
        # dropout prevent overfitting
        x = self.drop(x)
        # fulling connection
        x = self.classify(x)
        return F.log_softmax(x, dim=1)
Exemplo n.º 2
class CNN(nn.Module):
    def __init__(self, embed_size, max_sent_len, vocabList, device):

        super(CNN, self).__init__()

        self.max_sent_len = max_sent_len
        self.vocab = Vocab(vocabList)
        self.model_embeddings = loadWordEmbedding(self.vocab)

        self.feature = nn.Sequential(nn.Conv1d(embed_size, 64, 6), nn.ReLU(),
                                     nn.Dropout(0.5), nn.Conv1d(64, 16, 6),
                                     nn.Dropout(0.2), nn.ReLU(),
                                     nn.MaxPool1d(2, 2))

        self.classifier = nn.Sequential(
            nn.Linear(16 * 23, 120),
            # nn.Dropout(0.5),
            nn.Linear(120, 64),
            nn.Linear(64, 5))

        self.device = device

    def forward(self, data):
        # 批量数据进行转换,考虑到内存问题
        x = self.vocab.to_input_tensor(data, self.device, self.max_sent_len)
        x = self.model_embeddings(x)
        x = x.permute(0, 2, 1)

        x = self.feature(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)

        x = torch.squeeze(x)
        return F.log_softmax(x, dim=1)
Exemplo n.º 3
class RNN(nn.Module):
    实现了Reasoning about Entailment with Neural Attention中最简单的attention.
    def __init__(self,
        super(RNN, self).__init__()

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.vocab = Vocab(vocabList)
        self.model_embeddings = loadWordEmbedding(self.vocab)
        self.device = device

        self.lstm = nn.LSTM(embed_size, hidden_size)
        self.wy_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wy in the paper
        self.wh_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wh in the paper
        self.w_projeciton = nn.Linear(hidden_size, 1,
                                      bias=False)  #  called w in the paper
        self.wp_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wp in the paper
        self.wx_projection = nn.Linear(hidden_size, hidden_size,
                                       bias=False)  #  called Wx in the paper

        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, 3)

    def forward(self, premise: List[List[str]], hypothesis: List[List[str]]):
        premise_padded = self.vocab.to_input_tensor(
            premise, self.device,
            -1)  # torch.tensor (batch, seq1_len, embed_size)
        hypothesis_padded = self.vocab.to_input_tensor(
            hypothesis, self.device,
            -1)  # torch.tensor (batch, seq2_len, embed_size)

        premise_emb = self.model_embeddings(premise_padded)
        hypothesis_emb = self.model_embeddings(hypothesis_padded)

        input_data = torch.cat((premise_emb, hypothesis_emb), dim=1)
        input_data = input_data.permute(
            1, 0, 2)  # torch.tensor (seq1_len+seq2_len, batch, embed_size)

        premise_max_len = self.get_max_seq(premise)

        premise_lengths = [len(it) for it in premise]
        mask = self.generate_sent_masks(
            premise_padded, premise_lengths)  # tensor, (batch, hidden_size)
        # 注意力机制
        r, h_n = self.attention(input_data, premise_max_len, mask)

        # called h* in the paper
        H = torch.tanh(self.wp_projection(r) +
                       self.wx_projection(h_n))  # tensor, (batch, hidden_size)

        output = self.fc(H)

        return F.log_softmax(output, dim=1)

    def attention(self, X, max_len, mask):
        :param X: tensor, (seq1_len+seq2_len, batch, hidden_size)
        :param max_len: premise句子中最长的句子长度
        :param mask: tensor, (batch, seq1_len)
        # Y - tensor, (seq1_len+seq2_len, batch, hidden_size)
        Y, (h_n, c_n) = self.lstm(X)  # (h_0, c_0) 都初始化为0
        Y = Y[:max_len, :, :]  # tensor, (seq1_len, batch, hidden_size)
        h_n = torch.squeeze(h_n, 0)  # tensor, (batch, hidden_size)
        # c_n = torch.squeeze(c_n)

        eL = torch.ones(max_len, h_n.shape[0],
                        h_n.shape[1])  # called eL in the paper
        eL = eL.to(self.device)

        # h_n*eL: tensor, (seq1_len, batch, hidden_size)  利用了python中的广播机制
        M = torch.tanh(self.wy_projection(Y) + self.wh_projection(
            h_n * eL))  # tensor, (seq1_len, batch, hidden_size)
        assert (M.shape == eL.shape)

        e_n = self.w_projeciton(M)  # tensor, (seq1_len, batch, 1)
        mask = mask.permute(1, 0).unsqueeze(-1)
        # 对于句子中的填充词,注意力为0
        e_n.masked_fill(mask.bool(), -float('inf'))

        alpha = F.softmax(e_n, dim=0)  # tensor, (seq1_len, batch, 1)
        assert (alpha.shape == (max_len, h_n.shape[0], 1))

        Y = Y.permute(1, 2, 0)  # tensor, (batch, hidden_size, seq1_len)
        alpha = alpha.permute(1, 0, 2)  # tensor, (batch, seq1_len, 1)

        r = torch.bmm(Y, alpha).squeeze(-1)  # tensor, (batch, hidden_size)

        return r, h_n

    def get_max_seq(self, sents: List[List[str]]):
        # 获取sents中最长的句子长度
        max_len = 0
        for it in sents:
            max_len = max(max_len, len(it))
        return max_len

    def generate_sent_masks(self, premise_padded, premise_lengths):
        生成句子的mask, 句子中哪个地方使用了填充词, 就表示为1
        :param premise_padded: tensor, (batch, seq_len)
        :param premise_lengths: list[int], list中包含premise中每个句子长度
            tensor, (batch, seq_len)
        premise_mask = torch.zeros(premise_padded.shape[0],
        for id, len in enumerate(premise_lengths):
            premise_mask[id, len:] = 1
        return premise_mask.to(self.device)
Exemplo n.º 4
class Transformer(nn.Module):
    def __init__(self,

        self.device = device
        self.vocab = Vocab(vocabList)
        self.embed_size = input_dim
        self.n_heads = n_heads

        # self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        # self.pos_embedding = nn.Embedding(max_length, hid_dim)
        self.tok_embedding = loadWordEmbedding(self.vocab)
        self.pos_embedding = loadPosEmbedding(max_length, hid_dim)

        self.layers = nn.ModuleList([
            EncoderLayer(hid_dim, n_heads, pf_dim, dropout, device)
            for _ in range(n_layers)

        self.dropout = nn.Dropout(dropout)

        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)

        self.drop = nn.Dropout(dropout)
        self.classify = nn.Linear(hid_dim, 5)

    def forward(self, src, src_mask=None):
        # src = list[batch size, src len]
        # src_mask = [batch size, src len]

        src = self.vocab.to_input_tensor(src, None, 60)
        batch_size = src.shape[0]
        seq_len = src.shape[1]

        zero = torch.zeros(1, )
        one = torch.ones(1, )
        src_mask = torch.where(src == 0., zero, one)

        src_mask = src_mask.repeat(self.n_heads * seq_len,
                                   1).reshape(batch_size, self.n_heads,
                                              seq_len, seq_len)

        src = src.to(self.device)
        src_mask = src_mask.to(self.device)

        # src = [batch size, src len]

        batch_size = src.shape[0]
        src_len = src.shape[1]

        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size,

        # pos = [batch size, src len]

        src = self.dropout((self.tok_embedding(src) * self.scale) +

        # src = [batch size, src len, hid dim]

        for layer in self.layers:
            src = layer(src, src_mask)

        # src = [batch size, src len, hid dim]

        src = torch.max(src, dim=1)[0]

        # src = [batch size, hid dim]

        src = self.classify(self.drop(src))

        return F.log_softmax(src, dim=1)