示例#1
0
 def forward(self, c, q, c_mask, q_mask):
     batch_size, c_len, _ = c.size()
     q_len = q.size(1)
     # Get the projected q
     q = self.proj(q)
     q = F.dropout(q, self.drop_prob, self.training)
     q = F.relu(q) # (batch_size, q_len, hidden_size)
     # Add sentinel vectors
     q = torch.cat([q,self.q_cent.unsqueeze(0).repeat(batch_size,1,1)], dim=1) # (batch_size, q_len+1, hidden_size)
     c = torch.cat([c,self.c_cent.unsqueeze(0).repeat(batch_size,1,1)], dim=1) # (batch_size, c_len+1, hidden_size)
     q_mask = torch.cat([q_mask, torch.ones(batch_size, 1).to(self.device)], dim=1) # (batch_size, q_len+1)
     c_mask = torch.cat([c_mask, torch.ones(batch_size, 1).to(self.device)], dim=1) # (batch_size, c_len+1)
     # Compute affinity matrix
     L = torch.bmm(c, q.transpose(1,2)) # (bs, c_len+1, hid_size) x (bs, hid_size, q_len+1) => (bs, c_len+1, q_len+1)
     # Compute C2Q attention
     alpha = masked_softmax(L, q_mask.view(batch_size, 1, q_len+1), dim=2)  # (batch_size, c_len+1, q_len+1)
     c2q_att = torch.bmm(alpha, q)  # (bs, c_len+1, q_len+1) x (bs, q_len+1, hid_size) => (bs, c_len+1, hid_size)
     # Compute Q2C attention
     beta = masked_softmax(L, c_mask.view(batch_size, c_len+1, 1), dim=1) # (batch_size, c_len+1, q_len+1)
     q2c_att = torch.bmm(beta.transpose(1,2), c) # (bs, q_len+1, c_len+1) x (bs, c_len+1, hid_size) => (bs, q_len+1, hid_size)
     # Compute second level attention
     att_lv2 = torch.bmm(alpha, q2c_att)  # (bs, c_len+1, q_len+1) x (bs, q_len+1, hid_size) => (bs, c_len+1, hid_size)
     x = torch.cat([att_lv2, c2q_att], dim=2) # (batch_size, c_len+1, hidden_size*2)
     x = self.rnn(x[:,:c_len,:], c_mask[:,:c_len].sum(-1)) # (batch_size, c_len, hidden_size*4)
     return x
示例#2
0
    def forward(self, c, q, c_mask, q_mask):
        batch_size, c_len, _ = c.size()
        q_len = q.size(1)
        s = self.get_similarity_matrix(c, q)  # (batch_size, c_len, q_len)
        c_mask = c_mask.view(batch_size, c_len, 1)  # (batch_size, c_len, 1)
        q_mask = q_mask.view(batch_size, 1, q_len)  # (batch_size, 1, q_len)

        alpha = masked_softmax(s, q_mask,
                               dim=2)  # (batch_size, c_len, q_len) =
        beta = masked_softmax(s, c_mask, dim=1)  # (batch_size, c_len, q_len)

        # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size)
        A = torch.bmm(alpha, q)
        # (bs, q_len, c_len) x (bs, c_len, hid_size) => (bs, q_len, hid_size)
        B = torch.bmm(beta.transpose(1, 2), c)

        # second level cross attention matrix N × M
        # (bs, c_len, hid_size) x (bs, q_len, hid_size)^T -> (bs, c_len, q_len)
        R = torch.bmm(A, torch.transpose(B, 1, 2))  # (bs, c_len, q_len)

        gamma = F.softmax(R, dim=2)  # (bs, c_len, q_len)

        #print('gamma shape = ', gamma.shape)
        #print('B shape = ', B.shape)

        # (bs, q_len, c_len) x ()
        D = torch.bmm(gamma, B)  # (bs, q_len, hid_size)

        x = torch.cat([c, A, D], dim=2)

        return x
示例#3
0
文件: layers.py 项目: mswoff/squad
    def forward(self, c, q, c_mask, q_mask, q_global, c_conv):
        batch_size, c_len, _ = c.size()
        q_len = q.size(1)
        s = self.get_similarity_matrix(c, q)        # (batch_size, c_len, q_len)
        c_mask = c_mask.view(batch_size, c_len, 1)  # (batch_size, c_len, 1)
        q_mask = q_mask.view(batch_size, 1, q_len)  # (batch_size, 1, q_len)
        s1 = masked_softmax(s, q_mask, dim=2)       # (batch_size, c_len, q_len)
        s2 = masked_softmax(s, c_mask, dim=1)       # (batch_size, c_len, q_len)

        # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size)
        a = torch.bmm(s1, q)
        # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size)
        b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c)


        # make q_global the proper size (batch_size, 900) --> (batch_size, 1, 900)
        q_global = q_global.unsqueeze(1)        # (batch_size, 1, 900)
        # elementwise product of q_clobal and c_conv
        global_sim = q_global*c_conv            # (batch_size, c_len, 900)
        # project to size 200
        global_sim = self.global_proj(global_sim)       # (batch_size, c_len, 200)
        # add to vector x
        x = torch.cat([c, a, c * a, c * b, global_sim], dim=2)  # (bs, c_len, 5 * hid_size)

        return x
示例#4
0
    def forward(self, cw_idxs, qw_idxs, bert_embeddings, max_context_len,
                max_question_len, device):
        c_mask = torch.zeros_like(cw_idxs) != cw_idxs
        q_mask = torch.zeros_like(qw_idxs) != qw_idxs
        c_len, q_len = c_mask.sum(-1), q_mask.sum(-1)

        #c_len, q_len = max_context_len, max_question_len
        glove_c_emb = self.emb(cw_idxs)  # (batch_size, c_len, 300)
        glove_q_emb = self.emb(qw_idxs)  # (batch_size, c_len, 300)
        #print("word_vec_emb.size() before : ", word_vec_emb.size())
        #word_vec_emb = self.proj_up(word_vec_emb) # (batch_size, c_len, hidden_size)
        #print("word_vec_emb.size() after: ", word_vec_emb.size())

        c_emb = bert_embeddings[:, 0:torch.max(
            c_len), :]  # (batch_size, c_len, hidden_size)
        c_emb = self.proj_down(
            torch.nn.functional.relu(c_emb))  # (batch_size, c_len, 300)
        c_emb = c_emb + c_emb * glove_c_emb

        start_logits = self.bert_start(c_emb)  # (batch_size, c_len, 1)
        end_logits = self.bert_end(c_emb)  # (batch_size, c_len, 1)

        log_p1 = masked_softmax(start_logits.squeeze(),
                                c_mask,
                                log_softmax=True)  # (batch_size, c_len)
        log_p2 = masked_softmax(end_logits.squeeze(), c_mask,
                                log_softmax=True)  # (batch_size, c_len)

        out = log_p1, log_p2
        return out  # 2 tensors, each (batch_size, c_len)
示例#5
0
    def forward(self, start, end, mask):
        logits_1 = self.startFF(start)
        logits_2 = self.startFF(end)
        log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#6
0
    def forward(self, context, question, c_masks):
        """
        Single forward pass of the attention layer
        :param context: (Tensor) tensor of shape (batch_size, context_len, hidden_size),
                                output of encoder layer on context
        :param question: (Tensor) tensor of shape (batch_size, hidden_size), output of encoder layer on context
        :param c_masks:
        :param q_masks:
        :returns: output_t (Tensor) tensor of shape (batch_size, seq_len, hidden_size)
        """

        # context_hidden_proj = self.attn_proj(context) # shape (batch_size, context_len, hidden_size)
        logits_1 = torch.bmm(self.attn_proj_1(context),
                             torch.unsqueeze(
                                 question,
                                 2))  # shape (batch_size, context_len, 1)
        logits_1 = torch.squeeze(logits_1, -1)

        logits_2 = torch.bmm(self.attn_proj_2(context),
                             torch.unsqueeze(
                                 question,
                                 2))  # shape (batch_size, context_len, 1)
        logits_2 = torch.squeeze(logits_2, -1)

        log_p1 = masked_softmax(logits_1, c_masks, dim=1, log_softmax=True)
        log_p2 = masked_softmax(logits_2, c_masks, dim=1, log_softmax=True)
        #
        # alpha_t = masked_softmax(scores, c_masks, dim=1) # shape (batch_size, context_len)
        # output_t = torch.mul(context, alpha_t.unsqueeze(2))
        #
        # output_t = torch.bmm(torch.unsqueeze(alpha_t, 1), context) # shape (batch_size, 1, hidden_size)
        # output_t = output_t.squeeze(1)
        return log_p1, log_p2
示例#7
0
    def forward(self, M0, M1, M2, mask):
        """
        Args:
            M0, M1, M2 (torch.tensor): the outputs of the three model encoders from bottom to up
            See more details in Figure1: https://arxiv.org/pdf/1804.09541.pdf

        Returns:
            log_p1, log_p2 (torch.tensor): logits for start and end position
        """
        # Shapes: (batch_size, seq_len, 1)
        X1 = torch.cat([M0, M1], dim=2)
        X2 = torch.cat([M0, M2], dim=2)
        logits_1 = self.linear_1(X1)
        logits_2 = self.linear_2(X2)

        # Shapes: (batch_size, seq_len)
        mask = mask.type(torch.float)
        log_p1 = masked_softmax(logits_1.squeeze(), mask,
                                log_softmax=True)  # [batch_size, seq_len]

        # modified output
        # logits_2 = torch.cat([logits_2, log_p1.unsqueeze(2)], dim=2)   # [batch_size, seq_len, 2]
        # logits_2 = self.linear_3(logits_2)                      # [batch_size, seq_len, 1]

        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#8
0
    def forward(self, p, q, p_mask, q_mask):
        batch_size, p_len, _ = p.size()
        q_len = q.size(1)
        # (batch_size, p_len, q_len)
        s = self.get_similarity_matrix(p, q)
        p_mask = p_mask.view(batch_size, p_len, 1)  # (batch_size, p_len, 1)
        q_mask = q_mask.view(batch_size, 1, q_len)  # (batch_size, 1, q_len)

        # approx eq(4)
        # (batch_size, p_len, q_len)
        s1 = masked_softmax(s, q_mask, dim=2)

        # approx eq(5)
        # (bs, p_len, q_len) x (bs, q_len, hid_size) => (bs, p_len, hid_size)
        p_tilde = torch.bmm(s1, q)

        # approx eq(6)
        # (batch_size, p_len, q_len)
        s2 = masked_softmax(s, p_mask, dim=1)
        # approx eq(7)
        q_tilde = torch.bmm(s2.transpose(1, 2), p)  # (bs, q_len, hid_size)

        x = (p_tilde, q_tilde)

        return x
    def forward(self, context, question, c_masks, q_masks):
        """
        Single forward pass of attention layer
        :param context: (Tensor) tensor of shape (batch, c_len, hidden_size)
        :param question: (Tensor) tensor of shape (batch, q_len, hidden_size)
        :param c_masks:
        :param q_masks:
        :return:
        """

        batch_size, c_len, _ = context.shape
        q_len = question.shape[1]
        s = self.get_similarity_matrix(context,
                                       question)  # shape (batch, c_len, q_len)

        c_masks = c_masks.view(batch_size, c_len, 1)
        q_masks = q_masks.view(batch_size, 1, q_len)

        s1 = masked_softmax(s, q_masks, dim=2)  # shape (batch, c_len, q_len)
        s2 = masked_softmax(s, c_masks, dim=1)  # shape (batch, c_len, q_len)

        a = torch.bmm(s1, question)  # shape (batch, c_len, hidden_size)

        ss = torch.bmm(s1, s2.transpose(1, 2))  # shape (batch, c_len, c_len)
        b = torch.bmm(ss, context)  # shape (batch, c_len, hidden_size)

        x = torch.cat([context, a, context * a, context * b], dim=2)

        return x
示例#10
0
    def forward(self, c, q, c_mask, q_mask):
#         batch_size, c_len, _ = c.size()
#         q_len = q.size(1)
#         s = self.get_similarity_matrix(c, q)        # (batch_size, c_len, q_len)
#         c_mask = c_mask.view(batch_size, c_len, 1)  # (batch_size, c_len, 1)
#         q_mask = q_mask.view(batch_size, 1, q_len)  # (batch_size, 1, q_len)
#         s1 = masked_softmax(s, q_mask, dim=2)       # (batch_size, c_len, q_len)
#         s2 = masked_softmax(s, c_mask, dim=1)       # (batch_size, c_len, q_len)

#         # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size)
#         a = torch.bmm(s1, q)
#         # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size)
#         b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c)

#         x = torch.cat([c, a, c * a, c * b], dim=2)  # (bs, c_len, 4 * hid_size)
        batch_size, c_len, _ = c.size()
        q_len = q.size(1)
        s = self.get_similarity_matrix(c, q)        # (batch_size, c_len, q_len)
        c_mask = c_mask.view(batch_size, c_len, 1)  # (batch_size, c_len, 1)
        q_mask = q_mask.view(batch_size, 1, q_len)  # (batch_size, 1, q_len)
        s1 = masked_softmax(s, q_mask, dim=2)       # (batch_size, c_len, q_len)
        s2 = masked_softmax(s, c_mask, dim=1)       # (batch_size, c_len, q_len)

        # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size)
        a = torch.bmm(s1, q)
        # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size)
        b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c)

        x = torch.cat([c, a, c * a, c * b], dim=2)  # (bs, c_len, 4 * hid_size)

        return x
示例#11
0
    def attention_vectors(self, h, u, h_mask, u_mask):
        # h_mask (batch, h_len)
        # u_mask (batch, u_len)
        S = self.similarity_matrix(h, u)
        h_len, u_len = h.size(1), u.size(1)

        # Context-to-query Attention
        u_mask = u_mask.view(-1, 1, u_len)
        a_t = masked_softmax(S, u_mask, dim=2)               # (batch, h_len, u_len)
        U_tilde = torch.bmm(a_t, u)                 # (batch, h_len, input_size)

        STANFORD_Q2C = True
        if STANFORD_Q2C:
            # Query-to-context Attention (Stanford variant)
            h_mask = h_mask.view(-1, h_len, 1)
            b_t = masked_softmax(S, h_mask, dim=1)      # (batch, h_len, u_len)
            h_tt = torch.bmm(a_t, b_t.transpose(1, 2))   # (batch, h_len, h_len)
            H_tilde = torch.bmm(h_tt, h)                 # (batch, h_len, input_size)
        else:
            # Query-to-context Attention (original)
            b = masked_softmax(masked_max(S, u_mask, dim=2), h_mask, dim=1) # (batch, h_len)
            b = b.unsqueeze(dim=1)                      # (batch, 1, h_len)
            H_tilde = torch.bmm(b, h)                   # (batch, 1, input_size)
            H_tilde = H_tilde.repeat(1, h_len, 1)       # (batch, h_len, input_size)

        return H_tilde, U_tilde
    def forward(self, M0, M1, M2, mask):

        p1 = self.W1(torch.cat((M0, M1), -1)).squeeze()  # (batch_size, c_len)
        p2 = self.W2(torch.cat((M0, M2), -1)).squeeze()  # (batch_size, c_len)
        log_p1 = masked_softmax(p1, mask, log_softmax=True)
        log_p2 = masked_softmax(p2, mask, log_softmax=True)
        return log_p1, log_p2
示例#13
0
    def forward(self, p_prime, q_prime, doc_len, query_len, doc_mask, query_mask):
        
        batch_size, _, emb_size = p_prime.size()

        d = self.bilstm1(p_prime, doc_len)
        l = self.bilinearAtt(d,d) 
        l = masked_softmax(l, doc_mask.unsqueeze(1), dim=2)
        
        d_tilde = torch.bmm(l,d)
        

        # FUSE
        concat_input_fuse = torch.cat((d,d_tilde,d*d_tilde,d-d_tilde),2)
        d_prime = self.tanh(self.linear1(concat_input_fuse))


        d_double_prime = self.bilstm2(d_prime, doc_len)
        
        # Get q_double_prime
        q_double_prime = self.bilstm3(q_prime, query_len)
        
        # Get gamma
        gamma = self.linear2(q_double_prime).permute(0,2,1)
        gamma = masked_softmax(gamma, query_mask.unsqueeze(1), dim=2)        
        q_bold = torch.bmm(gamma, q_double_prime)#.squeeze(1)

#==============================compute 1-dim self attention for P =====================        
        p_double_prime = d
        gamma_p = self.linear3(p_double_prime).permute(0,2,1)
        gamma_p = masked_softmax(gamma_p, doc_mask.unsqueeze(1), dim=2)
        p_bold = torch.bmm(gamma_p, p_double_prime)

#=======================================================================================        
        
        return d_double_prime, q_bold, p_bold
示例#14
0
    def forward(self, cw_idxs, qw_idxs):
        # cw_idxs (batch_size, c_len)
        # qw_idxs (batch_size, q_len)

        c_mask = torch.zeros_like(cw_idxs) != cw_idxs
        q_mask = torch.zeros_like(qw_idxs) != qw_idxs
        c_len, q_len = c_mask.sum(-1), q_mask.sum(-1)

        c_emb = self.embed(cw_idxs)  # (batch_size, c_len, input_size)
        q_emb = self.embed(qw_idxs)  # (batch_size, q_len, input_size)

        c_emb = F.dropout(c_emb, self.drop_prob, self.training)
        q_emb = F.dropout(q_emb, self.drop_prob, self.training)

        p_i = self.passage_rnn(c_emb, c_len)  # (batch_size, c_len, h)
        q = self.query_rnn(q_emb, q_len)  # (batch_size, 1, h)
        q_t = q.permute(0, 2, 1)  # (batch_size, h, 1)

        logits_start = self.att_start(p_i, q_t)
        logits_end = self.att_end(p_i, q_t)

        probs_start = masked_softmax(logits_start, c_mask,
                                     log_softmax=True)  # (batch_size, c_len)
        probs_end = masked_softmax(logits_end, c_mask,
                                   log_softmax=True)  # (batch_size, c_len)

        return probs_start, probs_end
示例#15
0
    def forward(self, c, q, c_mask, q_mask):
        batch_size, c_len, _ = c.size()
        """
        print("batch_size: ", batch_size)
        print("c_len: ", c_len)
        """
        q_len = q.size(1)
        """
        print("q_len: ", q_len)
        print("c.size(): ", c.size())
        print("q.size(): ", q.size())
        """
        s = self.get_similarity_matrix(c, q)  # (batch_size, c_len, q_len)
        """
        print("s.size() ", s.size())
        print("c_mask.size() :", c_mask.size())
        print("q_mask.size() :", q_mask.size())
        """
        c_mask = c_mask.view(batch_size, c_len, 1)  # (batch_size, c_len, 1)
        q_mask = q_mask.view(batch_size, 1, q_len)  # (batch_size, 1, q_len)
        s1 = masked_softmax(s, q_mask, dim=2)  # (batch_size, c_len, q_len)
        s2 = masked_softmax(s, c_mask, dim=1)  # (batch_size, c_len, q_len)

        # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size)
        a = torch.bmm(s1, q)
        # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size)
        b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c)

        x = torch.cat([c, a, c * a, c * b], dim=2)  # (bs, c_len, 4 * hid_size)

        return x
示例#16
0
    def forward(self, context_hiddens, context_mask, question_hiddens,
                question_mask):
        batch_size, c_len, _ = context_hiddens.size()
        q_len = question_hiddens.size(1)
        s = self.get_similarity_matrix(
            context_hiddens, question_hiddens)  # (batch_size, c_len, q_len)
        context_mask = context_mask.view(batch_size, c_len,
                                         1)  # (batch_size, c_len, 1)
        question_mask = question_mask.view(batch_size, 1,
                                           q_len)  # (batch_size, 1, q_len)
        s1 = masked_softmax(s, question_mask,
                            dim=2)  # (batch_size, c_len, q_len)
        s2 = masked_softmax(s, context_mask,
                            dim=1)  # (batch_size, c_len, q_len)

        # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size)
        a = torch.bmm(s1, question_hiddens)
        # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size)
        b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), context_hiddens)

        x = torch.cat(
            [context_hiddens, a, context_hiddens * a, context_hiddens * b],
            dim=2)  # (bs, c_len, 4 * hid_size)

        return x
    def __forward(self, query, passage):

        # 0.
        passage, passage_length = passage
        batch_size = passage.size(0)
        passage_length = passage.size(1)
        passage_mask = passage.eq(0)
        query_mask = query.eq(0)

        # 0.1 Encoding
        embedded_query = self.embeddings_tgt(query)  # (N, W, D)
        embedded_passage = self.embeddings_src(passage)

        # 1. Separately encoding.
        encoded_passage = self.passage_encoder(embedded_passage, passage_mask)
        encoded_query = self.query_encoder(embedded_query, query_mask)
        encoding_dim = encoded_query.size(-1)

        # 2. Interaction.

        # Shape: (batch_size, passage_length, query_length)
        passage_query_similarity = self._matrix_attention(
            encoded_passage, encoded_query)
        # Shape: (batch_size, passage_length, query_length)
        passage_query_attention = util.masked_softmax(passage_query_similarity,
                                                      query_mask)
        # Shape: (batch_size, passage_length, encoding_dim)
        passage_query_vectors = util.weighted_sum(encoded_query,
                                                  passage_query_attention)

        # We replace masked values with something really negative here, so they don't affect the
        # max below.
        masked_similarity = util.replace_masked_values(
            passage_query_similarity, query_mask.unsqueeze(1), -1e7)
        # Shape: (batch_size, passage_length)
        query_passage_similarity = masked_similarity.max(dim=-1)[0].squeeze(-1)
        # Shape: (batch_size, passage_length)
        query_passage_attention = util.masked_softmax(query_passage_similarity,
                                                      passage_mask)
        # Shape: (batch_size, encoding_dim)
        query_passage_vector = util.weighted_sum(encoded_passage,
                                                 query_passage_attention)
        # Shape: (batch_size, passage_length, encoding_dim)
        tiled_query_passage_vector = query_passage_vector.unsqueeze(1).expand(
            batch_size, passage_length, encoding_dim)

        # Shape: (batch_size, passage_length, encoding_dim * 4)
        final_merged_passage = torch.cat([
            encoded_passage, passage_query_vectors, encoded_passage *
            passage_query_vectors, encoded_passage * tiled_query_passage_vector
        ],
                                         dim=-1)

        # 3. Compress Composition Mix ... ? or just max_pooling or mean
        # output = self.combine(final_merged_passage, passage_mask)
        output = torch.mean(final_merged_passage, 1)

        prob = self.sigmoid(self.linear(output))
        return prob
示例#18
0
 def forward(self, m0, m1, m2, mask):
     logits_1 = self.linear1(torch.cat((m0, m1),
                                       2))  # (batch_size, n_context, 1)
     logits_2 = self.linear2(torch.cat((m0, m2),
                                       2))  # (batch_size, n_context, 1)
     log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
     log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)
     return log_p1, log_p2
示例#19
0
 def forward(self, M1, M2, M3, mask):
     X1 = torch.cat([M1, M2], dim=2)
     X2 = torch.cat([M1, M3], dim=2)
     logits_1 = self.linear_1(X1)
     logits_2 = self.linear_2(X2)
     log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
     log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)
     return log_p1, log_p2
示例#20
0
    def forward(self, att, mod, mask):

        logits_1 = self.mod_linear_1(mod)
        mod_2 = self.rnn(mod, mask)
        logits_2 = self.mod_linear_2(mod_2)
        log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#21
0
    def forward(self, pw_idxs: torch.Tensor, pc_idxs: Optional[torch.Tensor],
                qw_idxs: torch.Tensor, qc_idxs: Optional[torch.Tensor]):
        """ Run a forward step
            pw_idxs: word indices in the paragraph  64, 254
            pc_idxs: char indices in the paragraph  64, 254, 16
            qw_idxs: word indices in the question 64, 20
            qc_idx: char indices in the question  64, 20, 16
        """

        p_mask = torch.zeros_like(pw_idxs) != pw_idxs
        q_mask = torch.zeros_like(qw_idxs) != qw_idxs
        p_len, q_len = p_mask.sum(-1), q_mask.sum(-1)

        p_emb = self.emb(pw_idxs, pc_idxs)  # (batch_size, p_len, hidden_size)
        q_emb = self.emb(qw_idxs, qc_idxs)  # (batch_size, q_len, hidden_size)

        # eq (1)
        # (batch_size, p_len, 2 * hidden_size)
        p_enc = self.enc(p_emb, p_len)

        # approx eq (2)
        # (batch_size, q_len, 2 * hidden_size)
        q_enc = self.enc(q_emb, q_len)

        # approx eq (3)-(7)
        (p_tilde,
         q_tilde) = self.att(p_enc, q_enc, p_mask,
                             q_mask)  # 2 x (batch_size, p_len, 2*hidden_size)

        # eq (8) + (11)
        p_fused1 = self.p_fusion1(p_enc, p_tilde)

        # eq (9) + (12)
        q_fused1 = self.q_fusion1(q_enc, q_tilde)

        # eq (13)
        p_enc_13 = self.p_enc_eq_13(p_fused1, p_len)
        q_enc_13 = self.q_enc_eq_13(q_fused1, q_len)

        p_fused_16 = self.self_attention(p_enc_13)
        # more steps missing in here
        contextual_p = self.p_enc_eq_17(p_fused_16, p_len)

        # question partial processing
        # eq (19)
        q_enc_17 = self.q_enc_eq_17(q_enc_13, q_len)
        weighted_q = self.q_linear_align_18(q_enc_17)

        logits_start = self.bilinear_start(weighted_q, contextual_p)
        logits_end = self.bilinear_end(weighted_q, contextual_p)
        log_start = masked_softmax(logits_start, p_mask, log_softmax=True)
        log_end = masked_softmax(logits_end, p_mask, log_softmax=True)

        out = (log_start, log_end)
        return out
示例#22
0
    def forward(self, att, mod, mask):
        # Shapes: (batch_size, seq_len, 1)
        logits_1 = self.att_linear_1(att) + self.mod_linear_1(mod)
        mod_2 = self.rnn(mod, mask.sum(-1))
        logits_2 = self.att_linear_2(att) + self.mod_linear_2(mod_2)

        # Shapes: (batch_size, seq_len)
        log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#23
0
    def forward(self, M_1, M_2, M_3, mask):
        begin = torch.cat([M_1, M_2], dim=2)
        begin = self.W1(begin)

        end = torch.cat([M_1, M_3], dim=2)
        end = self.W2(end)

        log_p1 = masked_softmax(begin.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(end.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#24
0
    def forward(self, M_1, M_2, M_3, mask):

        y_i = self.ifv(M_1, M_2, M_3, mask)
        # y_i = None

        logits_1 = self.Ws(torch.cat((M_1, M_2), dim=1)).squeeze()
        logits_2 = self.We(torch.cat((M_1, M_3), dim=1)).squeeze()

        log_p1 = masked_softmax(logits_1, mask, dim=1, log_softmax=True)
        log_p2 = masked_softmax(logits_2, mask, dim=1, log_softmax=True)

        return y_i, log_p1, log_p2
示例#25
0
    def forward(self, att, mask, c_len):

        mod = self.modeling(att, c_len)  # (batch_size, c_len, 2 * hidden_size)
        # Shapes: (batch_size, seq_len, 1)
        logits_1 = self.att_linear_1(mod)
        logits_2 = self.att_linear_2(mod)

        # Shapes: (batch_size, seq_len)
        log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#26
0
文件: Output.py 项目: hy2632/cs224n
    def forward(self, M0, M1, M2, mask):
        # 08/10: mod: (batch_size, seq_len, h)
        # mask: (batch_size, seq_len, 1)

        logits_1 = self.w1(torch.cat([M0, M1], dim=-1))
        logits_2 = self.w1(torch.cat([M0, M2], dim=-1))

        # Shapes: (batch_size, seq_len)
        log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#27
0
    def forward(self, m0, m1, m2, mask):
        x1 = torch.cat([m0, m1], dim=-1)
        x2 = torch.cat([m0, m2], dim=-1)

        from util import masked_softmax
        log_p1 = masked_softmax(self.span_start_linear(x1).squeeze(),
                                mask,
                                log_softmax=True)
        log_p2 = masked_softmax(self.span_end_linear(x2).squeeze(),
                                mask,
                                log_softmax=True)

        return log_p1, log_p2
示例#28
0
    def forward(self, att, mod, mask):
        # Shapes: (batch_size, seq_len, 1)
        logits_1 = self.att_linear_1(att) + self.mod_linear_1(mod)
        if not self.use_transformer:
            mod_2 = self.enc(mod, mask.sum(-1))
        else:
            mod_2 = self.enc(mod, mask)
        logits_2 = self.att_linear_2(att) + self.mod_linear_2(mod_2)

        # Shapes: (batch_size, seq_len)
        log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#29
0
    def forward(self, M1, M2, M3, mask):
        X1 = torch.cat([M1, M2], dim=1)
        X2 = torch.cat([M1, M3], dim=1)
        L1 = self.w1(X1)
        L2 = self.w2(X2)

        Y1 = mask_logits(L1.squeeze(), mask)
        Y2 = mask_logits(L2.squeeze(), mask)

        from util import masked_softmax
        log_p1 = masked_softmax(Y1.squeeze(), mask, log_softmax=True)
        log_p2 = masked_softmax(Y2.squeeze(), mask, log_softmax=True)

        return log_p1, log_p2
示例#30
0
    def forward(self, att, masks):
        """

        :param att: output of the attention layer, shape (batch_size, seq_len, hidden_size)
        :param masks:
        :return:
        """
        logits1 = F.dropout(self.output_proj1(att), self.drop_prob)
        logits2 = F.dropout(self.output_proj2(att), self.drop_prob)

        log_p1 = masked_softmax(logits1.squeeze(-1), masks, log_softmax=True)
        log_p2 = masked_softmax(logits2.squeeze(-1), masks, log_softmax=True)

        return log_p1, log_p2