def forward(self, c, q, c_mask, q_mask): batch_size, c_len, _ = c.size() q_len = q.size(1) # Get the projected q q = self.proj(q) q = F.dropout(q, self.drop_prob, self.training) q = F.relu(q) # (batch_size, q_len, hidden_size) # Add sentinel vectors q = torch.cat([q,self.q_cent.unsqueeze(0).repeat(batch_size,1,1)], dim=1) # (batch_size, q_len+1, hidden_size) c = torch.cat([c,self.c_cent.unsqueeze(0).repeat(batch_size,1,1)], dim=1) # (batch_size, c_len+1, hidden_size) q_mask = torch.cat([q_mask, torch.ones(batch_size, 1).to(self.device)], dim=1) # (batch_size, q_len+1) c_mask = torch.cat([c_mask, torch.ones(batch_size, 1).to(self.device)], dim=1) # (batch_size, c_len+1) # Compute affinity matrix L = torch.bmm(c, q.transpose(1,2)) # (bs, c_len+1, hid_size) x (bs, hid_size, q_len+1) => (bs, c_len+1, q_len+1) # Compute C2Q attention alpha = masked_softmax(L, q_mask.view(batch_size, 1, q_len+1), dim=2) # (batch_size, c_len+1, q_len+1) c2q_att = torch.bmm(alpha, q) # (bs, c_len+1, q_len+1) x (bs, q_len+1, hid_size) => (bs, c_len+1, hid_size) # Compute Q2C attention beta = masked_softmax(L, c_mask.view(batch_size, c_len+1, 1), dim=1) # (batch_size, c_len+1, q_len+1) q2c_att = torch.bmm(beta.transpose(1,2), c) # (bs, q_len+1, c_len+1) x (bs, c_len+1, hid_size) => (bs, q_len+1, hid_size) # Compute second level attention att_lv2 = torch.bmm(alpha, q2c_att) # (bs, c_len+1, q_len+1) x (bs, q_len+1, hid_size) => (bs, c_len+1, hid_size) x = torch.cat([att_lv2, c2q_att], dim=2) # (batch_size, c_len+1, hidden_size*2) x = self.rnn(x[:,:c_len,:], c_mask[:,:c_len].sum(-1)) # (batch_size, c_len, hidden_size*4) return x
def forward(self, c, q, c_mask, q_mask): batch_size, c_len, _ = c.size() q_len = q.size(1) s = self.get_similarity_matrix(c, q) # (batch_size, c_len, q_len) c_mask = c_mask.view(batch_size, c_len, 1) # (batch_size, c_len, 1) q_mask = q_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) alpha = masked_softmax(s, q_mask, dim=2) # (batch_size, c_len, q_len) = beta = masked_softmax(s, c_mask, dim=1) # (batch_size, c_len, q_len) # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size) A = torch.bmm(alpha, q) # (bs, q_len, c_len) x (bs, c_len, hid_size) => (bs, q_len, hid_size) B = torch.bmm(beta.transpose(1, 2), c) # second level cross attention matrix N × M # (bs, c_len, hid_size) x (bs, q_len, hid_size)^T -> (bs, c_len, q_len) R = torch.bmm(A, torch.transpose(B, 1, 2)) # (bs, c_len, q_len) gamma = F.softmax(R, dim=2) # (bs, c_len, q_len) #print('gamma shape = ', gamma.shape) #print('B shape = ', B.shape) # (bs, q_len, c_len) x () D = torch.bmm(gamma, B) # (bs, q_len, hid_size) x = torch.cat([c, A, D], dim=2) return x
def forward(self, c, q, c_mask, q_mask, q_global, c_conv): batch_size, c_len, _ = c.size() q_len = q.size(1) s = self.get_similarity_matrix(c, q) # (batch_size, c_len, q_len) c_mask = c_mask.view(batch_size, c_len, 1) # (batch_size, c_len, 1) q_mask = q_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) s1 = masked_softmax(s, q_mask, dim=2) # (batch_size, c_len, q_len) s2 = masked_softmax(s, c_mask, dim=1) # (batch_size, c_len, q_len) # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size) a = torch.bmm(s1, q) # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size) b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c) # make q_global the proper size (batch_size, 900) --> (batch_size, 1, 900) q_global = q_global.unsqueeze(1) # (batch_size, 1, 900) # elementwise product of q_clobal and c_conv global_sim = q_global*c_conv # (batch_size, c_len, 900) # project to size 200 global_sim = self.global_proj(global_sim) # (batch_size, c_len, 200) # add to vector x x = torch.cat([c, a, c * a, c * b, global_sim], dim=2) # (bs, c_len, 5 * hid_size) return x
def forward(self, cw_idxs, qw_idxs, bert_embeddings, max_context_len, max_question_len, device): c_mask = torch.zeros_like(cw_idxs) != cw_idxs q_mask = torch.zeros_like(qw_idxs) != qw_idxs c_len, q_len = c_mask.sum(-1), q_mask.sum(-1) #c_len, q_len = max_context_len, max_question_len glove_c_emb = self.emb(cw_idxs) # (batch_size, c_len, 300) glove_q_emb = self.emb(qw_idxs) # (batch_size, c_len, 300) #print("word_vec_emb.size() before : ", word_vec_emb.size()) #word_vec_emb = self.proj_up(word_vec_emb) # (batch_size, c_len, hidden_size) #print("word_vec_emb.size() after: ", word_vec_emb.size()) c_emb = bert_embeddings[:, 0:torch.max( c_len), :] # (batch_size, c_len, hidden_size) c_emb = self.proj_down( torch.nn.functional.relu(c_emb)) # (batch_size, c_len, 300) c_emb = c_emb + c_emb * glove_c_emb start_logits = self.bert_start(c_emb) # (batch_size, c_len, 1) end_logits = self.bert_end(c_emb) # (batch_size, c_len, 1) log_p1 = masked_softmax(start_logits.squeeze(), c_mask, log_softmax=True) # (batch_size, c_len) log_p2 = masked_softmax(end_logits.squeeze(), c_mask, log_softmax=True) # (batch_size, c_len) out = log_p1, log_p2 return out # 2 tensors, each (batch_size, c_len)
def forward(self, start, end, mask): logits_1 = self.startFF(start) logits_2 = self.startFF(end) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, context, question, c_masks): """ Single forward pass of the attention layer :param context: (Tensor) tensor of shape (batch_size, context_len, hidden_size), output of encoder layer on context :param question: (Tensor) tensor of shape (batch_size, hidden_size), output of encoder layer on context :param c_masks: :param q_masks: :returns: output_t (Tensor) tensor of shape (batch_size, seq_len, hidden_size) """ # context_hidden_proj = self.attn_proj(context) # shape (batch_size, context_len, hidden_size) logits_1 = torch.bmm(self.attn_proj_1(context), torch.unsqueeze( question, 2)) # shape (batch_size, context_len, 1) logits_1 = torch.squeeze(logits_1, -1) logits_2 = torch.bmm(self.attn_proj_2(context), torch.unsqueeze( question, 2)) # shape (batch_size, context_len, 1) logits_2 = torch.squeeze(logits_2, -1) log_p1 = masked_softmax(logits_1, c_masks, dim=1, log_softmax=True) log_p2 = masked_softmax(logits_2, c_masks, dim=1, log_softmax=True) # # alpha_t = masked_softmax(scores, c_masks, dim=1) # shape (batch_size, context_len) # output_t = torch.mul(context, alpha_t.unsqueeze(2)) # # output_t = torch.bmm(torch.unsqueeze(alpha_t, 1), context) # shape (batch_size, 1, hidden_size) # output_t = output_t.squeeze(1) return log_p1, log_p2
def forward(self, M0, M1, M2, mask): """ Args: M0, M1, M2 (torch.tensor): the outputs of the three model encoders from bottom to up See more details in Figure1: https://arxiv.org/pdf/1804.09541.pdf Returns: log_p1, log_p2 (torch.tensor): logits for start and end position """ # Shapes: (batch_size, seq_len, 1) X1 = torch.cat([M0, M1], dim=2) X2 = torch.cat([M0, M2], dim=2) logits_1 = self.linear_1(X1) logits_2 = self.linear_2(X2) # Shapes: (batch_size, seq_len) mask = mask.type(torch.float) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) # [batch_size, seq_len] # modified output # logits_2 = torch.cat([logits_2, log_p1.unsqueeze(2)], dim=2) # [batch_size, seq_len, 2] # logits_2 = self.linear_3(logits_2) # [batch_size, seq_len, 1] log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, p, q, p_mask, q_mask): batch_size, p_len, _ = p.size() q_len = q.size(1) # (batch_size, p_len, q_len) s = self.get_similarity_matrix(p, q) p_mask = p_mask.view(batch_size, p_len, 1) # (batch_size, p_len, 1) q_mask = q_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) # approx eq(4) # (batch_size, p_len, q_len) s1 = masked_softmax(s, q_mask, dim=2) # approx eq(5) # (bs, p_len, q_len) x (bs, q_len, hid_size) => (bs, p_len, hid_size) p_tilde = torch.bmm(s1, q) # approx eq(6) # (batch_size, p_len, q_len) s2 = masked_softmax(s, p_mask, dim=1) # approx eq(7) q_tilde = torch.bmm(s2.transpose(1, 2), p) # (bs, q_len, hid_size) x = (p_tilde, q_tilde) return x
def forward(self, context, question, c_masks, q_masks): """ Single forward pass of attention layer :param context: (Tensor) tensor of shape (batch, c_len, hidden_size) :param question: (Tensor) tensor of shape (batch, q_len, hidden_size) :param c_masks: :param q_masks: :return: """ batch_size, c_len, _ = context.shape q_len = question.shape[1] s = self.get_similarity_matrix(context, question) # shape (batch, c_len, q_len) c_masks = c_masks.view(batch_size, c_len, 1) q_masks = q_masks.view(batch_size, 1, q_len) s1 = masked_softmax(s, q_masks, dim=2) # shape (batch, c_len, q_len) s2 = masked_softmax(s, c_masks, dim=1) # shape (batch, c_len, q_len) a = torch.bmm(s1, question) # shape (batch, c_len, hidden_size) ss = torch.bmm(s1, s2.transpose(1, 2)) # shape (batch, c_len, c_len) b = torch.bmm(ss, context) # shape (batch, c_len, hidden_size) x = torch.cat([context, a, context * a, context * b], dim=2) return x
def forward(self, c, q, c_mask, q_mask): # batch_size, c_len, _ = c.size() # q_len = q.size(1) # s = self.get_similarity_matrix(c, q) # (batch_size, c_len, q_len) # c_mask = c_mask.view(batch_size, c_len, 1) # (batch_size, c_len, 1) # q_mask = q_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) # s1 = masked_softmax(s, q_mask, dim=2) # (batch_size, c_len, q_len) # s2 = masked_softmax(s, c_mask, dim=1) # (batch_size, c_len, q_len) # # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size) # a = torch.bmm(s1, q) # # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size) # b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c) # x = torch.cat([c, a, c * a, c * b], dim=2) # (bs, c_len, 4 * hid_size) batch_size, c_len, _ = c.size() q_len = q.size(1) s = self.get_similarity_matrix(c, q) # (batch_size, c_len, q_len) c_mask = c_mask.view(batch_size, c_len, 1) # (batch_size, c_len, 1) q_mask = q_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) s1 = masked_softmax(s, q_mask, dim=2) # (batch_size, c_len, q_len) s2 = masked_softmax(s, c_mask, dim=1) # (batch_size, c_len, q_len) # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size) a = torch.bmm(s1, q) # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size) b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c) x = torch.cat([c, a, c * a, c * b], dim=2) # (bs, c_len, 4 * hid_size) return x
def attention_vectors(self, h, u, h_mask, u_mask): # h_mask (batch, h_len) # u_mask (batch, u_len) S = self.similarity_matrix(h, u) h_len, u_len = h.size(1), u.size(1) # Context-to-query Attention u_mask = u_mask.view(-1, 1, u_len) a_t = masked_softmax(S, u_mask, dim=2) # (batch, h_len, u_len) U_tilde = torch.bmm(a_t, u) # (batch, h_len, input_size) STANFORD_Q2C = True if STANFORD_Q2C: # Query-to-context Attention (Stanford variant) h_mask = h_mask.view(-1, h_len, 1) b_t = masked_softmax(S, h_mask, dim=1) # (batch, h_len, u_len) h_tt = torch.bmm(a_t, b_t.transpose(1, 2)) # (batch, h_len, h_len) H_tilde = torch.bmm(h_tt, h) # (batch, h_len, input_size) else: # Query-to-context Attention (original) b = masked_softmax(masked_max(S, u_mask, dim=2), h_mask, dim=1) # (batch, h_len) b = b.unsqueeze(dim=1) # (batch, 1, h_len) H_tilde = torch.bmm(b, h) # (batch, 1, input_size) H_tilde = H_tilde.repeat(1, h_len, 1) # (batch, h_len, input_size) return H_tilde, U_tilde
def forward(self, M0, M1, M2, mask): p1 = self.W1(torch.cat((M0, M1), -1)).squeeze() # (batch_size, c_len) p2 = self.W2(torch.cat((M0, M2), -1)).squeeze() # (batch_size, c_len) log_p1 = masked_softmax(p1, mask, log_softmax=True) log_p2 = masked_softmax(p2, mask, log_softmax=True) return log_p1, log_p2
def forward(self, p_prime, q_prime, doc_len, query_len, doc_mask, query_mask): batch_size, _, emb_size = p_prime.size() d = self.bilstm1(p_prime, doc_len) l = self.bilinearAtt(d,d) l = masked_softmax(l, doc_mask.unsqueeze(1), dim=2) d_tilde = torch.bmm(l,d) # FUSE concat_input_fuse = torch.cat((d,d_tilde,d*d_tilde,d-d_tilde),2) d_prime = self.tanh(self.linear1(concat_input_fuse)) d_double_prime = self.bilstm2(d_prime, doc_len) # Get q_double_prime q_double_prime = self.bilstm3(q_prime, query_len) # Get gamma gamma = self.linear2(q_double_prime).permute(0,2,1) gamma = masked_softmax(gamma, query_mask.unsqueeze(1), dim=2) q_bold = torch.bmm(gamma, q_double_prime)#.squeeze(1) #==============================compute 1-dim self attention for P ===================== p_double_prime = d gamma_p = self.linear3(p_double_prime).permute(0,2,1) gamma_p = masked_softmax(gamma_p, doc_mask.unsqueeze(1), dim=2) p_bold = torch.bmm(gamma_p, p_double_prime) #======================================================================================= return d_double_prime, q_bold, p_bold
def forward(self, cw_idxs, qw_idxs): # cw_idxs (batch_size, c_len) # qw_idxs (batch_size, q_len) c_mask = torch.zeros_like(cw_idxs) != cw_idxs q_mask = torch.zeros_like(qw_idxs) != qw_idxs c_len, q_len = c_mask.sum(-1), q_mask.sum(-1) c_emb = self.embed(cw_idxs) # (batch_size, c_len, input_size) q_emb = self.embed(qw_idxs) # (batch_size, q_len, input_size) c_emb = F.dropout(c_emb, self.drop_prob, self.training) q_emb = F.dropout(q_emb, self.drop_prob, self.training) p_i = self.passage_rnn(c_emb, c_len) # (batch_size, c_len, h) q = self.query_rnn(q_emb, q_len) # (batch_size, 1, h) q_t = q.permute(0, 2, 1) # (batch_size, h, 1) logits_start = self.att_start(p_i, q_t) logits_end = self.att_end(p_i, q_t) probs_start = masked_softmax(logits_start, c_mask, log_softmax=True) # (batch_size, c_len) probs_end = masked_softmax(logits_end, c_mask, log_softmax=True) # (batch_size, c_len) return probs_start, probs_end
def forward(self, c, q, c_mask, q_mask): batch_size, c_len, _ = c.size() """ print("batch_size: ", batch_size) print("c_len: ", c_len) """ q_len = q.size(1) """ print("q_len: ", q_len) print("c.size(): ", c.size()) print("q.size(): ", q.size()) """ s = self.get_similarity_matrix(c, q) # (batch_size, c_len, q_len) """ print("s.size() ", s.size()) print("c_mask.size() :", c_mask.size()) print("q_mask.size() :", q_mask.size()) """ c_mask = c_mask.view(batch_size, c_len, 1) # (batch_size, c_len, 1) q_mask = q_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) s1 = masked_softmax(s, q_mask, dim=2) # (batch_size, c_len, q_len) s2 = masked_softmax(s, c_mask, dim=1) # (batch_size, c_len, q_len) # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size) a = torch.bmm(s1, q) # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size) b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), c) x = torch.cat([c, a, c * a, c * b], dim=2) # (bs, c_len, 4 * hid_size) return x
def forward(self, context_hiddens, context_mask, question_hiddens, question_mask): batch_size, c_len, _ = context_hiddens.size() q_len = question_hiddens.size(1) s = self.get_similarity_matrix( context_hiddens, question_hiddens) # (batch_size, c_len, q_len) context_mask = context_mask.view(batch_size, c_len, 1) # (batch_size, c_len, 1) question_mask = question_mask.view(batch_size, 1, q_len) # (batch_size, 1, q_len) s1 = masked_softmax(s, question_mask, dim=2) # (batch_size, c_len, q_len) s2 = masked_softmax(s, context_mask, dim=1) # (batch_size, c_len, q_len) # (bs, c_len, q_len) x (bs, q_len, hid_size) => (bs, c_len, hid_size) a = torch.bmm(s1, question_hiddens) # (bs, c_len, c_len) x (bs, c_len, hid_size) => (bs, c_len, hid_size) b = torch.bmm(torch.bmm(s1, s2.transpose(1, 2)), context_hiddens) x = torch.cat( [context_hiddens, a, context_hiddens * a, context_hiddens * b], dim=2) # (bs, c_len, 4 * hid_size) return x
def __forward(self, query, passage): # 0. passage, passage_length = passage batch_size = passage.size(0) passage_length = passage.size(1) passage_mask = passage.eq(0) query_mask = query.eq(0) # 0.1 Encoding embedded_query = self.embeddings_tgt(query) # (N, W, D) embedded_passage = self.embeddings_src(passage) # 1. Separately encoding. encoded_passage = self.passage_encoder(embedded_passage, passage_mask) encoded_query = self.query_encoder(embedded_query, query_mask) encoding_dim = encoded_query.size(-1) # 2. Interaction. # Shape: (batch_size, passage_length, query_length) passage_query_similarity = self._matrix_attention( encoded_passage, encoded_query) # Shape: (batch_size, passage_length, query_length) passage_query_attention = util.masked_softmax(passage_query_similarity, query_mask) # Shape: (batch_size, passage_length, encoding_dim) passage_query_vectors = util.weighted_sum(encoded_query, passage_query_attention) # We replace masked values with something really negative here, so they don't affect the # max below. masked_similarity = util.replace_masked_values( passage_query_similarity, query_mask.unsqueeze(1), -1e7) # Shape: (batch_size, passage_length) query_passage_similarity = masked_similarity.max(dim=-1)[0].squeeze(-1) # Shape: (batch_size, passage_length) query_passage_attention = util.masked_softmax(query_passage_similarity, passage_mask) # Shape: (batch_size, encoding_dim) query_passage_vector = util.weighted_sum(encoded_passage, query_passage_attention) # Shape: (batch_size, passage_length, encoding_dim) tiled_query_passage_vector = query_passage_vector.unsqueeze(1).expand( batch_size, passage_length, encoding_dim) # Shape: (batch_size, passage_length, encoding_dim * 4) final_merged_passage = torch.cat([ encoded_passage, passage_query_vectors, encoded_passage * passage_query_vectors, encoded_passage * tiled_query_passage_vector ], dim=-1) # 3. Compress Composition Mix ... ? or just max_pooling or mean # output = self.combine(final_merged_passage, passage_mask) output = torch.mean(final_merged_passage, 1) prob = self.sigmoid(self.linear(output)) return prob
def forward(self, m0, m1, m2, mask): logits_1 = self.linear1(torch.cat((m0, m1), 2)) # (batch_size, n_context, 1) logits_2 = self.linear2(torch.cat((m0, m2), 2)) # (batch_size, n_context, 1) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, M1, M2, M3, mask): X1 = torch.cat([M1, M2], dim=2) X2 = torch.cat([M1, M3], dim=2) logits_1 = self.linear_1(X1) logits_2 = self.linear_2(X2) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, att, mod, mask): logits_1 = self.mod_linear_1(mod) mod_2 = self.rnn(mod, mask) logits_2 = self.mod_linear_2(mod_2) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, pw_idxs: torch.Tensor, pc_idxs: Optional[torch.Tensor], qw_idxs: torch.Tensor, qc_idxs: Optional[torch.Tensor]): """ Run a forward step pw_idxs: word indices in the paragraph 64, 254 pc_idxs: char indices in the paragraph 64, 254, 16 qw_idxs: word indices in the question 64, 20 qc_idx: char indices in the question 64, 20, 16 """ p_mask = torch.zeros_like(pw_idxs) != pw_idxs q_mask = torch.zeros_like(qw_idxs) != qw_idxs p_len, q_len = p_mask.sum(-1), q_mask.sum(-1) p_emb = self.emb(pw_idxs, pc_idxs) # (batch_size, p_len, hidden_size) q_emb = self.emb(qw_idxs, qc_idxs) # (batch_size, q_len, hidden_size) # eq (1) # (batch_size, p_len, 2 * hidden_size) p_enc = self.enc(p_emb, p_len) # approx eq (2) # (batch_size, q_len, 2 * hidden_size) q_enc = self.enc(q_emb, q_len) # approx eq (3)-(7) (p_tilde, q_tilde) = self.att(p_enc, q_enc, p_mask, q_mask) # 2 x (batch_size, p_len, 2*hidden_size) # eq (8) + (11) p_fused1 = self.p_fusion1(p_enc, p_tilde) # eq (9) + (12) q_fused1 = self.q_fusion1(q_enc, q_tilde) # eq (13) p_enc_13 = self.p_enc_eq_13(p_fused1, p_len) q_enc_13 = self.q_enc_eq_13(q_fused1, q_len) p_fused_16 = self.self_attention(p_enc_13) # more steps missing in here contextual_p = self.p_enc_eq_17(p_fused_16, p_len) # question partial processing # eq (19) q_enc_17 = self.q_enc_eq_17(q_enc_13, q_len) weighted_q = self.q_linear_align_18(q_enc_17) logits_start = self.bilinear_start(weighted_q, contextual_p) logits_end = self.bilinear_end(weighted_q, contextual_p) log_start = masked_softmax(logits_start, p_mask, log_softmax=True) log_end = masked_softmax(logits_end, p_mask, log_softmax=True) out = (log_start, log_end) return out
def forward(self, att, mod, mask): # Shapes: (batch_size, seq_len, 1) logits_1 = self.att_linear_1(att) + self.mod_linear_1(mod) mod_2 = self.rnn(mod, mask.sum(-1)) logits_2 = self.att_linear_2(att) + self.mod_linear_2(mod_2) # Shapes: (batch_size, seq_len) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, M_1, M_2, M_3, mask): begin = torch.cat([M_1, M_2], dim=2) begin = self.W1(begin) end = torch.cat([M_1, M_3], dim=2) end = self.W2(end) log_p1 = masked_softmax(begin.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(end.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, M_1, M_2, M_3, mask): y_i = self.ifv(M_1, M_2, M_3, mask) # y_i = None logits_1 = self.Ws(torch.cat((M_1, M_2), dim=1)).squeeze() logits_2 = self.We(torch.cat((M_1, M_3), dim=1)).squeeze() log_p1 = masked_softmax(logits_1, mask, dim=1, log_softmax=True) log_p2 = masked_softmax(logits_2, mask, dim=1, log_softmax=True) return y_i, log_p1, log_p2
def forward(self, att, mask, c_len): mod = self.modeling(att, c_len) # (batch_size, c_len, 2 * hidden_size) # Shapes: (batch_size, seq_len, 1) logits_1 = self.att_linear_1(mod) logits_2 = self.att_linear_2(mod) # Shapes: (batch_size, seq_len) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, M0, M1, M2, mask): # 08/10: mod: (batch_size, seq_len, h) # mask: (batch_size, seq_len, 1) logits_1 = self.w1(torch.cat([M0, M1], dim=-1)) logits_2 = self.w1(torch.cat([M0, M2], dim=-1)) # Shapes: (batch_size, seq_len) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, m0, m1, m2, mask): x1 = torch.cat([m0, m1], dim=-1) x2 = torch.cat([m0, m2], dim=-1) from util import masked_softmax log_p1 = masked_softmax(self.span_start_linear(x1).squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(self.span_end_linear(x2).squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, att, mod, mask): # Shapes: (batch_size, seq_len, 1) logits_1 = self.att_linear_1(att) + self.mod_linear_1(mod) if not self.use_transformer: mod_2 = self.enc(mod, mask.sum(-1)) else: mod_2 = self.enc(mod, mask) logits_2 = self.att_linear_2(att) + self.mod_linear_2(mod_2) # Shapes: (batch_size, seq_len) log_p1 = masked_softmax(logits_1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(logits_2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, M1, M2, M3, mask): X1 = torch.cat([M1, M2], dim=1) X2 = torch.cat([M1, M3], dim=1) L1 = self.w1(X1) L2 = self.w2(X2) Y1 = mask_logits(L1.squeeze(), mask) Y2 = mask_logits(L2.squeeze(), mask) from util import masked_softmax log_p1 = masked_softmax(Y1.squeeze(), mask, log_softmax=True) log_p2 = masked_softmax(Y2.squeeze(), mask, log_softmax=True) return log_p1, log_p2
def forward(self, att, masks): """ :param att: output of the attention layer, shape (batch_size, seq_len, hidden_size) :param masks: :return: """ logits1 = F.dropout(self.output_proj1(att), self.drop_prob) logits2 = F.dropout(self.output_proj2(att), self.drop_prob) log_p1 = masked_softmax(logits1.squeeze(-1), masks, log_softmax=True) log_p2 = masked_softmax(logits2.squeeze(-1), masks, log_softmax=True) return log_p1, log_p2