def forward(self, x1, x1_c, x1_f, x1_mask, x2, x2_c, x2_f, x2_mask): """Inputs: x1 = document word indices [batch * len_d] x1_f = document word features indices [batch * len_d * nfeat] x1_mask = document padding mask [batch * len_d] x2 = question word indices [batch * len_q] x2_mask = question padding mask [batch * len_q] """ # Embed both document and question x1_emb = self.embedding(x1) x2_emb = self.embedding(x2) # Dropout on embeddings if self.args.dropout_emb > 0: x1_emb = nn.functional.dropout(x1_emb, p=self.args.dropout_emb, training=self.training) x2_emb = nn.functional.dropout(x2_emb, p=self.args.dropout_emb, training=self.training) # Form document encoding inputs drnn_input = [x1_emb] # Add attention-weighted question representation if self.args.use_qemb: x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask) drnn_input.append(x2_weighted_emb) # Add manual features if self.args.num_features > 0: drnn_input.append(x1_f) # Encode document with RNN doc_hiddens = self.doc_rnn(torch.cat(drnn_input, 2), x1_mask) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(x2_emb, x2_mask) if self.args.question_merge == 'avg': q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask) elif self.args.question_merge == 'self_attn': q_merge_weights = self.self_attn(question_hiddens, x2_mask) question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask) end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask) return start_scores, end_scores
def forward(self, x1, x1_f, x1_pos, x1_ner, x1_mask, x2, x2_mask): """Inputs: x1 = document word indices [batch * len_d] x1_f = document word features indices [batch * len_d * nfeat] x1_pos = document POS tags [batch * len_d] x1_ner = document entity tags [batch * len_d] x1_mask = document padding mask [batch * len_d] x2 = question word indices [batch * len_q] x2_mask = question padding mask [batch * len_q] """ # Embed both document and question x1_emb = self.embedding(x1) x2_emb = self.embedding(x2) # Dropout on embeddings if self.opt['dropout_emb'] > 0: x1_emb = nn.functional.dropout(x1_emb, p=self.opt['dropout_emb'], training=self.training) x2_emb = nn.functional.dropout(x2_emb, p=self.opt['dropout_emb'], training=self.training) drnn_input_list = [x1_emb, x1_f] # Add attention-weighted question representation if self.opt['use_qemb']: x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask) drnn_input_list.append(x2_weighted_emb) if self.opt['pos']: drnn_input_list.append(x1_pos) if self.opt['ner']: drnn_input_list.append(x1_ner) drnn_input = torch.cat(drnn_input_list, 2) # Encode document with RNN doc_hiddens = self.doc_rnn(drnn_input, x1_mask) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(x2_emb, x2_mask) if self.opt['question_merge'] == 'avg': q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask) elif self.opt['question_merge'] == 'self_attn': q_merge_weights = self.self_attn(question_hiddens, x2_mask) question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask) end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask) return start_scores, end_scores
def forward(self, ex): """Inputs: xq = question word indices (batch, max_q_len) xq_mask = question padding mask (batch, max_q_len) xd = document word indices (batch, max_d_len) xd_f = document word features indices (batch, max_d_len, nfeat) xd_mask = document padding mask (batch, max_d_len) targets = span targets (batch,) """ # Embed both document and question xq_emb = self.w_embedding(ex['xq']) # (batch, max_q_len, word_embed) xd_emb = self.w_embedding(ex['xd']) # (batch, max_d_len, word_embed) shared_axes = [2] if self.config['word_dropout'] else [] xq_emb = dropout(xq_emb, self.config['dropout_emb'], shared_axes=shared_axes, training=self.training) xd_emb = dropout(xd_emb, self.config['dropout_emb'], shared_axes=shared_axes, training=self.training) xd_mask = ex['xd_mask'] xq_mask = ex['xq_mask'] # Add attention-weighted question representation if self.config['use_qemb']: xq_weighted_emb = self.qemb_match(xd_emb, xq_emb, xq_mask) drnn_input = torch.cat([xd_emb, xq_weighted_emb], 2) else: drnn_input = xd_emb if self.config["num_features"] > 0: drnn_input = torch.cat([drnn_input, ex['xd_f']], 2) # Project document and question to the same size as their encoders if self.config['resize_rnn_input']: drnn_input = F.relu(self.doc_linear(drnn_input)) xq_emb = F.relu(self.q_linear(xq_emb)) if self.config['dropout_ff'] > 0: drnn_input = F.dropout(drnn_input, training=self.training) xq_emb = F.dropout(xq_emb, training=self.training) # Encode document with RNN doc_hiddens = self.doc_rnn(drnn_input, xd_mask) # (batch, max_d_len, hidden_size) # Document self attention if self.config['doc_self_attn']: xd_weighted_emb = self.doc_self_attn(doc_hiddens, doc_hiddens, xd_mask) doc_hiddens = torch.cat([doc_hiddens, xd_weighted_emb], 2) # Encode question with RNN + merge hiddens question_hiddens = self.question_rnn(xq_emb, xq_mask) if self.config['question_merge'] == 'avg': q_merge_weights = uniform_weights(question_hiddens, xq_mask) elif self.config['question_merge'] == 'self_attn': q_merge_weights = self.self_attn(question_hiddens.contiguous(), xq_mask) question_hidden = weighted_avg(question_hiddens, q_merge_weights) # Predict start and end positions start_scores = self.start_attn(doc_hiddens, question_hidden, xd_mask) if self.config['span_dependency']: question_hidden = torch.cat([ question_hidden, (doc_hiddens * start_scores.exp().unsqueeze(2)).sum(1) ], 1) end_scores = self.end_attn(doc_hiddens, question_hidden, xd_mask) return { 'score_s': start_scores, 'score_e': end_scores, 'targets': ex['targets'] }
def forward(self, x1, x2): """Inputs: x1 = premise word indices [batch * len_1] x1_f = premise word features indices [batch * len_1 * nfeat] x1_pos = premise POS tags [batch * len_1] x1_ner = premise entity tags [batch * len_1] x1_mask = premise padding mask [batch * len_1] x2 = hypothesis word indices [batch * len_2] x2_f = hypothesis word features indices [batch * len_2 * nfeat] x2_pos = hypothesis POS tags [batch * len_2] x2_ner = hypothesis entity tags [batch * len_2] x2_mask = hypothesis padding mask [batch * len_2] """ # Prepare premise and hypothesis input Prnn_input_list = [] Hrnn_input_list = [] # Word embeddings emb = self.embedding if self.training else self.eval_embed x1_emb, x2_emb = emb(x1), emb(x2) # Dropout on embeddings if self.opt['dropout_emb'] > 0: x1_emb = layers.dropout(x1_emb, p=self.opt['dropout_emb'], training=self.training) x2_emb = layers.dropout(x2_emb, p=self.opt['dropout_emb'], training=self.training) Prnn_input_list.append(x1_emb) Hrnn_input_list.append(x2_emb) # # Contextualized embeddings # _, x1_cove = self.CoVe(x1, x1_mask) # _, x2_cove = self.CoVe(x2, x2_mask) # if self.opt['dropout_emb'] > 0: # x1_cove = layers.dropout(x1_cove, p=self.opt['dropout_emb'], training=self.training) # x2_cove = layers.dropout(x2_cove, p=self.opt['dropout_emb'], training=self.training) # Prnn_input_list.append(x1_cove) # Hrnn_input_list.append(x2_cove) # # # POS embeddings # x1_pos_emb = self.pos_embedding(x1_pos) # x2_pos_emb = self.pos_embedding(x2_pos) # Prnn_input_list.append(x1_pos_emb) # Hrnn_input_list.append(x2_pos_emb) # # # NER embeddings # x1_ner_emb = self.ner_embedding(x1_ner) # x2_ner_emb = self.ner_embedding(x2_ner) # Prnn_input_list.append(x1_ner_emb) # Hrnn_input_list.append(x2_ner_emb) # # x1_input = torch.cat(Prnn_input_list, 2) # x2_input = torch.cat(Hrnn_input_list, 2) # Now the features are ready # x1_input: [batch_size, doc_len, input_size] # x2_input: [batch_size, doc_len, input_size] x1_input = x1_emb x2_input = x2_emb # if self.opt['full_att_type'] == 2: # x1_f = layers.dropout(x1_f, p=self.opt['dropout_EM'], training=self.training) # x2_f = layers.dropout(x2_f, p=self.opt['dropout_EM'], training=self.training) # Paux_input, Haux_input = x1_f, x2_f # else: # Paux_input = x1_f[:, :, 0].contiguous().view(x1_f.size(0), x1_f.size(1), 1) # Haux_input = x2_f[:, :, 0].contiguous().view(x2_f.size(0), x2_f.size(1), 1) # Encode premise with RNN P_abstr_ls = self.P_rnn(x1_input) # Encode hypothesis with RNN H_abstr_ls = self.H_rnn(x2_input) # Fusion if self.opt['full_att_type'] == 0: P_atts = P_abstr_ls[-1].contiguous() H_atts = H_abstr_ls[-1].contiguous() P_xs = P_abstr_ls[-1].contiguous() H_xs = H_abstr_ls[-1].contiguous() elif self.opt['full_att_type'] == 1: P_atts = torch.cat([x1_input] + P_abstr_ls, 2) H_atts = torch.cat([x2_input] + H_abstr_ls, 2) P_xs = P_abstr_ls[-1].contiguous() H_xs = H_abstr_ls[-1].contiguous() elif self.opt['full_att_type'] == 2: P_atts = torch.cat([x1_input] + P_abstr_ls, 2) H_atts = torch.cat([x2_input] + H_abstr_ls, 2) P_xs = torch.cat(P_abstr_ls, 2) H_xs = torch.cat(H_abstr_ls, 2) aP_xs = self.full_attn_P(P_atts, H_atts, P_xs, H_xs, None) aH_xs = self.full_attn_H(H_atts, P_atts, H_xs, P_xs, None) P_hiddens = torch.cat([P_xs, aP_xs], 2) H_hiddens = torch.cat([H_xs, aH_xs], 2) # Inference on premise and hypothesis P_hiddens = torch.cat(self.P_infer_rnn(P_hiddens, None), 2) H_hiddens = torch.cat(self.H_infer_rnn(H_hiddens, None), 2) # Merge hiddens for answer classification if self.opt['final_merge'] == 'avg': P_merge_weights = layers.uniform_weights(P_hiddens, None) H_merge_weights = layers.uniform_weights(H_hiddens, None) elif self.opt['final_merge'] == 'linear_self_attn': P_merge_weights = self.self_attn_P(P_hiddens, None) H_merge_weights = self.self_attn_H(H_hiddens, None) P_avg_hidden = layers.weighted_avg(P_hiddens, P_merge_weights) H_avg_hidden = layers.weighted_avg(H_hiddens, H_merge_weights) P_max_hidden = torch.max(P_hiddens, 1)[0] H_max_hidden = torch.max(H_hiddens, 1)[0] # Predict scores for different classes scores = self.classifier( torch.cat([P_avg_hidden, H_avg_hidden, P_max_hidden, H_max_hidden], 1)) return scores # -inf to inf