Пример #1
0
    def forward(self, x1, x1_c, x1_f, x1_mask, x2, x2_c, x2_f, x2_mask):
        """Inputs:
        x1 = document word indices             [batch * len_d]
        x1_f = document word features indices  [batch * len_d * nfeat]
        x1_mask = document padding mask        [batch * len_d]
        x2 = question word indices             [batch * len_q]
        x2_mask = question padding mask        [batch * len_q]
        """
        # Embed both document and question
        x1_emb = self.embedding(x1)
        x2_emb = self.embedding(x2)

        # Dropout on embeddings
        if self.args.dropout_emb > 0:
            x1_emb = nn.functional.dropout(x1_emb,
                                           p=self.args.dropout_emb,
                                           training=self.training)
            x2_emb = nn.functional.dropout(x2_emb,
                                           p=self.args.dropout_emb,
                                           training=self.training)

        # Form document encoding inputs
        drnn_input = [x1_emb]

        # Add attention-weighted question representation
        if self.args.use_qemb:
            x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask)
            drnn_input.append(x2_weighted_emb)

        # Add manual features
        if self.args.num_features > 0:
            drnn_input.append(x1_f)

        # Encode document with RNN
        doc_hiddens = self.doc_rnn(torch.cat(drnn_input, 2), x1_mask)

        # Encode question with RNN + merge hiddens
        question_hiddens = self.question_rnn(x2_emb, x2_mask)

        if self.args.question_merge == 'avg':
            q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask)
        elif self.args.question_merge == 'self_attn':
            q_merge_weights = self.self_attn(question_hiddens, x2_mask)
        question_hidden = layers.weighted_avg(question_hiddens,
                                              q_merge_weights)

        # Predict start and end positions
        start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask)
        end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask)

        return start_scores, end_scores
Пример #2
0
    def forward(self, x1, x1_f, x1_pos, x1_ner, x1_mask, x2, x2_mask):
        """Inputs:
        x1 = document word indices             [batch * len_d]
        x1_f = document word features indices  [batch * len_d * nfeat]
        x1_pos = document POS tags             [batch * len_d]
        x1_ner = document entity tags          [batch * len_d]
        x1_mask = document padding mask        [batch * len_d]
        x2 = question word indices             [batch * len_q]
        x2_mask = question padding mask        [batch * len_q]
        """
        # Embed both document and question
        x1_emb = self.embedding(x1)
        x2_emb = self.embedding(x2)

        # Dropout on embeddings
        if self.opt['dropout_emb'] > 0:
            x1_emb = nn.functional.dropout(x1_emb, p=self.opt['dropout_emb'],
                                           training=self.training)
            x2_emb = nn.functional.dropout(x2_emb, p=self.opt['dropout_emb'],
                                           training=self.training)

        drnn_input_list = [x1_emb, x1_f]
        # Add attention-weighted question representation
        if self.opt['use_qemb']:
            x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask)
            drnn_input_list.append(x2_weighted_emb)
        if self.opt['pos']:
            drnn_input_list.append(x1_pos)
        if self.opt['ner']:
            drnn_input_list.append(x1_ner)
        drnn_input = torch.cat(drnn_input_list, 2)
        # Encode document with RNN
        doc_hiddens = self.doc_rnn(drnn_input, x1_mask)

        # Encode question with RNN + merge hiddens
        question_hiddens = self.question_rnn(x2_emb, x2_mask)
        if self.opt['question_merge'] == 'avg':
            q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask)
        elif self.opt['question_merge'] == 'self_attn':
            q_merge_weights = self.self_attn(question_hiddens, x2_mask)
        question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights)

        # Predict start and end positions
        start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask)
        end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask)
        return start_scores, end_scores
Пример #3
0
    def forward(self, ex):
        """Inputs:
        xq = question word indices             (batch, max_q_len)
        xq_mask = question padding mask        (batch, max_q_len)
        xd = document word indices             (batch, max_d_len)
        xd_f = document word features indices  (batch, max_d_len, nfeat)
        xd_mask = document padding mask        (batch, max_d_len)
        targets = span targets                 (batch,)
        """

        # Embed both document and question
        xq_emb = self.w_embedding(ex['xq'])  # (batch, max_q_len, word_embed)
        xd_emb = self.w_embedding(ex['xd'])  # (batch, max_d_len, word_embed)

        shared_axes = [2] if self.config['word_dropout'] else []
        xq_emb = dropout(xq_emb,
                         self.config['dropout_emb'],
                         shared_axes=shared_axes,
                         training=self.training)
        xd_emb = dropout(xd_emb,
                         self.config['dropout_emb'],
                         shared_axes=shared_axes,
                         training=self.training)
        xd_mask = ex['xd_mask']
        xq_mask = ex['xq_mask']

        # Add attention-weighted question representation
        if self.config['use_qemb']:
            xq_weighted_emb = self.qemb_match(xd_emb, xq_emb, xq_mask)
            drnn_input = torch.cat([xd_emb, xq_weighted_emb], 2)
        else:
            drnn_input = xd_emb

        if self.config["num_features"] > 0:
            drnn_input = torch.cat([drnn_input, ex['xd_f']], 2)

        # Project document and question to the same size as their encoders
        if self.config['resize_rnn_input']:
            drnn_input = F.relu(self.doc_linear(drnn_input))
            xq_emb = F.relu(self.q_linear(xq_emb))
            if self.config['dropout_ff'] > 0:
                drnn_input = F.dropout(drnn_input, training=self.training)
                xq_emb = F.dropout(xq_emb, training=self.training)

        # Encode document with RNN
        doc_hiddens = self.doc_rnn(drnn_input,
                                   xd_mask)  # (batch, max_d_len, hidden_size)

        # Document self attention
        if self.config['doc_self_attn']:
            xd_weighted_emb = self.doc_self_attn(doc_hiddens, doc_hiddens,
                                                 xd_mask)
            doc_hiddens = torch.cat([doc_hiddens, xd_weighted_emb], 2)

        # Encode question with RNN + merge hiddens
        question_hiddens = self.question_rnn(xq_emb, xq_mask)
        if self.config['question_merge'] == 'avg':
            q_merge_weights = uniform_weights(question_hiddens, xq_mask)
        elif self.config['question_merge'] == 'self_attn':
            q_merge_weights = self.self_attn(question_hiddens.contiguous(),
                                             xq_mask)
        question_hidden = weighted_avg(question_hiddens, q_merge_weights)

        # Predict start and end positions
        start_scores = self.start_attn(doc_hiddens, question_hidden, xd_mask)
        if self.config['span_dependency']:
            question_hidden = torch.cat([
                question_hidden,
                (doc_hiddens * start_scores.exp().unsqueeze(2)).sum(1)
            ], 1)
        end_scores = self.end_attn(doc_hiddens, question_hidden, xd_mask)

        return {
            'score_s': start_scores,
            'score_e': end_scores,
            'targets': ex['targets']
        }
Пример #4
0
    def forward(self, x1, x2):
        """Inputs:
        x1 = premise word indices                [batch * len_1]
        x1_f = premise word features indices     [batch * len_1 * nfeat]
        x1_pos = premise POS tags                [batch * len_1]
        x1_ner = premise entity tags             [batch * len_1]
        x1_mask = premise padding mask           [batch * len_1]
        x2 = hypothesis word indices             [batch * len_2]
        x2_f = hypothesis word features indices  [batch * len_2 * nfeat]
        x2_pos = hypothesis POS tags             [batch * len_2]
        x2_ner = hypothesis entity tags          [batch * len_2]
        x2_mask = hypothesis padding mask        [batch * len_2]
        """
        # Prepare premise and hypothesis input
        Prnn_input_list = []
        Hrnn_input_list = []

        # Word embeddings
        emb = self.embedding if self.training else self.eval_embed
        x1_emb, x2_emb = emb(x1), emb(x2)
        # Dropout on embeddings
        if self.opt['dropout_emb'] > 0:
            x1_emb = layers.dropout(x1_emb,
                                    p=self.opt['dropout_emb'],
                                    training=self.training)
            x2_emb = layers.dropout(x2_emb,
                                    p=self.opt['dropout_emb'],
                                    training=self.training)
        Prnn_input_list.append(x1_emb)
        Hrnn_input_list.append(x2_emb)

        #         # Contextualized embeddings
        #         _, x1_cove = self.CoVe(x1, x1_mask)
        #         _, x2_cove = self.CoVe(x2, x2_mask)
        #         if self.opt['dropout_emb'] > 0:
        #             x1_cove = layers.dropout(x1_cove, p=self.opt['dropout_emb'], training=self.training)
        #             x2_cove = layers.dropout(x2_cove, p=self.opt['dropout_emb'], training=self.training)
        #         Prnn_input_list.append(x1_cove)
        #         Hrnn_input_list.append(x2_cove)
        #
        #         # POS embeddings
        #         x1_pos_emb = self.pos_embedding(x1_pos)
        #         x2_pos_emb = self.pos_embedding(x2_pos)
        #         Prnn_input_list.append(x1_pos_emb)
        #         Hrnn_input_list.append(x2_pos_emb)
        #
        #         # NER embeddings
        #         x1_ner_emb = self.ner_embedding(x1_ner)
        #         x2_ner_emb = self.ner_embedding(x2_ner)
        #         Prnn_input_list.append(x1_ner_emb)
        #         Hrnn_input_list.append(x2_ner_emb)
        #
        #         x1_input = torch.cat(Prnn_input_list, 2)
        #         x2_input = torch.cat(Hrnn_input_list, 2)

        # Now the features are ready
        # x1_input: [batch_size, doc_len, input_size]
        # x2_input: [batch_size, doc_len, input_size]

        x1_input = x1_emb
        x2_input = x2_emb

        #         if self.opt['full_att_type'] == 2:
        #             x1_f = layers.dropout(x1_f, p=self.opt['dropout_EM'], training=self.training)
        #             x2_f = layers.dropout(x2_f, p=self.opt['dropout_EM'], training=self.training)
        #             Paux_input, Haux_input = x1_f, x2_f
        #         else:
        #             Paux_input = x1_f[:, :, 0].contiguous().view(x1_f.size(0), x1_f.size(1), 1)
        #             Haux_input = x2_f[:, :, 0].contiguous().view(x2_f.size(0), x2_f.size(1), 1)

        # Encode premise with RNN
        P_abstr_ls = self.P_rnn(x1_input)
        # Encode hypothesis with RNN
        H_abstr_ls = self.H_rnn(x2_input)

        # Fusion
        if self.opt['full_att_type'] == 0:
            P_atts = P_abstr_ls[-1].contiguous()
            H_atts = H_abstr_ls[-1].contiguous()
            P_xs = P_abstr_ls[-1].contiguous()
            H_xs = H_abstr_ls[-1].contiguous()
        elif self.opt['full_att_type'] == 1:
            P_atts = torch.cat([x1_input] + P_abstr_ls, 2)
            H_atts = torch.cat([x2_input] + H_abstr_ls, 2)
            P_xs = P_abstr_ls[-1].contiguous()
            H_xs = H_abstr_ls[-1].contiguous()
        elif self.opt['full_att_type'] == 2:
            P_atts = torch.cat([x1_input] + P_abstr_ls, 2)
            H_atts = torch.cat([x2_input] + H_abstr_ls, 2)
            P_xs = torch.cat(P_abstr_ls, 2)
            H_xs = torch.cat(H_abstr_ls, 2)
        aP_xs = self.full_attn_P(P_atts, H_atts, P_xs, H_xs, None)
        aH_xs = self.full_attn_H(H_atts, P_atts, H_xs, P_xs, None)
        P_hiddens = torch.cat([P_xs, aP_xs], 2)
        H_hiddens = torch.cat([H_xs, aH_xs], 2)

        # Inference on premise and hypothesis
        P_hiddens = torch.cat(self.P_infer_rnn(P_hiddens, None), 2)
        H_hiddens = torch.cat(self.H_infer_rnn(H_hiddens, None), 2)

        # Merge hiddens for answer classification
        if self.opt['final_merge'] == 'avg':
            P_merge_weights = layers.uniform_weights(P_hiddens, None)
            H_merge_weights = layers.uniform_weights(H_hiddens, None)
        elif self.opt['final_merge'] == 'linear_self_attn':
            P_merge_weights = self.self_attn_P(P_hiddens, None)
            H_merge_weights = self.self_attn_H(H_hiddens, None)
        P_avg_hidden = layers.weighted_avg(P_hiddens, P_merge_weights)
        H_avg_hidden = layers.weighted_avg(H_hiddens, H_merge_weights)
        P_max_hidden = torch.max(P_hiddens, 1)[0]
        H_max_hidden = torch.max(H_hiddens, 1)[0]

        # Predict scores for different classes
        scores = self.classifier(
            torch.cat([P_avg_hidden, H_avg_hidden, P_max_hidden, H_max_hidden],
                      1))

        return scores  # -inf to inf