示例#1
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args

        if self.args.use_elmo:
            self.embedding_dim = self.args.elmo_num_layer * 1024 
        else:
            self.embedding_dim = 300
            self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0)
            self.embedding.weight.data.fill_(0)
            self.embedding.weight.data[:2].normal_(0, 0.1)

        self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # question-aware choice representation
        self.q_c_emb_match = layers.SeqAttnMatch(self.embedding_dim) # choice-aware question representation
        
        # RNN question encoder: 2 * word emb + pos emb + ner emb + manual features + rel emb
        qst_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 4 + args.rel_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 2 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size)
     
        self.project = nn.Linear(2 * question_hidden_size + choice_hidden_size, 1)
示例#2
0
    def __init__(self, args, word_dict, char_dict):
        super(DSSM, self).__init__()
        args.word_dim = 300
        args.char_dim = 50
        args.word_hidden = 256
        args.char_hidden = 128
        self.args = args

        # word layers
        self.word_embedding = nn.Embedding(len(word_dict),
                                           args.word_dim,
                                           padding_idx=0)
        self.word_gru_bi = nn.GRU(args.word_dim,
                                  args.word_hidden,
                                  num_layers=1,
                                  batch_first=True,
                                  bidirectional=True)
        self.word_gru1 = nn.GRU(args.word_dim,
                                args.word_hidden,
                                num_layers=2,
                                batch_first=True)
        self.self_word_attn = layers.LinearSeqAttn(args.word_dim)

        # char layers
        self.char_embedding = nn.Embedding(len(char_dict),
                                           args.char_dim,
                                           padding_idx=0)
        self.self_char_attn = layers.LinearSeqAttn(args.char_dim)
        self.char_gru = nn.GRU(args.char_dim,
                               args.char_hidden,
                               bidirectional=True,
                               batch_first=True)
        self.linear1 = nn.Linear(
            6 * args.word_hidden + 2 * args.word_dim + args.char_hidden * 4 +
            2 * args.char_dim, 32)
        self.act1 = nn.ReLU()
        self.linear2 = nn.Linear(
            6 * args.word_hidden + 2 * args.word_dim + args.char_hidden * 4 +
            2 * args.char_dim, 48)
        self.act2 = nn.Sigmoid()
        self.linear3 = nn.Linear(80, 2)
        self.act3 = nn.LogSoftmax()
        self.dropout = nn.Dropout(0.5)
示例#3
0
文件: model.py 项目: vycezhong/qa
    def __init__(self, config):
        super(Reader, self).__init__()
        self.config = config.model

        #word embedding
        self.embedding = nn.Embedding(self.config.vocab_size,
                                      self.config.embedding_dim)

        #attention weighted question
        self.qemb_match = layers.SeqAttnMatch(self.config.embedding_dim)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)

        self.passage_input_size = self.config.embedding_dim + self.config.num_features + self.config.embedding_dim
        self.question_input_size = self.config.embedding_dim
        self.passage_encoder = layers.StackedBiLSTM(
            input_size=self.passage_input_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.passage_layers,
            dropout_rate=self.config.dropout_rate)

        self.question_encoder = layers.StackedBiLSTM(
            input_size=self.question_input_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.question_layers,
            dropout_rate=self.config.dropout_rate)

        #question merging
        self.self_attn = layers.LinearSeqAttn(self.config.hidden_size)
        init.normal_(self.self_attn.linear.weight, 1)
        init.constant(self.self_attn.linear.bias, 0.1)

        #span start/end
        self.start_attn = layers.BilinearSeqAttn(self.config.hidden_size,
                                                 self.config.hidden_size)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)

        self.end_attn = layers.BilinearSeqAttn(self.config.hidden_size,
                                               self.config.hidden_size)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)
    def __init__(self, args):
        super(MyModel, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab),
                                      self.embedding_dim,
                                      padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)

        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)

        #self.emb_match = layers.SeqAttnMatch(self.embedding_dim)
        #self.q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        #self.c_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        #self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        #self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        #self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}
        print("###########self.args.matching_order:  %s " %
              (self.args.matching_order))

        # RNN context encoder
        #rnn_input_size  = self.embedding_dim+ args.pos_emb_dim + args.ner_emb_dim +5+ 2*args.rel_emb_dim
        rnn_input_size = self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim
        #rnn_input_size  = self.embedding_dim+ 5
        self.context_rnn = layers.StackedBRNN(
            input_size=rnn_input_size,
            #input_size=self.embedding_dim,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        self.hidden_match = layers.SeqDotAttnMatch()
        self.mtinfer = layers.MultiTurnInference(args, self.RNN_TYPES)

        #mtinfer output size
        if args.use_multiturn_infer or args.use_bilstm:
            choice_infer_hidden_size = 2 * args.hidden_size
            #choice_infer_hidden_size = 2 * args.hidden_size * len(args.matching_order)
        else:
            #choice_infer_hidden_size = args.hidden_size * len(args.matching_order)
            choice_infer_hidden_size = 2 * args.hidden_size

        #self.c_infer_self_attn = layers.LinearSeqAttn(choice_infer_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(2 * args.hidden_size)

        if args.use_multiturn_infer == True:
            self.c_infer_linear = nn.Linear(4 * choice_infer_hidden_size,
                                            args.hidden_size)
        #elif args.use_bilstm == True:
        else:
            self.c_infer_linear = nn.Linear(
                2 * choice_infer_hidden_size + 2 * 2 * args.hidden_size,
                args.hidden_size)

        self.logits_linear = nn.Linear(args.hidden_size, 1)
示例#5
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)
        self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim)

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 3 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size)

        self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size)

        self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size)
        self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
示例#6
0
    def __init__(self, args, normalize=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)

        # Projection for attention weighted question
        if args.use_qemb:
            self.qemb_match = layers.SeqAttnMatch(args.embedding_dim)

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = args.embedding_dim + args.num_features
        if args.use_qemb:
            doc_input_size += args.embedding_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=args.embedding_dim,
            hidden_size=args.hidden_size,
            num_layers=args.question_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        if args.concat_rnn_layers:
            doc_hidden_size *= args.doc_layers
            question_hidden_size *= args.question_layers

        # Question merging
        if args.question_merge not in ['avg', 'self_attn']:
            raise NotImplementedError('merge_mode = %s' % args.merge_mode)
        if args.question_merge == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
            normalize=normalize,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
            normalize=normalize,
        )
示例#7
0
    def __init__(self, args):
        super(MyModel, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab),
                                      self.embedding_dim,
                                      padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)

        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)

        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}
        print("###########self.args.matching_order:  %s " %
              (self.args.matching_order))

        # RNN context encoder
        #rnn_input_size  = self.embedding_dim+ args.pos_emb_dim + args.ner_emb_dim +5+ 2*args.rel_emb_dim
        rnn_input_size = self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim
        #rnn_input_size  = self.embedding_dim+ 5
        self.context_rnn = layers.StackedBRNN(
            input_size=rnn_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn_output,  # float
            dropout_output=args.rnn_output_dropout,  #True or False
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        self.Hq_BiLstm = layers.StackedBRNN(
            input_size=rnn_input_size + args.hidden_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,  # float
            dropout_output=args.rnn_output_dropout,  #True or False
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        self.hidden_match = layers.SeqDotAttnMatch()
        self.mtinfer = layers.MultiTurnInference(args, self.RNN_TYPES)

        if self.args.tri_input == 'NA':
            self.mfunction = self.NA_TriMatching

        elif self.args.tri_input == 'CA':
            self.mfunction = self.CA_TriMatching
        else:
            self.mfunction = self.NA_CA_TriMatching

        #mtinfer output size
        if args.use_multiturn_infer or args.use_bilstm:
            choice_infer_hidden_size = 2 * args.hidden_size
            #choice_infer_hidden_size = 2 * args.hidden_size * len(args.matching_order)
        else:
            #choice_infer_hidden_size = args.hidden_size * len(args.matching_order)
            choice_infer_hidden_size = 2 * args.hidden_size

        #self.c_infer_self_attn = layers.LinearSeqAttn(choice_infer_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(2 * args.hidden_size)
        '''my:'''
        self.linearlayer = nn.Linear(rnn_input_size, args.hidden_size)  ##my
        self.pre_y = nn.Linear(
            2 * args.hidden_size + args.pos_emb_dim + args.ner_emb_dim + 5 +
            2 * args.rel_emb_dim, 1)
        if args.use_multiturn_infer == True:
            #self.c_infer_linear= nn.Linear(4*choice_infer_hidden_size,args.hidden_size)
            self.c_infer_linear = nn.Linear(
                4 * choice_infer_hidden_size + 2 * 2 * args.hidden_size,
                args.hidden_size)
        #elif args.use_bilstm == True:
        else:
            infer_input_size = 2 * 2 * args.hidden_size
            if self.args.p_channel == True:
                infer_input_size += 2 * choice_infer_hidden_size

            if self.args.q_channel == True:
                infer_input_size += 2 * choice_infer_hidden_size

            if self.args.c_channel == True:
                infer_input_size += 2 * choice_infer_hidden_size

            self.c_infer_linear = nn.Linear(infer_input_size, args.hidden_size)
示例#8
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab),
                                      self.embedding_dim,
                                      padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)
        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim

        # Max passage size
        p_max_size = args.p_max_size
        self.p_max_size = p_max_size

        # Max question size
        q_max_size = args.q_max_size
        self.q_max_size = q_max_size

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        self.doc_hidden_size = doc_hidden_size
        question_hidden_size = 2 * args.hidden_size
        self.question_hidden_size = question_hidden_size
        # print('p_mask : ' , doc_input_size)

        # Attention over passage and question
        self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size,
                                                      q_max_size)
        self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size,
                                                     p_max_size)

        self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size,
                                                    q_max_size)
        self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size,
                                                   p_max_size)

        # Bilinear layer and sigmoid to proba
        self.p_q_bilinear_start = nn.Bilinear(question_hidden_size,
                                              question_hidden_size, 1)
        self.p_q_bilinear_end = nn.Bilinear(question_hidden_size,
                                            question_hidden_size, 1)
        self.p_linear_start = nn.Linear(question_hidden_size, 1)
        self.p_linear_end = nn.Linear(question_hidden_size, 1)
        # Attention start end
        self.start_end_attn = layers.BilinearProbaAttn(p_max_size)
        self.end_start_attn = layers.BilinearProbaAttn(p_max_size)

        # Feed forward
        self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size,
                                                  p_max_size)
        self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size,
                                                p_max_size)
示例#9
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args

        if self.args.use_elmo:
            self.embedding_dim = self.args.elmo_num_layer * 1024
        else:
            self.embedding_dim = 300
            self.embedding = nn.Embedding(
                len(vocab), self.embedding_dim,
                padding_idx=0)  # len is same as vocab size
            self.embedding.weight.data.fill_(0)
            self.embedding.weight.data[:2].normal_(0, 0.1)  # initialize

        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # question-aware passage representation
        self.c_q_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # question-aware choice representation
        self.c_p_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # passage-aware choice representation

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 3 + 2 * args.rel_emb_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 3 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size +
                                                1)  # add essential term flag

        self.c_diff_attn = layers.DiffSeqAttn(choice_hidden_size)

        self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size,
                                               y_size=question_hidden_size)

        #self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size)
        #self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
        self.p_c_bilinear = nn.Linear(2 * doc_hidden_size,
                                      3 * choice_hidden_size)
        self.q_c_bilinear = nn.Linear(2 * question_hidden_size,
                                      3 * choice_hidden_size)
示例#10
0
    def __init__(self, opt, padding_idx=0, embedding=None):
        super(DRQA, self).__init__()
        # Store config
        self.opt = opt

        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False)
            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                self.embedding.weight.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                offset = self.opt['tune_partial'] + 2

                def embedding_hook(grad, offset=offset):
                    grad[offset:] = 0
                    return grad

                self.embedding.weight.register_hook(embedding_hook)

        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(opt['embedding_dim'])

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_size']
        if opt['ner']:
            doc_input_size += opt['ner_size']

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=opt['hidden_size'],
            num_layers=opt['doc_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']],
            padding=opt['rnn_padding'],
        )

        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=opt['embedding_dim'],
            hidden_size=opt['hidden_size'],
            num_layers=opt['question_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']],
            padding=opt['rnn_padding'],
        )

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size']
        question_hidden_size = 2 * opt['hidden_size']
        if opt['concat_rnn_layers']:
            doc_hidden_size *= opt['doc_layers']
            question_hidden_size *= opt['question_layers']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' % opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )