def __init__(self, args): super(TriAN, self).__init__() self.args = args if self.args.use_elmo: self.embedding_dim = self.args.elmo_num_layer * 1024 else: self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # question-aware choice representation self.q_c_emb_match = layers.SeqAttnMatch(self.embedding_dim) # choice-aware question representation # RNN question encoder: 2 * word emb + pos emb + ner emb + manual features + rel emb qst_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 4 + args.rel_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 2 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size) self.project = nn.Linear(2 * question_hidden_size + choice_hidden_size, 1)
def __init__(self, args, word_dict, char_dict): super(DSSM, self).__init__() args.word_dim = 300 args.char_dim = 50 args.word_hidden = 256 args.char_hidden = 128 self.args = args # word layers self.word_embedding = nn.Embedding(len(word_dict), args.word_dim, padding_idx=0) self.word_gru_bi = nn.GRU(args.word_dim, args.word_hidden, num_layers=1, batch_first=True, bidirectional=True) self.word_gru1 = nn.GRU(args.word_dim, args.word_hidden, num_layers=2, batch_first=True) self.self_word_attn = layers.LinearSeqAttn(args.word_dim) # char layers self.char_embedding = nn.Embedding(len(char_dict), args.char_dim, padding_idx=0) self.self_char_attn = layers.LinearSeqAttn(args.char_dim) self.char_gru = nn.GRU(args.char_dim, args.char_hidden, bidirectional=True, batch_first=True) self.linear1 = nn.Linear( 6 * args.word_hidden + 2 * args.word_dim + args.char_hidden * 4 + 2 * args.char_dim, 32) self.act1 = nn.ReLU() self.linear2 = nn.Linear( 6 * args.word_hidden + 2 * args.word_dim + args.char_hidden * 4 + 2 * args.char_dim, 48) self.act2 = nn.Sigmoid() self.linear3 = nn.Linear(80, 2) self.act3 = nn.LogSoftmax() self.dropout = nn.Dropout(0.5)
def __init__(self, config): super(Reader, self).__init__() self.config = config.model #word embedding self.embedding = nn.Embedding(self.config.vocab_size, self.config.embedding_dim) #attention weighted question self.qemb_match = layers.SeqAttnMatch(self.config.embedding_dim) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1) self.passage_input_size = self.config.embedding_dim + self.config.num_features + self.config.embedding_dim self.question_input_size = self.config.embedding_dim self.passage_encoder = layers.StackedBiLSTM( input_size=self.passage_input_size, hidden_size=self.config.hidden_size, num_layers=self.config.passage_layers, dropout_rate=self.config.dropout_rate) self.question_encoder = layers.StackedBiLSTM( input_size=self.question_input_size, hidden_size=self.config.hidden_size, num_layers=self.config.question_layers, dropout_rate=self.config.dropout_rate) #question merging self.self_attn = layers.LinearSeqAttn(self.config.hidden_size) init.normal_(self.self_attn.linear.weight, 1) init.constant(self.self_attn.linear.bias, 0.1) #span start/end self.start_attn = layers.BilinearSeqAttn(self.config.hidden_size, self.config.hidden_size) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1) self.end_attn = layers.BilinearSeqAttn(self.config.hidden_size, self.config.hidden_size) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1)
def __init__(self, args): super(MyModel, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) #self.emb_match = layers.SeqAttnMatch(self.embedding_dim) #self.q_emb_match = layers.SeqAttnMatch(self.embedding_dim) #self.c_emb_match = layers.SeqAttnMatch(self.embedding_dim) #self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) #self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) #self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} print("###########self.args.matching_order: %s " % (self.args.matching_order)) # RNN context encoder #rnn_input_size = self.embedding_dim+ args.pos_emb_dim + args.ner_emb_dim +5+ 2*args.rel_emb_dim rnn_input_size = self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim #rnn_input_size = self.embedding_dim+ 5 self.context_rnn = layers.StackedBRNN( input_size=rnn_input_size, #input_size=self.embedding_dim, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) self.hidden_match = layers.SeqDotAttnMatch() self.mtinfer = layers.MultiTurnInference(args, self.RNN_TYPES) #mtinfer output size if args.use_multiturn_infer or args.use_bilstm: choice_infer_hidden_size = 2 * args.hidden_size #choice_infer_hidden_size = 2 * args.hidden_size * len(args.matching_order) else: #choice_infer_hidden_size = args.hidden_size * len(args.matching_order) choice_infer_hidden_size = 2 * args.hidden_size #self.c_infer_self_attn = layers.LinearSeqAttn(choice_infer_hidden_size) self.q_self_attn = layers.LinearSeqAttn(2 * args.hidden_size) if args.use_multiturn_infer == True: self.c_infer_linear = nn.Linear(4 * choice_infer_hidden_size, args.hidden_size) #elif args.use_bilstm == True: else: self.c_infer_linear = nn.Linear( 2 * choice_infer_hidden_size + 2 * 2 * args.hidden_size, args.hidden_size) self.logits_linear = nn.Linear(args.hidden_size, 1)
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 3 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size) self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size) self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size) self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
def __init__(self, args, normalize=True): super(RnnDocReader, self).__init__() # Store config self.args = args # Word embeddings (+1 for padding) self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim, padding_idx=0) # Projection for attention weighted question if args.use_qemb: self.qemb_match = layers.SeqAttnMatch(args.embedding_dim) # Input size to RNN: word emb + question emb + manual features doc_input_size = args.embedding_dim + args.num_features if args.use_qemb: doc_input_size += args.embedding_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=args.embedding_dim, hidden_size=args.hidden_size, num_layers=args.question_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size if args.concat_rnn_layers: doc_hidden_size *= args.doc_layers question_hidden_size *= args.question_layers # Question merging if args.question_merge not in ['avg', 'self_attn']: raise NotImplementedError('merge_mode = %s' % args.merge_mode) if args.question_merge == 'self_attn': self.self_attn = layers.LinearSeqAttn(question_hidden_size) # Bilinear attention for span start/end self.start_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, normalize=normalize, ) self.end_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, normalize=normalize, )
def __init__(self, args): super(MyModel, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} print("###########self.args.matching_order: %s " % (self.args.matching_order)) # RNN context encoder #rnn_input_size = self.embedding_dim+ args.pos_emb_dim + args.ner_emb_dim +5+ 2*args.rel_emb_dim rnn_input_size = self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim #rnn_input_size = self.embedding_dim+ 5 self.context_rnn = layers.StackedBRNN( input_size=rnn_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn_output, # float dropout_output=args.rnn_output_dropout, #True or False concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) self.Hq_BiLstm = layers.StackedBRNN( input_size=rnn_input_size + args.hidden_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, # float dropout_output=args.rnn_output_dropout, #True or False concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) self.hidden_match = layers.SeqDotAttnMatch() self.mtinfer = layers.MultiTurnInference(args, self.RNN_TYPES) if self.args.tri_input == 'NA': self.mfunction = self.NA_TriMatching elif self.args.tri_input == 'CA': self.mfunction = self.CA_TriMatching else: self.mfunction = self.NA_CA_TriMatching #mtinfer output size if args.use_multiturn_infer or args.use_bilstm: choice_infer_hidden_size = 2 * args.hidden_size #choice_infer_hidden_size = 2 * args.hidden_size * len(args.matching_order) else: #choice_infer_hidden_size = args.hidden_size * len(args.matching_order) choice_infer_hidden_size = 2 * args.hidden_size #self.c_infer_self_attn = layers.LinearSeqAttn(choice_infer_hidden_size) self.q_self_attn = layers.LinearSeqAttn(2 * args.hidden_size) '''my:''' self.linearlayer = nn.Linear(rnn_input_size, args.hidden_size) ##my self.pre_y = nn.Linear( 2 * args.hidden_size + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim, 1) if args.use_multiturn_infer == True: #self.c_infer_linear= nn.Linear(4*choice_infer_hidden_size,args.hidden_size) self.c_infer_linear = nn.Linear( 4 * choice_infer_hidden_size + 2 * 2 * args.hidden_size, args.hidden_size) #elif args.use_bilstm == True: else: infer_input_size = 2 * 2 * args.hidden_size if self.args.p_channel == True: infer_input_size += 2 * choice_infer_hidden_size if self.args.q_channel == True: infer_input_size += 2 * choice_infer_hidden_size if self.args.c_channel == True: infer_input_size += 2 * choice_infer_hidden_size self.c_infer_linear = nn.Linear(infer_input_size, args.hidden_size)
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim # Max passage size p_max_size = args.p_max_size self.p_max_size = p_max_size # Max question size q_max_size = args.q_max_size self.q_max_size = q_max_size # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size self.doc_hidden_size = doc_hidden_size question_hidden_size = 2 * args.hidden_size self.question_hidden_size = question_hidden_size # print('p_mask : ' , doc_input_size) # Attention over passage and question self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) # Bilinear layer and sigmoid to proba self.p_q_bilinear_start = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_q_bilinear_end = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_linear_start = nn.Linear(question_hidden_size, 1) self.p_linear_end = nn.Linear(question_hidden_size, 1) # Attention start end self.start_end_attn = layers.BilinearProbaAttn(p_max_size) self.end_start_attn = layers.BilinearProbaAttn(p_max_size) # Feed forward self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size, p_max_size) self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size, p_max_size)
def __init__(self, args): super(TriAN, self).__init__() self.args = args if self.args.use_elmo: self.embedding_dim = self.args.elmo_num_layer * 1024 else: self.embedding_dim = 300 self.embedding = nn.Embedding( len(vocab), self.embedding_dim, padding_idx=0) # len is same as vocab size self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) # initialize self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch( self.embedding_dim) # question-aware passage representation self.c_q_emb_match = layers.SeqAttnMatch( self.embedding_dim) # question-aware choice representation self.c_p_emb_match = layers.SeqAttnMatch( self.embedding_dim) # passage-aware choice representation # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 3 + 2 * args.rel_emb_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 3 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size + 1) # add essential term flag self.c_diff_attn = layers.DiffSeqAttn(choice_hidden_size) self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size) #self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size) #self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size) self.p_c_bilinear = nn.Linear(2 * doc_hidden_size, 3 * choice_hidden_size) self.q_c_bilinear = nn.Linear(2 * question_hidden_size, 3 * choice_hidden_size)
def __init__(self, opt, padding_idx=0, embedding=None): super(DRQA, self).__init__() # Store config self.opt = opt # Word embeddings if opt['pretrained_words']: assert embedding is not None self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False) if opt['fix_embeddings']: assert opt['tune_partial'] == 0 self.embedding.weight.requires_grad = False elif opt['tune_partial'] > 0: assert opt['tune_partial'] + 2 < embedding.size(0) offset = self.opt['tune_partial'] + 2 def embedding_hook(grad, offset=offset): grad[offset:] = 0 return grad self.embedding.weight.register_hook(embedding_hook) else: # random initialized self.embedding = nn.Embedding(opt['vocab_size'], opt['embedding_dim'], padding_idx=padding_idx) # Projection for attention weighted question if opt['use_qemb']: self.qemb_match = layers.SeqAttnMatch(opt['embedding_dim']) # Input size to RNN: word emb + question emb + manual features doc_input_size = opt['embedding_dim'] + opt['num_features'] if opt['use_qemb']: doc_input_size += opt['embedding_dim'] if opt['pos']: doc_input_size += opt['pos_size'] if opt['ner']: doc_input_size += opt['ner_size'] # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=opt['hidden_size'], num_layers=opt['doc_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']], padding=opt['rnn_padding'], ) # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=opt['embedding_dim'], hidden_size=opt['hidden_size'], num_layers=opt['question_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']], padding=opt['rnn_padding'], ) # Output sizes of rnn encoders doc_hidden_size = 2 * opt['hidden_size'] question_hidden_size = 2 * opt['hidden_size'] if opt['concat_rnn_layers']: doc_hidden_size *= opt['doc_layers'] question_hidden_size *= opt['question_layers'] # Question merging if opt['question_merge'] not in ['avg', 'self_attn']: raise NotImplementedError('question_merge = %s' % opt['question_merge']) if opt['question_merge'] == 'self_attn': self.self_attn = layers.LinearSeqAttn(question_hidden_size) # Bilinear attention for span start/end self.start_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, ) self.end_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, )