def __init__(self, config): super(Reader, self).__init__() self.config = config.model #word embedding self.embedding = nn.Embedding(self.config.vocab_size, self.config.embedding_dim) #attention weighted question self.qemb_match = layers.SeqAttnMatch(self.config.embedding_dim) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1) self.passage_input_size = self.config.embedding_dim + self.config.num_features + self.config.embedding_dim self.question_input_size = self.config.embedding_dim self.passage_encoder = layers.StackedBiLSTM( input_size=self.passage_input_size, hidden_size=self.config.hidden_size, num_layers=self.config.passage_layers, dropout_rate=self.config.dropout_rate) self.question_encoder = layers.StackedBiLSTM( input_size=self.question_input_size, hidden_size=self.config.hidden_size, num_layers=self.config.question_layers, dropout_rate=self.config.dropout_rate) #question merging self.self_attn = layers.LinearSeqAttn(self.config.hidden_size) init.normal_(self.self_attn.linear.weight, 1) init.constant(self.self_attn.linear.bias, 0.1) #span start/end self.start_attn = layers.BilinearSeqAttn(self.config.hidden_size, self.config.hidden_size) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1) self.end_attn = layers.BilinearSeqAttn(self.config.hidden_size, self.config.hidden_size) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1)
def __init__(self, word_vectors, hidden_size, drop_prob=0., attention_type="BiDAF", train_embeddings=False): super(QA_Model, self).__init__() ATTENTION_TYPES = { 'DotProduct': layers.DotProductAttention, 'Bilinear': layers.BilinearSeqAttn, 'BiDAF': layers.BiDAFAttention } self.attention_type = ATTENTION_TYPES[attention_type] self.embedding_size = word_vectors.shape[1] self.embedding = nn.Embedding(word_vectors.shape[0], self.embedding_size) self.embedding.weight.data.copy_(word_vectors) self.embedding.weight.requires_grad = train_embeddings self.encoder = layers.LSTMEncoder(input_size=self.embedding_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) attention_output_size = hidden_size if self.attention_type == ATTENTION_TYPES['DotProduct']: self.att = layers.DotProductAttention(2 * hidden_size) elif self.attention_type == ATTENTION_TYPES['Bilinear']: self.att = layers.BilinearSeqAttn(2 * hidden_size, 2 * hidden_size) elif self.attention_type == ATTENTION_TYPES['BiDAF']: self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) attention_output_size *= 2 # BiDAFAttention output is larger self.out = layers.SoftmaxOutputLayer(hidden_size=attention_output_size, drop_prob=drop_prob)
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 3 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size) self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size) self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size) self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
def __init__(self, args, normalize=True): super(RnnDocReader, self).__init__() # Store config self.args = args # Word embeddings (+1 for padding) self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim, padding_idx=0) # Projection for attention weighted question if args.use_qemb: self.qemb_match = layers.SeqAttnMatch(args.embedding_dim) # Input size to RNN: word emb + question emb + manual features doc_input_size = args.embedding_dim + args.num_features if args.use_qemb: doc_input_size += args.embedding_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=args.embedding_dim, hidden_size=args.hidden_size, num_layers=args.question_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size if args.concat_rnn_layers: doc_hidden_size *= args.doc_layers question_hidden_size *= args.question_layers # Question merging if args.question_merge not in ['avg', 'self_attn']: raise NotImplementedError('merge_mode = %s' % args.merge_mode) if args.question_merge == 'self_attn': self.self_attn = layers.LinearSeqAttn(question_hidden_size) # Bilinear attention for span start/end self.start_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, normalize=normalize, ) self.end_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, normalize=normalize, )
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim # Max passage size p_max_size = args.p_max_size self.p_max_size = p_max_size # Max question size q_max_size = args.q_max_size self.q_max_size = q_max_size # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size self.doc_hidden_size = doc_hidden_size question_hidden_size = 2 * args.hidden_size self.question_hidden_size = question_hidden_size # print('p_mask : ' , doc_input_size) # Attention over passage and question self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) # Bilinear layer and sigmoid to proba self.p_q_bilinear_start = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_q_bilinear_end = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_linear_start = nn.Linear(question_hidden_size, 1) self.p_linear_end = nn.Linear(question_hidden_size, 1) # Attention start end self.start_end_attn = layers.BilinearProbaAttn(p_max_size) self.end_start_attn = layers.BilinearProbaAttn(p_max_size) # Feed forward self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size, p_max_size) self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size, p_max_size)
def __init__(self, args): super(TriAN, self).__init__() self.args = args if self.args.use_elmo: self.embedding_dim = self.args.elmo_num_layer * 1024 else: self.embedding_dim = 300 self.embedding = nn.Embedding( len(vocab), self.embedding_dim, padding_idx=0) # len is same as vocab size self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) # initialize self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch( self.embedding_dim) # question-aware passage representation self.c_q_emb_match = layers.SeqAttnMatch( self.embedding_dim) # question-aware choice representation self.c_p_emb_match = layers.SeqAttnMatch( self.embedding_dim) # passage-aware choice representation # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 3 + 2 * args.rel_emb_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 3 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size + 1) # add essential term flag self.c_diff_attn = layers.DiffSeqAttn(choice_hidden_size) self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size) #self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size) #self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size) self.p_c_bilinear = nn.Linear(2 * doc_hidden_size, 3 * choice_hidden_size) self.q_c_bilinear = nn.Linear(2 * question_hidden_size, 3 * choice_hidden_size)
def __init__(self, opt, padding_idx=0, embedding=None): super(DRQA, self).__init__() # Store config self.opt = opt # Word embeddings if opt['pretrained_words']: assert embedding is not None self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False) if opt['fix_embeddings']: assert opt['tune_partial'] == 0 self.embedding.weight.requires_grad = False elif opt['tune_partial'] > 0: assert opt['tune_partial'] + 2 < embedding.size(0) offset = self.opt['tune_partial'] + 2 def embedding_hook(grad, offset=offset): grad[offset:] = 0 return grad self.embedding.weight.register_hook(embedding_hook) else: # random initialized self.embedding = nn.Embedding(opt['vocab_size'], opt['embedding_dim'], padding_idx=padding_idx) # Projection for attention weighted question if opt['use_qemb']: self.qemb_match = layers.SeqAttnMatch(opt['embedding_dim']) # Input size to RNN: word emb + question emb + manual features doc_input_size = opt['embedding_dim'] + opt['num_features'] if opt['use_qemb']: doc_input_size += opt['embedding_dim'] if opt['pos']: doc_input_size += opt['pos_size'] if opt['ner']: doc_input_size += opt['ner_size'] # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=opt['hidden_size'], num_layers=opt['doc_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']], padding=opt['rnn_padding'], ) # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=opt['embedding_dim'], hidden_size=opt['hidden_size'], num_layers=opt['question_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']], padding=opt['rnn_padding'], ) # Output sizes of rnn encoders doc_hidden_size = 2 * opt['hidden_size'] question_hidden_size = 2 * opt['hidden_size'] if opt['concat_rnn_layers']: doc_hidden_size *= opt['doc_layers'] question_hidden_size *= opt['question_layers'] # Question merging if opt['question_merge'] not in ['avg', 'self_attn']: raise NotImplementedError('question_merge = %s' % opt['question_merge']) if opt['question_merge'] == 'self_attn': self.self_attn = layers.LinearSeqAttn(question_hidden_size) # Bilinear attention for span start/end self.start_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, ) self.end_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, )