def get_context_to_question_model(rnn_dim: int, q2c: bool, res_rnn: bool, res_self_att: bool): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) question_to_context = \ AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None context_to_question = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) return SingleContextToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), question_to_context_attention=question_to_context, context_to_question_attention=context_to_question, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), predictor=BinaryFixedPredictor() )
def get_multi_encode_softmax_weighting_model(rnn_dim, multi_rnn_dim, num_encodings, keep_rate=0.8, map_embed=True): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) multi_recurrent_layer = CudnnGru(multi_rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() return SingleContextMultipleEncodingWeightedSoftmaxModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, ) if map_embed else None, sequence_multi_encoder=MultiMapThenEncode( mapper=SequenceMapperSeq(VariationalDropoutLayer(keep_rate), multi_recurrent_layer), encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), num_encodings=num_encodings ), weight_layer=MultiEncodingWeights(weight_mode='mlp'), merger=ConcatWithProduct(), post_merger=None, predictor=BinaryWeightedMultipleFixedPredictor() )
def get_sentences_model(rnn_dim, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_wiki_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return SingleContextMaxSentenceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), max_batch_size=256 )
def get_model(rnn_dim): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() return ContextPairRelevanceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, # VariationalDropoutLayer(0.8), # fixme probably doesn't belong here ), question_to_context_attention=None, context_to_context_attention=None, context_to_question_attention=None, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), merger=MergeTwoContextsConcatQuestion(), predictor=BinaryFixedPredictor())
def get_multi_hop_model(rnn_dim, c2c: bool, q2c: bool, res_rnn: bool, res_self_att: bool, post_merge: bool, encoder: str, merge_type: str, num_c2c_hops: int): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) context_to_context = \ AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if c2c else None question_to_context = \ AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None if encoder == 'max': sequence_encoder = MaxPool(map_layer=None, min_val=0, regular_reshape=True) elif encoder == 'rnn': sequence_encoder = CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) else: raise NotImplementedError() if merge_type == 'max': attention_merger = MaxMerge( pre_map_layer=None, post_map_layer=(res_model if post_merge else None)) else: attention_merger = WeightedMerge( pre_map_layer=None, post_map_layer=(res_model if post_merge else None), weight_type=merge_type) return MultiHopContextsToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), question_to_context_attention=question_to_context, context_to_context_attention=context_to_context, c2c_hops=num_c2c_hops, context_to_question_attention=BiAttention(TriLinear(bias=True), True), attention_merger=attention_merger, sequence_encoder=sequence_encoder, predictor=BinaryFixedPredictor())
def get_reread_model(rnn_dim, use_elmo, encoder_keep_rate=0.8, reread_keep_rate=0.8, two_phase_att=False, res_rnn=True, res_self_att=False, multiply_iteration_probs=False, reformulate_by_context=False, rank_first=False, rank_second=False, reread_rnn_dim=None, first_rank_lambda=1.0, second_rank_lambda=1.0, ranking_gamma=1.0): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder(group=rank_first or rank_second) embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(encoder_keep_rate), recurrent_layer) if res_rnn or res_self_att: res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att, keep_rate=reread_keep_rate) else: res_model = FullyConnected(rnn_dim * 2, activation="relu") attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) use_c2q = two_phase_att or not reformulate_by_context use_q2c = two_phase_att or reformulate_by_context elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return IterativeContextReReadModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=None, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), reread_mapper=None if reread_rnn_dim is None else CudnnGru(reread_rnn_dim, w_init=TruncatedNormal(stddev=0.05)), pre_attention_mapper=None, # VariationalDropoutLayer(reread_keep_rate), context_to_question_attention=attention if use_c2q else None, question_to_context_attention=attention if use_q2c else None, reformulate_by_context=reformulate_by_context, multiply_iteration_probs=multiply_iteration_probs, first_predictor=BinaryNullPredictor(rank_first, ranking_lambda=first_rank_lambda, gamma=ranking_gamma), second_predictor=BinaryNullPredictor(rank_second, ranking_lambda=second_rank_lambda, gamma=ranking_gamma), max_batch_size=512 )
def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) embed_mapper = SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), ) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True) predictor = BoundsPredictor( ChainBiMapper( first_layer=recurrent_layer, second_layer=recurrent_layer ), span_predictor=IndependentBoundsGroupedWithYesNo() ) return AttentionQAWithYesNo( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, question_mapper=None, context_mapper=None, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer(SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), )), VariationalDropoutLayer(keep_rate)), predictor=predictor, yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True) )
def __init__(self, encoder: QuestionsAndParagraphsEncoder, word_embed: Optional[WordEmbedder], char_embed: Optional[CharWordEmbedder], embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]], sequence_encoder: SequenceEncoder, sentences_encoder: SentencesEncoder, sentence_mapper: Optional[SequenceMapper], merger: FixedMergeLayer, post_merger: Optional[Mapper], reformulation_layer: ReformulationLayer, max_batch_size: Optional[int] = None, elmo_model: Optional[LanguageModel] = None): super().__init__(encoder=encoder, word_embed=word_embed, char_embed=char_embed, max_batch_size=max_batch_size, elmo_model=elmo_model) self.embed_mapper = embed_mapper self.sequence_encoder = sequence_encoder self.sentences_encoder = sentences_encoder self.sentence_mapper = sentence_mapper self.merger = merger self.post_merger = post_merger self.reformulation_layer = reformulation_layer self.predictor = BinaryNullPredictor() self.max_pool = MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True) self.mean_pool = MeanPool()
def get_reread_merge_model(rnn_dim, use_elmo, keep_rate=0.8, res_rnn=True, res_self_att=False, multiply_iteration_probs=False): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) if res_rnn or res_self_att: res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) else: res_model = FullyConnected(rnn_dim * 2, activation="relu") attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return IterativeContextReReadMergeModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=None, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=False, raw=True), context_to_question_attention=attention, question_to_context_attention=attention, reread_merger=ConcatWithProduct(), multiply_iteration_probs=multiply_iteration_probs, max_batch_size=128 )
def __init__( self, encoder: QuestionsAndParagraphsEncoder, word_embed: Optional[WordEmbedder], char_embed: Optional[CharWordEmbedder], embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]], sequence_encoder: SequenceEncoder, sentences_encoder: SentencesEncoder, sentence_mapper: Optional[SequenceMapper], reread_mapper: Optional[Union[SequenceMapper, ElmoWrapper]], pre_attention_mapper: Optional[SequenceMapper], context_to_question_attention: Optional[AttentionWithPostMapper], question_to_context_attention: Optional[AttentionWithPostMapper], first_predictor: BinaryNullPredictor, second_predictor: BinaryNullPredictor, reformulate_by_context: bool, max_batch_size: Optional[int] = None, elmo_model: Optional[LanguageModel] = None): super().__init__(encoder=encoder, word_embed=word_embed, char_embed=char_embed, max_batch_size=max_batch_size, elmo_model=elmo_model) self.embed_mapper = embed_mapper self.sequence_encoder = sequence_encoder self.sentences_encoder = sentences_encoder self.sentence_mapper = sentence_mapper self.reread_mapper = reread_mapper self.pre_attention_mapper = pre_attention_mapper self.question_to_context_attention = question_to_context_attention self.context_to_question_attention = context_to_question_attention self.reformulate_by_context = reformulate_by_context self.first_predictor = first_predictor self.second_predictor = second_predictor self.max_pool = MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True) if (self.reformulate_by_context and question_to_context_attention is None) or \ (not self.reformulate_by_context and context_to_question_attention is None): raise ValueError("The last attention must be defined")
def get_model(rnn_dim, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) reformulation = ProjectMapEncodeReformulation(project_layer=None, sequence_mapper=None, encoder=CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05))) # reformulation = WeightedSumThenProjectReformulation(rnn_dim*2, activation='relu') # reformulation = ProjectThenWeightedSumReformulation(rnn_dim*2, activation='relu') return IterativeContextMaxSentenceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=recurrent_layer, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), reformulation_layer=reformulation, max_batch_size=128 )
def get_fixed_context_to_question(rnn_dim): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() return SingleFixedContextToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), embed_mapper=SequenceMapperSeq( SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer), ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer)), ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer)) ), context_mapper=None, # ResidualLayer( # SequenceMapperSeq( # VariationalDropoutLayer(0.8), # StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), # FullyConnected(rnn_dim*2, activation=None))), context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), question_mapper=None, # ResidualLayer( # SequenceMapperSeq( # VariationalDropoutLayer(0.8), # StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), # FullyConnected(rnn_dim*2, activation=None))), merger=WithConcatOptions(dot=True, sub=True, hadamard=True, raw=True, project=False), post_merger=SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation='relu'), ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer, FullyConnected(rnn_dim * 2, activation='relu'))) ), final_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), predictor=BinaryFixedPredictor() )
def get_basic_model(rnn_dim, post_merger_params: Optional[dict] = None, use_elmo=False, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) # embed_mapper = SequenceMapperSeq( # SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer), # ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)), # ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)) # ) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_squad_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=True, rnn_layer=embed_mapper, lm_reduce=lm_reduce) post_merger = None if post_merger_params is None else get_mlp(**post_merger_params) return BasicSingleContextAndQuestionIndependentModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), merger=ConcatWithProductSub(), post_merger=post_merger, predictor=BinaryFixedPredictor(sigmoid=True), max_batch_size=128 )
def get_bottleneck_to_seq_model(rnn_dim, q2c: bool, res_rnn: bool, res_self_att: bool, seq_len=50): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) question_to_context = \ AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None context_to_question = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) sequence_generator = GenerativeRNN(tf.contrib.rnn.LSTMCell(num_units=rnn_dim, initializer=tf.initializers.truncated_normal( stddev=0.05)), output_layer=FullyConnected(rnn_dim * 2, activation='relu'), vec_to_in=FullyConnected(rnn_dim * 2, activation='relu'), seq_len=seq_len, include_original_vec=False) return SingleContextBottleneckToSeqQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, ), sequence_generator=sequence_generator, pre_attention=VariationalDropoutLayer(0.8), question_to_context_attention=question_to_context, context_to_question_attention=context_to_question, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), predictor=BinaryFixedPredictor() )
def get_contexts_to_question_model(rnn_dim, post_merge): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() if post_merge == 'res_rnn_self_att': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), ))) elif post_merge == 'res_rnn': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, FullyConnected(rnn_dim * 2, activation="relu"), ))) elif post_merge == 'res_self_att': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), ))) else: raise NotImplementedError() return ContextsToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), attention_merger=MaxMerge(pre_map_layer=None, post_map_layer=post_map_layer), context_to_question_attention=BiAttention(TriLinear(bias=True), True), sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), predictor=BinaryFixedPredictor())
class IterativeContextReReadSimpleScoreModel(MultipleContextModel): """ Calculating the similarities by a simple dot product between question an paragraph representations. This is a more neat model which we should check to see if on par with the more complicated one above. """ def __init__( self, encoder: QuestionsAndParagraphsEncoder, word_embed: Optional[WordEmbedder], char_embed: Optional[CharWordEmbedder], embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]], sequence_encoder: SequenceEncoder, sentences_encoder: SentencesEncoder, sentence_mapper: Optional[SequenceMapper], reread_mapper: Optional[Union[SequenceMapper, ElmoWrapper]], pre_attention_mapper: Optional[SequenceMapper], context_to_question_attention: Optional[AttentionWithPostMapper], question_to_context_attention: Optional[AttentionWithPostMapper], first_predictor: BinaryNullPredictor, second_predictor: BinaryNullPredictor, reformulate_by_context: bool, max_batch_size: Optional[int] = None, elmo_model: Optional[LanguageModel] = None): super().__init__(encoder=encoder, word_embed=word_embed, char_embed=char_embed, max_batch_size=max_batch_size, elmo_model=elmo_model) self.embed_mapper = embed_mapper self.sequence_encoder = sequence_encoder self.sentences_encoder = sentences_encoder self.sentence_mapper = sentence_mapper self.reread_mapper = reread_mapper self.pre_attention_mapper = pre_attention_mapper self.question_to_context_attention = question_to_context_attention self.context_to_question_attention = context_to_question_attention self.reformulate_by_context = reformulate_by_context self.first_predictor = first_predictor self.second_predictor = second_predictor self.max_pool = MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True) if (self.reformulate_by_context and question_to_context_attention is None) or \ (not self.reformulate_by_context and context_to_question_attention is None): raise ValueError("The last attention must be defined") def _get_predictions_for(self, is_train, question_embed, question_mask, context_embed, context_mask, answer, question_lm, context_lm, sentence_segments, sentence_mask): question_rep, context_rep = question_embed, context_embed context1_rep, context2_rep = tf.unstack(context_rep, axis=1, num=2) context1_mask, context2_mask = tf.unstack(context_mask, axis=1, num=2) context1_sentence_segments, context2_sentence_segments = tf.unstack( sentence_segments, axis=1, num=2) context1_sentence_mask, context2_sentence_mask = tf.unstack( sentence_mask, axis=1, num=2) q_lm_in, c1_lm_in, c2_lm_in = [], [], [] if self.use_elmo: context1_lm, context2_lm = tf.unstack(context_lm, axis=1, num=2) q_lm_in = [question_lm] c1_lm_in = [context1_lm] c2_lm_in = [context2_lm] if self.embed_mapper is not None: with tf.variable_scope("map_embed"): context1_rep = self.embed_mapper.apply(is_train, context1_rep, context1_mask, *c1_lm_in) with tf.variable_scope("map_embed", reuse=True): context2_rep = self.embed_mapper.apply(is_train, context2_rep, context2_mask, *c2_lm_in) question_rep = self.embed_mapper.apply(is_train, question_rep, question_mask, *q_lm_in) with tf.variable_scope("seq_enc"): question_enc = self.sequence_encoder.apply(is_train, question_rep, question_mask) question_enc = tf.identity(question_enc, name='encode_question') tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, question_enc) def encode_sentences(context, sentence_segs, sentence_mask, rep_name): context = self.sentences_encoder.apply(context, sentence_segs, sentence_mask) if self.sentence_mapper is not None: with tf.variable_scope('sentence_mapper'): context = self.sentence_mapper.apply(is_train, context, mask=sentence_mask) context = tf.identity(context, name=rep_name) tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, context) return context with tf.variable_scope('sentences_enc'): context1_sent_rep = encode_sentences(context1_rep, context1_sentence_segments, context1_sentence_mask, 'encode_context1') with tf.variable_scope('sentences_enc', reuse=True): context2_sent_rep = encode_sentences(context2_rep, context2_sentence_segments, context2_sentence_mask, 'encode_context2') # First Iteration (same as in the single context model) with tf.variable_scope("context1_relevance"): sentence_logits = tf.matmul(context1_sent_rep, tf.expand_dims(question_enc, axis=2)) max_logits = self.max_pool.apply(is_train, sentence_logits, context1_sentence_mask) with tf.variable_scope("predictor"): context1_pred = self.first_predictor.apply( is_train, max_logits, [answer[0]] + answer[2:]) # Question Reformulation with tf.variable_scope("reformulation"): if self.reread_mapper is not None: question_rep, context_rep = question_embed, context_embed context1_rep, _ = tf.unstack(context_rep, axis=1, num=2) context1_mask, _ = tf.unstack(context_mask, axis=1, num=2) if not isinstance(self.reread_mapper, ElmoWrapper): c1_lm_in, q_lm_in = [], [] with tf.variable_scope("reread_map_embed"): context1_rep = self.reread_mapper.apply( is_train, context1_rep, context1_mask, *c1_lm_in) with tf.variable_scope("reread_map_embed", reuse=True): question_rep = self.reread_mapper.apply( is_train, question_rep, question_mask, *q_lm_in) if self.pre_attention_mapper is not None: with tf.variable_scope("pre_att"): question_rep = self.pre_attention_mapper.apply( is_train, question_rep, question_mask) with tf.variable_scope("pre_att", reuse=True): context1_rep = self.pre_attention_mapper.apply( is_train, context1_rep, context1_mask) if not self.reformulate_by_context: if self.question_to_context_attention is not None: with tf.variable_scope('q2c'): context1_rep = self.question_to_context_attention.apply( is_train, x=context1_rep, keys=question_rep, memories=question_rep, x_mask=context1_mask, memory_mask=question_mask) if self.pre_attention_mapper is not None: with tf.variable_scope("pre_att", reuse=True): context1_rep = self.pre_attention_mapper.apply( is_train, context1_rep, context1_mask) with tf.variable_scope('c2q'): question_rep = self.context_to_question_attention.apply( is_train, x=question_rep, keys=context1_rep, memories=context1_rep, x_mask=question_mask, memory_mask=context1_mask) reformulated_q = self.sequence_encoder.apply( is_train, question_rep, question_mask) else: if self.context_to_question_attention is not None: with tf.variable_scope('c2q'): question_rep = self.context_to_question_attention.apply( is_train, x=question_rep, keys=context1_rep, memories=context1_rep, x_mask=question_mask, memory_mask=context1_mask) if self.pre_attention_mapper is not None: with tf.variable_scope("pre_att", reuse=True): question_rep = self.pre_attention_mapper.apply( is_train, question_rep, question_mask) with tf.variable_scope('q2c'): context1_rep = self.question_to_context_attention.apply( is_train, x=context1_rep, keys=question_rep, memories=question_rep, x_mask=context1_mask, memory_mask=question_mask) reformulated_q = self.sequence_encoder.apply( is_train, context1_rep, context1_mask) reformulated_q = tf.identity(reformulated_q, name='reformulated_question') tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, reformulated_q) # Second Iteration with tf.variable_scope("context2_relevance"): sentence_logits = tf.matmul(context2_sent_rep, tf.expand_dims(reformulated_q, axis=2)) max_logits = self.max_pool.apply(is_train, sentence_logits, context2_sentence_mask) with tf.variable_scope("predictor"): context2_pred = self.second_predictor.apply( is_train, max_logits, [answer[1]] + answer[2:]) return MultipleBinaryPredictions([context1_pred, context2_pred])
class SingleContextMaxSentenceModel(MultipleContextModel): """ Model for a question and a single paragraph which takes into account the sentences. This model first creates an encoding for each sentence, and then performs a fully connected layer on the encodings to get each sentence's prediction. It then gets the maximum value and predicts with it. """ def __init__(self, encoder: QuestionsAndParagraphsEncoder, word_embed: Optional[WordEmbedder], char_embed: Optional[CharWordEmbedder], embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]], sequence_encoder: SequenceEncoder, sentences_encoder: SentencesEncoder, merger: FixedMergeLayer, post_merger: Optional[Mapper], max_batch_size: Optional[int] = None, elmo_model: Optional[LanguageModel] = None ): super().__init__(encoder=encoder, word_embed=word_embed, char_embed=char_embed, max_batch_size=max_batch_size, elmo_model=elmo_model) self.embed_mapper = embed_mapper self.sequence_encoder = sequence_encoder self.sentences_encoder = sentences_encoder self.merger = merger self.post_merger = post_merger self.predictor = BinaryNullPredictor() self.max_pool = MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True) self.mean_pool = MeanPool() def _get_predictions_for(self, is_train, question_embed, question_mask, context_embed, context_mask, answer, question_lm, context_lm, sentence_segments, sentence_mask): question_rep, context_rep = question_embed, context_embed context1_rep, = tf.unstack(context_rep, axis=1, num=1) context1_mask, = tf.unstack(context_mask, axis=1, num=1) sentence_segments, = tf.unstack(sentence_segments, axis=1, num=1) sentence_mask, = tf.unstack(sentence_mask, axis=1, num=1) q_lm_in, c1_lm_in = [], [] if self.use_elmo: context1_lm, = tf.unstack(context_lm, axis=1, num=1) q_lm_in = [question_lm] c1_lm_in = [context1_lm] if self.embed_mapper is not None: with tf.variable_scope("map_embed"): context1_rep = self.embed_mapper.apply(is_train, context1_rep, context1_mask, *c1_lm_in) with tf.variable_scope("map_embed", reuse=True): question_rep = self.embed_mapper.apply(is_train, question_rep, question_mask, *q_lm_in) with tf.variable_scope("seq_enc"): question_rep = self.sequence_encoder.apply(is_train, question_rep, question_mask) with tf.variable_scope("sentences_enc"): context1_rep = self.sentences_encoder.apply(context1_rep, sentence_segments, sentence_mask) context1_rep = tf.identity(context1_rep, name='encode_context') tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, context1_rep) with tf.variable_scope("merger"): merged_rep = self.merger.apply(is_train, tensor=context1_rep, fixed_tensor=question_rep, mask=sentence_mask) if self.post_merger is not None: with tf.variable_scope("post_merger"): merged_rep = self.post_merger.apply(is_train, merged_rep, mask=sentence_mask) with tf.variable_scope("sentence_level_predictions"): sentences_logits = fully_connected(merged_rep, 1, use_bias=True, activation=None, kernel_initializer=get_keras_initialization('glorot_uniform')) max_logits = self.max_pool.apply(is_train, sentences_logits, sentence_mask) with tf.variable_scope("predictor"): return self.predictor.apply(is_train, max_logits, answer) def __setstate__(self, state): if "post_merger" not in state: state["post_merger"] = None super().__setstate__(state)