def __init__(self, vocab, text_field_embedder, num_highway_layers, phrase_layer, attention_similarity_function, modeling_layer, cove_layer=None, elmo_layer=None, deep_elmo=False, dropout=0.2, mask_lstms=True, initializer=InitializerApplicator(), regularizer=None): super(HeadlessPairAttnEncoder, self).__init__(vocab) #, regularizer) if text_field_embedder is None: # just using ELMo embeddings self._text_field_embedder = lambda x: x d_emb = 0 self._highway_layer = lambda x: x else: self._text_field_embedder = text_field_embedder d_emb = text_field_embedder.get_output_dim() self._highway_layer = TimeDistributed( Highway(d_emb, num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._cove = cove_layer self._elmo = elmo_layer self._deep_elmo = deep_elmo self.pad_idx = vocab.get_token_index(vocab._padding_token) d_inp_phrase = phrase_layer.get_input_dim() d_out_phrase = phrase_layer.get_output_dim() d_out_model = modeling_layer.get_output_dim() d_inp_model = modeling_layer.get_input_dim() self.output_dim = d_out_model if (elmo_layer is None and d_inp_model != 2 * d_out_phrase) or \ (elmo_layer is not None and not deep_elmo and d_inp_model != 2 * d_out_phrase) or \ (elmo_layer is not None and deep_elmo and d_inp_model != 2 * d_out_phrase + 1024): raise ConfigurationError( "The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format( d_inp_model, d_out_phrase)) if (cove_layer is None and elmo_layer is None and d_emb != d_inp_phrase) \ or (cove_layer is not None and d_emb + 600 != d_inp_phrase) \ or (elmo_layer is not None and d_emb + 1024 != d_inp_phrase): raise ConfigurationError( "The output dimension of the text_field_embedder " "must match the input " "dimension of the phrase_encoder. Found {} and {} " "respectively.".format(d_emb, d_inp_phrase)) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention: MatrixAttention, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match( modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim", ) check_dimensions_match( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim", ) check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim", ) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, modeling_layer_memory: Seq2SeqEncoder, margin: float, max: float, dropout: float = 0.2, mask_lstms: bool = False, memory_enabled: bool = False, memory_update: bool = True, memory_concat: bool = False, save_memory_snapshots: bool = False, save_entity_embeddings: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answer_layer_image: Seq2SeqEncoder = None, answer_layer_text: Seq2SeqEncoder = None, question_image_encoder: Seq2SeqEncoder = None, step_layer: Seq2SeqEncoder = None, num_heads: int = 2, num_slots: int = 61, # Maximum number of entities in the training set. last_layer_hidden_dims: List[int] = None, last_layer_num_layers: int = 4, projection_input_dim: int = 2048, projection_hidden_dims: List[int] = None, save_step_wise_attentions=False) -> None: super(ProceduralReasoningNetworksforRecipeQA, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._modeling_layer_memory = modeling_layer_memory self.margin = torch.FloatTensor([margin]).cuda() self.cos = nn.CosineSimilarity(dim=-1, eps=1e-6).cuda() self.for_max = torch.FloatTensor([max]).cuda() self._memory_enabled = memory_enabled self._memory_update = memory_update self._memory_concat = memory_concat self._save_memory_snapshots = save_memory_snapshots self._save_entity_embeddings = save_entity_embeddings self._step_layer = step_layer self._label_acc = CategoricalAccuracy() self.save_step_wise_attentions = save_step_wise_attentions if self._memory_enabled: head_size = int(step_layer.get_output_dim() / num_heads) self.mem_module = RelationalMemory( mem_slots=num_slots, head_size=head_size, input_size=head_size * num_heads, num_heads=num_heads, num_blocks=1, forget_bias=1., input_bias=0., ).cuda(0) last_layer_input_dim = 10 * modeling_layer.get_output_dim() else: last_layer_input_dim = 5 * modeling_layer.get_output_dim() self._activation = torch.nn.Tanh() self._last_layer = FeedForward(last_layer_input_dim, last_layer_num_layers, last_layer_hidden_dims, self._activation, dropout) self._answer_layer_image = answer_layer_image # uses image encoder for image input self._answer_layer_text = answer_layer_text # uses text encoder for text input self._question_image_encoder = question_image_encoder # converts question image inputs to encoding dim self._vocab = vocab # TODO: Replace hard coded parameters with config parameters self._mlp_projector = TimeDistributed( torch.nn.Sequential( torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_input_dim, projection_hidden_dims[0]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[0], projection_hidden_dims[1]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[1], projection_hidden_dims[2]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[2], projection_hidden_dims[3]), )) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms if self._save_memory_snapshots: if os.path.isfile('memory_snapshots_by_recipe.pkl' ): # make sure we start with a clean file os.remove('memory_snapshots_by_recipe.pkl') if self._save_entity_embeddings: if os.path.isfile('entity_embeddings_final.pkl' ): # make sure we start with a clean file os.remove('entity_embeddings_final.pkl') initializer(self)
def test_forward_works_on_nd_input(self): highway = Highway(2, 2) input_tensor = Variable(torch.ones(2, 2, 2)) output = highway(input_tensor) assert output.size() == (2, 2, 2)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, mask_lstms: bool = True) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) initializer(self) # Bidaf has lots of layer dimensions which need to match up - these # aren't necessarily obvious from the configuration files, so we check # here. if modeling_layer.get_input_dim() != 4 * encoding_dim: raise ConfigurationError("The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format(modeling_layer.get_input_dim(), encoding_dim)) if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder (embedding_dim + " "char_cnn) must match the input dimension of the phrase_encoder. " "Found {} and {}, respectively.".format(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim())) if span_end_encoder.get_input_dim() != encoding_dim * 4 + modeling_dim * 3: raise ConfigurationError("The input dimension of the span_end_encoder should be equal to " "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. " "Found {} and (4 * {} + 3 * {}) " "respectively.".format(span_end_encoder.get_input_dim(), encoding_dim, modeling_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answering_abilities: List[str] = None, ) -> None: super().__init__(vocab, regularizer) if answering_abilities is None: self.answering_abilities = [ "passage_span_extraction", "question_span_extraction", "addition_subtraction", "counting", ] else: self.answering_abilities = answering_abilities text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1) self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1) if len(self.answering_abilities) > 1: self._answer_ability_predictor = FeedForward( modeling_out_dim + encoding_out_dim, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, len(self.answering_abilities)], num_layers=2, dropout=dropout_prob, ) if "passage_span_extraction" in self.answering_abilities: self._passage_span_extraction_index = self.answering_abilities.index( "passage_span_extraction") self._passage_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) self._passage_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) if "question_span_extraction" in self.answering_abilities: self._question_span_extraction_index = self.answering_abilities.index( "question_span_extraction") self._question_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) self._question_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) if "addition_subtraction" in self.answering_abilities: self._addition_subtraction_index = self.answering_abilities.index( "addition_subtraction") self._number_sign_predictor = FeedForward( modeling_out_dim * 3, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, 3], num_layers=2, ) if "counting" in self.answering_abilities: self._counting_index = self.answering_abilities.index("counting") self._count_number_predictor = FeedForward( modeling_out_dim, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[modeling_out_dim, 10], num_layers=2, ) self._drop_metrics = DropEmAndF1() self._dropout = torch.nn.Dropout(p=dropout_prob) initializer(self)
print("Word representation dimensionality: ", embeddings_batch_question.shape[2]) batch_size = embeddings_batch_question.size(0) passage_length = embeddings_batch_passage.size(1) """ ################### Highway LAYER ######################################### The number of highway layers to use in between embedding the input and passing it through the phrase layer. """ print("-------------- HIGHWAY LAYER ---------------") num_highway_layers = 2 highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) highway_batch_question = highway_layer(embeddings_batch_question) highway_batch_passage = highway_layer(embeddings_batch_passage) print("Question representations: ", highway_batch_question.shape) print("Passage representations: ", highway_batch_passage.shape) print("Maximum num words in question: ", highway_batch_question.shape[1]) print("Word representation dimensionality: ", highway_batch_question.shape[2]) """ ################### phrase_layer LAYER ######################################### NOTE: Since the LSTM implementation of PyTorch cannot apply dropout in the last layer, we just apply ourselves later """ print("-------------- PHRASE LAYER ---------------")
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, self_attention_layer: StackedSelfAttentionEncoder, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder #New Self Attention layer self._self_attention_layer = self_attention_layer self._sa_matrix_attention = LegacyMatrixAttention(similarity_function) selfattent_dim = self_attention_layer.get_output_dim() # its 200 #print("Self Attention Output Dim:",selfattent_dim,"\n") encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() #span_start_input_dim = encoding_dim * 4 + modeling_dim span_start_input_dim = encoding_dim * 4 + modeling_dim + 2 * selfattent_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim + 2 * selfattent_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim + 2 * selfattent_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim + 2 * selfattent_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._na_accuracy = CategoricalAccuracy() self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._na_dense = lambda in_dim: torch.nn.Linear(in_dim, 2).cuda() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)