Exemplo n.º 1
0
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 num_highway_layers,
                 phrase_layer,
                 attention_similarity_function,
                 modeling_layer,
                 cove_layer=None,
                 elmo_layer=None,
                 deep_elmo=False,
                 dropout=0.2,
                 mask_lstms=True,
                 initializer=InitializerApplicator(),
                 regularizer=None):
        super(HeadlessPairAttnEncoder, self).__init__(vocab)  #, regularizer)

        if text_field_embedder is None:  # just using ELMo embeddings
            self._text_field_embedder = lambda x: x
            d_emb = 0
            self._highway_layer = lambda x: x
        else:
            self._text_field_embedder = text_field_embedder
            d_emb = text_field_embedder.get_output_dim()
            self._highway_layer = TimeDistributed(
                Highway(d_emb, num_highway_layers))

        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._cove = cove_layer
        self._elmo = elmo_layer
        self._deep_elmo = deep_elmo
        self.pad_idx = vocab.get_token_index(vocab._padding_token)

        d_inp_phrase = phrase_layer.get_input_dim()
        d_out_phrase = phrase_layer.get_output_dim()
        d_out_model = modeling_layer.get_output_dim()
        d_inp_model = modeling_layer.get_input_dim()
        self.output_dim = d_out_model

        if (elmo_layer is None and d_inp_model != 2 * d_out_phrase) or \
            (elmo_layer is not None and not deep_elmo and d_inp_model != 2 * d_out_phrase) or \
            (elmo_layer is not None and deep_elmo and d_inp_model != 2 * d_out_phrase + 1024):
            raise ConfigurationError(
                "The input dimension to the modeling_layer must be "
                "equal to 4 times the encoding dimension of the phrase_layer. "
                "Found {} and 4 * {} respectively.".format(
                    d_inp_model, d_out_phrase))
        if (cove_layer is None and elmo_layer is None and d_emb != d_inp_phrase) \
            or (cove_layer is not None and d_emb + 600 != d_inp_phrase) \
            or (elmo_layer is not None and d_emb + 1024 != d_inp_phrase):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder "
                "must match the input "
                "dimension of the phrase_encoder. Found {} and {} "
                "respectively.".format(d_emb, d_inp_phrase))
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Exemplo n.º 2
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        num_highway_layers: int,
        phrase_layer: Seq2SeqEncoder,
        matrix_attention: MatrixAttention,
        modeling_layer: Seq2SeqEncoder,
        span_end_encoder: Seq2SeqEncoder,
        dropout: float = 0.2,
        mask_lstms: bool = True,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = matrix_attention
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(
            modeling_layer.get_input_dim(),
            4 * encoding_dim,
            "modeling layer input dim",
            "4 * encoding dim",
        )
        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            phrase_layer.get_input_dim(),
            "text field embedder output dim",
            "phrase layer input dim",
        )
        check_dimensions_match(
            span_end_encoder.get_input_dim(),
            4 * encoding_dim + 3 * modeling_dim,
            "span end encoder input dim",
            "4 * encoding dim + 3 * modeling dim",
        )

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Exemplo n.º 3
0
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            num_highway_layers: int,
            phrase_layer: Seq2SeqEncoder,
            similarity_function: SimilarityFunction,
            modeling_layer: Seq2SeqEncoder,
            modeling_layer_memory: Seq2SeqEncoder,
            margin: float,
            max: float,
            dropout: float = 0.2,
            mask_lstms: bool = False,
            memory_enabled: bool = False,
            memory_update: bool = True,
            memory_concat: bool = False,
            save_memory_snapshots: bool = False,
            save_entity_embeddings: bool = False,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None,
            answer_layer_image: Seq2SeqEncoder = None,
            answer_layer_text: Seq2SeqEncoder = None,
            question_image_encoder: Seq2SeqEncoder = None,
            step_layer: Seq2SeqEncoder = None,
            num_heads: int = 2,
            num_slots:
        int = 61,  # Maximum number of entities in the training set.
            last_layer_hidden_dims: List[int] = None,
            last_layer_num_layers: int = 4,
            projection_input_dim: int = 2048,
            projection_hidden_dims: List[int] = None,
            save_step_wise_attentions=False) -> None:

        super(ProceduralReasoningNetworksforRecipeQA,
              self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._modeling_layer_memory = modeling_layer_memory
        self.margin = torch.FloatTensor([margin]).cuda()
        self.cos = nn.CosineSimilarity(dim=-1, eps=1e-6).cuda()
        self.for_max = torch.FloatTensor([max]).cuda()
        self._memory_enabled = memory_enabled
        self._memory_update = memory_update
        self._memory_concat = memory_concat
        self._save_memory_snapshots = save_memory_snapshots
        self._save_entity_embeddings = save_entity_embeddings
        self._step_layer = step_layer
        self._label_acc = CategoricalAccuracy()
        self.save_step_wise_attentions = save_step_wise_attentions

        if self._memory_enabled:
            head_size = int(step_layer.get_output_dim() / num_heads)
            self.mem_module = RelationalMemory(
                mem_slots=num_slots,
                head_size=head_size,
                input_size=head_size * num_heads,
                num_heads=num_heads,
                num_blocks=1,
                forget_bias=1.,
                input_bias=0.,
            ).cuda(0)

            last_layer_input_dim = 10 * modeling_layer.get_output_dim()
        else:
            last_layer_input_dim = 5 * modeling_layer.get_output_dim()
        self._activation = torch.nn.Tanh()
        self._last_layer = FeedForward(last_layer_input_dim,
                                       last_layer_num_layers,
                                       last_layer_hidden_dims,
                                       self._activation, dropout)
        self._answer_layer_image = answer_layer_image  # uses image encoder for image input
        self._answer_layer_text = answer_layer_text  # uses text encoder for text input
        self._question_image_encoder = question_image_encoder  # converts question image inputs to encoding dim
        self._vocab = vocab
        # TODO: Replace hard coded parameters with config parameters
        self._mlp_projector = TimeDistributed(
            torch.nn.Sequential(
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_input_dim,
                                projection_hidden_dims[0]),
                torch.nn.Tanh(),
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_hidden_dims[0],
                                projection_hidden_dims[1]),
                torch.nn.Tanh(),
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_hidden_dims[1],
                                projection_hidden_dims[2]),
                torch.nn.Tanh(),
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_hidden_dims[2],
                                projection_hidden_dims[3]),
            ))
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._mask_lstms = mask_lstms

        if self._save_memory_snapshots:
            if os.path.isfile('memory_snapshots_by_recipe.pkl'
                              ):  # make sure we start with a clean file
                os.remove('memory_snapshots_by_recipe.pkl')

        if self._save_entity_embeddings:
            if os.path.isfile('entity_embeddings_final.pkl'
                              ):  # make sure we start with a clean file
                os.remove('entity_embeddings_final.pkl')
        initializer(self)
Exemplo n.º 4
0
 def test_forward_works_on_nd_input(self):
     highway = Highway(2, 2)
     input_tensor = Variable(torch.ones(2, 2, 2))
     output = highway(input_tensor)
     assert output.size() == (2, 2, 2)
Exemplo n.º 5
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator,
                 dropout: float = 0.2,
                 mask_lstms: bool = True) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(),
                                                      num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1))
        initializer(self)

        # Bidaf has lots of layer dimensions which need to match up - these
        # aren't necessarily obvious from the configuration files, so we check
        # here.
        if modeling_layer.get_input_dim() != 4 * encoding_dim:
            raise ConfigurationError("The input dimension to the modeling_layer must be "
                                     "equal to 4 times the encoding dimension of the phrase_layer. "
                                     "Found {} and 4 * {} respectively.".format(modeling_layer.get_input_dim(),
                                                                                encoding_dim))
        if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder (embedding_dim + "
                                     "char_cnn) must match the input dimension of the phrase_encoder. "
                                     "Found {} and {}, respectively.".format(text_field_embedder.get_output_dim(),
                                                                             phrase_layer.get_input_dim()))

        if span_end_encoder.get_input_dim() != encoding_dim * 4 + modeling_dim * 3:
            raise ConfigurationError("The input dimension of the span_end_encoder should be equal to "
                                     "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. "
                                     "Found {} and (4 * {} + 3 * {}) "
                                     "respectively.".format(span_end_encoder.get_input_dim(),
                                                            encoding_dim,
                                                            modeling_dim))

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms
Exemplo n.º 6
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        num_highway_layers: int,
        phrase_layer: Seq2SeqEncoder,
        matrix_attention_layer: MatrixAttention,
        modeling_layer: Seq2SeqEncoder,
        dropout_prob: float = 0.1,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        answering_abilities: List[str] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        if answering_abilities is None:
            self.answering_abilities = [
                "passage_span_extraction",
                "question_span_extraction",
                "addition_subtraction",
                "counting",
            ]
        else:
            self.answering_abilities = answering_abilities

        text_embed_dim = text_field_embedder.get_output_dim()
        encoding_in_dim = phrase_layer.get_input_dim()
        encoding_out_dim = phrase_layer.get_output_dim()
        modeling_in_dim = modeling_layer.get_input_dim()
        modeling_out_dim = modeling_layer.get_output_dim()

        self._text_field_embedder = text_field_embedder

        self._embedding_proj_layer = torch.nn.Linear(text_embed_dim,
                                                     encoding_in_dim)
        self._highway_layer = Highway(encoding_in_dim, num_highway_layers)

        self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim,
                                                    encoding_in_dim)
        self._phrase_layer = phrase_layer

        self._matrix_attention = matrix_attention_layer

        self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4,
                                                    modeling_in_dim)
        self._modeling_layer = modeling_layer

        self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1)
        self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1)

        if len(self.answering_abilities) > 1:
            self._answer_ability_predictor = FeedForward(
                modeling_out_dim + encoding_out_dim,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim,
                             len(self.answering_abilities)],
                num_layers=2,
                dropout=dropout_prob,
            )

        if "passage_span_extraction" in self.answering_abilities:
            self._passage_span_extraction_index = self.answering_abilities.index(
                "passage_span_extraction")
            self._passage_span_start_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )
            self._passage_span_end_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )

        if "question_span_extraction" in self.answering_abilities:
            self._question_span_extraction_index = self.answering_abilities.index(
                "question_span_extraction")
            self._question_span_start_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )
            self._question_span_end_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )

        if "addition_subtraction" in self.answering_abilities:
            self._addition_subtraction_index = self.answering_abilities.index(
                "addition_subtraction")
            self._number_sign_predictor = FeedForward(
                modeling_out_dim * 3,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim, 3],
                num_layers=2,
            )

        if "counting" in self.answering_abilities:
            self._counting_index = self.answering_abilities.index("counting")
            self._count_number_predictor = FeedForward(
                modeling_out_dim,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[modeling_out_dim, 10],
                num_layers=2,
            )

        self._drop_metrics = DropEmAndF1()
        self._dropout = torch.nn.Dropout(p=dropout_prob)

        initializer(self)
print("Word representation dimensionality: ",
      embeddings_batch_question.shape[2])

batch_size = embeddings_batch_question.size(0)
passage_length = embeddings_batch_passage.size(1)
"""
################### Highway LAYER  #########################################
The number of highway layers to use in between embedding the input and passing it through
the phrase layer.
"""
print("-------------- HIGHWAY LAYER ---------------")

num_highway_layers = 2

highway_layer = TimeDistributed(
    Highway(text_field_embedder.get_output_dim(), num_highway_layers))

highway_batch_question = highway_layer(embeddings_batch_question)
highway_batch_passage = highway_layer(embeddings_batch_passage)

print("Question representations: ", highway_batch_question.shape)
print("Passage representations: ", highway_batch_passage.shape)
print("Maximum num words in question: ", highway_batch_question.shape[1])
print("Word representation dimensionality: ", highway_batch_question.shape[2])
"""
################### phrase_layer LAYER  #########################################
NOTE: Since the LSTM implementation of PyTorch cannot apply dropout in the last layer, 
we just apply ourselves later
"""

print("-------------- PHRASE LAYER ---------------")
Exemplo n.º 8
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 self_attention_layer: StackedSelfAttentionEncoder,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        #New Self Attention layer
        self._self_attention_layer = self_attention_layer
        self._sa_matrix_attention = LegacyMatrixAttention(similarity_function)
        selfattent_dim = self_attention_layer.get_output_dim()  # its 200
        #print("Self Attention Output Dim:",selfattent_dim,"\n")

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        #span_start_input_dim = encoding_dim * 4 + modeling_dim

        span_start_input_dim = encoding_dim * 4 + modeling_dim + 2 * selfattent_dim

        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim + 2 * selfattent_dim

        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim + 2 * selfattent_dim,
                               "modeling layer input dim", "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(
            span_end_encoder.get_input_dim(),
            4 * encoding_dim + 3 * modeling_dim + 2 * selfattent_dim,
            "span end encoder input dim",
            "4 * encoding dim + 3 * modeling dim")

        self._na_accuracy = CategoricalAccuracy()
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()

        self._na_dense = lambda in_dim: torch.nn.Linear(in_dim, 2).cuda()

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)