def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_probability: FeedForward, edge_probability: FeedForward, premise_encoder: Seq2SeqEncoder, edge_embedding: Embedding, use_encoding_for_node: bool, ignore_edges: bool, attention_similarity: SimilarityFunction, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super(TreeAttention, self).__init__(vocab) self._text_field_embedder = text_field_embedder self._premise_encoder = premise_encoder self._nodes_attention = SingleTimeDistributed( LegacyMatrixAttention(attention_similarity), 0) self._num_labels = vocab.get_vocab_size(namespace="labels") self._phrase_probability = TimeDistributed(phrase_probability) self._ignore_edges = ignore_edges if not self._ignore_edges: self._num_edges = vocab.get_vocab_size(namespace="edges") self._edge_probability = TimeDistributed(edge_probability) self._edge_embedding = edge_embedding self._use_encoding_for_node = use_encoding_for_node self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, similarity_function: SimilarityFunction, response_projection_feedforward: FeedForward, response_inference_encoder: Seq2SeqEncoder, response_input_feedforward: Optional[FeedForward] = None, source_input_feedforward: Optional[FeedForward] = None, source_projection_feedforward: Optional[FeedForward] = None, source_inference_encoder: Optional[Seq2SeqEncoder] = None, dropout: float = 0.5, #whether to only consider the response and alignments from the source to response response_only=False ) -> None: super().__init__() self._response_input_feedforward = response_input_feedforward self._response_projection_feedforward = response_projection_feedforward self._response_inference_encoder = response_inference_encoder self._source_input_feedforward = source_input_feedforward or response_input_feedforward self._source_projection_feedforward = source_projection_feedforward or response_projection_feedforward self._source_inference_encoder = source_inference_encoder or response_inference_encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._response_only = response_only
def __init__(self, input_size: int, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True) -> None: super(BidafInteractionEncoder, self).__init__() self._highway_layer = TimeDistributed( Highway(input_size, num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(input_size, phrase_layer.get_input_dim(), "input_size", "phrase layer input dim") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms
def __init__( self, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, document_encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super(DecomposableAttentionSentenceScorer, self).__init__() self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._document_encoder = document_encoder d_dim = self._document_encoder.get_output_dim() self._scorer = FeedForward(input_dim=2 * d_dim, num_layers=1, hidden_dims=1, activations=lambda x: x, dropout=0.) initializer(self)
def test_forward_works_on_simple_input(self): attention = LegacyMatrixAttention(DotProductSimilarity()) sentence_1_tensor = Variable(torch.FloatTensor([[[1, 1, 1], [-1, 0, 1]]])) sentence_2_tensor = Variable(torch.FloatTensor([[[1, 1, 1], [-1, 0, 1], [-1, -1, -1]]])) result = attention(sentence_1_tensor, sentence_2_tensor).data.numpy() assert result.shape == (1, 2, 3) assert_allclose(result, [[[3, 0, -3], [0, 2, 0]]])
def __init__(self, input_dim: int, projection_dim: int = None, similarity_function: SimilarityFunction = DotProductSimilarity(), num_attention_heads: int = 1, combination: str = '1,2', output_dim: int = None) -> None: super().__init__() self._input_dim = input_dim if projection_dim: self._projection = torch.nn.Linear(input_dim, projection_dim) else: self._projection = lambda x: x projection_dim = input_dim self._matrix_attention = LegacyMatrixAttention(similarity_function) self._num_attention_heads = num_attention_heads if isinstance(similarity_function, MultiHeadedSimilarity): if num_attention_heads == 1: raise ConfigurationError("Similarity function has multiple heads but encoder doesn't") if num_attention_heads != similarity_function.num_heads: raise ConfigurationError("Number of heads don't match between similarity function " "and encoder: %d, %d" % (num_attention_heads, similarity_function.num_heads)) elif num_attention_heads > 1: raise ConfigurationError("Encoder has multiple heads but similarity function doesn't") self._combination = combination combined_dim = util.get_combined_dim(combination, [input_dim, projection_dim]) if output_dim: self._output_projection = Linear(combined_dim, output_dim) self._output_dim = output_dim else: self._output_projection = lambda x: x self._output_dim = combined_dim
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlowFT, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._action_predictor = torch.nn.Linear(modeling_dim, 4) self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._action_accuracy = CategoricalAccuracy() self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, use_sentiment: bool, use_tfidf: bool, headline_encoder: Optional[Seq2SeqEncoder] = None, body_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttentionModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._headline_encoder = headline_encoder self._body_encoder = body_encoder or headline_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") self.use_sentiment = use_sentiment self.use_tfidf = use_tfidf self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input", ) check_dimensions_match( projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim", ) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, parser_model_path: str, parser_cuda_device: int, freeze_parser: bool, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._parser = load_archive(parser_model_path, cuda_device=parser_cuda_device).model self._parser._head_sentinel.requires_grad = False for child in self._parser.children(): for param in child.parameters(): param.requires_grad = False if not freeze_parser: for param in self._parser.encoder.parameters(): param.requires_grad = True initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2VecEncoder, answers_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._classifier_feedforward = classifier_feedforward self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer encoding_dim = phrase_layer.get_output_dim() self._time_distributed_highway_layer = TimeDistributed( self._highway_layer) self._answers_encoder = TimeDistributed(answers_encoder) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, parser_model_path: str, parser_cuda_device: int, freeze_parser: bool, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SyntacticEntailment, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._parser = load_archive(parser_model_path, cuda_device=parser_cuda_device).model self._parser._head_sentinel.requires_grad = False for child in self._parser.children(): for param in child.parameters(): param.requires_grad = False if not freeze_parser: for param in self._parser.encoder.parameters(): param.requires_grad = True initializer(self)
class EsimComparatorLayer2(torch.nn.Module): def __init__(self, similarity_function: SimilarityFunction = None) -> None: super().__init__() # Don't use DotProductMatrixAttention() if model wasn't trained exactly with it. self._matrix_attention = LegacyMatrixAttention(similarity_function) @overrides def forward(self, # pylint: disable=arguments-differ encoded_premise: torch.Tensor, encoded_hypothesis: torch.Tensor) -> Dict[str, torch.Tensor]: # pylint: disable=unused-argument # Shape: (batch_size, premise_length, hypothesis_length) similarity_matrix = self._matrix_attention(encoded_premise, encoded_hypothesis) return similarity_matrix def get_output_dim(self): return self._matrix_attention.get_output_dim()
def __init__(self, word_embeddings: TextFieldEmbedder, encoder1: Seq2VecEncoder, encoder2: Seq2VecEncoder, similarity_function: SimilarityFunction, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embedding = word_embeddings self.enc_turn1and2 = encoder1 self.enc_turn3 = encoder2 self.matrix_attention = LegacyMatrixAttention(similarity_function) self.accuracy = MicroMetrics(vocab) self.label_index_to_label = self.vocab.get_index_to_token_vocabulary( 'labels') final_concatenated_dimension = 4 * self.enc_turn1and2.get_output_dim() self.hidden2out = torch.nn.Linear( in_features=final_concatenated_dimension, out_features=vocab.get_vocab_size("labels")) self.lexicon_embedding = LexiconEmbedder(LEXICON_PATH, self.vocab)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, similarity_weight: int = 30) -> None: super(DecomposableAttentionModified, self).__init__(vocab, regularizer) self.label_map = vocab.get_token_to_index_vocabulary('labels') label_map = [None]*len(self.label_map) for lb,lb_idx in self.label_map.items(): label_map[lb_idx] = lb self.label_map = label_map self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self) self.lambda_layer = nn.Sequential(nn.Linear(16, 1,bias=False), MyActivationFunction()) self.lambda_layer[0].weight.data = torch.tensor([[0.1,0.5,0.5,0.5, 0.5,0.1,0.5,0.5, 0.5,0.5,0.1,0.5, 0.5,0.5,0.5,0.9]]) self.similarity_weight = similarity_weight
def __init__(self, encoder: Seq2SeqEncoder, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, similarity_function: SimilarityFunction = None, dropout: float = 0.5) -> None: super().__init__() self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward
def __init__( self, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, query_encoder: Optional[Seq2SeqEncoder] = None, document_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super(DecomposableAttentionScorer, self).__init__() self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._query_encoder = query_encoder self._document_encoder = document_encoder or query_encoder initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, aggr_type: str = "both", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = 1 check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") # check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), # "encoder output dim", "projection feedforward input") # check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), # "proj feedforward output dim", "inference lstm input dim") self._aggr_type = aggr_type self._metric = PearsonCorrelation() self._loss = torch.nn.MSELoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForwardPair, dropout: float = 0.5, margin: float = 1.25, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._margin = margin self._accuracy = BooleanAccuracy() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, similarity_weight: int = 30) -> None: super().__init__(vocab, regularizer) self.label_map = vocab.get_token_to_index_vocabulary('labels') label_map = [None] * len(self.label_map) for lb, lb_idx in self.label_map.items(): label_map[lb_idx] = lb self.label_map = label_map self._text_field_embedder = text_field_embedder self._encoder = encoder print(similarity_function) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self) self.lambda_layer = nn.Sequential(nn.Linear(16, 1, bias=False), MyActivationFunction()) self.lambda_layer[0].weight.data = torch.tensor([[ 0.1, 0.5, 0.5, 0.5, 0.5, 0.1, 0.5, 0.5, 0.5, 0.5, 0.1, 0.5, 0.5, 0.5, 0.5, 0.9 ]]) self.similarity_weight = similarity_weight print("SIMILARITY WEIGHT BEING USED IS : {0}".format( self.similarity_weight))
def __init__(self, vocab: Vocabulary, cf_a, preloaded_elmo=None) -> None: super(BidirectionalAttentionFlow_1, self).__init__(vocab, cf_a.regularizer) """ Initialize some data structures """ self.cf_a = cf_a # Bayesian data models self.VBmodels = [] self.LinearModels = [] """ ############## TEXT FIELD EMBEDDER with ELMO #################### text_field_embedder : ``TextFieldEmbedder`` Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model. """ if (cf_a.use_ELMO): if (type(preloaded_elmo) != type(None)): text_field_embedder = preloaded_elmo else: text_field_embedder = bidut.download_Elmo( cf_a.ELMO_num_layers, cf_a.ELMO_droput) print("ELMO loaded from disk or downloaded") else: text_field_embedder = None # embedder_out_dim = text_field_embedder.get_output_dim() self._text_field_embedder = text_field_embedder if (cf_a.Add_Linear_projection_ELMO): if (self.cf_a.VB_Linear_projection_ELMO): prior = Vil.Prior(**(cf_a.VB_Linear_projection_ELMO_prior)) print( "----------------- Bayesian Linear Projection ELMO --------------" ) linear_projection_ELMO = LinearVB( text_field_embedder.get_output_dim(), 200, prior=prior) self.VBmodels.append(linear_projection_ELMO) else: linear_projection_ELMO = torch.nn.Linear( text_field_embedder.get_output_dim(), 200) self._linear_projection_ELMO = linear_projection_ELMO """ ############## Highway layers #################### num_highway_layers : ``int`` The number of highway layers to use in between embedding the input and passing it through the phrase layer. """ Input_dimension_highway = None if (cf_a.Add_Linear_projection_ELMO): Input_dimension_highway = 200 else: Input_dimension_highway = text_field_embedder.get_output_dim() num_highway_layers = cf_a.num_highway_layers # Linear later to compute the start if (self.cf_a.VB_highway_layers): print("----------------- Bayesian Highway network --------------") prior = Vil.Prior(**(cf_a.VB_highway_layers_prior)) highway_layer = HighwayVB(Input_dimension_highway, num_highway_layers, prior=prior) self.VBmodels.append(highway_layer) else: highway_layer = Highway(Input_dimension_highway, num_highway_layers) highway_layer = TimeDistributed(highway_layer) self._highway_layer = highway_layer """ ############## Phrase layer #################### phrase_layer : ``Seq2SeqEncoder`` The encoder (with its own internal stacking) that we will use in between embedding tokens and doing the bidirectional attention. """ if cf_a.phrase_layer_dropout > 0: ## Create dropout layer dropout_phrase_layer = torch.nn.Dropout( p=cf_a.phrase_layer_dropout) else: dropout_phrase_layer = lambda x: x phrase_layer = PytorchSeq2SeqWrapper( torch.nn.LSTM(Input_dimension_highway, hidden_size=cf_a.phrase_layer_hidden_size, batch_first=True, bidirectional=True, num_layers=cf_a.phrase_layer_num_layers, dropout=cf_a.phrase_layer_dropout)) phrase_encoding_out_dim = cf_a.phrase_layer_hidden_size * 2 self._phrase_layer = phrase_layer self._dropout_phrase_layer = dropout_phrase_layer """ ############## Matrix attention layer #################### similarity_function : ``SimilarityFunction`` The similarity function that we will use when comparing encoded passage and question representations. """ # Linear later to compute the start if (self.cf_a.VB_similarity_function): prior = Vil.Prior(**(cf_a.VB_similarity_function_prior)) print( "----------------- Bayesian Similarity matrix --------------") similarity_function = LinearSimilarityVB( combination="x,y,x*y", tensor_1_dim=phrase_encoding_out_dim, tensor_2_dim=phrase_encoding_out_dim, prior=prior) self.VBmodels.append(similarity_function) else: similarity_function = LinearSimilarity( combination="x,y,x*y", tensor_1_dim=phrase_encoding_out_dim, tensor_2_dim=phrase_encoding_out_dim) matrix_attention = LegacyMatrixAttention(similarity_function) self._matrix_attention = matrix_attention """ ############## Modelling Layer #################### modeling_layer : ``Seq2SeqEncoder`` The encoder (with its own internal stacking) that we will use in between the bidirectional attention and predicting span start and end. """ ## Create dropout layer if cf_a.modeling_passage_dropout > 0: ## Create dropout layer dropout_modeling_passage = torch.nn.Dropout( p=cf_a.modeling_passage_dropout) else: dropout_modeling_passage = lambda x: x modeling_layer = PytorchSeq2SeqWrapper( torch.nn.LSTM(phrase_encoding_out_dim * 4, hidden_size=cf_a.modeling_passage_hidden_size, batch_first=True, bidirectional=True, num_layers=cf_a.modeling_passage_num_layers, dropout=cf_a.modeling_passage_dropout)) self._modeling_layer = modeling_layer self._dropout_modeling_passage = dropout_modeling_passage """ ############## Span Start Representation ##################### span_end_encoder : ``Seq2SeqEncoder`` The encoder that we will use to incorporate span start predictions into the passage state before predicting span end. """ encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim # Linear later to compute the start if (self.cf_a.VB_span_start_predictor_linear): prior = Vil.Prior(**(cf_a.VB_span_start_predictor_linear_prior)) print( "----------------- Bayesian Span Start Predictor--------------" ) span_start_predictor_linear = LinearVB(span_start_input_dim, 1, prior=prior) self.VBmodels.append(span_start_predictor_linear) else: span_start_predictor_linear = torch.nn.Linear( span_start_input_dim, 1) self._span_start_predictor_linear = span_start_predictor_linear self._span_start_predictor = TimeDistributed( span_start_predictor_linear) """ ############## Span End Representation ##################### """ ## Create dropout layer if cf_a.span_end_encoder_dropout > 0: dropout_span_end_encode = torch.nn.Dropout( p=cf_a.span_end_encoder_dropout) else: dropout_span_end_encode = lambda x: x span_end_encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(encoding_dim * 4 + modeling_dim * 3, hidden_size=cf_a.modeling_span_end_hidden_size, batch_first=True, bidirectional=True, num_layers=cf_a.modeling_span_end_num_layers, dropout=cf_a.span_end_encoder_dropout)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_encoder = span_end_encoder self._dropout_span_end_encode = dropout_span_end_encode if (self.cf_a.VB_span_end_predictor_linear): print( "----------------- Bayesian Span End Predictor--------------") prior = Vil.Prior(**(cf_a.VB_span_end_predictor_linear_prior)) span_end_predictor_linear = LinearVB(span_end_input_dim, 1, prior=prior) self.VBmodels.append(span_end_predictor_linear) else: span_end_predictor_linear = torch.nn.Linear(span_end_input_dim, 1) self._span_end_predictor_linear = span_end_predictor_linear self._span_end_predictor = TimeDistributed(span_end_predictor_linear) """ Dropput last layers """ if cf_a.spans_output_dropout > 0: dropout_spans_output = torch.nn.Dropout( p=cf_a.span_end_encoder_dropout) else: dropout_spans_output = lambda x: x self._dropout_spans_output = dropout_spans_output """ Checkings and accuracy """ # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(Input_dimension_highway, phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() """ mask_lstms : ``bool``, optional (default=True) If ``False``, we will skip passing the mask to the LSTM layers. This gives a ~2x speedup, with only a slight performance decrease, if any. We haven't experimented much with this yet, but have confirmed that we still get very similar performance with much faster training times. We still use the mask for all softmaxes, but avoid the shuffling that's required when using masking with pytorch LSTMs. """ self._mask_lstms = cf_a.mask_lstms """ ################### Initialize parameters ############################## """ #### THEY ARE ALL INITIALIZED WHEN INSTANTING THE COMPONENTS ### """ ####################### OPTIMIZER ################ """ optimizer = pytut.get_optimizers(self, cf_a) self._optimizer = optimizer
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, modeling_layer_memory: Seq2SeqEncoder, margin: float, max: float, dropout: float = 0.2, mask_lstms: bool = False, memory_enabled: bool = False, memory_update: bool = True, memory_concat: bool = False, save_memory_snapshots: bool = False, save_entity_embeddings: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answer_layer_image: Seq2SeqEncoder = None, answer_layer_text: Seq2SeqEncoder = None, question_image_encoder: Seq2SeqEncoder = None, step_layer: Seq2SeqEncoder = None, num_heads: int = 2, num_slots: int = 61, # Maximum number of entities in the training set. last_layer_hidden_dims: List[int] = None, last_layer_num_layers: int = 4, projection_input_dim: int = 2048, projection_hidden_dims: List[int] = None, save_step_wise_attentions=False) -> None: super(ProceduralReasoningNetworksforRecipeQA, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._modeling_layer_memory = modeling_layer_memory self.margin = torch.FloatTensor([margin]).cuda() self.cos = nn.CosineSimilarity(dim=-1, eps=1e-6).cuda() self.for_max = torch.FloatTensor([max]).cuda() self._memory_enabled = memory_enabled self._memory_update = memory_update self._memory_concat = memory_concat self._save_memory_snapshots = save_memory_snapshots self._save_entity_embeddings = save_entity_embeddings self._step_layer = step_layer self._label_acc = CategoricalAccuracy() self.save_step_wise_attentions = save_step_wise_attentions if self._memory_enabled: head_size = int(step_layer.get_output_dim() / num_heads) self.mem_module = RelationalMemory( mem_slots=num_slots, head_size=head_size, input_size=head_size * num_heads, num_heads=num_heads, num_blocks=1, forget_bias=1., input_bias=0., ).cuda(0) last_layer_input_dim = 10 * modeling_layer.get_output_dim() else: last_layer_input_dim = 5 * modeling_layer.get_output_dim() self._activation = torch.nn.Tanh() self._last_layer = FeedForward(last_layer_input_dim, last_layer_num_layers, last_layer_hidden_dims, self._activation, dropout) self._answer_layer_image = answer_layer_image # uses image encoder for image input self._answer_layer_text = answer_layer_text # uses text encoder for text input self._question_image_encoder = question_image_encoder # converts question image inputs to encoding dim self._vocab = vocab # TODO: Replace hard coded parameters with config parameters self._mlp_projector = TimeDistributed( torch.nn.Sequential( torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_input_dim, projection_hidden_dims[0]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[0], projection_hidden_dims[1]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[1], projection_hidden_dims[2]), torch.nn.Tanh(), torch.nn.Dropout(0.1, inplace=False), torch.nn.Linear(projection_hidden_dims[2], projection_hidden_dims[3]), )) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms if self._save_memory_snapshots: if os.path.isfile('memory_snapshots_by_recipe.pkl' ): # make sure we start with a clean file os.remove('memory_snapshots_by_recipe.pkl') if self._save_entity_embeddings: if os.path.isfile('entity_embeddings_final.pkl' ): # make sure we start with a clean file os.remove('entity_embeddings_final.pkl') initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, class_weights: list = [], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, encode_together: bool = False, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self.encode_together = encode_together self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None if class_weights: self.class_weights = class_weights else: self.class_weights = [1.] * self.output_feedforward.get_output_dim() self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input", ) check_dimensions_match( projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim", ) self.metrics = {"accuracy": CategoricalAccuracy()} for _class in range(len(self.class_weights)): self.metrics.update({ f"f1_rel{_class}": F1Measure(_class), }) self._loss = torch.nn.CrossEntropyLoss(weight=torch.FloatTensor(self.class_weights)) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlowBasic, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = {x: Average() for x in self._bleu_score_types_to_use} # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = ["rouge-{0}".format(x) for x in range(1, max_rouge_n + 1)] rouge_scores_names = rouge_n_metrics + [y for y in self._rouge_score_types_to_use if y != 'rouge-n'] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge(metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__(self, vocab: Vocabulary, training_tasks: Any, validation_tasks: Any, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, langs_print_train: List[str] = None, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttentionMultiling, self).__init__(vocab, regularizer=regularizer) if type(training_tasks) == dict: self._training_tasks = list(training_tasks.keys()) else: self._training_tasks = training_tasks if type(validation_tasks) == dict: self._validation_tasks = list(validation_tasks.keys()) else: self._validation_tasks = validation_tasks self._label_namespace = "labels" self._num_labels = vocab.get_vocab_size(namespace=self._label_namespace) # elmo / bert self._text_field_embedder = text_field_embedder # decomposable attention stuff self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._dropout = torch.nn.Dropout(p=dropout) self._loss = torch.nn.CrossEntropyLoss() # initializer(self._nli_projection_layer) self._nli_per_lang_acc: Dict[str, CategoricalAccuracy] = dict() for taskname in self._validation_tasks: # this will hide some metrics from tqdm, but they will still be computed self._nli_per_lang_acc[taskname] = CategoricalAccuracy() self._nli_avg_acc = Average() self._langs_pring_train = langs_print_train or "en" if '*' in self._langs_pring_train: self._langs_pring_train = [t.split("")[-1] for t in training_tasks]
def __init__(self, similarity_function: SimilarityFunction = None) -> None: super().__init__() # Don't use DotProductMatrixAttention() if model wasn't trained exactly with it. self._matrix_attention = LegacyMatrixAttention(similarity_function)
print("encoding_dim: ", encoding_dim) print("Question encoding: ", encoded_question.shape) print("Passage encoding: ", encoded_passage.shape) """ ################### SIMILARITY FUNCTION LAYER ######################################### NOTE: Since the LSTM implementation of PyTorch cannot apply dropout in the last layer, we just apply ourselves later """ print("-------------- SIMILARITY LAYER ---------------") similarity_function = LinearSimilarity(combination="x,y,x*y", tensor_1_dim=200, tensor_2_dim=200) matrix_attention = LegacyMatrixAttention(similarity_function) passage_question_similarity = matrix_attention(encoded_passage, encoded_question) # Shape: (batch_size, passage_length, question_length) print("passage question similarity: ", passage_question_similarity.shape) # Shape: (batch_size, passage_length, question_length) passage_question_attention = util.masked_softmax(passage_question_similarity, question_mask) # Shape: (batch_size, passage_length, encoding_dim) passage_question_vectors = util.weighted_sum(encoded_question, passage_question_attention) # We replace masked values with something really negative here, so they don't affect the # max below.
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, judge: Model = None, update_judge: bool = False, reward_method: str = None, detach_value_head: bool = False, qa_loss_weight: float = 0., influence_reward: bool = False, dataset_name: str = 'squad') -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self.judge = judge self.is_judge = self.judge is None self.reward_method = None if self.is_judge else reward_method self.update_judge = update_judge and (self.judge is not None) self._detach_value_head = detach_value_head self._qa_loss_weight = qa_loss_weight self.influence_reward = influence_reward self.answer_type = 'mc' if dataset_name == 'race' else 'span' self.output_type = 'span' # The actual way the output is given (here it's as a pointer to input) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) if not self.is_judge: self._turn_film_gen = torch.nn.Linear( 1, 2 * modeling_layer.get_input_dim()) self._film = FiLM() self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim if not self.is_judge: self._value_head = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) # Can make MLP self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)