def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, feed_forward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelV21, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder self._feed_forward = feed_forward encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() #span_start_input_dim = encoding_dim * 4 + modeling_dim #span_start_input_dim = encoding_dim + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim #span_end_input_dim = encoding_dim + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) # TODO: self._self_matrix_attention = MatrixAttention( attention_similarity_function) self._linear_layer = TimeDistributed( torch.nn.Linear(4 * encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed( torch.nn.Linear(3 * encoding_dim, encoding_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, # num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelSQUAD, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder # self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), # num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._residual_encoder = residual_encoder self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder encoding_dim = phrase_layer.get_output_dim() self._span_start_predictor = TimeDistributed(torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() self._span_end_predictor = TimeDistributed(torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed(torch.nn.Linear(encoding_dim, 1)) self._self_matrix_attention = MatrixAttention(attention_similarity_function) self._linear_layer = TimeDistributed(torch.nn.Linear(4*encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed(torch.nn.Linear(3*encoding_dim, encoding_dim)) self._self_atten = TriLinearAttention(encoding_dim) #self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim)) #self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim)) #self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim)) #std = math.sqrt(6 / (encoding_dim + 1)) #self._w_x.data.uniform_(-std, std) #self._w_y.data.uniform_(-std, std) #self._w_xy.data.uniform_(-std, std) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") if text_field_embedder.get_output_dim() != attend_feedforward.get_input_dim(): raise ConfigurationError("Output dimension of the text_field_embedder (dim: {}), " "must match the input_dim of the FeedForward layer " "attend_feedforward, (dim: {}). ".format(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim())) if aggregate_feedforward.get_output_dim() != self._num_labels: raise ConfigurationError("Final output dimension (%d) must equal num labels (%d)" % (aggregate_feedforward.get_output_dim(), self._num_labels)) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, premise_composer: Optional[Seq2SeqEncoder] = None, hypothesis_composer: Optional[Seq2SeqEncoder] = None, combine_feedforward: Optional[FeedForward] = None, aggregate_feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), num_wrapping_dims=0, vocab=None) -> None: super(ESIM, self).__init__() self.vocab = vocab self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._premise_composer = premise_composer self._hypothesis_composer = hypothesis_composer or premise_composer self._combine_feedforward = combine_feedforward self._aggregate_feedforward = aggregate_feedforward self._num_wrapping_dims = num_wrapping_dims
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, classifier_feedforward: FeedForward, context_encoder: Optional[Seq2SeqEncoder] = None, response_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DialogueContextCoherenceAttentionClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = vocab.get_vocab_size("labels") self.context_encoder = context_encoder self.response_encoder = response_encoder self.attend_feedforward = TimeDistributed(attend_feedforward) self.matrix_attention = MatrixAttention(similarity_function) self.compare_feedforward = TimeDistributed(compare_feedforward) self.classifier_feedforward = classifier_feedforward labels = self.vocab.get_index_to_token_vocabulary('labels') pos_label_index = list(labels.keys())[list(labels.values()).index('neg')] check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(classifier_feedforward.get_output_dim(), self.num_classes, "final output dimension", "number of labels") self.metrics = { "accuracy": CategoricalAccuracy() # "f1": F1Measure(positive_label=pos_label_index) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def test_forward_works_on_simple_input(self): attention = MatrixAttention() sentence_1_tensor = Variable( torch.FloatTensor([[[1, 1, 1], [-1, 0, 1]]])) sentence_2_tensor = Variable( torch.FloatTensor([[[1, 1, 1], [-1, 0, 1], [-1, -1, -1]]])) result = attention(sentence_1_tensor, sentence_2_tensor).data.numpy() assert result.shape == (1, 2, 3) assert_allclose(result, [[[3, 0, -3], [0, 2, 0]]])
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, mask_lstms: bool = True, evaluation_json_file: str = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_em = Average() self._official_f1 = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms if evaluation_json_file: logger.info("Prepping official evaluation dataset from %s", evaluation_json_file) with open(evaluation_json_file) as dataset_file: dataset_json = json.load(dataset_file) question_to_answers = {} for article in dataset_json['data']: for paragraph in article['paragraphs']: for question in paragraph['qas']: question_id = question['id'] answers = [answer['text'] for answer in question['answers']] question_to_answers[question_id] = answers self._official_eval_dataset = question_to_answers else: self._official_eval_dataset = None
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, preload_path: Optional[str] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder # self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") # check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, # "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self) # Do we want to initialize with the SNLI stuff? let's say yes. # 'snli-decomposable-attention/weights.th' if preload_path is not None: logger.info("Preloading!") preload = torch.load(preload_path) own_state = self.state_dict() for name, param in preload.items(): if name not in own_state: logger.info("Unexpected key {} in state_dict with size {}".format(name, param.size())) elif param.size() == own_state[name].size(): own_state[name].copy_(param) else: logger.info("Network has {} with size {}, ckpt has {}".format(name, own_state[name].size(), param.size())) missing = set(own_state.keys()) - set(preload.keys()) if len(missing) > 0: logger.info("We couldn't find {}".format(','.join(missing)))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), dropout: float = 0.5, regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = MatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = VariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'DialogueContextHierarchicalCoherenceAttentionClassifier': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) utterance_encoder = Seq2VecEncoder.from_params(params.pop("utterance_encoder")) context_encoder = Seq2SeqEncoder.from_params(params.pop("context_encoder")) response_encoder_params = params.pop("response_encoder", None) if response_encoder_params is not None: response_encoder = Seq2SeqEncoder.from_params(response_encoder_params) else: response_encoder = None attend_feedforward = FeedForward.from_params(params.pop('attend_feedforward')) #similarity_function = SimilarityFunction.from_params(params.pop("similarity_function")) compare_feedforward = FeedForward.from_params(params.pop('compare_feedforward')) classifier_feedforward = FeedForward.from_params(params.pop("classifier_feedforward")) final_classifier_feedforward = FeedForward.from_params(params.pop("final_classifier_feedforward")) initializer = InitializerApplicator.from_params(params.pop("initializer", [])) regularizer = RegularizerApplicator.from_params(params.pop("regularizer", [])) matrix_attention = MatrixAttention().from_params(params.pop("similarity_function")) return cls(vocab=vocab, text_field_embedder=text_field_embedder, attend_feedforward=attend_feedforward, matrix_attention=matrix_attention, compare_feedforward=compare_feedforward, classifier_feedforward=classifier_feedforward, final_classifier_feedforward=final_classifier_feedforward, utterance_encoder=utterance_encoder, context_encoder=context_encoder, response_encoder=response_encoder, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() self._compat_layer = FC3(encoding_dim * 4 + modeling_dim) self._compat_pred_layer = Linear(encoding_dim * 4 + modeling_dim, 2) span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these # aren't necessarily obvious from the configuration files, so we check # here. if modeling_layer.get_input_dim() != 4 * encoding_dim: raise ConfigurationError( "The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format( modeling_layer.get_input_dim(), encoding_dim)) if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder (embedding_dim + " "char_cnn) must match the input dimension of the phrase_encoder. " "Found {} and {}, respectively.".format( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim())) if span_end_encoder.get_input_dim( ) != encoding_dim * 4 + modeling_dim * 3: raise ConfigurationError( "The input dimension of the span_end_encoder should be equal to " "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. " "Found {} and (4 * {} + 3 * {}) " "respectively.".format(span_end_encoder.get_input_dim(), encoding_dim, modeling_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._compat_accuracy = BooleanAccuracy() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab, text_field_embedder, num_highway_layers, phrase_layer, attention_similarity_function, modeling_layer, cove_layer=None, elmo_layer=None, deep_elmo=False, dropout=0.2, mask_lstms=True, initializer=InitializerApplicator(), regularizer=None): super(HeadlessPairAttnEncoder, self).__init__(vocab) #, regularizer) if text_field_embedder is None: # just using ELMo embeddings self._text_field_embedder = lambda x: x d_emb = 0 self._highway_layer = lambda x: x else: self._text_field_embedder = text_field_embedder d_emb = text_field_embedder.get_output_dim() self._highway_layer = TimeDistributed( Highway(d_emb, num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._cove = cove_layer self._elmo = elmo_layer self._deep_elmo = deep_elmo self.pad_idx = vocab.get_token_index(vocab._padding_token) d_inp_phrase = phrase_layer.get_input_dim() d_out_phrase = phrase_layer.get_output_dim() d_out_model = modeling_layer.get_output_dim() d_inp_model = modeling_layer.get_input_dim() self.output_dim = d_out_model if (elmo_layer is None and d_inp_model != 2 * d_out_phrase) or \ (elmo_layer is not None and not deep_elmo and d_inp_model != 2 * d_out_phrase) or \ (elmo_layer is not None and deep_elmo and d_inp_model != 2 * d_out_phrase + 1024): raise ConfigurationError( "The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format( d_inp_model, d_out_phrase)) if (cove_layer is None and elmo_layer is None and d_emb != d_inp_phrase) \ or (cove_layer is not None and d_emb + 600 != d_inp_phrase) \ or (elmo_layer is not None and d_emb + 1024 != d_inp_phrase): raise ConfigurationError( "The output dimension of the text_field_embedder " "must match the input " "dimension of the phrase_encoder. Found {} and {} " "respectively.".format(d_emb, d_inp_phrase)) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab, text_field_embedder, num_highway_layers, phrase_layer, attention_similarity_function, modeling_layer, span_end_encoder, dropout=0.2, mask_lstms=True, initializer=InitializerApplicator(), regularizer=None): super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) # Initialize layers. self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder # Inintialize start/end span predictors. encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = \ TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = \ TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Check dimentions check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() # If dropout has been set, add Dropout layer. if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)