def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None) -> None: super(SemanticRoleLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing check_dimensions_match( text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def test_regex_matches_are_initialized_correctly(self): class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.linear_1_with_funky_name = torch.nn.Linear(5, 10) self.linear_2 = torch.nn.Linear(10, 5) self.conv = torch.nn.Conv1d(5, 5, 5) def forward(self, inputs): # pylint: disable=arguments-differ pass # Make sure we handle regexes properly json_params = """{"initializer": [ ["conv", {"type": "constant", "val": 5}], ["funky_na.*bi", {"type": "constant", "val": 7}] ]} """ params = Params(json.loads(_jsonnet.evaluate_snippet("", json_params))) initializers = InitializerApplicator.from_params(params['initializer']) model = Net() initializers(model) for parameter in model.conv.parameters(): assert torch.equal(parameter.data, torch.ones(parameter.size()) * 5) parameter = model.linear_1_with_funky_name.bias assert torch.equal(parameter.data, torch.ones(parameter.size()) * 7)
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'output_dims': 3, 'pool_sizes': 4, 'dropout': 0.0, 'num_layers': 2 }) maxout = Maxout.from_params(params) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
def setUp(self): super(TestTokenCharactersEncoder, self).setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params( vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def test_l2_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) initializer = InitializerApplicator([ (".*", lambda tensor: constant_(tensor, 0.5)) ]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def test_l1_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) initializer = InitializerApplicator([ (".*", lambda tensor: constant_(tensor, -1)) ]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) initializer = InitializerApplicator([ (".*", lambda tensor: constant_(tensor, 1.)) ]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def test_forward_does_correct_computation(self): encoder = CnnEncoder(embedding_dim=2, num_filters=1, ngram_filter_sizes=(1, 2)) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(".*", constant_init)]) initializer(encoder) input_tensor = torch.FloatTensor([[[.7, .8], [.1, 1.5]]]) encoder_output = encoder(input_tensor, None) assert_almost_equal(encoder_output.data.numpy(), numpy.asarray([[1.6 + 1.0, 3.1 + 1.0]]), decimal=6)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = DEFAULT_EVALB_DIR) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed( feedforward) if feedforward else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim") if feedforward is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward.get_input_dim(), "span extractor output dim", "feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, matcher_word: BiMpmMatching, encoder1: Seq2SeqEncoder, matcher_forward1: BiMpmMatching, matcher_backward1: BiMpmMatching, encoder2: Seq2SeqEncoder, matcher_forward2: BiMpmMatching, matcher_backward2: BiMpmMatching, aggregator: Seq2VecEncoder, classifier_feedforward: FeedForward, dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiMpm, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.matcher_word = matcher_word self.encoder1 = encoder1 self.matcher_forward1 = matcher_forward1 self.matcher_backward1 = matcher_backward1 self.encoder2 = encoder2 self.matcher_forward2 = matcher_forward2 self.matcher_backward2 = matcher_backward2 self.aggregator = aggregator matching_dim = self.matcher_word.get_output_dim() + \ self.matcher_forward1.get_output_dim() + self.matcher_backward1.get_output_dim() + \ self.matcher_forward2.get_output_dim() + self.matcher_backward2.get_output_dim() check_dimensions_match(matching_dim, self.aggregator.get_input_dim(), "sum of dim of all matching layers", "aggregator input dim") self.classifier_feedforward = classifier_feedforward self.dropout = torch.nn.Dropout(dropout) self.metrics = {"accuracy": CategoricalAccuracy()} self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'hidden_dims': 3, 'activations': 'relu', 'num_layers': 2 }) feedforward = FeedForward.from_params(params) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(".*", constant_init)]) initializer(feedforward) input_tensor = torch.FloatTensor([[-3, 1]]) output = feedforward(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand - ReLU makes output after first hidden layer [0, 0, 0], # which then gets a bias added in the second layer to be [1, 1, 1]. assert_almost_equal(output, [[1, 1, 1]])
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. initializer = InitializerApplicator([ (".*", lambda tensor: torch.nn.init.constant_(tensor, 1.)) ]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor * 5., self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm( lstm_input, (initial_state, initial_memory)) pytorch_output, pytorch_state = pytorch_lstm( lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal( pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal( pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal( pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def test_regex_match_prevention_prevents_and_overrides(self): class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.linear_1 = torch.nn.Linear(5, 10) self.linear_2 = torch.nn.Linear(10, 5) # typical actual usage: modules loaded from allenlp.model.load(..) self.linear_3_transfer = torch.nn.Linear(5, 10) self.linear_4_transfer = torch.nn.Linear(10, 5) self.pretrained_conv = torch.nn.Conv1d(5, 5, 5) def forward(self, inputs): # pylint: disable=arguments-differ pass json_params = """{"initializer": [ [".*linear.*", {"type": "constant", "val": 10}], [".*conv.*", {"type": "constant", "val": 10}], [".*_transfer.*", "prevent"], [".*pretrained.*",{"type": "prevent"}] ]} """ params = Params(json.loads(_jsonnet.evaluate_snippet("", json_params))) initializers = InitializerApplicator.from_params(params['initializer']) model = Net() initializers(model) for module in [model.linear_1, model.linear_2]: for parameter in module.parameters(): assert torch.equal(parameter.data, torch.ones(parameter.size()) * 10) transfered_modules = [ model.linear_3_transfer, model.linear_4_transfer, model.pretrained_conv ] for module in transfered_modules: for parameter in module.parameters(): assert not torch.equal(parameter.data, torch.ones(parameter.size()) * 10)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BiattentiveClassificationNetwork': # type: ignore # pylint: disable=arguments-differ embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab=vocab, params=embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params(params.pop("pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop("elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool("use_input_elmo", False) use_integrator_output_elmo = params.pop_bool("use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleTagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, embedding_dropout: float, pre_encode_feedforward: FeedForward, encoder: Seq2SeqEncoder, integrator: Seq2SeqEncoder, integrator_dropout: float, output_layer: Union[FeedForward, Maxout], elmo: Elmo, use_input_elmo: bool = False, use_integrator_output_elmo: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiattentiveClassificationNetwork, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if "elmo" in self._text_field_embedder._token_embedders.keys(): # pylint: disable=protected-access raise ConfigurationError("To use ELMo in the BiattentiveClassificationNetwork input, " "remove elmo from the text_field_embedder and pass an " "Elmo object to the BiattentiveClassificationNetwork and set the " "'use_input_elmo' and 'use_integrator_output_elmo' flags accordingly.") self._embedding_dropout = nn.Dropout(embedding_dropout) self._num_classes = self.vocab.get_vocab_size("labels") self._pre_encode_feedforward = pre_encode_feedforward self._encoder = encoder self._integrator = integrator self._integrator_dropout = nn.Dropout(integrator_dropout) self._elmo = elmo self._use_input_elmo = use_input_elmo self._use_integrator_output_elmo = use_integrator_output_elmo self._num_elmo_layers = int(self._use_input_elmo) + int(self._use_integrator_output_elmo) # Check that, if elmo is None, none of the elmo flags are set. if self._elmo is None and self._num_elmo_layers != 0: raise ConfigurationError("One of 'use_input_elmo' or 'use_integrator_output_elmo' is True, " "but no Elmo object was provided upon construction. Pass in an Elmo " "object to use Elmo.") if self._elmo is not None: # Check that, if elmo is not None, we use it somewhere. if self._num_elmo_layers == 0: raise ConfigurationError("Elmo object provided upon construction, but both 'use_input_elmo' " "and 'use_integrator_output_elmo' are 'False'. Set one of them to " "'True' to use Elmo, or do not provide an Elmo object upon construction.") # Check that the number of flags set is equal to the num_output_representations of the Elmo object # pylint: disable=protected-access,too-many-format-args if len(self._elmo._scalar_mixes) != self._num_elmo_layers: raise ConfigurationError("Elmo object has num_output_representations=%s, but this does not " "match the number of use_*_elmo flags set to true. use_input_elmo " "is %s, and use_integrator_output_elmo is %s".format( str(len(self._elmo._scalar_mixes)), str(self._use_input_elmo), str(self._use_integrator_output_elmo))) # Calculate combined integrator output dim, taking into account elmo if self._use_integrator_output_elmo: self._combined_integrator_output_dim = (self._integrator.get_output_dim() + self._elmo.get_output_dim()) else: self._combined_integrator_output_dim = self._integrator.get_output_dim() self._self_attentive_pooling_projection = nn.Linear( self._combined_integrator_output_dim, 1) self._output_layer = output_layer if self._use_input_elmo: check_dimensions_match(text_field_embedder.get_output_dim() + self._elmo.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim + ELMo output dim", "Pre-encoder feedforward input dim") else: check_dimensions_match(text_field_embedder.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim", "Pre-encoder feedforward input dim") check_dimensions_match(self._pre_encode_feedforward.get_output_dim(), self._encoder.get_input_dim(), "Pre-encoder feedforward output dim", "Encoder input dim") check_dimensions_match(self._encoder.get_output_dim() * 3, self._integrator.get_input_dim(), "Encoder output dim * 3", "Integrator input dim") if self._use_integrator_output_elmo: check_dimensions_match(self._combined_integrator_output_dim * 4, self._output_layer.get_input_dim(), "(Integrator output dim + ELMo output dim) * 4", "Output layer input dim") else: check_dimensions_match(self._integrator.get_output_dim() * 4, self._output_layer.get_input_dim(), "Integrator output dim * 4", "Output layer input dim") check_dimensions_match(self._output_layer.get_output_dim(), self._num_classes, "Output layer output dim", "Number of classes.") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)