def test_precomputed_contextualizer_scalar_mix(self):
        all_elmo_layers_path = self.model_paths / "elmo_layers_all.hdf5"
        all_elmo_layers_params = Params({
            "type": "precomputed_contextualizer",
            "representations_path": all_elmo_layers_path,
            "scalar_weights": [0.0, 0.0, 1.0],
            "gamma": 0.5
        })
        all_elmo_layers = Contextualizer.from_params(all_elmo_layers_params)

        top_elmo_layers_path = self.model_paths / "elmo_layers_top.hdf5"
        top_elmo_layers_params = Params({
            "type":
            "precomputed_contextualizer",
            "representations_path":
            top_elmo_layers_path
        })
        top_elmo_layers = Contextualizer.from_params(top_elmo_layers_params)

        rep_dim = 1024

        top_layers_representations = top_elmo_layers(
            [self.sentence_1, self.sentence_2, self.sentence_3])
        assert len(top_layers_representations) == 3
        all_layers_representations = all_elmo_layers(
            [self.sentence_1, self.sentence_2, self.sentence_3])
        assert len(all_layers_representations) == 3

        first_sentence_representation = all_layers_representations[0]
        seq_len = 16
        assert first_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(first_sentence_representation.cpu().numpy(),
                        (top_layers_representations[0] * 0.5).cpu().numpy(),
                        rtol=1e-5)

        second_sentence_representation = all_layers_representations[1]
        seq_len = 11
        assert second_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(second_sentence_representation.cpu().numpy(),
                        (top_layers_representations[1] * 0.5).cpu().numpy(),
                        rtol=1e-5)

        third_sentence_representation = all_layers_representations[2]
        seq_len = 11
        assert third_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(third_sentence_representation.cpu().numpy(),
                        (top_layers_representations[2] * 0.5).cpu().numpy(),
                        rtol=1e-5)
    def test_elmo_contextualizer_with_grad(self):
        weights_path = self.model_paths / "lm_weights.hdf5"
        options_path = self.model_paths / "options.json"

        params = Params({
            "type": "elmo_contextualizer",
            "batch_size": 2,
            "elmo": {
                "options_file": options_path,
                "weight_file": weights_path,
                "dropout": 0.0,
                "num_output_representations": 1,
                "requires_grad": True,
            }
        })
        elmo_contextualizer = Contextualizer.from_params(params)
        unpadded_representations = elmo_contextualizer([
            self.sentence_1, self.sentence_2, self.sentence_3])
        token_representations, mask = pad_contextualizer_output(
            unpadded_representations)
        loss = token_representations.sum()
        loss.backward()
        elmo_grads = [param.grad for name, param in
                      elmo_contextualizer.named_parameters() if '_elmo_lstm' in name]
        assert all([grad is not None for grad in elmo_grads])
    def test_elmo_contextualizer_raises_error_2_output_reps(self):
        weights_path = self.model_paths / "lm_weights.hdf5"
        options_path = self.model_paths / "options.json"

        params = Params({
            "type": "elmo_contextualizer",
            "batch_size": 2,
            "elmo": {
                "options_file": options_path,
                "weight_file": weights_path,
                "dropout": 0.0,
                "num_output_representations": 2
            }
        })
        with pytest.raises(ConfigurationError):
            Contextualizer.from_params(params)
    def test_elmo_contextualizer_without_grad_frozen_scalar_mix(self):
        weights_path = self.model_paths / "lm_weights.hdf5"
        options_path = self.model_paths / "options.json"

        params = Params({
            "type": "elmo_contextualizer",
            "batch_size": 2,
            "layer_num": 1,
            "freeze_scalar_mix": True,
            "elmo": {
                "options_file": options_path,
                "weight_file": weights_path,
                "dropout": 0.0,
                "num_output_representations": 1,
                "requires_grad": False,
            }
        })
        elmo_contextualizer = Contextualizer.from_params(params)
        unpadded_representations = elmo_contextualizer([
            self.sentence_1, self.sentence_2, self.sentence_3])
        token_representations, mask = pad_contextualizer_output(
            unpadded_representations)
        loss = token_representations.sum()
        # Nothing in the contextualizer is requires_grad=True, so this
        # should be requires_grad=False and grad_fn should be None
        assert loss.grad_fn is None
        assert loss.requires_grad is False
    def test_elmo_contextualizer_with_grad_frozen_scalar_mix(self):
        weights_path = self.model_paths / "lm_weights.hdf5"
        options_path = self.model_paths / "options.json"

        params = Params({
            "type": "elmo_contextualizer",
            "batch_size": 2,
            "layer_num": 1,
            "freeze_scalar_mix": True,
            "elmo": {
                "options_file": options_path,
                "weight_file": weights_path,
                "dropout": 0.0,
                "num_output_representations": 1,
                "requires_grad": True,
            }
        })
        elmo_contextualizer = Contextualizer.from_params(params)
        unpadded_representations = elmo_contextualizer([
            self.sentence_1, self.sentence_2, self.sentence_3])
        token_representations, mask = pad_contextualizer_output(
            unpadded_representations)
        loss = token_representations.sum()
        loss.backward()
        for name, param in elmo_contextualizer.named_parameters():
            if "scalar_mix" in name:
                assert param.grad is None, "Parameter {} should not have grad.".format(name)
            else:
                assert param.grad is not None, "Parameter {} should have grad.".format(name)
    def test_precomputed_contextualizer_top_elmo_layers(self):
        top_elmo_layers_path = self.model_paths / "elmo_layers_top.hdf5"
        params = Params({
            "type": "precomputed_contextualizer",
            "representations_path": top_elmo_layers_path
        })
        top_elmo_layers = Contextualizer.from_params(params)
        rep_dim = 1024

        representations = top_elmo_layers(
            [self.sentence_1, self.sentence_2, self.sentence_3])
        assert len(representations) == 3

        first_sentence_representation = representations[0]
        seq_len = 16
        assert first_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(first_sentence_representation[:, :1].cpu().numpy()[:4],
                        np.array([[0.28029996], [-1.1247723], [-0.45496008],
                                  [-0.25592047]]),
                        rtol=1e-5)
        second_sentence_representation = representations[1]
        seq_len = 11
        assert second_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(
            second_sentence_representation[:, :1].cpu().numpy()[:4],
            np.array([[-0.12891075], [-0.67801315], [0.021882683],
                      [0.03998524]]),
            rtol=1e-5)
        third_sentence_representation = representations[2]
        seq_len = 11
        assert third_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(third_sentence_representation[:, :1].cpu().numpy()[:4],
                        np.array([[0.17843074], [0.49779615], [0.36996722],
                                  [-1.154212]]),
                        rtol=1e-5)
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'SelectiveRegressor':
        token_representation_dim = params.pop_int("token_representation_dim")

        encoder = params.pop("encoder", None)
        if encoder is not None:
            encoder = Seq2SeqEncoder.from_params(encoder)
        decoder = params.pop("decoder", None)
        if decoder is not None and not isinstance(decoder, str):
            decoder = FeedForward.from_params(decoder)
        contextualizer = params.pop('contextualizer', None)
        if contextualizer:
            contextualizer = Contextualizer.from_params(contextualizer)

        pretrained_file = params.pop("pretrained_file", None)
        transfer_contextualizer_from_pretrained_file = params.pop_bool(
            "transfer_contextualizer_from_pretrained_file", False)
        transfer_encoder_from_pretrained_file = params.pop_bool(
            "transfer_encoder_from_pretrained_file", False)
        freeze_encoder = params.pop_bool("freeze_encoder", False)

        initializer = InitializerApplicator.from_params(params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
        params.assert_empty(cls.__name__)
        return cls(vocab=vocab,
                   token_representation_dim=token_representation_dim,
                   encoder=encoder,
                   decoder=decoder,
                   contextualizer=contextualizer,
                   pretrained_file=pretrained_file,
                   transfer_contextualizer_from_pretrained_file=transfer_contextualizer_from_pretrained_file,
                   transfer_encoder_from_pretrained_file=transfer_encoder_from_pretrained_file,
                   freeze_encoder=freeze_encoder,
                   initializer=initializer,
                   regularizer=regularizer)
Пример #8
0
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'Tagger':
        token_representation_dim = params.pop_int("token_representation_dim")

        encoder = params.pop("encoder", None)
        if encoder is not None:
            encoder = Seq2SeqEncoder.from_params(encoder)
        decoder = params.pop("decoder", None)
        if decoder is not None and not isinstance(decoder, str):
            decoder = FeedForward.from_params(decoder)

        use_crf = params.pop_bool("use_crf", False)
        constrain_crf_decoding = params.pop_bool("constrain_crf_decoding", False)
        include_start_end_transitions = params.pop_bool("include_start_end_transitions", True)

        contextualizer = params.pop('contextualizer', None)
        if contextualizer:
            contextualizer = Contextualizer.from_params(contextualizer)
        calculate_per_label_f1 = params.pop_bool("calculate_per_label_f1", False)
        calculate_span_f1 = params.pop_bool("calculate_span_f1", False)
        calculate_perplexity = params.pop_bool("calculate_perplexity", False)
        loss_average = params.pop("loss_average", "batch")
        label_encoding = params.pop_choice("label_encoding", [None, "BIO", "BIOUL", "IOB1"],
                                           default_to_first_choice=True)

        pretrained_file = params.pop("pretrained_file", None)
        transfer_contextualizer_from_pretrained_file = params.pop_bool(
            "transfer_contextualizer_from_pretrained_file", False)
        transfer_encoder_from_pretrained_file = params.pop_bool(
            "transfer_encoder_from_pretrained_file", False)
        freeze_encoder = params.pop_bool("freeze_encoder", False)

        initializer = InitializerApplicator.from_params(params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
        params.assert_empty(cls.__name__)
        return cls(vocab=vocab,
                   token_representation_dim=token_representation_dim,
                   encoder=encoder,
                   decoder=decoder,
                   use_crf=use_crf,
                   constrain_crf_decoding=constrain_crf_decoding,
                   include_start_end_transitions=include_start_end_transitions,
                   label_encoding=label_encoding,
                   contextualizer=contextualizer,
                   calculate_per_label_f1=calculate_per_label_f1,
                   calculate_span_f1=calculate_span_f1,
                   calculate_perplexity=calculate_perplexity,
                   loss_average=loss_average,
                   pretrained_file=pretrained_file,
                   transfer_contextualizer_from_pretrained_file=transfer_contextualizer_from_pretrained_file,
                   transfer_encoder_from_pretrained_file=transfer_encoder_from_pretrained_file,
                   freeze_encoder=freeze_encoder,
                   initializer=initializer,
                   regularizer=regularizer)
    def test_elmo_contextualizer_normal(self):
        weights_path = self.model_paths / "lm_weights.hdf5"
        options_path = self.model_paths / "options.json"
        rep_dim = 32
        num_sentences = 3

        # Test the first layer (index 0)
        params = Params({
            "type": "elmo_contextualizer",
            "batch_size": 2,
            "elmo": {
                "options_file": options_path,
                "weight_file": weights_path,
                "dropout": 0.0,
                "num_output_representations": 1
            }
        })
        elmo_contextualizer = Contextualizer.from_params(params)
        representations = elmo_contextualizer([
            self.sentence_1, self.sentence_2, self.sentence_3])
        assert len(representations) == num_sentences

        first_sentence_representation = representations[0]
        seq_len = 16
        assert first_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(
            torch.sum(first_sentence_representation, dim=-1).detach().cpu().numpy(),
            np.array([-5.4501357, 0.57151437, -1.9986794, -1.9020741, -1.6883984,
                      0.46092677, -2.0832047, -2.045756, -2.660774, -5.4992304,
                      -3.6687968, -3.4485395, -1.9255438, -0.92559034, -1.7234659,
                      -4.93639]),
            rtol=1e-5)

        second_sentence_representation = representations[1]
        seq_len = 11
        assert second_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(
            torch.sum(second_sentence_representation, dim=-1).detach().cpu().numpy(),
            np.array([-0.51167095, -0.61811006, -2.8013024, -3.7508147, -1.6987357,
                      -1.1114583, -3.6302583, -3.3409853, -1.3613609, -3.6760461,
                      -5.137144]),
            rtol=1e-5)
        third_sentence_representation = representations[2]
        seq_len = 11
        assert third_sentence_representation.size() == (seq_len, rep_dim)
        assert_allclose(
            torch.sum(third_sentence_representation, dim=-1).detach().cpu().numpy(),
            np.array([-1.5057361, -2.6824353, -4.1259403, -3.4485295, -1.3296673,
                      -4.5548496, -6.077871, -3.4515395, -3.8405519, -4.3518186,
                      -4.8782477]),
            rtol=1e-5)
Пример #10
0
 def test_glove_contextualizer_frozen(self):
     params = Params({
         "type": "glove_contextualizer",
         "glove_path": self.glove_path,
         "embedding_dim": self.representation_dim,
         "trainable": False
     })
     glove_contextualizer = Contextualizer.from_params(params)
     unpadded_representations = glove_contextualizer(
         [self.sentence_1, self.sentence_2, self.sentence_3])
     token_representations, mask = pad_contextualizer_output(
         unpadded_representations)
     loss = token_representations.sum()
     # Nothing in the contextualizer is requires_grad=True, so this
     # should be requires_grad=False and grad_fn should be None
     assert loss.grad_fn is None
     assert loss.requires_grad is False
Пример #11
0
    def from_params(cls, vocab: Vocabulary,
                    params: Params) -> 'PairwiseTagger':
        token_representation_dim = params.pop_int("token_representation_dim")

        encoder = params.pop("encoder", None)
        if encoder is not None:
            encoder = Seq2SeqEncoder.from_params(encoder)
        decoder = params.pop("decoder", None)
        if decoder is not None and not isinstance(decoder, str):
            decoder = FeedForward.from_params(decoder)
        combination = params.pop("combination", "x,y,x*y")
        contextualizer = params.pop('contextualizer', None)
        if contextualizer:
            contextualizer = Contextualizer.from_params(contextualizer)
        calculate_per_label_f1 = params.pop_bool("calculate_per_label_f1",
                                                 False)
        loss_average = params.pop("loss_average", "batch")
        pretrained_file = params.pop("pretrained_file", None)
        transfer_contextualizer_from_pretrained_file = params.pop_bool(
            "transfer_contextualizer_from_pretrained_file", False)
        transfer_encoder_from_pretrained_file = params.pop_bool(
            "transfer_encoder_from_pretrained_file", False)
        freeze_encoder = params.pop_bool("freeze_encoder", False)

        initializer = InitializerApplicator.from_params(
            params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params.pop('regularizer', []))
        params.assert_empty(cls.__name__)
        return cls(vocab=vocab,
                   token_representation_dim=token_representation_dim,
                   encoder=encoder,
                   decoder=decoder,
                   combination=combination,
                   contextualizer=contextualizer,
                   calculate_per_label_f1=calculate_per_label_f1,
                   loss_average=loss_average,
                   pretrained_file=pretrained_file,
                   transfer_contextualizer_from_pretrained_file=
                   transfer_contextualizer_from_pretrained_file,
                   transfer_encoder_from_pretrained_file=
                   transfer_encoder_from_pretrained_file,
                   freeze_encoder=freeze_encoder,
                   initializer=initializer,
                   regularizer=regularizer)
Пример #12
0
    def test_glove_contextualizer_default(self):
        params = Params({
            "type": "glove_contextualizer",
            "glove_path": self.glove_path,
            "embedding_dim": self.representation_dim
        })
        glove_contextualizer = Contextualizer.from_params(params)
        representations = glove_contextualizer(
            [self.sentence_1, self.sentence_2, self.sentence_3])
        assert len(representations) == self.num_sentences

        first_sentence_representation = representations[0]
        seq_len = 16
        assert first_sentence_representation.size() == (
            seq_len, self.representation_dim)
        assert_allclose(
            first_sentence_representation[:, :1].detach().cpu().numpy(),
            np.array([[0.464], [0.246], [0.458], [0.649], [0.273], [0.465],
                      [0.012], [0.19], [0.219], [0.199], [0.944], [0.432],
                      [0.28], [glove_contextualizer.weight[0, :1]], [0.083],
                      [0.681]]),
            rtol=1e-5)

        second_sentence_representation = representations[1]
        seq_len = 11
        assert second_sentence_representation.size() == (
            seq_len, self.representation_dim)
        assert_allclose(
            second_sentence_representation[:, :1].detach().cpu().numpy(),
            np.array([[glove_contextualizer.weight[0, :1]], [0.761], [0.249],
                      [0.571], [0.952], [0.41], [0.791], [0.063], [0.555],
                      [0.432], [0.681]]),
            rtol=1e-5)

        third_sentence_representation = representations[2]
        seq_len = 11
        assert third_sentence_representation.size() == (
            seq_len, self.representation_dim)
        assert_allclose(
            third_sentence_representation[:, :1].detach().cpu().numpy(),
            np.array([[glove_contextualizer.weight[0, :1]], [0.249], [0.56],
                      [0.591], [0.739], [0.222], [0.439], [0.308], [0.793],
                      [0.118], [0.681]]),
            rtol=1e-5)
Пример #13
0
 def test_glove_contextualizer_trainable(self):
     params = Params({
         "type": "glove_contextualizer",
         "glove_path": self.glove_path,
         "embedding_dim": self.representation_dim,
         "trainable": True
     })
     glove_contextualizer = Contextualizer.from_params(params)
     unpadded_representations = glove_contextualizer(
         [self.sentence_1, self.sentence_2, self.sentence_3])
     token_representations, mask = pad_contextualizer_output(
         unpadded_representations)
     loss = token_representations.sum()
     loss.backward()
     glove_grads = [
         param.grad
         for name, param in glove_contextualizer.named_parameters()
     ]
     assert all([grad is not None for grad in glove_grads])