def test_precomputed_contextualizer_scalar_mix(self): all_elmo_layers_path = self.model_paths / "elmo_layers_all.hdf5" all_elmo_layers_params = Params({ "type": "precomputed_contextualizer", "representations_path": all_elmo_layers_path, "scalar_weights": [0.0, 0.0, 1.0], "gamma": 0.5 }) all_elmo_layers = Contextualizer.from_params(all_elmo_layers_params) top_elmo_layers_path = self.model_paths / "elmo_layers_top.hdf5" top_elmo_layers_params = Params({ "type": "precomputed_contextualizer", "representations_path": top_elmo_layers_path }) top_elmo_layers = Contextualizer.from_params(top_elmo_layers_params) rep_dim = 1024 top_layers_representations = top_elmo_layers( [self.sentence_1, self.sentence_2, self.sentence_3]) assert len(top_layers_representations) == 3 all_layers_representations = all_elmo_layers( [self.sentence_1, self.sentence_2, self.sentence_3]) assert len(all_layers_representations) == 3 first_sentence_representation = all_layers_representations[0] seq_len = 16 assert first_sentence_representation.size() == (seq_len, rep_dim) assert_allclose(first_sentence_representation.cpu().numpy(), (top_layers_representations[0] * 0.5).cpu().numpy(), rtol=1e-5) second_sentence_representation = all_layers_representations[1] seq_len = 11 assert second_sentence_representation.size() == (seq_len, rep_dim) assert_allclose(second_sentence_representation.cpu().numpy(), (top_layers_representations[1] * 0.5).cpu().numpy(), rtol=1e-5) third_sentence_representation = all_layers_representations[2] seq_len = 11 assert third_sentence_representation.size() == (seq_len, rep_dim) assert_allclose(third_sentence_representation.cpu().numpy(), (top_layers_representations[2] * 0.5).cpu().numpy(), rtol=1e-5)
def test_elmo_contextualizer_with_grad(self): weights_path = self.model_paths / "lm_weights.hdf5" options_path = self.model_paths / "options.json" params = Params({ "type": "elmo_contextualizer", "batch_size": 2, "elmo": { "options_file": options_path, "weight_file": weights_path, "dropout": 0.0, "num_output_representations": 1, "requires_grad": True, } }) elmo_contextualizer = Contextualizer.from_params(params) unpadded_representations = elmo_contextualizer([ self.sentence_1, self.sentence_2, self.sentence_3]) token_representations, mask = pad_contextualizer_output( unpadded_representations) loss = token_representations.sum() loss.backward() elmo_grads = [param.grad for name, param in elmo_contextualizer.named_parameters() if '_elmo_lstm' in name] assert all([grad is not None for grad in elmo_grads])
def test_elmo_contextualizer_raises_error_2_output_reps(self): weights_path = self.model_paths / "lm_weights.hdf5" options_path = self.model_paths / "options.json" params = Params({ "type": "elmo_contextualizer", "batch_size": 2, "elmo": { "options_file": options_path, "weight_file": weights_path, "dropout": 0.0, "num_output_representations": 2 } }) with pytest.raises(ConfigurationError): Contextualizer.from_params(params)
def test_elmo_contextualizer_without_grad_frozen_scalar_mix(self): weights_path = self.model_paths / "lm_weights.hdf5" options_path = self.model_paths / "options.json" params = Params({ "type": "elmo_contextualizer", "batch_size": 2, "layer_num": 1, "freeze_scalar_mix": True, "elmo": { "options_file": options_path, "weight_file": weights_path, "dropout": 0.0, "num_output_representations": 1, "requires_grad": False, } }) elmo_contextualizer = Contextualizer.from_params(params) unpadded_representations = elmo_contextualizer([ self.sentence_1, self.sentence_2, self.sentence_3]) token_representations, mask = pad_contextualizer_output( unpadded_representations) loss = token_representations.sum() # Nothing in the contextualizer is requires_grad=True, so this # should be requires_grad=False and grad_fn should be None assert loss.grad_fn is None assert loss.requires_grad is False
def test_elmo_contextualizer_with_grad_frozen_scalar_mix(self): weights_path = self.model_paths / "lm_weights.hdf5" options_path = self.model_paths / "options.json" params = Params({ "type": "elmo_contextualizer", "batch_size": 2, "layer_num": 1, "freeze_scalar_mix": True, "elmo": { "options_file": options_path, "weight_file": weights_path, "dropout": 0.0, "num_output_representations": 1, "requires_grad": True, } }) elmo_contextualizer = Contextualizer.from_params(params) unpadded_representations = elmo_contextualizer([ self.sentence_1, self.sentence_2, self.sentence_3]) token_representations, mask = pad_contextualizer_output( unpadded_representations) loss = token_representations.sum() loss.backward() for name, param in elmo_contextualizer.named_parameters(): if "scalar_mix" in name: assert param.grad is None, "Parameter {} should not have grad.".format(name) else: assert param.grad is not None, "Parameter {} should have grad.".format(name)
def test_precomputed_contextualizer_top_elmo_layers(self): top_elmo_layers_path = self.model_paths / "elmo_layers_top.hdf5" params = Params({ "type": "precomputed_contextualizer", "representations_path": top_elmo_layers_path }) top_elmo_layers = Contextualizer.from_params(params) rep_dim = 1024 representations = top_elmo_layers( [self.sentence_1, self.sentence_2, self.sentence_3]) assert len(representations) == 3 first_sentence_representation = representations[0] seq_len = 16 assert first_sentence_representation.size() == (seq_len, rep_dim) assert_allclose(first_sentence_representation[:, :1].cpu().numpy()[:4], np.array([[0.28029996], [-1.1247723], [-0.45496008], [-0.25592047]]), rtol=1e-5) second_sentence_representation = representations[1] seq_len = 11 assert second_sentence_representation.size() == (seq_len, rep_dim) assert_allclose( second_sentence_representation[:, :1].cpu().numpy()[:4], np.array([[-0.12891075], [-0.67801315], [0.021882683], [0.03998524]]), rtol=1e-5) third_sentence_representation = representations[2] seq_len = 11 assert third_sentence_representation.size() == (seq_len, rep_dim) assert_allclose(third_sentence_representation[:, :1].cpu().numpy()[:4], np.array([[0.17843074], [0.49779615], [0.36996722], [-1.154212]]), rtol=1e-5)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SelectiveRegressor': token_representation_dim = params.pop_int("token_representation_dim") encoder = params.pop("encoder", None) if encoder is not None: encoder = Seq2SeqEncoder.from_params(encoder) decoder = params.pop("decoder", None) if decoder is not None and not isinstance(decoder, str): decoder = FeedForward.from_params(decoder) contextualizer = params.pop('contextualizer', None) if contextualizer: contextualizer = Contextualizer.from_params(contextualizer) pretrained_file = params.pop("pretrained_file", None) transfer_contextualizer_from_pretrained_file = params.pop_bool( "transfer_contextualizer_from_pretrained_file", False) transfer_encoder_from_pretrained_file = params.pop_bool( "transfer_encoder_from_pretrained_file", False) freeze_encoder = params.pop_bool("freeze_encoder", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, token_representation_dim=token_representation_dim, encoder=encoder, decoder=decoder, contextualizer=contextualizer, pretrained_file=pretrained_file, transfer_contextualizer_from_pretrained_file=transfer_contextualizer_from_pretrained_file, transfer_encoder_from_pretrained_file=transfer_encoder_from_pretrained_file, freeze_encoder=freeze_encoder, initializer=initializer, regularizer=regularizer)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Tagger': token_representation_dim = params.pop_int("token_representation_dim") encoder = params.pop("encoder", None) if encoder is not None: encoder = Seq2SeqEncoder.from_params(encoder) decoder = params.pop("decoder", None) if decoder is not None and not isinstance(decoder, str): decoder = FeedForward.from_params(decoder) use_crf = params.pop_bool("use_crf", False) constrain_crf_decoding = params.pop_bool("constrain_crf_decoding", False) include_start_end_transitions = params.pop_bool("include_start_end_transitions", True) contextualizer = params.pop('contextualizer', None) if contextualizer: contextualizer = Contextualizer.from_params(contextualizer) calculate_per_label_f1 = params.pop_bool("calculate_per_label_f1", False) calculate_span_f1 = params.pop_bool("calculate_span_f1", False) calculate_perplexity = params.pop_bool("calculate_perplexity", False) loss_average = params.pop("loss_average", "batch") label_encoding = params.pop_choice("label_encoding", [None, "BIO", "BIOUL", "IOB1"], default_to_first_choice=True) pretrained_file = params.pop("pretrained_file", None) transfer_contextualizer_from_pretrained_file = params.pop_bool( "transfer_contextualizer_from_pretrained_file", False) transfer_encoder_from_pretrained_file = params.pop_bool( "transfer_encoder_from_pretrained_file", False) freeze_encoder = params.pop_bool("freeze_encoder", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, token_representation_dim=token_representation_dim, encoder=encoder, decoder=decoder, use_crf=use_crf, constrain_crf_decoding=constrain_crf_decoding, include_start_end_transitions=include_start_end_transitions, label_encoding=label_encoding, contextualizer=contextualizer, calculate_per_label_f1=calculate_per_label_f1, calculate_span_f1=calculate_span_f1, calculate_perplexity=calculate_perplexity, loss_average=loss_average, pretrained_file=pretrained_file, transfer_contextualizer_from_pretrained_file=transfer_contextualizer_from_pretrained_file, transfer_encoder_from_pretrained_file=transfer_encoder_from_pretrained_file, freeze_encoder=freeze_encoder, initializer=initializer, regularizer=regularizer)
def test_elmo_contextualizer_normal(self): weights_path = self.model_paths / "lm_weights.hdf5" options_path = self.model_paths / "options.json" rep_dim = 32 num_sentences = 3 # Test the first layer (index 0) params = Params({ "type": "elmo_contextualizer", "batch_size": 2, "elmo": { "options_file": options_path, "weight_file": weights_path, "dropout": 0.0, "num_output_representations": 1 } }) elmo_contextualizer = Contextualizer.from_params(params) representations = elmo_contextualizer([ self.sentence_1, self.sentence_2, self.sentence_3]) assert len(representations) == num_sentences first_sentence_representation = representations[0] seq_len = 16 assert first_sentence_representation.size() == (seq_len, rep_dim) assert_allclose( torch.sum(first_sentence_representation, dim=-1).detach().cpu().numpy(), np.array([-5.4501357, 0.57151437, -1.9986794, -1.9020741, -1.6883984, 0.46092677, -2.0832047, -2.045756, -2.660774, -5.4992304, -3.6687968, -3.4485395, -1.9255438, -0.92559034, -1.7234659, -4.93639]), rtol=1e-5) second_sentence_representation = representations[1] seq_len = 11 assert second_sentence_representation.size() == (seq_len, rep_dim) assert_allclose( torch.sum(second_sentence_representation, dim=-1).detach().cpu().numpy(), np.array([-0.51167095, -0.61811006, -2.8013024, -3.7508147, -1.6987357, -1.1114583, -3.6302583, -3.3409853, -1.3613609, -3.6760461, -5.137144]), rtol=1e-5) third_sentence_representation = representations[2] seq_len = 11 assert third_sentence_representation.size() == (seq_len, rep_dim) assert_allclose( torch.sum(third_sentence_representation, dim=-1).detach().cpu().numpy(), np.array([-1.5057361, -2.6824353, -4.1259403, -3.4485295, -1.3296673, -4.5548496, -6.077871, -3.4515395, -3.8405519, -4.3518186, -4.8782477]), rtol=1e-5)
def test_glove_contextualizer_frozen(self): params = Params({ "type": "glove_contextualizer", "glove_path": self.glove_path, "embedding_dim": self.representation_dim, "trainable": False }) glove_contextualizer = Contextualizer.from_params(params) unpadded_representations = glove_contextualizer( [self.sentence_1, self.sentence_2, self.sentence_3]) token_representations, mask = pad_contextualizer_output( unpadded_representations) loss = token_representations.sum() # Nothing in the contextualizer is requires_grad=True, so this # should be requires_grad=False and grad_fn should be None assert loss.grad_fn is None assert loss.requires_grad is False
def from_params(cls, vocab: Vocabulary, params: Params) -> 'PairwiseTagger': token_representation_dim = params.pop_int("token_representation_dim") encoder = params.pop("encoder", None) if encoder is not None: encoder = Seq2SeqEncoder.from_params(encoder) decoder = params.pop("decoder", None) if decoder is not None and not isinstance(decoder, str): decoder = FeedForward.from_params(decoder) combination = params.pop("combination", "x,y,x*y") contextualizer = params.pop('contextualizer', None) if contextualizer: contextualizer = Contextualizer.from_params(contextualizer) calculate_per_label_f1 = params.pop_bool("calculate_per_label_f1", False) loss_average = params.pop("loss_average", "batch") pretrained_file = params.pop("pretrained_file", None) transfer_contextualizer_from_pretrained_file = params.pop_bool( "transfer_contextualizer_from_pretrained_file", False) transfer_encoder_from_pretrained_file = params.pop_bool( "transfer_encoder_from_pretrained_file", False) freeze_encoder = params.pop_bool("freeze_encoder", False) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, token_representation_dim=token_representation_dim, encoder=encoder, decoder=decoder, combination=combination, contextualizer=contextualizer, calculate_per_label_f1=calculate_per_label_f1, loss_average=loss_average, pretrained_file=pretrained_file, transfer_contextualizer_from_pretrained_file= transfer_contextualizer_from_pretrained_file, transfer_encoder_from_pretrained_file= transfer_encoder_from_pretrained_file, freeze_encoder=freeze_encoder, initializer=initializer, regularizer=regularizer)
def test_glove_contextualizer_default(self): params = Params({ "type": "glove_contextualizer", "glove_path": self.glove_path, "embedding_dim": self.representation_dim }) glove_contextualizer = Contextualizer.from_params(params) representations = glove_contextualizer( [self.sentence_1, self.sentence_2, self.sentence_3]) assert len(representations) == self.num_sentences first_sentence_representation = representations[0] seq_len = 16 assert first_sentence_representation.size() == ( seq_len, self.representation_dim) assert_allclose( first_sentence_representation[:, :1].detach().cpu().numpy(), np.array([[0.464], [0.246], [0.458], [0.649], [0.273], [0.465], [0.012], [0.19], [0.219], [0.199], [0.944], [0.432], [0.28], [glove_contextualizer.weight[0, :1]], [0.083], [0.681]]), rtol=1e-5) second_sentence_representation = representations[1] seq_len = 11 assert second_sentence_representation.size() == ( seq_len, self.representation_dim) assert_allclose( second_sentence_representation[:, :1].detach().cpu().numpy(), np.array([[glove_contextualizer.weight[0, :1]], [0.761], [0.249], [0.571], [0.952], [0.41], [0.791], [0.063], [0.555], [0.432], [0.681]]), rtol=1e-5) third_sentence_representation = representations[2] seq_len = 11 assert third_sentence_representation.size() == ( seq_len, self.representation_dim) assert_allclose( third_sentence_representation[:, :1].detach().cpu().numpy(), np.array([[glove_contextualizer.weight[0, :1]], [0.249], [0.56], [0.591], [0.739], [0.222], [0.439], [0.308], [0.793], [0.118], [0.681]]), rtol=1e-5)
def test_glove_contextualizer_trainable(self): params = Params({ "type": "glove_contextualizer", "glove_path": self.glove_path, "embedding_dim": self.representation_dim, "trainable": True }) glove_contextualizer = Contextualizer.from_params(params) unpadded_representations = glove_contextualizer( [self.sentence_1, self.sentence_2, self.sentence_3]) token_representations, mask = pad_contextualizer_output( unpadded_representations) loss = token_representations.sum() loss.backward() glove_grads = [ param.grad for name, param in glove_contextualizer.named_parameters() ] assert all([grad is not None for grad in glove_grads])