def test_from_params(self): params = Params({"type": "pretrained", "weights_file_path": self.temp_file}) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == {} name_overrides = {"a": "b", "c": "d"} params = Params({ "type": "pretrained", "weights_file_path": self.temp_file, "parameter_name_overrides": name_overrides }) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == name_overrides
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'num_classes': 1, }) simrel = SimRel.from_params(params) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(simrel) input_tensor = torch.FloatTensor([[[-3, 1]]]) labels = torch.Tensor([[0]]) class_avgs = [torch.FloatTensor([5, 5])] output = simrel(input_tensor, labels, class_avgs).data.numpy() assert output.shape == (1, 1, 1) # This output was checked by hand - assert_almost_equal(output, torch.FloatTensor([[[-0.44721356]]])) params = Params({ 'input_dim': 5, 'num_classes': 3, }) simrel = SimRel.from_params(params) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(simrel) input_tensor = torch.FloatTensor([[[1, 2, 3, 4, 5]]]) print("Input tensor:", input_tensor) print("Input shape:", input_tensor.shape) labels = torch.Tensor([[0, 1, 2]]) class_avgs = [ torch.FloatTensor([3, 4, 5, 6, 7]), torch.FloatTensor([25, 63, 55, 8, 2.4]), torch.FloatTensor([1.003, 1.005, 6.578, 3.4, 9.999]) ] output = simrel(input_tensor, labels, class_avgs).data.numpy() assert output.shape == (1, 1, 3) # This output was checked via WolframAlpha: `N[1 - CosineDistance[(1,2,3,4,5),(3,4,5,6,7)], 10]` assert_almost_equal( output, torch.FloatTensor([[[0.9864400504, 0.5535960766, 0.9296758768]]]))
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'output_dims': 3, 'pool_sizes': 4, 'dropout': 0.0, 'num_layers': 2 }) maxout = Maxout.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
def setUp(self): super(TestTokenCharactersEncoder, self).setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params(vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor * 5., self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output, pytorch_state = pytorch_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal(pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'output_dims': 3, 'pool_sizes': 4, 'dropout': 0.0, 'num_layers': 2 }) maxout = Maxout.from_params(params) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
def test_forward_respects_masking(self): # seed 1 fails on the old cnn encoder code torch.manual_seed(1) encoder = CnnEncoder(embedding_dim=7, num_filters=13, ngram_filter_sizes=(1, 2, 3, 4, 5)) init = Initializer.from_params( Params({ "type": "normal", "mean": 0.0, "std": 10 })) initializer = InitializerApplicator([(".*", init)]) initializer(encoder) tokens = torch.ones(4, 8, 7) padded_tokens = torch.nn.functional.pad(tokens.transpose(1, 2), (0, 2), value=5).transpose(1, 2) mask = (torch.where(padded_tokens == 5, torch.zeros_like(padded_tokens), torch.ones_like(padded_tokens)).bool().any(dim=2)) regular_output = encoder.forward(tokens=tokens, mask=None) masked_output = encoder.forward(tokens=padded_tokens, mask=mask) assert_almost_equal(regular_output.data.numpy(), masked_output.data.numpy(), decimal=6)
def setUp(self): super().setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params( vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def test_from_params(self): params = Params({"type": "pretrained", "weights_file_path": self.temp_file}) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == {} name_overrides = {"a": "b", "c": "d"} params = Params( { "type": "pretrained", "weights_file_path": self.temp_file, "parameter_name_overrides": name_overrides, } ) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == name_overrides
def test_forward_does_correct_computation(self): encoder = CnnEncoder(embedding_dim=2, num_filters=1, ngram_filter_sizes=(1, 2)) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(encoder) input_tensor = torch.FloatTensor([[[0.7, 0.8], [0.1, 1.5]]]) encoder_output = encoder(input_tensor, None) assert_almost_equal( encoder_output.data.numpy(), numpy.asarray([[1.6 + 1.0, 3.1 + 1.0]]), decimal=6 )
def test_forward_does_correct_computation(self): encoder = CnnEncoder(embedding_dim=2, num_filters=1, ngram_filter_sizes=(1, 2)) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(encoder) input_tensor = torch.FloatTensor([[[.7, .8], [.1, 1.5]]]) encoder_output = encoder(input_tensor, None) assert_almost_equal(encoder_output.data.numpy(), numpy.asarray([[1.6 + 1.0, 3.1 + 1.0]]), decimal=6)
def test_l2_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def test_l1_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) constant_init = Initializer.from_params(Params({"type": "constant", "val": -1})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def test_l2_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 0.5 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def get_loss(params: Params) -> float: # Required to remove the random initialization constant_init = Initializer.from_params( Params({ "type": "constant", "val": 0.5 })) initializer = InitializerApplicator([(".*", constant_init)]) model = Model.from_params(vocab=vocab, params=params.get('model')) initializer(model) training_tensors = dataset.as_tensor_dict() output_dict = model(**training_tensors) return output_dict['loss'].cpu().data.numpy()
def test_l1_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": -1 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def test_forward_gives_correct_output(self): params = Params({"input_dim": 2, "hidden_dims": 3, "activations": "relu", "num_layers": 2}) feedforward = FeedForward.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(feedforward) input_tensor = torch.FloatTensor([[-3, 1]]) output = feedforward(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand - ReLU makes output after first hidden layer [0, 0, 0], # which then gets a bias added in the second layer to be [1, 1, 1]. assert_almost_equal(output, [[1, 1, 1]])
def test_dropout_version_is_different_to_no_dropout(self): augmented_lstm = AugmentedLstm(10, 11) dropped_augmented_lstm = AugmentedLstm( 10, 11, recurrent_dropout_probability=0.9) # Initialize all weights to be == 1. constant_init = Initializer.from_params( Params({ "type": "constant", "val": 0.5 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(dropped_augmented_lstm) initial_state = torch.randn([1, 5, 11]) initial_memory = torch.randn([1, 5, 11]) # If we use too bigger number like in the PyTorch test the dropout has no affect sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor, self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm( lstm_input, (initial_state, initial_memory)) dropped_output, dropped_state = dropped_augmented_lstm( lstm_input, (initial_state, initial_memory)) dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def test_frozen_params(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": -1 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) # freeze the parameters of the first linear for name, param in model.named_parameters(): if re.search(r"0.*$", name): param.requires_grad = False value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 55 because of bias (5*10 + 5) assert value.data.numpy() == 55
def test_from_params_tar_gz(self): with tempfile.NamedTemporaryFile(suffix=".tar.gz") as f: with tarfile.open(fileobj=f, mode="w:gz") as archive: archive.add(self.temp_file, arcname=os.path.basename(self.temp_file)) f.flush() params = Params({ "type": "pretrained", "weights_file_path": f.name }) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == {} for name, parameter in self.net2.state_dict().items(): assert torch.equal(parameter, initializer.weights[name])
def test_stacked_bidirectional_lstm_dropout_version_is_different(self, dropout_name: str): stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3) if dropout_name == 'layer_dropout_probability': dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3, layer_dropout_probability=0.9) elif dropout_name == 'recurrent_dropout_probability': dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3, recurrent_dropout_probability=0.9) else: raise ValueError('Do not recognise the following dropout name ' f'{dropout_name}') # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(stacked_lstm) initializer(dropped_stacked_lstm) initial_state = torch.randn([3, 5, 11]) initial_memory = torch.randn([3, 5, 11]) tensor = torch.rand([5, 7, 10]) sequence_lengths = torch.LongTensor([7, 7, 7, 7, 7]) sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(tensor, sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) stacked_output, stacked_state = stacked_lstm(lstm_input, (initial_state, initial_memory)) dropped_output, dropped_state = dropped_stacked_lstm(lstm_input, (initial_state, initial_memory)) dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True) stacked_output_sequence, _ = pad_packed_sequence(stacked_output, batch_first=True) if dropout_name == 'layer_dropout_probability': with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_output_sequence.data.numpy(), stacked_output_sequence.data.numpy(), decimal=4) if dropout_name == 'recurrent_dropout_probability': with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_state[0].data.numpy(), stacked_state[0].data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_state[1].data.numpy(), stacked_state[1].data.numpy(), decimal=4)
def test_from_params_none(self): Initializer.from_params(params=None)
def test_from_params_string(self): Initializer.from_params(params="eye")