def test_as_tensor_handles_words(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() tensor_dict = field.as_tensor(padding_lengths) numpy.testing.assert_array_almost_equal(tensor_dict["words"].detach().cpu().numpy(), numpy.array([1, 1, 1, 2, 1]))
def test_padding_lengths_are_computed_correctly(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == {"num_tokens": 5} field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"characters": TokenCharactersIndexer("characters")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == {"num_tokens": 5, "num_token_characters": 8} field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"characters": TokenCharactersIndexer("characters"), "words": SingleIdTokenIndexer("words")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == {"num_tokens": 5, "num_token_characters": 8}
def test_as_tensor_handles_characters(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"characters": TokenCharactersIndexer("characters")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() tensor_dict = field.as_tensor(padding_lengths) expected_character_array = numpy.array([[1, 1, 1, 3, 0, 0, 0, 0], [1, 3, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0], [3, 4, 5, 6, 4, 5, 7, 4], [1, 0, 0, 0, 0, 0, 0, 0]]) numpy.testing.assert_array_almost_equal(tensor_dict["characters"].detach().cpu().numpy(), expected_character_array)
def test_as_tensor_handles_words_and_characters_with_longer_lengths(self): field = TextField([Token(t) for t in ["a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words"), "characters": TokenCharactersIndexer("characters")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() padding_lengths["num_tokens"] = 5 padding_lengths["num_token_characters"] = 10 tensor_dict = field.as_tensor(padding_lengths) numpy.testing.assert_array_almost_equal(tensor_dict["words"].detach().cpu().numpy(), numpy.array([1, 2, 1, 0, 0])) numpy.testing.assert_array_almost_equal(tensor_dict["characters"].detach().cpu().numpy(), numpy.array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [3, 4, 5, 6, 4, 5, 7, 4, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]))
def test_token_embedder_returns_dict(self): field = TextField([Token(t) for t in ["A", "sentence"]], token_indexers={"field_with_dict": DictReturningTokenIndexer(), "words": SingleIdTokenIndexer("words"), "characters": TokenCharactersIndexer("characters")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == { 'token_ids': 5, 'additional_key': 2, 'words': 2, 'characters': 2, 'num_token_characters': 8 } padding_lengths['token_ids'] = 7 padding_lengths['additional_key'] = 3 padding_lengths['words'] = 4 padding_lengths['characters'] = 4 tensors = field.as_tensor(padding_lengths) assert list(tensors['token_ids'].shape) == [7] assert list(tensors['additional_key'].shape) == [3] assert list(tensors['words'].shape) == [4] assert list(tensors['characters'].shape) == [4, 8]
def test_get_padding_lengths_raises_if_no_indexed_tokens(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words")}) with pytest.raises(ConfigurationError): field.get_padding_lengths()