def test_all_fields_padded_to_max_length(self): list_field = ListField([self.field1, self.field2, self.field3]) list_field.index(self.vocab) array_dict = list_field.as_array(list_field.get_padding_lengths()) numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5])) numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0]))
def test_list_field_can_handle_empty_text_fields(self): list_field = ListField([self.field1, self.field2, self.empty_text_field]) list_field.index(self.vocab) array_dict = list_field.as_array(list_field.get_padding_lengths()) numpy.testing.assert_array_equal(array_dict["words"], numpy.array([[2, 3, 4, 5, 0], [2, 3, 4, 1, 5], [0, 0, 0, 0, 0]]))
def test_as_array_can_handle_multiple_token_indexers_and_empty_fields(self): # pylint: disable=protected-access self.field1._token_indexers = self.words_and_characters_indexers self.field2._token_indexers = self.words_and_characters_indexers self.field3._token_indexers = self.words_and_characters_indexers list_field = ListField([self.field1.empty_field(), self.field1, self.field2]) list_field.index(self.vocab) padding_lengths = list_field.get_padding_lengths() array_dict = list_field.as_array(padding_lengths) words = array_dict["words"] characters = array_dict["characters"] numpy.testing.assert_array_almost_equal(words, numpy.array([[0, 0, 0, 0, 0], [2, 3, 4, 5, 0], [2, 3, 4, 1, 5]])) numpy.testing.assert_array_almost_equal(characters[0], numpy.zeros([5, 9])) numpy.testing.assert_array_almost_equal(characters[1], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0], [2, 3, 4, 5, 3, 4, 6, 3, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]])) numpy.testing.assert_array_almost_equal(characters[2], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 3, 1, 3, 4, 5], [2, 3, 4, 5, 3, 4, 6, 3, 0]]))
def test_pad_can_handle_multiple_token_indexers(self): field1 = TextField(["this", "is", "a", "sentence"], self.words_and_characters_indexer) field2 = TextField(["this", "is", "a", "different", "sentence"], self.words_and_characters_indexer) field3 = TextField(["this", "is", "another", "sentence"], self.words_and_characters_indexer) list_field = ListField([field1, field2, field3]) list_field.index(self.vocab) padding_lengths = list_field.get_padding_lengths() array_dict = list_field.as_array(padding_lengths) words = array_dict["words"] characters = array_dict["characters"] numpy.testing.assert_array_almost_equal(words, numpy.array([[2, 3, 4, 5, 0], [2, 3, 4, 1, 5], [2, 3, 1, 5, 0]])) numpy.testing.assert_array_almost_equal(characters[0], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0], [2, 3, 4, 5, 3, 4, 6, 3, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]])) numpy.testing.assert_array_almost_equal(characters[1], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 3, 1, 3, 4, 5], [2, 3, 4, 5, 3, 4, 6, 3, 0]])) numpy.testing.assert_array_almost_equal(characters[2], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0, 0, 0], [1, 4, 1, 5, 1, 3, 1, 0, 0], [2, 3, 4, 5, 3, 4, 6, 3, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]))
def test_list_field_can_handle_empty_sequence_label_fields(self): list_field = ListField([self.sequence_label_field, self.sequence_label_field, self.empty_sequence_label_field]) list_field.index(self.vocab) array = list_field.as_array(list_field.get_padding_lengths()) numpy.testing.assert_array_equal(array, numpy.array([[1, 1, 0, 1], [1, 1, 0, 1], [0, 0, 0, 0]]))
def test_nested_list_fields_are_padded_correctly(self): nested_field1 = ListField([LabelField(c) for c in ['a', 'b', 'c', 'd', 'e']]) nested_field2 = ListField([LabelField(c) for c in ['f', 'g', 'h', 'i', 'j', 'k']]) list_field = ListField([nested_field1.empty_field(), nested_field1, nested_field2]) list_field.index(self.vocab) padding_lengths = list_field.get_padding_lengths() assert padding_lengths == {'num_fields': 3, 'list_num_fields': 6} array = list_field.as_array(padding_lengths) numpy.testing.assert_almost_equal(array, [[[-1], [-1], [-1], [-1], [-1], [-1]], [[0], [1], [2], [3], [4], [-1]], [[5], [6], [7], [8], [9], [10]]])
def test_fields_can_pad_to_greater_than_max_length(self): list_field = ListField([self.field1, self.field2, self.field3]) list_field.index(self.vocab) padding_lengths = list_field.get_padding_lengths() padding_lengths["list_num_tokens"] = 7 padding_lengths["num_fields"] = 5 array_dict = list_field.as_array(padding_lengths) numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][3], numpy.array([0, 0, 0, 0, 0, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][4], numpy.array([0, 0, 0, 0, 0, 0, 0]))
def test_all_fields_padded_to_max_length(self): field1 = TextField(["this", "is", "a", "sentence"], self.word_indexer) field2 = TextField(["this", "is", "a", "different", "sentence"], self.word_indexer) field3 = TextField(["this", "is", "another", "sentence"], self.word_indexer) list_field = ListField([field1, field2, field3]) list_field.index(self.vocab) array_dict = list_field.as_array(list_field.get_padding_lengths()) numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5])) numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0]))
def test_padding_handles_list_fields(self): array1 = ArrayField(numpy.ones([2, 3])) array2 = ArrayField(numpy.ones([1, 5])) empty_array = array1.empty_field() list_field = ListField([array1, array2, empty_array]) returned_array = list_field.as_array(list_field.get_padding_lengths()) correct_array = numpy.array([[[1., 1., 1., 0., 0.], [1., 1., 1., 0., 0.]], [[1., 1., 1., 1., 1.], [0., 0., 0., 0., 0.]], [[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]]]) numpy.testing.assert_array_equal(returned_array, correct_array)
def test_fields_can_pad_to_greater_than_max_length(self): field1 = TextField(["this", "is", "a", "sentence"], self.word_indexer) field2 = TextField(["this", "is", "a", "different", "sentence"], self.word_indexer) field3 = TextField(["this", "is", "another", "sentence"], self.word_indexer) list_field = ListField([field1, field2, field3]) list_field.index(self.vocab) padding_lengths = list_field.get_padding_lengths() padding_lengths["num_tokens"] = 7 padding_lengths["num_fields"] = 5 array_dict = list_field.as_array(padding_lengths) numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][3], numpy.array([0, 0, 0, 0, 0, 0, 0])) numpy.testing.assert_array_almost_equal(array_dict["words"][4], numpy.array([0, 0, 0, 0, 0, 0, 0]))