def testMultipleComponents(self): dataset = dataset_ops.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6])) self.assertEqual((1, 3, 5), self.evaluate(random_access.at(dataset, 0))) self.assertEqual((2, 4, 6), self.evaluate(random_access.at(dataset, 1)))
def testBasic(self, count, batch_size, drop_remainder): """Tests the batch dataset logic for various input configurations. Args: count: the number of input elements batch_size: the batch size drop_remainder: whether a smaller batch size should be produced if batch size does not divide number of inputs evenly """ dataset = dataset_ops.Dataset.from_tensor_slices(list( range(count))).batch(batch_size=batch_size, drop_remainder=drop_remainder) num_full_batches = count // batch_size for i in range(num_full_batches): expected_batch = np.arange(i * batch_size, (i * batch_size + batch_size), 1, dtype=np.int32) self.assertAllEqual(expected_batch, self.evaluate(random_access.at(dataset, i))) has_remainder = (not drop_remainder) and (count % batch_size != 0) if has_remainder: expected_batch = np.arange(batch_size * num_full_batches, count, 1) self.assertAllEqual( expected_batch, self.evaluate(random_access.at(dataset, num_full_batches))) with self.assertRaises(errors.OutOfRangeError): self.evaluate( random_access.at(dataset, index=num_full_batches + (1 if has_remainder else 0)))
def testInvalidIndex(self): dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3 ]).shuffle(buffer_size=100) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, -1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, 4))
def testRandomAccessBatchWithShuffle(self): dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7]) shuffle_dataset = dataset.shuffle(buffer_size=10, seed=2) batch_dataset = shuffle_dataset.batch(2) expected_output = [ np.array([5, 2], dtype=np.int32), np.array([4, 7], dtype=np.int32), np.array([1, 3], dtype=np.int32), np.array([6], dtype=np.int32) ] for i in range(4): self.assertAllEqual( expected_output[i], self.evaluate(random_access.at(batch_dataset, i))) # Checks the order is consistent with shuffle dataset. for i in range(3): self.assertAllEqual( expected_output[i][0], self.evaluate(random_access.at(shuffle_dataset, i * 2))) self.assertAllEqual( expected_output[i][1], self.evaluate(random_access.at(shuffle_dataset, (i * 2) + 1))) # Checks the remainder is the last element in shuffled dataset. self.assertAllEqual( expected_output[3][0], self.evaluate(random_access.at(shuffle_dataset, 6)))
def testBasic(self, count): dataset = dataset_ops.Dataset.range(10).take(count) num_output = min(count, 10) if count != -1 else 10 for i in range(num_output): self.assertEqual( self.evaluate(random_access.at(dataset, index=i)), i) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=num_output))
def testConcatenateTwoNonEmptyDatasets(self): input_dataset = dataset_ops.Dataset.from_tensor_slices([0, 1, 2]) concatenate_dataset = dataset_ops.Dataset.from_tensor_slices([3, 4]) concatenated = input_dataset.concatenate(concatenate_dataset) for i in range(5): self.assertAllEqual(random_access.at(concatenated, index=i), i) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(concatenated, index=5))
def testNumpyArray(self): components = (np.array(1), np.array([1, 2, 3]), np.array(37.0)) dataset = dataset_ops.Dataset.from_tensors(components) result = self.evaluate(random_access.at(dataset, 0)) for i in range(3): self.assertAllEqual(result[i], components[i]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, 1))
def testConcatenateAnEmptyDataset(self): input_dataset = dataset_ops.Dataset.from_tensor_slices([1.0]) concatenate_dataset = dataset_ops.Dataset.from_tensor_slices([]) concatenated = input_dataset.concatenate(concatenate_dataset) self.assertAllEqual( self.evaluate(random_access.at(concatenated, index=0)), 1.0) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(concatenated, index=1))
def testZipUnequal(self): components = [[1, 2, 3, 4], [1, 2, 3, 4, 5], [1.0, 2.0]] dataset = _dataset_factory(components) for i in range(2): results = self.evaluate(random_access.at(dataset, index=i)) for component, result_component in zip(components, results): self.assertAllEqual(component[i], result_component) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=2))
def testMultipleCombinations(self, elements, buffer_size): dataset = dataset_ops.Dataset.range(elements).prefetch( buffer_size=buffer_size) len_dataset = self.evaluate(dataset.cardinality()) expected_output = np.arange(elements) for i in range(len_dataset): self.assertEqual(self.evaluate(random_access.at(dataset, index=i)), expected_output[i]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=len_dataset))
def testDictionary(self): dataset = from_list.from_list([{"a": 1, "b": 3}, {"a": 2, "b": 4}]) self.assertEqual({ "a": 1, "b": 3 }, self.evaluate(random_access.at(dataset, 0))) self.assertEqual({ "a": 2, "b": 4 }, self.evaluate(random_access.at(dataset, 1)))
def testDictionary(self): dataset = dataset_ops.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]}) self.assertEqual({ "a": 1, "b": 3 }, self.evaluate(random_access.at(dataset, 0))) self.assertEqual({ "a": 2, "b": 4 }, self.evaluate(random_access.at(dataset, 1)))
def testMultipleCombinations(self, elements, num_shards, index): components = range(elements) dataset = dataset_ops.Dataset.range(elements).shard( num_shards=num_shards, index=index) len_dataset = self.evaluate(dataset.cardinality()) for i in range(self.evaluate(dataset.cardinality())): self.assertAllEqual(components[index + (num_shards * i)], self.evaluate(random_access.at(dataset, i))) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=len_dataset))
def testWithOptions(self): dataset = dataset_ops.Dataset.from_tensors(range(4)) options = options_lib.Options() options.experimental_optimization.map_and_batch_fusion = True dataset = dataset.with_options(options) self.assertAllEqual(self.evaluate(random_access.at(dataset, 0)), range(4)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, 1))
def testNamedTuple(self): Foo = collections.namedtuple("Foo", ["x", "y"]) x = Foo(x=dataset_ops.Dataset.range(3), y=dataset_ops.Dataset.range(3, 6)) dataset = dataset_ops.Dataset.zip(x) expected = [Foo(x=0, y=3), Foo(x=1, y=4), Foo(x=2, y=5)] for i in range(3): self.assertAllEqual( self.evaluate(random_access.at(dataset, index=i)), expected[i]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=4))
def testBasicWithoutSeedEager(self): dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3, 4, 5]) shuffled_dataset = dataset.shuffle(buffer_size=100) dataset_array = [] shuffled_dataset_array = [] for i in range(5): shuffled_dataset_array.append( self.evaluate(random_access.at(shuffled_dataset, i))) dataset_array.append(self.evaluate(random_access.at(dataset, i))) self.assertAllEqual(sorted(dataset_array), sorted(shuffled_dataset_array))
def testZipEqual(self): components = [ np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(np.array([[12], [13], [14], [15]]), 22), np.array([37.0, 38.0, 39.0, 40.0]) ] dataset = _dataset_factory(components) for i in range(4): results = self.evaluate(random_access.at(dataset, index=i)) for component, result_component in zip(components, results): self.assertAllEqual(component[i], result_component) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=4))
def testSameSeedReturnsSameSequence(self): dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3, 4, 5]) shuffled_dataset = dataset.shuffle(buffer_size=100, seed=5) shuffled_dataset_2 = dataset.shuffle(buffer_size=100, seed=5) shuffled_dataset_array = [] shuffled_dataset_array_2 = [] for i in range(5): shuffled_dataset_array.append( self.evaluate(random_access.at(shuffled_dataset, i))) shuffled_dataset_array_2.append( self.evaluate(random_access.at(shuffled_dataset_2, i))) self.assertAllEqual(shuffled_dataset_array, shuffled_dataset_array_2)
def verifyRandomAccessInfiniteCardinality(self, dataset, expected): """Tests randomly accessing elements of a dataset.""" # Tests accessing the elements in a shuffled order with repeats. len_expected = len(expected) indices = list(range(len_expected)) * 2 random.shuffle(indices) for i in indices: self.assertAllEqual(expected[i], self.evaluate(random_access.at(dataset, i))) # Tests accessing the elements in order. indices = set(sorted(indices)) for i in indices: self.assertAllEqual(expected[i], self.evaluate(random_access.at(dataset, i)))
def testMultipleCombinations(self, start, stop, step, output_type): dataset = dataset_ops.Dataset.range(start, stop, step, output_type=output_type) expected_output = np.arange(start, stop, step, dtype=output_type.as_numpy_dtype) len_dataset = self.evaluate(dataset.cardinality()) for i in range(len_dataset): self.assertEqual(self.evaluate(random_access.at(dataset, index=i)), expected_output[i]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=len_dataset))
def testMultipleCombinations(self, elements, num_shards, index): components = range(elements) dataset = dataset_ops.Dataset.from_tensor_slices(components).shard( num_shards=num_shards, index=index) for i in range(self.evaluate(dataset.cardinality())): self.assertAllEqual(components[index + (num_shards * i)], self.evaluate(random_access.at(dataset, i)))
def testZipBasic(self): dataset = dataset_ops.Dataset.zip( (dataset_ops.Dataset.range(1, 4), dataset_ops.Dataset.range(4, 7))) expected_dataset = [(1, 4), (2, 5), (3, 6)] for i in range(3): self.assertEqual(self.evaluate(random_access.at(dataset, index=i)), expected_dataset[i])
def testDifferentSeedDifferentSequence(self): components = list(range(1000)) dataset = dataset_ops.Dataset.from_tensor_slices(components) shuffled_dataset = dataset.shuffle(buffer_size=1000, seed=124) shuffled_dataset_2 = dataset.shuffle(buffer_size=1000, seed=51) shuffled_dataset_array = [] shuffled_dataset_array_2 = [] for i in range(1000): shuffled_dataset_array.append( self.evaluate(random_access.at(shuffled_dataset, i))) shuffled_dataset_array_2.append( self.evaluate(random_access.at(shuffled_dataset_2, i))) self.assertNotEqual(shuffled_dataset_array, shuffled_dataset_array_2) self.assertAllEqual( sorted(shuffled_dataset_array), sorted(shuffled_dataset_array_2))
def testAttrs(self): if attr is None: self.skipTest("attr module is not available.") @attr.s class Foo: x = attr.ib() y = attr.ib() x = Foo(x=dataset_ops.Dataset.range(3), y=dataset_ops.Dataset.range(3, 6)) dataset = dataset_ops.Dataset.zip(x) expected = [Foo(x=0, y=3), Foo(x=1, y=4), Foo(x=2, y=5)] for i in range(3): self.assertAllEqual( self.evaluate(random_access.at(dataset, index=i)), expected[i]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=4))
def testFiniteRepeat(self, elements, count): dataset = dataset_ops.Dataset.range(elements).repeat(count) expected_dataset = np.tile( np.arange(start=0, stop=elements, step=1, dtype=dtypes.int64.as_numpy_dtype), count) for i in range(elements * count): self.assertEqual(self.evaluate(random_access.at(dataset, index=i)), expected_dataset[i])
def testNumpy(self): elements = [ np.tile(np.array([[0], [1]], dtype=np.uint64), 2), np.tile(np.array([[2], [256]], dtype=np.uint64), 2), np.tile(np.array([[4], [65536]], dtype=np.uint64), 2), np.tile(np.array([[8], [4294967296]], dtype=np.uint64), 2), ] dataset = from_list.from_list(elements) for i in range(len(elements)): result = self.evaluate(random_access.at(dataset, i)) self.assertAllEqual(elements[i], result)
def testNested(self): components = [ np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(np.array([[12], [13], [14], [15]]), 22), np.array([37.0, 38.0, 39.0, 40.0]) ] datasets = [ dataset_ops.Dataset.from_tensor_slices(component) for component in components ] dataset = dataset_ops.Dataset.zip( (datasets[0], (datasets[1], datasets[2]))) for i in range(4): result1, (result2, result3) = self.evaluate( random_access.at(dataset, index=i)) self.assertAllEqual(components[0][i], result1) self.assertAllEqual(components[1][i], result2) self.assertAllEqual(components[2][i], result3) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=4))
def testNumpy(self): components = ( np.tile(np.array([[0], [1]], dtype=np.uint8), 2), np.tile(np.array([[2], [256]], dtype=np.uint16), 2), np.tile(np.array([[4], [65536]], dtype=np.uint32), 2), np.tile(np.array([[8], [4294967296]], dtype=np.uint64), 2), ) expected_output = [tuple([c[i] for c in components]) for i in range(2)] dataset = dataset_ops.Dataset.from_tensor_slices(components) for i in range(2): result = self.evaluate(random_access.at(dataset, i)) self.assertAllEqual(expected_output[i], result)
def testEmptyDataset(self): dataset = dataset_ops.Dataset.from_tensor_slices( []).shuffle(buffer_size=100) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, 0))
def testEmptyDataset(self, index): dataset = dataset_ops.Dataset.from_tensor_slices( []).prefetch(buffer_size=5) with self.assertRaises(errors.OutOfRangeError): self.evaluate(random_access.at(dataset, index=index))