def testErrors(self): with self.assertRaisesRegexp(ValueError, r"vector of length `len\(datasets\)`"): interleave_ops.sample_from_datasets( [dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(20)], weights=[0.25, 0.25, 0.25, 0.25]) with self.assertRaisesRegexp(TypeError, "`tf.float32` or `tf.float64`"): interleave_ops.sample_from_datasets( [dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(20)], weights=[1, 1]) with self.assertRaisesRegexp(TypeError, "must have the same type"): interleave_ops.sample_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(0.0) ]) with self.assertRaisesRegexp(TypeError, "tf.int64"): interleave_ops.choose_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(1) ], choice_dataset=dataset_ops.Dataset.from_tensors(1.0)) with self.assertRaisesRegexp(TypeError, "scalar"): interleave_ops.choose_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(1) ], choice_dataset=dataset_ops.Dataset.from_tensors([1.0]))
def testErrors(self): with self.assertRaisesRegex(ValueError, r"must have the same length"): interleave_ops.sample_from_datasets( [dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(20)], weights=[0.25, 0.25, 0.25, 0.25]) with self.assertRaisesRegex(TypeError, "`tf.float32` or `tf.float64`"): interleave_ops.sample_from_datasets( [dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(20)], weights=[1, 1]) with self.assertRaisesRegex(TypeError, "must have the same type"): interleave_ops.sample_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(0.0) ]) with self.assertRaisesRegex( ValueError, r"`datasets` must be a non-empty list of datasets."): interleave_ops.sample_from_datasets(datasets=[], weights=[]) with self.assertRaisesRegex(TypeError, "tf.int64"): interleave_ops.choose_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(1) ], choice_dataset=dataset_ops.Dataset.from_tensors(1.0)) with self.assertRaisesRegex(TypeError, "scalar"): interleave_ops.choose_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(1) ], choice_dataset=dataset_ops.Dataset.from_tensors([1.0])) with self.assertRaisesRegex(errors.InvalidArgumentError, "out of range"): dataset = interleave_ops.choose_from_datasets( [dataset_ops.Dataset.from_tensors(0)], choice_dataset=dataset_ops.Dataset.from_tensors( constant_op.constant(1, dtype=dtypes.int64))) next_element = self.getNext(dataset) self.evaluate(next_element()) with self.assertRaisesRegex( ValueError, r"`datasets` must be a non-empty list of datasets."): interleave_ops.choose_from_datasets( datasets=[], choice_dataset=dataset_ops.Dataset.from_tensors(1.0)) with self.assertRaisesRegex( TypeError, r"`choice_dataset` must be a dataset of scalar"): interleave_ops.choose_from_datasets([ dataset_ops.Dataset.from_tensors(0), dataset_ops.Dataset.from_tensors(1) ], choice_dataset=None)
def testChooseFromDatasetsChoiceDatasetIsEmpty(self): datasets = [ dataset_ops.Dataset.from_tensors(b"foo").repeat(), dataset_ops.Dataset.from_tensors(b"bar").repeat(), dataset_ops.Dataset.from_tensors(b"baz").repeat(), ] dataset = interleave_ops.choose_from_datasets( datasets, choice_dataset=dataset_ops.Dataset.range(0), stop_on_empty_dataset=False) self.assertDatasetProduces(dataset, [])
def testChooseFromDatasetsNested(self): ds1 = dataset_ops.Dataset.range(10).window(2) ds2 = dataset_ops.Dataset.range(10, 20).window(2) choice_dataset = dataset_ops.Dataset.range(2).repeat(5) ds = interleave_ops.choose_from_datasets([ds1, ds2], choice_dataset) ds = ds.flat_map(lambda x: x) expected = [] for i in range(5): for j in range(2): expected.extend([10 * j + 2 * i, 10 * j + 2 * i + 1]) self.assertDatasetProduces(ds, expected)
def testChooseFromDatasetsStoppingOnEmptyDataset(self): datasets = [ dataset_ops.Dataset.from_tensors(b"foo").repeat(2), dataset_ops.Dataset.from_tensors(b"bar").repeat(), dataset_ops.Dataset.from_tensors(b"baz").repeat(), ] choice_array = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) dataset = interleave_ops.choose_from_datasets( datasets, choice_dataset, stop_on_empty_dataset=True) self.assertDatasetProduces(dataset, [b"foo", b"foo"])
def testChooseFromDatasets(self): words = [b"foo", b"bar", b"baz"] datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words] choice_array = np.random.randint(3, size=(15,), dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) dataset = interleave_ops.choose_from_datasets(datasets, choice_dataset) next_element = self.getNext(dataset) for i in choice_array: self.assertEqual(words[i], self.evaluate(next_element())) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def testSelectFromDatasets(self): words = [b"foo", b"bar", b"baz"] datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words] choice_array = np.random.randint(3, size=(15,), dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) dataset = interleave_ops.choose_from_datasets(datasets, choice_dataset) next_element = self.getNext(dataset) for i in choice_array: self.assertEqual(words[i], self.evaluate(next_element())) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def testChooseFromDatasets(self, num_workers): cluster = data_service_test_base.TestCluster(num_workers=num_workers) words = [b"foo", b"bar", b"baz"] datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words] choice_array = np.random.randint(3, size=(15,), dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) ds = interleave_ops.choose_from_datasets(datasets, choice_dataset) ds = self._make_dynamic_sharding_dataset(ds, cluster) expected = [words[i] for i in choice_array] assert_items_equal = (num_workers > 1) self.assertDatasetProduces( ds, expected, assert_items_equal=assert_items_equal)
def testChooseFromDatasetsSkippingEmptyDatasets(self): datasets = [ dataset_ops.Dataset.from_tensors(b"foo").repeat(2), dataset_ops.Dataset.from_tensors(b"bar").repeat(), dataset_ops.Dataset.from_tensors(b"baz").repeat(), ] choice_array = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) dataset = interleave_ops.choose_from_datasets( datasets, choice_dataset, stop_on_empty_dataset=False) # Chooses 2 elements from the first dataset while the selector specifies 3. self.assertDatasetProduces( dataset, [b"foo", b"foo", b"bar", b"bar", b"bar", b"baz", b"baz", b"baz"])
def testSelectFromDatasets(self): words = [b"foo", b"bar", b"baz"] datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words] choice_array = np.random.randint(3, size=(15,), dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) dataset = interleave_ops.choose_from_datasets(datasets, choice_dataset) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() with self.cached_session() as sess: for i in choice_array: self.assertEqual(words[i], self.evaluate(next_element)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def testSelectFromDatasets(self): words = [b"foo", b"bar", b"baz"] datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words] choice_array = np.random.randint(3, size=(15,), dtype=np.int64) choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) dataset = interleave_ops.choose_from_datasets(datasets, choice_dataset) iterator = dataset_ops.make_one_shot_iterator(dataset) next_element = iterator.get_next() with self.cached_session() as sess: for i in choice_array: self.assertEqual(words[i], self.evaluate(next_element)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element)
def choose_from_datasets(datasets, choice_dataset): """Creates a dataset that deterministically chooses elements from `datasets`. For example, given the following datasets: ```python datasets = [tf.data.Dataset.from_tensors("foo").repeat(), tf.data.Dataset.from_tensors("bar").repeat(), tf.data.Dataset.from_tensors("baz").repeat()] # Define a dataset containing `[0, 1, 2, 0, 1, 2, 0, 1, 2]`. choice_dataset = tf.data.Dataset.range(3).repeat(3) result = tf.contrib.data.choose_from_datasets(datasets, choice_dataset) ``` The elements of `result` will be: ``` "foo", "bar", "baz", "foo", "bar", "baz", "foo", "bar", "baz" ``` Args: datasets: A list of `tf.data.Dataset` objects with compatible structure. choice_dataset: A `tf.data.Dataset` of scalar `tf.int64` tensors between `0` and `len(datasets) - 1`. Returns: A dataset that interleaves elements from `datasets` according to the values of `choice_dataset`. Raises: TypeError: If the `datasets` or `choice_dataset` arguments have the wrong type. """ return interleave_ops.choose_from_datasets(datasets, choice_dataset)
def choose_from_datasets(datasets, choice_dataset): """Creates a dataset that deterministically chooses elements from `datasets`. For example, given the following datasets: ```python datasets = [tf.data.Dataset.from_tensors("foo").repeat(), tf.data.Dataset.from_tensors("bar").repeat(), tf.data.Dataset.from_tensors("baz").repeat()] # Define a dataset containing `[0, 1, 2, 0, 1, 2, 0, 1, 2]`. choice_dataset = tf.data.Dataset.range(3).repeat(3) result = tf.data.experimental.choose_from_datasets(datasets, choice_dataset) ``` The elements of `result` will be: ``` "foo", "bar", "baz", "foo", "bar", "baz", "foo", "bar", "baz" ``` Args: datasets: A list of `tf.data.Dataset` objects with compatible structure. choice_dataset: A `tf.data.Dataset` of scalar `tf.int64` tensors between `0` and `len(datasets) - 1`. Returns: A dataset that interleaves elements from `datasets` according to the values of `choice_dataset`. Raises: TypeError: If the `datasets` or `choice_dataset` arguments have the wrong type. """ return interleave_ops.choose_from_datasets(datasets, choice_dataset)