def testSaveRestoreMultipleIterator(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) dataset = dataset.map(math_ops.square).batch(2) iterator_1 = datasets.Iterator(dataset) iterator_2 = datasets.Iterator(dataset) dataset_2 = Dataset.range(10) iterator_3 = datasets.Iterator(dataset_2) checkpoint = checkpointable_utils.Checkpoint( iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3) self.assertAllEqual([1, 4], iterator_1.get_next().numpy()) self.assertEqual(0, iterator_3.get_next().numpy()) self.assertEqual(1, iterator_3.get_next().numpy()) self.assertEqual(2, iterator_3.get_next().numpy()) save_path = checkpoint.save(checkpoint_prefix) self.assertAllEqual([1, 4], iterator_2.get_next().numpy()) self.assertAllEqual([9, 16], iterator_2.get_next().numpy()) self.assertEqual(3, iterator_3.get_next().numpy()) checkpoint.restore(save_path) self.assertAllEqual([9, 16], iterator_1.get_next().numpy()) self.assertAllEqual([1, 4], iterator_2.get_next().numpy()) self.assertEqual(3, iterator_3.get_next().numpy())
def testNestedOutputs(self): ds = Dataset.zip((Dataset.range(4), Dataset.zip((Dataset.range(4), Dataset.range(4))))) total = 0 # The Iterator will return a nested structure of Tensor objects. # Some funkiness to compare against simple integers. for (i, x) in enumerate(datasets.Iterator(ds)): want = (i, (i, i)) got = (x[0].numpy(), (x[1][0].numpy(), x[1][1].numpy())) self.assertEqual(got, want) total += 1 self.assertEqual(4, total)
def testOverrideThreadPool(self): def get_thread_id(_): # Python creates a dummy thread object to represent the current # thread when called from an "alien" thread (such as a # `PrivateThreadPool` thread in this case). It does not include # the TensorFlow-given display name, but it has a unique # identifier that maps one-to-one with the underlying OS thread. return np.array(threading.current_thread().ident).astype(np.int64) for num_threads in [1, 2, 4, 8, 16]: dataset = ( Dataset.range(1000).map( lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64), num_parallel_calls=32).apply(unique.unique())) dataset = threadpool.override_threadpool( dataset, threadpool.PrivateThreadPool( num_threads, display_name='private_thread_pool_%d' % num_threads)) thread_ids = [] for next_element in datasets.Iterator(dataset): thread_ids.append(next_element) self.assertEqual(len(thread_ids), len(set(thread_ids))) self.assertGreater(len(thread_ids), 0) # NOTE(mrry): We don't control the thread pool scheduling, and # so cannot guarantee that all of the threads in the pool will # perform work. self.assertLessEqual(len(thread_ids), num_threads)
def testMultipleIteratorsOnADatasetThatUsesFunctions(self): ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square) got1 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual([1, 4, 9, 16, 25, 36], got1) got2 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual(got1, got2)
def testMapAndFilter(self): def even(x): return math_ops.equal(math_ops.mod(x, 2), 0) it = datasets.Iterator(Dataset.range(8).map(math_ops.square).filter(even)) got = [x.numpy() for x in it] self.assertAllEqual([0, 4, 16, 36], got)
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None): """ Trains a linear regression model of one feature. Args: :param features: pandas DataFrame of features :param targets: pandas DataFrame of targets :param batch_size: size of batches to be passed to the model :param shuffle: weather to shuffle the data :param num_epochs: number of epochs for which data should be repeated. None = repeat indefinitely :return: Tuple of (features, labels) for next data batch """ # Convert pandas data into a dict of np arrays. features = {key: np.array(value) for key, value in dict(features).items()} # Construct a dataset, and configure batching/repeating. ds = Dataset.from_tensor_slices((features, targets)) ds = ds.batch(batch_size).repeat(num_epochs) # Shuffle the data, if specified. if shuffle: ds.shuffle(buffer_size=10000) # Return the next batch of data. features, labels = ds.make_one_shot_iterator().get_next() return features, labels
def benchmarkSliceBatchCacheRepeatCallable(self): input_size = 10000 batch_size = 100 num_epochs = 100 input_data = np.random.randn(input_size) dataset = ( Dataset.from_tensor_slices(input_data).batch(batch_size).cache() .repeat(num_epochs)) iterator = datasets.Iterator(dataset) ends = [time.time()] for _ in iterator: ends.append(time.time()) deltas = np.ediff1d(ends) median_wall_time = np.median(deltas) print( 'Slice/batch/cache/repeat eager input size: %d batch size: %d Median ' 'wall time per element: %f' % (input_size, batch_size, median_wall_time)) self.report_benchmark( iters=len(deltas), wall_time=median_wall_time, name='benchmark_slice_batch_cache_repeat_eager_input_%d_batch_%d' % (input_size, batch_size))
def testMultipleIteratorsOnADatasetThatUsesFunctions(self): ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square) got1 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual([1, 4, 9, 16, 25, 36], got1) got2 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual(got1, got2)
def testGetNextOneShotIterator(self): iterator = Dataset.range(4).make_one_shot_iterator() self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) self.assertEqual(2, iterator.get_next().numpy()) self.assertEqual(3, iterator.get_next().numpy()) with self.assertRaises(errors.OutOfRangeError): iterator.get_next()
def testGetNext(self): iterator = datasets.Iterator(Dataset.range(4)) self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) self.assertEqual(2, iterator.get_next().numpy()) self.assertEqual(3, iterator.get_next().numpy()) with self.assertRaises(errors.OutOfRangeError): iterator.get_next()
def testPyFunc(self): def my_map(inp): return [[x + 1 for x in inp]] ds = Dataset.range(4).map( lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64)) got = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual([[1], [2], [3], [4]], got)
def _input_fn(): # raw_features = {"MFCCs": features.values} ds = Dataset.from_tensor_slices( (features.to_dict('list'))) # warning: 2GB limit ds = ds.batch(batch_size) # Return the next batch of data. feature_batch = ds.make_one_shot_iterator().get_next() return feature_batch
def testGetNextOneShotIterator(self): iterator = Dataset.range(4).make_one_shot_iterator() self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) self.assertEqual(2, iterator.get_next().numpy()) self.assertEqual(3, iterator.get_next().numpy()) with self.assertRaises(errors.OutOfRangeError): iterator.get_next()
def _input_fn(): raw_features = {"MFCCs": features.values} raw_targets = np.array(labels) ds = Dataset.from_tensor_slices((raw_features, raw_targets)) # warning: 2GB limit ds = ds.batch(batch_size) # Return the next batch of data. feature_batch, label_batch = ds.make_one_shot_iterator().get_next() return feature_batch, label_batch
def input_fn(features,targets, batch_size=1, shuffle=True, num_epochs=None): features={key: np.array(val) for key,val in dict(features).items()} ds=Dataset.from_tensor_slices((features, targets)) ds=ds.batch(batch_size).repeat(num_epochs) if shuffle: ds=ds.shuffle(buffer_size=10000) features,labels= ds.make_one_shot_iterator().get_next() return features, labels
def testMapAndFilter(self): def even(x): return math_ops.equal(math_ops.mod(x, 2), 0) it = datasets.Iterator( Dataset.range(8).map(math_ops.square).filter(even)) got = [x.numpy() for x in it] self.assertAllEqual([0, 4, 16, 36], got)
def testGetNext(self): iterator = datasets.Iterator(Dataset.range(4)) self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) self.assertEqual(2, iterator.get_next().numpy()) self.assertEqual(3, iterator.get_next().numpy()) with self.assertRaises(errors.OutOfRangeError): iterator.get_next()
def train(self, input_language: arraylike, output_language: arraylike, number_epochs: int = 20): assert len(input_language) == len(output_language), "input and output language must be equal length, " \ "since the observations must be matching translations" input_word_index, input_sentence, input_tokenizer = tokenize_sentences( input_language, num_words=self.num_words) output_word_index, output_sentence, output_tokenizer = tokenize_sentences( output_language, num_words=self.num_words) self.input_tokenizer, self.output_tokenizer = input_tokenizer, output_tokenizer self.input_word_index, self.output_word_index = input_word_index, output_word_index input_splits, output_splits = create_train_test_splits( input_sentence, output_sentence, ratio=self.test_ratio) buffer_size = input_splits[0].shape[0] dataset = Dataset.from_tensor_slices( (input_splits[0], output_splits[0])).shuffle(buffer_size=buffer_size) dataset = dataset.batch(self.batch_size) steps_per_epoch = max(buffer_size // self.batch_size, 1) for epoch in range(number_epochs): try: epoch_start = time.time() loss_per_batch = [] for (batch, (input, target)) in enumerate(dataset.take(steps_per_epoch)): batch_start = time.time() batch_loss = self._train_step(input, target) if batch % max(int(steps_per_epoch / 5), 1) == 0: logging.warning( "Epoch: {} \t Batch: {} \t Rel Loss: {:.4f} \t Time taken: {}" .format(epoch + 1, batch, batch_loss, time.time() - batch_start)) loss_per_batch.append(batch_loss) epoch_loss = sum(loss_per_batch) / steps_per_epoch self.loss_per_epoch.append(epoch_loss) logging_string = "Epoch: {} \t Rel Loss: {:.4f} \t Time taken: {}".format( epoch + 1, epoch_loss, time.time() - epoch_start) if self.test_ratio > 0.0: val_loss = self._forward_pass(input_splits[1], output_splits[1]) rel_val_loss = val_loss / input_splits[1].shape[1] logging_string += " \t Rel Val Loss: {:.4f}".format( rel_val_loss) logging.warning(logging_string) if (epoch + 1) % self.checkpoint_steps == 0: self.checkpoint_manager.save() except KeyboardInterrupt: self.checkpoint_manager.save() except Exception as e: raise Exception(e)
def get_dataset_encoded(dir='train', batch_size=32): # Load encoder. encoder = tfds.deprecated.text.SubwordTextEncoder.load_from_file('vocab') print('Vocab size is', encoder.vocab_size) # Load data. with open('dataset/' + dir + '/original.txt') as original: # Remove newline at the end. data_orig = original.readlines()[:-1] with open('dataset/' + dir + '/shuffled.txt') as shuffled: data_shuffled = shuffled.readlines()[:-1] data = data_orig + data_shuffled # Get song with max length to know the size for padding. max_len = 0 longest_song = '' count = 0 for i in range(len(data)): count += 1 data[i] = data[i].strip() song = data[i] data[i] = encoder.encode(data[i]) if len(data[i]) > max_len: max_len = len(data[i]) longest_song = song print('max len is', max_len) print('longest song:', longest_song) # Create labels. labels = [1] * len(data_orig) + [0] * len(data_shuffled) # Shuffle. random.seed(42) random.shuffle(data) random.seed(42) random.shuffle(labels) # Create Dataset objects from generators. data_gen = lambda: (d for d in data) label_gen = lambda: ([l] for l in labels) dataset_data = tf.data.Dataset.from_generator(data_gen, output_types=tf.int32, output_shapes=tf.TensorShape( [None])) dataset_labels = tf.data.Dataset.from_generator( label_gen, output_types=tf.int32, output_shapes=tf.TensorShape([1])) dataset = Dataset.zip((dataset_data, dataset_labels)) # Each batch is padded to the size of the longest element in that batch. dataset_batched = dataset.padded_batch(batch_size, padding_values=0, padded_shapes=(max_len, 1)) # Debug prints: print('{0} dataset: {1}'.format(dir, dataset_batched.cardinality())) # for element in dataset: # print(element) for text_batch, label_batch in dataset_batched.take(1): print(text_batch.shape) print(label_batch.shape) for i in range(5): print(text_batch[i]) print(label_batch[i]) return dataset
def testMultipleIteratorsOnTheSameDataset(self): ds = Dataset.range(4) it1 = datasets.Iterator(ds) it2 = datasets.Iterator(ds) got = [x.numpy() for x in it1] self.assertAllEqual([0, 1, 2, 3], got) got = [x.numpy() for x in it2] self.assertAllEqual([0, 1, 2, 3], got)
def _input_fn(): raw_features = {'pixels': features.values} raw_targets = np.array(labels) ds = Dataset.from_tensor_slices((raw_features, raw_targets)) ds = ds.batch(batch_size) feature_batch, label_batch = ds.make_one_shot_iterator().get_next() return feature_batch, label_batch
def _input_fn(): feature_new = {'pixels': feature.values} label_new = np.array(label) ds = Dataset.from_tensor_slices((feature_new, label_new)) ds = ds.batch(batch_size) feature_next, label_next = ds.make_one_shot_iterator().get_next() return feature_next, label_next
def my_training_input_fn(features, targets): features = {key: np.array(value) for key, value in dict(features).items()} ds = Dataset.from_tensor_slices((features, targets)) ds = ds.batch(1) (features, targets) = ds.make_one_shot_iterator().get_next() return (features, targets)
def _dataset_without_targets(self, Xs, train): if not callable(Xs): Xs_fn = lambda: self.wrap_tqdm(Xs, train) else: Xs_fn = lambda: self.wrap_tqdm(Xs(), train) dataset_encoded = lambda: itertools.chain.from_iterable(map(self.text_to_tokens_mask, Xs_fn())) types, shapes = self.feed_shape_type_def() return Dataset.from_generator(dataset_encoded, types[0], shapes[0]) # 0s cut out the targets
def _input_fn(num_epochs=None, shuffle=True): idx = np.random.permutation(features.index) raw_features = {'pixels': features.reindex(idx)} raw_targets = np.array(labels) ds = Dataset.from_tensor_slices((raw_features, raw_targets)) ds = ds.batch(batch_size).repeat(num_epochs) feature_batch, label_batch = ds.make_one_shot_iterator().get_next() return feature_batch, label_batch
def _input_fn(num_epochs=None,shuffle = True): idx = np.random.permutation(features.index) raw_features = {"Pclass": features["Pclass"].values ,"Age": features["Age"].values ,"SibSp":features["SibSp"].values ,"Parch": features["Parch"].values,"Fare": features["Fare"].values ,"SexCode": features["SexCode"].values ,"EmbarkCode":features["EmbarkCode"].values} raw_targets = np.array(labels) ds = Dataset.from_tensor_slices((raw_features,raw_targets)) ds = ds.batch(batch_size) feature_batch, label_batch = ds.make_one_shot_iterator().get_next() return feature_batch,label_batch
def test_smart_resize_tf_dataset(self, size): test_input_np = np.random.random((2, 20, 40, 3)) test_ds = Dataset.from_tensor_slices(test_input_np) resize = lambda img: preprocessing_image.smart_resize(img, size=size) test_ds = test_ds.map(resize) for sample in test_ds.as_numpy_iterator(): self.assertIsInstance(sample, np.ndarray) self.assertListEqual(list(sample.shape), [size[0], size[1], 3])
def testPyFunc(self): def my_map(inp): return [[x + 1 for x in inp]] ds = Dataset.range(4).map( lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64)) got = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual([[1], [2], [3], [4]], got)
def input_fn(X_data, y_data, batch_size=1, repeat=1, shuffle=True): X_tensor = { key: np.array(value) for key, value in dict(X_data).items() } ds = Dataset.from_tensor_slices((X_tensor, y_data)) ds = ds.batch(int(batch_size)).repeat(repeat) X, y = ds.make_one_shot_iterator().get_next() return X, y
def inputFunction(): raw_features = {"audioFeatures": features.values} raw_labels = np.array(labels) datatens = Dataset.from_tensor_slices((raw_features, raw_labels)) datatens = datatens.batch(batch_size) # Returns the next batch of data. feature_batch, label_batch = datatens.make_one_shot_iterator().get_next() return feature_batch, label_batch
def input_fn(features, label, batch_size=10, epochs=None, shuffle=True): if label is None: inputs = dict(features) else: inputs = (dict(features), label) dataset = Dataset.from_tensor_slices(inputs) if shuffle: dataset = dataset.shuffle(1000) dataset = dataset.repeat(epochs).batch(batch_size) return dataset.make_one_shot_iterator().get_next()
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None): #convert pandas data to np array features = {key:np.array(value) for key,value in dict(features).items()} ds = Dataset.from_tensor_slices((features,targets)) ds = ds.batch(batch_size).repeat(num_epochs) #shuffle if specified if shuffle: ds = ds.shuffle(1000) features, labels = ds.make_one_shot_iterator().get_next() return features, labels
def my_input_fn(feature_dataframe, target_serie, batch_size, num_epochs, shuffle, buffer_size): ds = Dataset.from_tensor_slices((dict(feature_dataframe), target_serie)) ds = ds.batch(batch_size).repeat(num_epochs) if shuffle: ds = ds.shuffle(buffer_size) return ds.make_one_shot_iterator().get_next()
def _get_file_list_dataset(self, directory: str, shuffle: bool = False) -> Dataset: # Get the file list from directory ds = Dataset.list_files(str(Path(directory) / "*/*"), shuffle=False) if shuffle: # Shuffle the filenames ds = ds.shuffle(len(ds), reshuffle_each_iteration=False) return ds
def _input_fn(directory, config, mode): print("Fetching {} data...".format(mode)) all_files = os.listdir(directory) all_features = [] all_labels = [] for file in all_files: features, labels = _load_json_file(os.path.join(directory, file), config) all_features += features all_labels += labels num_data_points = len(all_features) num_batches = math.ceil(len(all_features) / config["batch_size"]) dataset = Dataset.from_tensor_slices((all_features, all_labels)) if mode == "train": dataset = Dataset.from_tensor_slices((all_features, all_labels)) dataset = dataset.batch(config["batch_size"]).shuffle( 10000, seed=12345).repeat(config["num_epoch"]) num_batches = math.ceil(len(all_features) / config["batch_size"]) if mode in ("validation", "eval"): dataset = dataset.batch(config["batch_size"]).repeat( config["num_epoch"]) num_batches = math.ceil(len(all_features) / config["batch_size"]) iterator = dataset.make_one_shot_iterator() dataset_features, dataset_labels = iterator.get_next() return [{ config["input_tensor_name"]: dataset_features }, dataset_labels, { "num_data_point": num_data_points, "num_batches": num_batches }]
def testTensorsExplicitPrefetchToDevice(self): ds = Dataset.from_tensor_slices([0., 1.]) ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name())) with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'): datasets.Iterator(ds) for i, x in enumerate(ds): with ops.device(test.gpu_device_name()): x = math_ops.add(x, x) self.assertEqual(float(i) + float(i), x.numpy())
def testMapCaptureLookupTable(self): default_val = -1 keys = constant_op.constant(['brain', 'salad', 'surgery']) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.HashTable( lookup.KeyValueTensorInitializer(keys, values), default_val) dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery']) dataset = dataset.map(table.lookup) it = datasets.Iterator(dataset) got = [x.numpy() for x in it] self.assertAllEqual([0, 1, 2], got)
def input_fn(self, mode: ModeKeys): return { ModeKeys.TRAIN: lambda: self.train_ds.repeat(self.params.num_epoch).padded_batch( self.params.batch_size, padded_shapes=([None], [], [], [])), ModeKeys.EVAL: lambda: self.eval_ds.padded_batch( self.params.batch_size, padded_shapes=([None], [], [], [])), ModeKeys.INFER: lambda: Dataset.range(1) }[mode]().make_one_shot_iterator().get_next(), None
def input_template(feature, label, batch_size=1, epoch_num=None, shuffle=True): """Return: A Tuple (feature, label) for next data batch""" # Convert pandas data into a dict of np arrays feature = {key: np.array(value) for key, value in dict(feature).items()} # Construct a dataset with configured Batch, Epoch, and Shuffle ds = Dataset.from_tensor_slices((feature, label)) ds = ds.batch(batch_size).repeat(epoch_num) if shuffle: ds = ds.shuffle(10000) # Return the next batch of data feature, label = ds.make_one_shot_iterator().get_next() return feature, label
def my_input_fn(x, y, batch_size=1, shuffle=True, num_epochs=None): x = {key: np.array(value) for key, value in dict(x).items()} ds = Dataset.from_tensor_slices((x, y)) # warning: 2GB limit ds = ds.batch(batch_size).repeat(num_epochs) if shuffle: ds = ds.shuffle(100) x, labels = ds.make_one_shot_iterator().get_next() return x, labels
def testRestoreInReconstructedIterator(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.range(10) for i in range(5): iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) checkpoint.restore(saver.latest_checkpoint(checkpoint_directory)) for j in range(2): self.assertEqual(i * 2 + j, iterator.get_next().numpy()) checkpoint.save(file_prefix=checkpoint_prefix)
def testMapCaptureLookupTable(self): default_val = -1 keys = constant_op.constant(['brain', 'salad', 'surgery']) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.HashTable( lookup.KeyValueTensorInitializer(keys, values), default_val) dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery']) dataset = dataset.map(table.lookup) it = datasets.Iterator(dataset) got = [x.numpy() for x in it] self.assertAllEqual([0, 1, 2], got)
def testRestoreInReconstructedIterator(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.range(10) for i in range(5): iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) checkpoint.restore(checkpoint_management.latest_checkpoint( checkpoint_directory)) for j in range(2): self.assertEqual(i * 2 + j, iterator.get_next().numpy()) checkpoint.save(file_prefix=checkpoint_prefix)
def testRestoreExhaustedIterator(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.range(3) iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) self.assertEqual(0, iterator.get_next().numpy()) self.assertEqual(1, iterator.get_next().numpy()) save_path = checkpoint.save(checkpoint_prefix) self.assertEqual(2, iterator.get_next().numpy()) checkpoint.restore(save_path) self.assertEqual(2, iterator.get_next().numpy())
def testSaveRestore(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) dataset = dataset.map(math_ops.square).batch(2) iterator = datasets.Iterator(dataset) checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) self.assertAllEqual([1, 4], iterator.get_next().numpy()) save_path = checkpoint.save(checkpoint_prefix) self.assertAllEqual([9, 16], iterator.get_next().numpy()) self.assertAllEqual([25, 36], iterator.get_next().numpy()) checkpoint.restore(save_path) self.assertAllEqual([9, 16], iterator.get_next().numpy()) self.assertAllEqual([25, 36], iterator.get_next().numpy())
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None): # 将 pandas 的 data 转换成 numpy arrays features = {key: np.array(value) for key, value in dict(features).items()} # 构造一个 tensorflow 的 Dataset, 并且配置 batching 和 repeating ds = Dataset.from_tensor_slices((features, targets)) ds = ds.batch(batch_size).repeat(num_epochs) # 按需随机打乱数据 if shuffle: ds = ds.shuffle(buffer_size=10000) # 返回下一批次的数据 features, labels = ds.make_one_shot_iterator().get_next() return features, labels
def testSparseTensorElements(self): components = (sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 0], [2, 0]]), values=np.array([0, 0, 0]), dense_shape=np.array([3, 1])), sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 1], [2, 2]]), values=np.array([1, 2, 3]), dense_shape=np.array([3, 3]))) expected = [ (sparse_tensor.SparseTensorValue( indices=np.array([[0]]), values=np.array([0]), dense_shape=np.array([1])), sparse_tensor.SparseTensorValue( indices=np.array([[0]]), values=np.array([1]), dense_shape=np.array([3]))), (sparse_tensor.SparseTensorValue( indices=np.array([[0]]), values=np.array([0]), dense_shape=np.array([1])), sparse_tensor.SparseTensorValue( indices=np.array([[1]]), values=np.array([2]), dense_shape=np.array([3]))), (sparse_tensor.SparseTensorValue( indices=np.array([[0]]), values=np.array([0]), dense_shape=np.array([1])), sparse_tensor.SparseTensorValue( indices=np.array([[2]]), values=np.array([3]), dense_shape=np.array([3]))), ] for i, result in enumerate( datasets.Iterator(Dataset.from_tensor_slices(components))): self.assertSparseValuesEqual(expected[i][0], result[0]) self.assertSparseValuesEqual(expected[i][1], result[1])
def testBasicImplicitIterator(self): got = [] for t in Dataset.range(4): got.append(t.numpy()) self.assertAllEqual([0, 1, 2, 3], got)
def testTensorsPlacedOnDevice(self): ds = Dataset.from_tensors([0., 1.]) with ops.device(test.gpu_device_name()): x = datasets.Iterator(ds).next() x = math_ops.add(x, x) self.assertAllEqual([0., 2.], x.numpy())
def testGpuDefinedDataset(self): with ops.device(test.gpu_device_name()): ds = Dataset.from_tensors([0., 1.]) for x in ds: y = math_ops.add(x, x) self.assertAllEqual([0., 2.], y.numpy())
def testBasic(self): got = [] for t in datasets.Iterator(Dataset.range(4)): got.append(t.numpy()) self.assertAllEqual([0, 1, 2, 3], got)
def testBasicOneShotIterator(self): got = [] for t in Dataset.range(4).make_one_shot_iterator(): got.append(t.numpy()) self.assertAllEqual([0, 1, 2, 3], got)