示例#1
0
  def testSaveRestoreMultipleIterator(self):
    checkpoint_directory = self.get_temp_dir()
    checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
    dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
    dataset = dataset.map(math_ops.square).batch(2)
    iterator_1 = datasets.Iterator(dataset)
    iterator_2 = datasets.Iterator(dataset)
    dataset_2 = Dataset.range(10)
    iterator_3 = datasets.Iterator(dataset_2)

    checkpoint = checkpointable_utils.Checkpoint(
        iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3)
    self.assertAllEqual([1, 4], iterator_1.get_next().numpy())
    self.assertEqual(0, iterator_3.get_next().numpy())
    self.assertEqual(1, iterator_3.get_next().numpy())
    self.assertEqual(2, iterator_3.get_next().numpy())

    save_path = checkpoint.save(checkpoint_prefix)
    self.assertAllEqual([1, 4], iterator_2.get_next().numpy())
    self.assertAllEqual([9, 16], iterator_2.get_next().numpy())
    self.assertEqual(3, iterator_3.get_next().numpy())
    checkpoint.restore(save_path)
    self.assertAllEqual([9, 16], iterator_1.get_next().numpy())
    self.assertAllEqual([1, 4], iterator_2.get_next().numpy())
    self.assertEqual(3, iterator_3.get_next().numpy())
示例#2
0
 def testNestedOutputs(self):
   ds = Dataset.zip((Dataset.range(4), Dataset.zip((Dataset.range(4),
                                                    Dataset.range(4)))))
   total = 0
   # The Iterator will return a nested structure of Tensor objects.
   # Some funkiness to compare against simple integers.
   for (i, x) in enumerate(datasets.Iterator(ds)):
     want = (i, (i, i))
     got = (x[0].numpy(), (x[1][0].numpy(), x[1][1].numpy()))
     self.assertEqual(got, want)
     total += 1
   self.assertEqual(4, total)
示例#3
0
  def testOverrideThreadPool(self):

    def get_thread_id(_):
      # Python creates a dummy thread object to represent the current
      # thread when called from an "alien" thread (such as a
      # `PrivateThreadPool` thread in this case). It does not include
      # the TensorFlow-given display name, but it has a unique
      # identifier that maps one-to-one with the underlying OS thread.
      return np.array(threading.current_thread().ident).astype(np.int64)

    for num_threads in [1, 2, 4, 8, 16]:

      dataset = (
          Dataset.range(1000).map(
              lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64),
              num_parallel_calls=32).apply(unique.unique()))

      dataset = threadpool.override_threadpool(
          dataset,
          threadpool.PrivateThreadPool(
              num_threads, display_name='private_thread_pool_%d' % num_threads))

      thread_ids = []
      for next_element in datasets.Iterator(dataset):
        thread_ids.append(next_element)
      self.assertEqual(len(thread_ids), len(set(thread_ids)))
      self.assertGreater(len(thread_ids), 0)
      # NOTE(mrry): We don't control the thread pool scheduling, and
      # so cannot guarantee that all of the threads in the pool will
      # perform work.
      self.assertLessEqual(len(thread_ids), num_threads)
示例#4
0
  def testMultipleIteratorsOnADatasetThatUsesFunctions(self):
    ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square)

    got1 = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual([1, 4, 9, 16, 25, 36], got1)
    got2 = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual(got1, got2)
示例#5
0
  def testMapAndFilter(self):
    def even(x):
      return math_ops.equal(math_ops.mod(x, 2), 0)

    it = datasets.Iterator(Dataset.range(8).map(math_ops.square).filter(even))
    got = [x.numpy() for x in it]
    self.assertAllEqual([0, 4, 16, 36], got)
示例#6
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """ Trains a linear regression model of one feature.
    Args:
        :param features: pandas DataFrame of features
        :param targets: pandas DataFrame of targets
        :param batch_size: size of batches to be passed to the model
        :param shuffle: weather to shuffle the data
        :param num_epochs: number of epochs for which data should be repeated. None = repeat indefinitely
    :return:
        Tuple of (features, labels) for next data batch
    """
    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    # Shuffle the data, if specified.
    if shuffle:
        ds.shuffle(buffer_size=10000)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
示例#7
0
  def benchmarkSliceBatchCacheRepeatCallable(self):
    input_size = 10000
    batch_size = 100
    num_epochs = 100

    input_data = np.random.randn(input_size)

    dataset = (
        Dataset.from_tensor_slices(input_data).batch(batch_size).cache()
        .repeat(num_epochs))
    iterator = datasets.Iterator(dataset)

    ends = [time.time()]
    for _ in iterator:
      ends.append(time.time())

    deltas = np.ediff1d(ends)
    median_wall_time = np.median(deltas)
    print(
        'Slice/batch/cache/repeat eager input size: %d batch size: %d Median '
        'wall time per element: %f'
        % (input_size, batch_size, median_wall_time))
    self.report_benchmark(
        iters=len(deltas),
        wall_time=median_wall_time,
        name='benchmark_slice_batch_cache_repeat_eager_input_%d_batch_%d' %
        (input_size, batch_size))
示例#8
0
    def testMultipleIteratorsOnADatasetThatUsesFunctions(self):
        ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5,
                                         6]).map(math_ops.square)

        got1 = [x.numpy() for x in datasets.Iterator(ds)]
        self.assertAllEqual([1, 4, 9, 16, 25, 36], got1)
        got2 = [x.numpy() for x in datasets.Iterator(ds)]
        self.assertAllEqual(got1, got2)
示例#9
0
 def testGetNextOneShotIterator(self):
     iterator = Dataset.range(4).make_one_shot_iterator()
     self.assertEqual(0, iterator.get_next().numpy())
     self.assertEqual(1, iterator.get_next().numpy())
     self.assertEqual(2, iterator.get_next().numpy())
     self.assertEqual(3, iterator.get_next().numpy())
     with self.assertRaises(errors.OutOfRangeError):
         iterator.get_next()
示例#10
0
 def testGetNext(self):
     iterator = datasets.Iterator(Dataset.range(4))
     self.assertEqual(0, iterator.get_next().numpy())
     self.assertEqual(1, iterator.get_next().numpy())
     self.assertEqual(2, iterator.get_next().numpy())
     self.assertEqual(3, iterator.get_next().numpy())
     with self.assertRaises(errors.OutOfRangeError):
         iterator.get_next()
示例#11
0
    def testPyFunc(self):
        def my_map(inp):
            return [[x + 1 for x in inp]]

        ds = Dataset.range(4).map(
            lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64))
        got = [x.numpy() for x in datasets.Iterator(ds)]
        self.assertAllEqual([[1], [2], [3], [4]], got)
示例#12
0
 def _input_fn():
     # raw_features = {"MFCCs": features.values}
     ds = Dataset.from_tensor_slices(
         (features.to_dict('list')))  # warning: 2GB limit
     ds = ds.batch(batch_size)
     # Return the next batch of data.
     feature_batch = ds.make_one_shot_iterator().get_next()
     return feature_batch
示例#13
0
 def testGetNextOneShotIterator(self):
   iterator = Dataset.range(4).make_one_shot_iterator()
   self.assertEqual(0, iterator.get_next().numpy())
   self.assertEqual(1, iterator.get_next().numpy())
   self.assertEqual(2, iterator.get_next().numpy())
   self.assertEqual(3, iterator.get_next().numpy())
   with self.assertRaises(errors.OutOfRangeError):
     iterator.get_next()
示例#14
0
 def _input_fn():
     raw_features = {"MFCCs": features.values}
     raw_targets = np.array(labels)
     ds = Dataset.from_tensor_slices((raw_features, raw_targets))  # warning: 2GB limit
     ds = ds.batch(batch_size)
     # Return the next batch of data.
     feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
     return feature_batch, label_batch
示例#15
0
def input_fn(features,targets, batch_size=1, shuffle=True, num_epochs=None):
    features={key: np.array(val) for key,val in dict(features).items()}
    ds=Dataset.from_tensor_slices((features, targets))
    ds=ds.batch(batch_size).repeat(num_epochs)
    if shuffle:
        ds=ds.shuffle(buffer_size=10000)
    features,labels= ds.make_one_shot_iterator().get_next()
    return features, labels
示例#16
0
    def testMapAndFilter(self):
        def even(x):
            return math_ops.equal(math_ops.mod(x, 2), 0)

        it = datasets.Iterator(
            Dataset.range(8).map(math_ops.square).filter(even))
        got = [x.numpy() for x in it]
        self.assertAllEqual([0, 4, 16, 36], got)
示例#17
0
 def testGetNext(self):
   iterator = datasets.Iterator(Dataset.range(4))
   self.assertEqual(0, iterator.get_next().numpy())
   self.assertEqual(1, iterator.get_next().numpy())
   self.assertEqual(2, iterator.get_next().numpy())
   self.assertEqual(3, iterator.get_next().numpy())
   with self.assertRaises(errors.OutOfRangeError):
     iterator.get_next()
示例#18
0
    def train(self,
              input_language: arraylike,
              output_language: arraylike,
              number_epochs: int = 20):
        assert len(input_language) == len(output_language), "input and output language must be equal length, " \
                                                            "since the observations must be matching translations"
        input_word_index, input_sentence, input_tokenizer = tokenize_sentences(
            input_language, num_words=self.num_words)
        output_word_index, output_sentence, output_tokenizer = tokenize_sentences(
            output_language, num_words=self.num_words)
        self.input_tokenizer, self.output_tokenizer = input_tokenizer, output_tokenizer
        self.input_word_index, self.output_word_index = input_word_index, output_word_index

        input_splits, output_splits = create_train_test_splits(
            input_sentence, output_sentence, ratio=self.test_ratio)
        buffer_size = input_splits[0].shape[0]
        dataset = Dataset.from_tensor_slices(
            (input_splits[0],
             output_splits[0])).shuffle(buffer_size=buffer_size)
        dataset = dataset.batch(self.batch_size)
        steps_per_epoch = max(buffer_size // self.batch_size, 1)

        for epoch in range(number_epochs):
            try:
                epoch_start = time.time()

                loss_per_batch = []
                for (batch,
                     (input,
                      target)) in enumerate(dataset.take(steps_per_epoch)):
                    batch_start = time.time()
                    batch_loss = self._train_step(input, target)
                    if batch % max(int(steps_per_epoch / 5), 1) == 0:
                        logging.warning(
                            "Epoch: {} \t Batch: {} \t Rel Loss: {:.4f} \t Time taken: {}"
                            .format(epoch + 1, batch, batch_loss,
                                    time.time() - batch_start))

                    loss_per_batch.append(batch_loss)

                epoch_loss = sum(loss_per_batch) / steps_per_epoch
                self.loss_per_epoch.append(epoch_loss)
                logging_string = "Epoch: {} \t Rel Loss: {:.4f} \t Time taken: {}".format(
                    epoch + 1, epoch_loss,
                    time.time() - epoch_start)
                if self.test_ratio > 0.0:
                    val_loss = self._forward_pass(input_splits[1],
                                                  output_splits[1])
                    rel_val_loss = val_loss / input_splits[1].shape[1]
                    logging_string += " \t Rel Val Loss: {:.4f}".format(
                        rel_val_loss)
                logging.warning(logging_string)
                if (epoch + 1) % self.checkpoint_steps == 0:
                    self.checkpoint_manager.save()
            except KeyboardInterrupt:
                self.checkpoint_manager.save()
            except Exception as e:
                raise Exception(e)
示例#19
0
def get_dataset_encoded(dir='train', batch_size=32):
    # Load encoder.
    encoder = tfds.deprecated.text.SubwordTextEncoder.load_from_file('vocab')
    print('Vocab size is', encoder.vocab_size)
    # Load data.
    with open('dataset/' + dir + '/original.txt') as original:
        # Remove newline at the end.
        data_orig = original.readlines()[:-1]
    with open('dataset/' + dir + '/shuffled.txt') as shuffled:
        data_shuffled = shuffled.readlines()[:-1]
    data = data_orig + data_shuffled
    # Get song with max length to know the size for padding.
    max_len = 0
    longest_song = ''
    count = 0
    for i in range(len(data)):
        count += 1
        data[i] = data[i].strip()
        song = data[i]
        data[i] = encoder.encode(data[i])
        if len(data[i]) > max_len:
            max_len = len(data[i])
            longest_song = song
    print('max len is', max_len)
    print('longest song:', longest_song)
    # Create labels.
    labels = [1] * len(data_orig) + [0] * len(data_shuffled)
    # Shuffle.
    random.seed(42)
    random.shuffle(data)
    random.seed(42)
    random.shuffle(labels)
    # Create Dataset objects from generators.
    data_gen = lambda: (d for d in data)
    label_gen = lambda: ([l] for l in labels)
    dataset_data = tf.data.Dataset.from_generator(data_gen,
                                                  output_types=tf.int32,
                                                  output_shapes=tf.TensorShape(
                                                      [None]))
    dataset_labels = tf.data.Dataset.from_generator(
        label_gen, output_types=tf.int32, output_shapes=tf.TensorShape([1]))
    dataset = Dataset.zip((dataset_data, dataset_labels))
    # Each batch is padded to the size of the longest element in that batch.
    dataset_batched = dataset.padded_batch(batch_size,
                                           padding_values=0,
                                           padded_shapes=(max_len, 1))
    # Debug prints:
    print('{0} dataset: {1}'.format(dir, dataset_batched.cardinality()))
    # for element in dataset:
    #   print(element)
    for text_batch, label_batch in dataset_batched.take(1):
        print(text_batch.shape)
        print(label_batch.shape)
        for i in range(5):
            print(text_batch[i])
            print(label_batch[i])
    return dataset
示例#20
0
  def testMultipleIteratorsOnTheSameDataset(self):
    ds = Dataset.range(4)
    it1 = datasets.Iterator(ds)
    it2 = datasets.Iterator(ds)
    got = [x.numpy() for x in it1]
    self.assertAllEqual([0, 1, 2, 3], got)

    got = [x.numpy() for x in it2]
    self.assertAllEqual([0, 1, 2, 3], got)
示例#21
0
        def _input_fn():
            raw_features = {'pixels': features.values}
            raw_targets = np.array(labels)

            ds = Dataset.from_tensor_slices((raw_features, raw_targets))
            ds = ds.batch(batch_size)

            feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
            return feature_batch, label_batch
示例#22
0
 def _input_fn():
     feature_new = {'pixels': feature.values}
     label_new = np.array(label)
     
     ds = Dataset.from_tensor_slices((feature_new, label_new))
     ds = ds.batch(batch_size)
 
     feature_next, label_next = ds.make_one_shot_iterator().get_next()
     return feature_next, label_next
def my_training_input_fn(features, targets):
    features = {key: np.array(value) for key, value in dict(features).items()}

    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(1)

    (features, targets) = ds.make_one_shot_iterator().get_next()

    return (features, targets)
示例#24
0
    def _dataset_without_targets(self, Xs, train):
        if not callable(Xs):
            Xs_fn = lambda: self.wrap_tqdm(Xs, train)
        else:
            Xs_fn = lambda: self.wrap_tqdm(Xs(), train)

        dataset_encoded = lambda: itertools.chain.from_iterable(map(self.text_to_tokens_mask, Xs_fn()))
        types, shapes = self.feed_shape_type_def()
        return Dataset.from_generator(dataset_encoded, types[0], shapes[0])  # 0s cut out the targets
示例#25
0
    def _input_fn(num_epochs=None, shuffle=True):
        idx = np.random.permutation(features.index)
        raw_features = {'pixels': features.reindex(idx)}
        raw_targets = np.array(labels)

        ds = Dataset.from_tensor_slices((raw_features, raw_targets))
        ds = ds.batch(batch_size).repeat(num_epochs)
        feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
        return feature_batch, label_batch
	def _input_fn(num_epochs=None,shuffle = True):
		idx =  np.random.permutation(features.index)
		raw_features = {"Pclass": features["Pclass"].values ,"Age": features["Age"].values ,"SibSp":features["SibSp"].values  ,"Parch":  features["Parch"].values,"Fare": features["Fare"].values ,"SexCode": features["SexCode"].values ,"EmbarkCode":features["EmbarkCode"].values}
		raw_targets = np.array(labels)

		ds = Dataset.from_tensor_slices((raw_features,raw_targets))
		ds = ds.batch(batch_size)
		feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
		return feature_batch,label_batch
示例#27
0
  def test_smart_resize_tf_dataset(self, size):
    test_input_np = np.random.random((2, 20, 40, 3))
    test_ds = Dataset.from_tensor_slices(test_input_np)

    resize = lambda img: preprocessing_image.smart_resize(img, size=size)
    test_ds = test_ds.map(resize)
    for sample in test_ds.as_numpy_iterator():
      self.assertIsInstance(sample, np.ndarray)
      self.assertListEqual(list(sample.shape), [size[0], size[1], 3])
示例#28
0
  def testPyFunc(self):

    def my_map(inp):
      return [[x + 1 for x in inp]]

    ds = Dataset.range(4).map(
        lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64))
    got = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual([[1], [2], [3], [4]], got)
示例#29
0
 def input_fn(X_data, y_data, batch_size=1, repeat=1, shuffle=True):
     X_tensor = {
         key: np.array(value)
         for key, value in dict(X_data).items()
     }
     ds = Dataset.from_tensor_slices((X_tensor, y_data))
     ds = ds.batch(int(batch_size)).repeat(repeat)
     X, y = ds.make_one_shot_iterator().get_next()
     return X, y
示例#30
0
    def inputFunction():
        raw_features = {"audioFeatures": features.values}
        raw_labels = np.array(labels)

        datatens = Dataset.from_tensor_slices((raw_features, raw_labels))
        datatens = datatens.batch(batch_size)

        # Returns the next batch of data.
        feature_batch, label_batch = datatens.make_one_shot_iterator().get_next()
        return feature_batch, label_batch
示例#31
0
def input_fn(features, label, batch_size=10, epochs=None, shuffle=True):
    if label is None:
        inputs = dict(features)
    else:
        inputs = (dict(features), label)
    dataset = Dataset.from_tensor_slices(inputs)
    if shuffle:
        dataset = dataset.shuffle(1000)
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset.make_one_shot_iterator().get_next()
示例#32
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    #convert pandas data to np array
    features = {key:np.array(value) for key,value in dict(features).items()}
    ds = Dataset.from_tensor_slices((features,targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    #shuffle if specified
    if shuffle:
        ds = ds.shuffle(1000)
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
示例#33
0
def my_input_fn(feature_dataframe, target_serie, batch_size, num_epochs,
                shuffle, buffer_size):

    ds = Dataset.from_tensor_slices((dict(feature_dataframe), target_serie))
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(buffer_size)

    return ds.make_one_shot_iterator().get_next()
示例#34
0
    def _get_file_list_dataset(self,
                               directory: str,
                               shuffle: bool = False) -> Dataset:
        # Get the file list from directory
        ds = Dataset.list_files(str(Path(directory) / "*/*"), shuffle=False)
        if shuffle:
            # Shuffle the filenames
            ds = ds.shuffle(len(ds), reshuffle_each_iteration=False)

        return ds
def _input_fn(directory, config, mode):

    print("Fetching {} data...".format(mode))

    all_files = os.listdir(directory)

    all_features = []
    all_labels = []

    for file in all_files:
        features, labels = _load_json_file(os.path.join(directory, file),
                                           config)
        all_features += features
        all_labels += labels

    num_data_points = len(all_features)
    num_batches = math.ceil(len(all_features) / config["batch_size"])

    dataset = Dataset.from_tensor_slices((all_features, all_labels))

    if mode == "train":

        dataset = Dataset.from_tensor_slices((all_features, all_labels))
        dataset = dataset.batch(config["batch_size"]).shuffle(
            10000, seed=12345).repeat(config["num_epoch"])
        num_batches = math.ceil(len(all_features) / config["batch_size"])

    if mode in ("validation", "eval"):

        dataset = dataset.batch(config["batch_size"]).repeat(
            config["num_epoch"])
        num_batches = math.ceil(len(all_features) / config["batch_size"])

    iterator = dataset.make_one_shot_iterator()
    dataset_features, dataset_labels = iterator.get_next()

    return [{
        config["input_tensor_name"]: dataset_features
    }, dataset_labels, {
        "num_data_point": num_data_points,
        "num_batches": num_batches
    }]
示例#36
0
  def testTensorsExplicitPrefetchToDevice(self):
    ds = Dataset.from_tensor_slices([0., 1.])
    ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name()))

    with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'):
      datasets.Iterator(ds)

    for i, x in enumerate(ds):
      with ops.device(test.gpu_device_name()):
        x = math_ops.add(x, x)
        self.assertEqual(float(i) + float(i), x.numpy())
示例#37
0
 def testMapCaptureLookupTable(self):
     default_val = -1
     keys = constant_op.constant(['brain', 'salad', 'surgery'])
     values = constant_op.constant([0, 1, 2], dtypes.int64)
     table = lookup.HashTable(
         lookup.KeyValueTensorInitializer(keys, values), default_val)
     dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery'])
     dataset = dataset.map(table.lookup)
     it = datasets.Iterator(dataset)
     got = [x.numpy() for x in it]
     self.assertAllEqual([0, 1, 2], got)
示例#38
0
 def input_fn(self, mode: ModeKeys):
     return {
         ModeKeys.TRAIN:
         lambda: self.train_ds.repeat(self.params.num_epoch).padded_batch(
             self.params.batch_size, padded_shapes=([None], [], [], [])),
         ModeKeys.EVAL:
         lambda: self.eval_ds.padded_batch(
             self.params.batch_size, padded_shapes=([None], [], [], [])),
         ModeKeys.INFER:
         lambda: Dataset.range(1)
     }[mode]().make_one_shot_iterator().get_next(), None
示例#39
0
def input_template(feature, label, batch_size=1, epoch_num=None, shuffle=True):
    """Return: A Tuple (feature, label) for next data batch"""
    # Convert pandas data into a dict of np arrays
    feature = {key: np.array(value) for key, value in dict(feature).items()}
    # Construct a dataset with configured Batch, Epoch, and Shuffle
    ds = Dataset.from_tensor_slices((feature, label))
    ds = ds.batch(batch_size).repeat(epoch_num)
    if shuffle: ds = ds.shuffle(10000)
    # Return the next batch of data
    feature, label = ds.make_one_shot_iterator().get_next()
    return feature, label
示例#40
0
def my_input_fn(x, y, batch_size=1, shuffle=True, num_epochs=None):

    x = {key: np.array(value) for key, value in dict(x).items()}
    ds = Dataset.from_tensor_slices((x, y))  # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(100)

    x, labels = ds.make_one_shot_iterator().get_next()
    return x, labels
示例#41
0
 def testRestoreInReconstructedIterator(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
     dataset = Dataset.range(10)
     for i in range(5):
         iterator = datasets.Iterator(dataset)
         checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
         checkpoint.restore(saver.latest_checkpoint(checkpoint_directory))
         for j in range(2):
             self.assertEqual(i * 2 + j, iterator.get_next().numpy())
         checkpoint.save(file_prefix=checkpoint_prefix)
示例#42
0
 def testMapCaptureLookupTable(self):
   default_val = -1
   keys = constant_op.constant(['brain', 'salad', 'surgery'])
   values = constant_op.constant([0, 1, 2], dtypes.int64)
   table = lookup.HashTable(
       lookup.KeyValueTensorInitializer(keys, values), default_val)
   dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery'])
   dataset = dataset.map(table.lookup)
   it = datasets.Iterator(dataset)
   got = [x.numpy() for x in it]
   self.assertAllEqual([0, 1, 2], got)
示例#43
0
 def testRestoreInReconstructedIterator(self):
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
   dataset = Dataset.range(10)
   for i in range(5):
     iterator = datasets.Iterator(dataset)
     checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
     checkpoint.restore(checkpoint_management.latest_checkpoint(
         checkpoint_directory))
     for j in range(2):
       self.assertEqual(i * 2 + j, iterator.get_next().numpy())
     checkpoint.save(file_prefix=checkpoint_prefix)
示例#44
0
  def testRestoreExhaustedIterator(self):
    checkpoint_directory = self.get_temp_dir()
    checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
    dataset = Dataset.range(3)
    iterator = datasets.Iterator(dataset)

    checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
    self.assertEqual(0, iterator.get_next().numpy())
    self.assertEqual(1, iterator.get_next().numpy())
    save_path = checkpoint.save(checkpoint_prefix)
    self.assertEqual(2, iterator.get_next().numpy())
    checkpoint.restore(save_path)
    self.assertEqual(2, iterator.get_next().numpy())
示例#45
0
 def testSaveRestore(self):
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
   dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
   dataset = dataset.map(math_ops.square).batch(2)
   iterator = datasets.Iterator(dataset)
   checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
   self.assertAllEqual([1, 4], iterator.get_next().numpy())
   save_path = checkpoint.save(checkpoint_prefix)
   self.assertAllEqual([9, 16], iterator.get_next().numpy())
   self.assertAllEqual([25, 36], iterator.get_next().numpy())
   checkpoint.restore(save_path)
   self.assertAllEqual([9, 16], iterator.get_next().numpy())
   self.assertAllEqual([25, 36], iterator.get_next().numpy())
示例#46
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    # 将 pandas 的 data 转换成 numpy arrays
    features = {key: np.array(value) for key, value in dict(features).items()}

    # 构造一个 tensorflow 的 Dataset, 并且配置 batching 和 repeating
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    # 按需随机打乱数据
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    # 返回下一批次的数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
示例#47
0
  def testSparseTensorElements(self):
    components = (sparse_tensor.SparseTensorValue(
        indices=np.array([[0, 0], [1, 0], [2, 0]]),
        values=np.array([0, 0, 0]),
        dense_shape=np.array([3, 1])),
                  sparse_tensor.SparseTensorValue(
                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
                      values=np.array([1, 2, 3]),
                      dense_shape=np.array([3, 3])))

    expected = [
        (sparse_tensor.SparseTensorValue(
            indices=np.array([[0]]),
            values=np.array([0]),
            dense_shape=np.array([1])),
         sparse_tensor.SparseTensorValue(
             indices=np.array([[0]]),
             values=np.array([1]),
             dense_shape=np.array([3]))),
        (sparse_tensor.SparseTensorValue(
            indices=np.array([[0]]),
            values=np.array([0]),
            dense_shape=np.array([1])),
         sparse_tensor.SparseTensorValue(
             indices=np.array([[1]]),
             values=np.array([2]),
             dense_shape=np.array([3]))),
        (sparse_tensor.SparseTensorValue(
            indices=np.array([[0]]),
            values=np.array([0]),
            dense_shape=np.array([1])),
         sparse_tensor.SparseTensorValue(
             indices=np.array([[2]]),
             values=np.array([3]),
             dense_shape=np.array([3]))),
    ]

    for i, result in enumerate(
        datasets.Iterator(Dataset.from_tensor_slices(components))):
      self.assertSparseValuesEqual(expected[i][0], result[0])
      self.assertSparseValuesEqual(expected[i][1], result[1])
示例#48
0
 def testBasicImplicitIterator(self):
   got = []
   for t in Dataset.range(4):
     got.append(t.numpy())
   self.assertAllEqual([0, 1, 2, 3], got)
示例#49
0
 def testTensorsPlacedOnDevice(self):
   ds = Dataset.from_tensors([0., 1.])
   with ops.device(test.gpu_device_name()):
     x = datasets.Iterator(ds).next()
     x = math_ops.add(x, x)
   self.assertAllEqual([0., 2.], x.numpy())
示例#50
0
 def testGpuDefinedDataset(self):
   with ops.device(test.gpu_device_name()):
     ds = Dataset.from_tensors([0., 1.])
     for x in ds:
       y = math_ops.add(x, x)
   self.assertAllEqual([0., 2.], y.numpy())
示例#51
0
 def testBasic(self):
   got = []
   for t in datasets.Iterator(Dataset.range(4)):
     got.append(t.numpy())
   self.assertAllEqual([0, 1, 2, 3], got)
示例#52
0
 def testBasicOneShotIterator(self):
   got = []
   for t in Dataset.range(4).make_one_shot_iterator():
     got.append(t.numpy())
   self.assertAllEqual([0, 1, 2, 3], got)