Пример #1
0
  def testMultipleIteratorsOnADatasetThatUsesFunctions(self):
    ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square)

    got1 = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual([1, 4, 9, 16, 25, 36], got1)
    got2 = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual(got1, got2)
Пример #2
0
  def benchmarkSliceBatchCacheRepeatCallable(self):
    input_size = 10000
    batch_size = 100
    num_epochs = 100

    input_data = np.random.randn(input_size)

    dataset = (
        Dataset.from_tensor_slices(input_data).batch(batch_size).cache()
        .repeat(num_epochs))
    iterator = datasets.Iterator(dataset)

    ends = [time.time()]
    for _ in iterator:
      ends.append(time.time())

    deltas = np.ediff1d(ends)
    median_wall_time = np.median(deltas)
    print(
        'Slice/batch/cache/repeat eager input size: %d batch size: %d Median '
        'wall time per element: %f'
        % (input_size, batch_size, median_wall_time))
    self.report_benchmark(
        iters=len(deltas),
        wall_time=median_wall_time,
        name='benchmark_slice_batch_cache_repeat_eager_input_%d_batch_%d' %
        (input_size, batch_size))
Пример #3
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """ Trains a linear regression model of one feature.
    Args:
        :param features: pandas DataFrame of features
        :param targets: pandas DataFrame of targets
        :param batch_size: size of batches to be passed to the model
        :param shuffle: weather to shuffle the data
        :param num_epochs: number of epochs for which data should be repeated. None = repeat indefinitely
    :return:
        Tuple of (features, labels) for next data batch
    """
    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    # Shuffle the data, if specified.
    if shuffle:
        ds.shuffle(buffer_size=10000)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #4
0
  def testSaveRestoreMultipleIterator(self):
    checkpoint_directory = self.get_temp_dir()
    checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
    dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
    dataset = dataset.map(math_ops.square).batch(2)
    iterator_1 = datasets.Iterator(dataset)
    iterator_2 = datasets.Iterator(dataset)
    dataset_2 = Dataset.range(10)
    iterator_3 = datasets.Iterator(dataset_2)

    checkpoint = checkpointable_utils.Checkpoint(
        iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3)
    self.assertAllEqual([1, 4], iterator_1.get_next().numpy())
    self.assertEqual(0, iterator_3.get_next().numpy())
    self.assertEqual(1, iterator_3.get_next().numpy())
    self.assertEqual(2, iterator_3.get_next().numpy())

    save_path = checkpoint.save(checkpoint_prefix)
    self.assertAllEqual([1, 4], iterator_2.get_next().numpy())
    self.assertAllEqual([9, 16], iterator_2.get_next().numpy())
    self.assertEqual(3, iterator_3.get_next().numpy())
    checkpoint.restore(save_path)
    self.assertAllEqual([9, 16], iterator_1.get_next().numpy())
    self.assertAllEqual([1, 4], iterator_2.get_next().numpy())
    self.assertEqual(3, iterator_3.get_next().numpy())
Пример #5
0
  def testTensorsExplicitPrefetchToDevice(self):
    ds = Dataset.from_tensor_slices([0., 1.])
    ds = ds.apply(prefetching_ops.prefetch_to_device(test.gpu_device_name()))

    with self.assertRaisesRegexp(TypeError, 'prefetch_to_device'):
      datasets.Iterator(ds)

    for i, x in enumerate(ds):
      with ops.device(test.gpu_device_name()):
        x = math_ops.add(x, x)
        self.assertEqual(float(i) + float(i), x.numpy())
Пример #6
0
 def testMapCaptureLookupTable(self):
   default_val = -1
   keys = constant_op.constant(['brain', 'salad', 'surgery'])
   values = constant_op.constant([0, 1, 2], dtypes.int64)
   table = lookup.HashTable(
       lookup.KeyValueTensorInitializer(keys, values), default_val)
   dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery'])
   dataset = dataset.map(table.lookup)
   it = datasets.Iterator(dataset)
   got = [x.numpy() for x in it]
   self.assertAllEqual([0, 1, 2], got)
Пример #7
0
 def testSaveRestore(self):
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
   dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
   dataset = dataset.map(math_ops.square).batch(2)
   iterator = datasets.Iterator(dataset)
   checkpoint = checkpointable_utils.Checkpoint(iterator=iterator)
   self.assertAllEqual([1, 4], iterator.get_next().numpy())
   save_path = checkpoint.save(checkpoint_prefix)
   self.assertAllEqual([9, 16], iterator.get_next().numpy())
   self.assertAllEqual([25, 36], iterator.get_next().numpy())
   checkpoint.restore(save_path)
   self.assertAllEqual([9, 16], iterator.get_next().numpy())
   self.assertAllEqual([25, 36], iterator.get_next().numpy())
Пример #8
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    # 将 pandas 的 data 转换成 numpy arrays
    features = {key: np.array(value) for key, value in dict(features).items()}

    # 构造一个 tensorflow 的 Dataset, 并且配置 batching 和 repeating
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    # 按需随机打乱数据
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    # 返回下一批次的数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    
    # Convert pandas data into a dict of np arrays.
    features = {key:np.array(value) for key,value in dict(features).items()}                                           
 
    # Construct a dataset, and configure batching/repeating
    ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #10
0
def my_input_fn(features,
                targets,
                batch_size=5,
                shuffle=True,
                num_epochs=None):
    # 将Csv数据转化成一个np数组。np数组能够保证一个list中全部都是一个类型的数据。
    # 接着将数据切片,不然输入数据是整个数据集
    # 切片后再进行分批处理。此处size为1,代表每次输入一个数据
    # repeat是指数据要被重复多少次。暂时不太理解重复的意义,可能和数据处理有关。
    features = {key: np.array(value) for key, value in dict(features).items()}
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    # 将数据随机打乱。
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
 def _input_fn(num_epochs=None, shuffle=True):
   # Input pipelines are reset with each call to .train(). To ensure model
   # gets a good sampling of data, even when number of steps is small, we 
   # shuffle all the data before creating the Dataset object
   idx = np.random.permutation(features.index)
   raw_features = {"pixels":features.reindex(idx)}
   raw_targets = np.array(labels[idx])
  
   ds = Dataset.from_tensor_slices((raw_features,raw_targets)) # warning: 2GB limit
   ds = ds.batch(batch_size).repeat(num_epochs)
   
   if shuffle:
     ds = ds.shuffle(10000)
   
   # Return the next batch of data.
   feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
   return feature_batch, label_batch
Пример #12
0
    def _input_fn(num_epochs=None, shuffle=True):
        idx = np.random.permutation(features.index)
        raw_features = {
            "Pclass": features["Pclass"].values,
            "Age": features["Age"].values,
            "SibSp": features["SibSp"].values,
            "Parch": features["Parch"].values,
            "Fare": features["Fare"].values,
            "SexCode": features["SexCode"].values,
            "EmbarkCode": features["EmbarkCode"].values
        }
        raw_targets = np.array(labels)

        ds = Dataset.from_tensor_slices((raw_features, raw_targets))
        ds = ds.batch(batch_size)
        feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
        return feature_batch, label_batch
Пример #13
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):

    # 把pandas特征数据转换成NumPy数组字典
    features = {key:np.array(value) for key,value in dict(features).items()}

    # 构建数据集
    ds = Dataset.from_tensor_slices((features, targets))
    # 配置单步的样本数量,每批次传递给模型处理的数据量为batch_size;如果将默认值num_epochs=None传递到repeat(),输入数据会无限重复下去,而没有次数限制
    ds = ds.batch(batch_size).repeat(num_epochs)

    # 打乱数据,缓冲大小为10000,以便数据在训练期间以随机方式传递到模型
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    # 为数据集构建一个迭代器,并向LinearRegressor返回下一批数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #14
0
    def create_predict_fn():
        #same as above, change data into a dictionary
        featureDictionary = dict()
        for i in range(0, 27):
            tempArray = []
            for j in range(0, len(bodyPartFeatures)):
                tempArray.append(bodyPartFeatures[j][i])
            tempArray = np.asarray(tempArray)
            featureDictionary[bodyParts[i]] = tempArray

        #same as above except there is no epoch mechanic because it is being used for predictions (dont want to repeat data being predicted)
        ds = Dataset.from_tensor_slices((featureDictionary, labels))
        ds = ds.batch(batch_size)
        ds = ds.shuffle(int(numberTests))
        feature_batch, label_batch, = ds.make_one_shot_iterator().get_next()

        return feature_batch, label_batch
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):

    features = {key: np.array(value) for key, value in dict(features).items()}

    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    # return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #16
0
def _input_function_delegate(input_features,
                             targets,
                             batch_size=1,
                             is_shuffle=True,
                             epoch_count=None):
    features = {
        key: numpy.array(value)
        for key, value in dict(input_features).items()
    }

    dataset = Dataset.from_tensor_slices(
        (features, targets)).batch(batch_size).repeat(epoch_count)

    if is_shuffle:
        dataset = dataset.shuffle(10000)

    return dataset.make_one_shot_iterator().get_next()
def tf_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # create a batch
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(10000)

    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #18
0
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    features = {key: np.array(value) for key, value in dict(features).items()}

    ds = Dataset.from_tensor_slices((features, targets))
    # See https://www.tensorflow.org/get_started/datasets_quickstart
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    features, labels = ds.make_one_shot_iterator().get_next()
    # print(features, labels)
    return features, labels
Пример #19
0
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    # Convert to dict of numpy arrays
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct dataset and batching/repeating
    ds = Dataset.from_tensor_slices((features, targets))  # !! 2GB limit!!
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(buffer_size=10_000)

    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    # 这里得到的features是一个dict,具体为{'total_rooms': array([ 5039.,1840., ..., 705.])}
    # 其仅有一对{key,value}值,这里的value实际上是一个array对象
    features = {key: np.array(value) for key, value in dict(features).items()}
    # 构造一个数据集
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    # 若shuffle为真,则对数据进行随机处理
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)
    # 返回下一批数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=False,
                num_epochs=None):
    #targets:pd.Series
    # print(';;;;;;;;;;;;;;;;;;;;;;;;;;;;;')
    # print(features.head(2))
    # print(dict(features.head(2)))

    features = {key: np.array(value) for key, value in dict(features).items()}
    ds = Dataset.from_tensor_slices((features, targets))  #这两个都是dict
    ds = ds.batch(batch_size).repeat(num_epochs)
    if shuffle:
        ds = ds.shuffle(10000)
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #22
0
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    #把pandas转换为numpy数组字典
    features = {key: np.array(value) for key, value in dict(features).items()}

    # 构建数据集
    ds = Dataset.from_tensor_slices((features, targets))  # 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):


    # 将pandas数据转换成np数组。
    features = {key: np.array(value) for key, value in dict(features).items()}

    # 构建数据集,并配置批处理/重复
    ds = Dataset.from_tensor_slices((features, targets))  # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)


    # 如果指定洗牌数据
    if shuffle:
        ds = ds.shuffle(10000)

    # 返回下一批数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
    def _input_fn(num_epochs=None, shuffle=True):

        # randomize all the data
        idx = np.random.permutation(features.index)

        # create dataset object
        raw_features = {"pixels": features.reindex(idx)}
        raw_targets = np.array(labels[idx])
        ds = Dataset.from_tensor_slices((raw_features, raw_targets))
        ds = ds.batch(batch_size).repeat(num_epochs)

        # shuffle data if requested
        if shuffle:
            ds = ds.shuffle(10000)

        # return the next batch of data.
        feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
        return feature_batch, label_batch
Пример #25
0
  def _input_fn(num_epochs=None, shuffle=True):
    # Input pipelines are reset with each call to .train(). To ensure model
    # gets a good sampling of data, even when number of steps is small, we 
    # shuffle all the data before creating the Dataset object

    raw_features = {key:np.array(value) for key, value in dict(features).items()}
    # print(raw_features)
    raw_targets = np.array(labels)
   
    ds = Dataset.from_tensor_slices((raw_features,raw_targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    if shuffle:
      ds = ds.shuffle(10000)
    
    # Return the next batch of data.
    feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
    return feature_batch, label_batch
Пример #26
0
    def my_input_fn(features,
                    targets,
                    batch_size=1,
                    shuffle=True,
                    num_epochs=None):
        """Trains a linear regression model of one feature.

        Args:
          features: pandas DataFrame of features
          targets: pandas DataFrame of targets
          batch_size: Size of batches to be passed to the model
          shuffle: True or False. Whether to shuffle the data.
          num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
        Returns:
          Tuple of (features, labels) for next data batch
        """
        print("The Input Function was Called")
        # Convert pandas data into a dict of np arrays.
        # dict() : Convert list object to dictionary
        # ex. List of Tuple, Tuple of List, List of Set
        # dictionary.items() : Return List of object of tuple
        # 内包表現 - Python Comprehension
        features = {
            key: np.array(value)
            for key, value in dict(features).items()
        }

        # Construct a dataset, and configure batching/repeating.
        # Feed Full Datas that was received to the function
        ds = Dataset.from_tensor_slices(
            (features, targets))  # warning: 2GB limit
        # Just setting batch size and num epoch variables
        # num_epocks is not important, because there are 1000 steps mean 1000 * 1  = 1000 samples will be used
        # That is not even 1 epoch, so there is no meaning to set up epoch here
        ds = ds.batch(batch_size).repeat(num_epochs)

        # Shuffle the data, if specified.
        if shuffle:
            ds = ds.shuffle(buffer_size=10000)

        # Return the next batch of data.
        features, labels = ds.make_one_shot_iterator().get_next()

        return features, labels
Пример #27
0
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    # 将pandas DataFrame数据转化成np array的字典
    features = {key: np.array(value) for key, value in dict(features).items()}

    # 建立数据集
    # 数据集中每个元素是传入的数据,表示成(features, targets)形式
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(10000)

    # 返回下一批数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #28
0
    def data_func(self,
                  features,
                  targets,
                  batch_size=1,
                  shuffle=True,
                  num_epochs=None):
        features = {
            key: np.array(value)
            for key, value in dict(features).items()
        }
        ds = Dataset.from_tensor_slices((features, targets))
        ds = ds.batch(batch_size).repeat(num_epochs)

        if shuffle:
            ds = ds.shuffle(100)

        features, labels = ds.make_one_shot_iterator().get_next()

        return (features, labels)
Пример #29
0
def my_input_fn(features,
                targets,
                batch_size=1,
                num_epochs=None,
                shuffle=True):

    #convert features into a dict of np arrays
    features = {key: np.array(val) for key, val in dict(features).items()}

    #construct dataset, configure batching/repeating
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(1000)

    features, labels = ds.make_one_shot_iterator().get_next()

    return features, labels
Пример #30
0
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    #Convert pandas data into a dict of NumPy arrays
    features = {key: np.array(value) for key, value in dict(features).items}

    #Create a dataset from features and configure batching and repeating
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    #Shuffle data if specified
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    #Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #31
0
def my_input_fn(features,targets,batch_size=1,shuffle=True,num_epochs=None):
 
    
    features={key:np.array(value) for key,value in dict(features).items()}
    print (features)
    
    #Use DataSet API to construct a dataset
#     ds=DataSet.from_tensor_slices((features,targets))
#     ds=ds.batch(batch_size).repeat(num_epochs)
     # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)
    #shuffle if shuffle is set to true
    if shuffle:
      ds=ds.shuffle(10000)
      
    #return batches of data
    features, labels=ds.make_one_shot_iterator().get_next()
    return features,labels
Пример #32
0
def my_input_fn(features,
                targets,
                batch_size=1,
                shuffle=True,
                num_epochs=None):
    #转换pandas读取到的内容为 一个 字典,  以 np arrays 的方式
    features = {key: np.array(value) for key, value in dict(features).items()}

    #构造数据, 然后配置 批数据处理, 比如重复次数
    ds = Dataset.from_tensor_slices((features, targets))  #警告, 2GB的大小限制
    ds = ds.batch(batch_size).repeat(num_epochs)

    #重新打乱数据, 如果指定了要打乱的话
    if shuffle:
        ds = ds.shuffle(buffer_size=10000)

    #返回下一个批处理的数据
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels
Пример #33
0
def predict_input_fn(features):
    """Trains model of multiple features.
  
  Args:
    features: pandas DataFrame of features
  Returns:
    features for data batch
  """

    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features))  # warning: 2GB limit
    ds = ds.batch(1).repeat(1)

    # Return the next batch of data.
    features = ds.make_one_shot_iterator().get_next()
    return features
def create_training_input_fn(features,
                             labels,
                             batch_size,
                             num_epochs=None,
                             shuffle=True):

    idx = np.random.permutation(features.index)
    raw_features = {"pixels": features.reindex(idx)}
    raw_targets = np.array(labels[idx])

    ds = Dataset.from_tensor_slices((raw_features, raw_targets))
    ds = ds.batch(batch_size).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(10000)

    # Return the next batch of data.
    feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
    return feature_batch, label_batch
Пример #35
0
    def _input_fn(num_epochs=None, shuffle=True):
        # Input pipelines are reset with each call to .train().
        # 为了达到很好的数据样本,甚至当步数很小的时候。我们需要在创建Dataset前混合所有的数据,也就是permutation操作。
        # reindex不是打乱,这里是打乱数据。
        idx = np.random.permutation(features.index)  # 取随机数
        raw_features = {
            "pixels": features.reindex(idx)
        }  # features按照随机数的索引reindex,建立dict。
        raw_targets = np.array(labels[idx])  # labels按照随机数的索引简历数组

        ds = Dataset.from_tensor_slices(
            (raw_features, raw_targets))  # 建立Dataset
        ds = ds.batch(batch_size).repeat(num_epochs)

        if shuffle:
            ds = ds.shuffle(10000)

        # Return the next batch of data
        feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
        return feature_batch, label_batch
Пример #36
0
    def _input_fn(num_epochs=None, shuffle=True):

        #permutiramo podatke da dobijemo dobar uzorak
        indeks = np.random.permutation(features.index)
        #kopiramo polja znacajki i oznaka koje nam trebaju
        _features = {"pixels": features.reindex(indeks)}
        _targets = np.array(labels[indeks])

        #uzimamo random element i pretvorimo ga u tensor_slices jer tensorflow radi s takvim podacima
        ds = Dataset.from_tensor_slices((_features, _targets))  # 2GB limit
        #gradimo skupove podataka na kojima cemo kasnije trenirati klasifikator
        ds = ds.batch(batch_size).repeat(num_epochs)

        #permutiramo podatke za treniranje
        if shuffle:
            ds = ds.shuffle(10000)

        #vraca znacajke i oznake podataka u slijedecem skupu podataka za treniranje
        feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
        return feature_batch, label_batch
Пример #37
0
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """

    :param features:
    :param targets:
    :param batch_size:
    :param shuffle:
    :param num_epochs:
    :return:
    """

    # 用tf的方法处理数据,在数据上设置一次梯度下降验证时所用的数据集大小(batch_size),以及数据集重复的次数(num_epchs)
    # 具体怎么整的不清楚,得看tf的源码了吧
    dataset = Dataset.from_tensor_slices(features, targets)
    dataset = dataset.batch(batch_size).repeat(num_epochs)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=10000)

    features, labels = dataset.make_one_shot_iterator().get_next()
    return features, labels
Пример #38
0
def my_input_fn(features,
                targets=None,
                batch_size_val=1,
                shuffle=True,
                num_epochs=None):
    features = {key: np.array(value) for key, value in dict(features).items()}

    if targets is None:
        # No labels, use only features.
        inputs = features
    else:
        inputs = (features, targets)

    ds = Dataset.from_tensor_slices(inputs)
    ds = ds.batch(batch_size_val).repeat(num_epochs)

    if shuffle:
        ds = ds.shuffle(10000)

    return ds.make_one_shot_iterator().get_next()
Пример #39
0
  def testSparseTensorElements(self):
    components = (sparse_tensor.SparseTensorValue(
        indices=np.array([[0, 0], [1, 0], [2, 0]]),
        values=np.array([0, 0, 0]),
        dense_shape=np.array([3, 1])),
                  sparse_tensor.SparseTensorValue(
                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
                      values=np.array([1, 2, 3]),
                      dense_shape=np.array([3, 3])))

    expected = [
        (sparse_tensor.SparseTensorValue(
            indices=np.array([[0]]),
            values=np.array([0]),
            dense_shape=np.array([1])),
         sparse_tensor.SparseTensorValue(
             indices=np.array([[0]]),
             values=np.array([1]),
             dense_shape=np.array([3]))),
        (sparse_tensor.SparseTensorValue(
            indices=np.array([[0]]),
            values=np.array([0]),
            dense_shape=np.array([1])),
         sparse_tensor.SparseTensorValue(
             indices=np.array([[1]]),
             values=np.array([2]),
             dense_shape=np.array([3]))),
        (sparse_tensor.SparseTensorValue(
            indices=np.array([[0]]),
            values=np.array([0]),
            dense_shape=np.array([1])),
         sparse_tensor.SparseTensorValue(
             indices=np.array([[2]]),
             values=np.array([3]),
             dense_shape=np.array([3]))),
    ]

    for i, result in enumerate(
        datasets.Iterator(Dataset.from_tensor_slices(components))):
      self.assertSparseValuesEqual(expected[i][0], result[0])
      self.assertSparseValuesEqual(expected[i][1], result[1])