def testPandasFeeding(self): if not HAS_PANDAS: return with tf.Graph().as_default(): array1 = np.arange(32) array2 = np.arange(32, 64) df = pd.DataFrame({ "a": array1, "b": array2 }, index=np.arange(64, 96)) q = ff.enqueue_data(df, capacity=100) batch_size = 5 dq_op = q.dequeue_many(5) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [ j % array1.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_df_indices = df.index[indices] expected_rows = df.iloc[indices] dq = sess.run(dq_op) np.testing.assert_array_equal(expected_df_indices, dq[0]) for col_num, col in enumerate(df.columns): np.testing.assert_array_equal( expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" if y is not None: if target_column in x: raise ValueError( 'Found already column \'%s\' in x, please change ' 'target_column to something else. Current columns ' 'in x: %s', target_column, x.columns) if not np.array_equal(x.index, y.index): raise ValueError( 'Index for x and y are mismatch, this will lead ' 'to missing values. Please make sure they match or ' 'use .reset_index() method.\n' 'Index for x: %s\n' 'Index for y: %s\n', x.index, y.index) x[target_column] = y queue = feeding_functions.enqueue_data(x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) features = dict(zip([index_column] + list(x.columns), features)) if y is not None: target = features.pop(target_column) return features, target return features
def testShuffle(self): array_size = 7 batch_size = 3 iterations = 1000 mean = batch_size * iterations * 1.0 / array_size tolerance = 3 with tf.Graph().as_default(): array = np.arange(array_size) q = ff.enqueue_data(array, capacity=100, shuffle=True, seed=1234) dq_op = q.dequeue_many(batch_size) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) counts = {x: 0 for x in array} for _ in range(iterations): dq = sess.run(dq_op) for dqed in dq[1]: self.assertIn(dqed, array) counts[dqed] += 1 for k, v in counts.items(): self.assertGreater( mean + tolerance, v, "Value {} occurred {} times, expected {:.2f} +/- {}". format(k, v, mean, tolerance)) self.assertLess( mean - tolerance, v, "Value {} occurred {} times, expected {:.2f} +/- {}". format(k, v, mean, tolerance)) coord.request_stop() coord.join(threads)
def testShuffle(self): array_size = 7 batch_size = 3 iterations = 1000 mean = batch_size * iterations * 1.0 / array_size tolerance = 3 with tf.Graph().as_default(): array = np.arange(array_size) q = ff.enqueue_data(array, capacity=100, shuffle=True, seed=1234) dq_op = q.dequeue_many(batch_size) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) counts = {x: 0 for x in array} for _ in range(iterations): dq = sess.run(dq_op) for dqed in dq[1]: self.assertIn(dqed, array) counts[dqed] += 1 for k, v in counts.items(): self.assertGreater( mean + tolerance, v, "Value {} occurred {} times, expected {:.2f} +/- {}".format( k, v, mean, tolerance)) self.assertLess( mean - tolerance, v, "Value {} occurred {} times, expected {:.2f} +/- {}".format( k, v, mean, tolerance)) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" if y is not None: if target_column in x: raise ValueError('Found already column \'%s\' in x, please change ' 'target_column to something else. Current columns ' 'in x: %s', target_column, x.columns) if not np.array_equal(x.index, y.index): raise ValueError('Index for x and y are mismatch, this will lead ' 'to missing values. Please make sure they match or ' 'use .reset_index() method.\n' 'Index for x: %s\n' 'Index for y: %s\n', x.index, y.index) x[target_column] = y queue = feeding_functions.enqueue_data( x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) features = dict(zip([index_column] + list(x.columns), features)) if y is not None: target = features.pop(target_column) return features, target return features
def testPandasFeeding(self): if not HAS_PANDAS: return with tf.Graph().as_default(): array1 = np.arange(32) array2 = np.arange(32, 64) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96)) q = ff.enqueue_data(df, capacity=100) batch_size = 5 dq_op = q.dequeue_many(5) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [j % array1.shape[0] for j in range(batch_size * i, batch_size * (i + 1))] expected_df_indices = df.index[indices] expected_rows = df.iloc[indices] dq = sess.run(dq_op) np.testing.assert_array_equal(expected_df_indices, dq[0]) for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def _apply_transform(self, transform_input): queue = feeding_functions.enqueue_data( self.data, self.queue_capacity, self.shuffle, self.min_after_dequeue) dequeued = queue.dequeue_many(self.batch_size) # TODO(jamieas): dequeue and dequeue_many will soon return a list regardless # of the number of enqueued tensors. Remove the following once that change # is in place. if not isinstance(dequeued, (tuple, list)): dequeued = (dequeued,) # pylint: disable=not-callable return self.return_type(*dequeued)
def _apply_transform(self, transform_input): queue = feeding_functions.enqueue_data(self.data, self.queue_capacity, self.shuffle, self.min_after_dequeue) dequeued = queue.dequeue_many(self.batch_size) # TODO(jamieas): dequeue and dequeue_many will soon return a list regardless # of the number of enqueued tensors. Remove the following once that change # is in place. if not isinstance(dequeued, (tuple, list)): dequeued = (dequeued, ) # pylint: disable=not-callable return self.return_type(*dequeued)
def testArrayFeedingMultiThread(self): with tf.Graph().as_default(): array = np.arange(256).reshape([128, 2]) q = ff.enqueue_data(array, capacity=128, num_threads=8, shuffle=True) batch_size = 3 dq_op = q.dequeue_many(batch_size) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_dq = get_rows(array, indices) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads)
def testArrayFeeding(self): with tf.Graph().as_default(): array = np.arange(32).reshape([16, 2]) q = ff.enqueue_data(array, capacity=100) batch_size = 3 dq_op = q.dequeue_many(batch_size) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [j % array.shape[0] for j in range(batch_size * i, batch_size * (i + 1))] expected_dq = get_rows(array, indices) dq = sess.run(dq_op) np.testing.assert_array_equal(indices, dq[0]) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" queue = feeding_functions.enqueue_data( x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) features = dict(zip([index_column] + list(x.columns), features)) if y is not None: target = features.pop(target_column) return features, target return features
def input_fn(): """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) unique_target_key = _get_unique_target_key(x) if y is not None: x[unique_target_key] = y if len(set(v.shape for v in x.values())) != 1: shape_dict_of_x = {k: x[k].shape for k in x.keys()} shape_of_y = None if y is None else y.shape raise ValueError( 'Shape of x and y are mismatch, this will lead to ' 'missing values. Please make sure each value in x have ' 'the same shape as y.\n' 'Shape for x: {}\n' 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) # Ensure the order of iteration is consistent. ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) queue = feeding_functions.enqueue_data(ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `features`, which is the row number. if len(features) > 0: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) if y is not None: target = features.pop(unique_target_key) return features, target return features
def input_fn(): """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) unique_target_key = _get_unique_target_key(x) if y is not None: x[unique_target_key] = y if len(set(v.shape for v in x.values())) != 1: shape_dict_of_x = {k: x[k].shape for k in x.keys()} shape_of_y = None if y is None else y.shape raise ValueError('Shape of x and y are mismatch, this will lead to ' 'missing values. Please make sure each value in x have ' 'the same shape as y.\n' 'Shape for x: {}\n' 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) # Ensure the order of iteration is consistent. ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) queue = feeding_functions.enqueue_data( ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `features`, which is the row number. if len(features) > 0: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) if y is not None: target = features.pop(unique_target_key) return features, target return features
def _generator_input_fn(): """generator input function.""" queue = feeding_functions.enqueue_data(x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) if not isinstance(features, list): features = [features] features = dict(zip(input_keys, features)) if target_key is not None: if len(target_key) > 1: target = {key: features.pop(key) for key in target_key} else: target = features.pop(target_key[0]) return features, target return features
def _generator_input_fn(): """generator input function.""" queue = feeding_functions.enqueue_data( x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) if not isinstance(features, list): features = [features] features = dict(zip(input_keys, features)) if target_key is not None: if len(target_key) > 1: target = {key: features.pop(key) for key in target_key} else: target = features.pop(target_key[0]) return features, target return features
def testPandasFeedingMultiThread(self): if not HAS_PANDAS: return with tf.Graph().as_default(): array1 = np.arange(128, 256) array2 = 2 * array1 df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128)) q = ff.enqueue_data(df, capacity=128, num_threads=8, shuffle=True) batch_size = 5 dq_op = q.dequeue_many(batch_size) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_rows = df.iloc[indices] for col_num, col in enumerate(df.columns): np.testing.assert_array_equal( expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" queue = feeding_functions.enqueue_data( x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) assert len(features) == len(x.columns) + 1, ('Features should have one ' 'extra element for the index.') features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: target = features.pop(target_column) return features, target return features
def testPandasFeedingMultiThread(self): if not HAS_PANDAS: return with tf.Graph().as_default(): array1 = np.arange(128, 256) array2 = 2 * array1 df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128)) q = ff.enqueue_data(df, capacity=128, num_threads=8, shuffle=True) batch_size = 5 dq_op = q.dequeue_many(batch_size) with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_rows = df.iloc[indices] for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" queue = feeding_functions.enqueue_data( x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) assert len(features) == len(x.columns) + 1, ( 'Features should have one ' 'extra element for the index.') features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: target = features.pop(target_column) return features, target return features