def testPandasFeeding(self): if not HAS_PANDAS: return with ops.Graph().as_default(): array1 = np.arange(32) array2 = np.arange(32, 64) df = pd.DataFrame({ "a": array1, "b": array2 }, index=np.arange(64, 96)) q = ff._enqueue_data(df, capacity=100) batch_size = 5 dq_op = q.dequeue_many(5) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [ j % array1.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_df_indices = df.index[indices] expected_rows = df.iloc[indices] dq = sess.run(dq_op) np.testing.assert_array_equal(expected_df_indices, dq[0]) for col_num, col in enumerate(df.columns): np.testing.assert_array_equal( expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" queue = feeding_functions._enqueue_data( # pylint: disable=protected-access x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) assert len(features) == len(x.columns) + 1, ( 'Features should have one ' 'extra element for the index.') features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: if isinstance(target_column, list): keys = [k for k, _ in y_columns] values = [features.pop(column) for column in target_column] target = {k: v for k, v in zip(keys, values)} else: target = features.pop(target_column) return features, target return features
def testArrayFeedingMultiThread(self): with ops.Graph().as_default(): array = np.arange(256).reshape([128, 2]) q = ff._enqueue_data(array, capacity=128, num_threads=8, shuffle=True) batch_size = 3 dq_op = q.dequeue_many(batch_size) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_dq = get_rows(array, indices) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads)
def testArrayFeeding(self): with ops.Graph().as_default(): array = np.arange(32).reshape([16, 2]) q = ff._enqueue_data(array, capacity=100) batch_size = 3 dq_op = q.dequeue_many(batch_size) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [ j % array.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_dq = get_rows(array, indices) dq = sess.run(dq_op) np.testing.assert_array_equal(indices, dq[0]) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads)
def testPandasFeedingMultiThread(self): if not HAS_PANDAS: return with ops.Graph().as_default(): array1 = np.arange(128, 256) array2 = 2 * array1 df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128)) q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True) batch_size = 5 dq_op = q.dequeue_many(batch_size) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_rows = df.iloc[indices] for col_num, col in enumerate(df.columns): np.testing.assert_array_equal( expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): """Numpy input function.""" # Note that `x` should not be used after conversion to ordered_dict_data, # as type could be either dict or array. ordered_dict_data = _validate_and_convert_features(x) # Deep copy keys which is a view in python 3 feature_keys = list(ordered_dict_data.keys()) if y is None: target_keys = None elif isinstance(y, dict): if not y: raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) if duplicate_keys: raise ValueError( '{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) ordered_dict_data.update(ordered_dict_y) else: target_keys = _get_unique_target_key(ordered_dict_data) ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: shape_dict_of_x = { k: ordered_dict_data[k].shape for k in feature_keys } if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: shape_of_y = { k: ordered_dict_data[k].shape for k in target_keys } raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shapes in y: {}\n'.format( shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) batch = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. if batch: batch.pop(0) if isinstance(x, np.ndarray): # Return as the same type as original array. features = batch[0] else: # Return as the original dict type features = dict(zip(feature_keys, batch[:len(feature_keys)])) if target_keys is None: # TODO(martinwicke), return consistent result return features elif isinstance(target_keys, string_types): target = batch[-1] return features, target else: target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target