def input_fn(): x = {'INPUT': data['data']['features'], 'INPUTLEN': data['data']['lengths'], 'LBLWEIGHTS': data['data']['lbl_weights']} y = data['data']['labels'] # Make a shadow copy and also ensure the order of iteration is consistent. ordered_dict_x = collections.OrderedDict(sorted(x.items(), key=lambda t: t[0])) unique_target_key = _get_unique_target_key(ordered_dict_x) if y is not None: ordered_dict_x[unique_target_key] = y queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_x, 1000, shuffle=hypers.get_param('th'), num_threads=1, enqueue_size=data['batch_size'], num_epochs=hypers.get_param('ts')[hypers.get_param('ti')]) features = queue.dequeue_many(data['batch_size']) # Remove the first `Tensor` in `features`, which is the row number. if len(features) > 0: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) if y is not None: target = features.pop(unique_target_key) return features, target return features
def testPandasFeeding(self): if not HAS_PANDAS: return with ops.Graph().as_default(): array1 = np.arange(32) array2 = np.arange(32, 64) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96)) q = ff._enqueue_data(df, capacity=100) batch_size = 5 dq_op = q.dequeue_many(5) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [ j % array1.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_df_indices = df.index[indices] expected_rows = df.iloc[indices] dq = sess.run(dq_op) np.testing.assert_array_equal(expected_df_indices, dq[0]) for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): """Pandas input function.""" queue = feeding_functions._enqueue_data( # pylint: disable=protected-access x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) assert len(features) == len(x.columns) + 1, ('Features should have one ' 'extra element for the index.') features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: if isinstance(target_column, list): keys = [k for k, _ in y_columns] values = [features.pop(column) for column in target_column] target = {k: v for k, v in zip(keys, values)} else: target = features.pop(target_column) return features, target return features
def input_fn(): """Pandas input function.""" queue = feeding_functions._enqueue_data( # pylint: disable=protected-access x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) assert len(features) == len(x.columns) + 1, ( 'Features should have one ' 'extra element for the index.') features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: if isinstance(target_column, list): keys = [k for k, _ in y_columns] values = [features.pop(column) for column in target_column] target = {k: v for k, v in zip(keys, values)} else: target = features.pop(target_column) return features, target return features
def testArrayFeedingMultiThread(self): with ops.Graph().as_default(): array = np.arange(256).reshape([128, 2]) q = ff._enqueue_data(array, capacity=128, num_threads=8, shuffle=True) batch_size = 3 dq_op = q.dequeue_many(batch_size) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_dq = get_rows(array, indices) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads)
def input_fn(): """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) # Make a shadow copy and also ensure the order of iteration is consistent. ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) unique_target_key = _get_unique_target_key(ordered_dict_x) if y is not None: ordered_dict_x[unique_target_key] = y if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: shape_dict_of_x = { k: ordered_dict_x[k].shape for k in ordered_dict_x.keys() } shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shape for y: {}\n'.format( shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_x, queue_capacity, shuffle=shuffle, seed=seed, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `features`, which is the row number. if len(features) > 0: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) if y is not None: target = features.pop(unique_target_key) return features, target return features
def input_fn(): """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) # Make a shadow copy and also ensure the order of iteration is consistent. ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) unique_target_key = _get_unique_target_key(ordered_dict_x) if y is not None: ordered_dict_x[unique_target_key] = y if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: shape_dict_of_x = {k: ordered_dict_x[k].shape for k in ordered_dict_x.keys()} shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `features`, which is the row number. if len(features) > 0: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) if y is not None: target = features.pop(unique_target_key) return features, target return features
def testArrayFeeding(self): with ops.Graph().as_default(): array = np.arange(32).reshape([16, 2]) q = ff._enqueue_data(array, capacity=100) batch_size = 3 dq_op = q.dequeue_many(batch_size) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [ j % array.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_dq = get_rows(array, indices) dq = sess.run(dq_op) np.testing.assert_array_equal(indices, dq[0]) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads)
def input_fn(): # Using queue with multiple threads to make it scalable pandas_queue = ff._enqueue_data(df, capacity=1024, shuffle=True, min_after_dequeue=256, num_threads=4, enqueue_size=16, num_epochs=epochs) _, color_name, r, g, b, seq_len = pandas_queue.dequeue_up_to( batch_size) # Split strings into chars split_color_name = tf.string_split(color_name, delimiter='') # Creating a tf constant to hold the map char -> index # this is need to create the sparse tensor and after the one hot encode mapping = tf.constant(CHARACTERS, name="mapping") # Names represented in a sparse tensor integerized_color_name = _sparse_string_to_index( split_color_name, mapping) # Tensor of normalized RGB values rgb = tf.to_float(tf.stack([r, g, b], axis=1)) / 255.0 # Generates batcheds batched = tf.train.shuffle_batch( { COLOR_NAME_KEY: integerized_color_name, SEQUENCE_LENGTH_KEY: seq_len, RGB_KEY: rgb }, batch_size, min_after_dequeue=100, num_threads=4, capacity=1000, enqueue_many=True, allow_smaller_final_batch=True) label = batched.pop(RGB_KEY) return batched, label
def testPandasFeedingMultiThread(self): if not HAS_PANDAS: return with ops.Graph().as_default(): array1 = np.arange(128, 256) array2 = 2 * array1 df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128)) q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True) batch_size = 5 dq_op = q.dequeue_many(batch_size) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_rows = df.iloc[indices] for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def input_fn(): ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) target_keys = [] for tar_key in y: target_keys.append(tar_key) ordered_dict_x[tar_key] = y[tar_key] queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) if features: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) target = {} for tar_key in target_keys: target[tar_key] = features.pop(tar_key) return features, target
def input_fn(): ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) target_keys = [] for tar_key in y: target_keys.append(tar_key) ordered_dict_x[tar_key] = y[tar_key] queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) if features: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) target = {} for tar_key in target_keys: target[tar_key] = features.pop(tar_key) return features, target
def input_fn(): ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) unique_target_key = _get_unique_target_key(ordered_dict_x) if y is not None: ordered_dict_x[unique_target_key] = y if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1: shape_dict_of_x = {k: ordered_dict_x[k].shape for k in ordered_dict_x.keys()} shape_of_y = None if y is None else y.shape raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y)) print ordered_dict_x queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_x, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) features = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) if len(features) > 0: features.pop(0) features = dict(zip(ordered_dict_x.keys(), features)) if y is not None: target = features.pop(unique_target_key) return read_images(features), target return read_images(features)
def input_fn(): """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) if not x: raise ValueError('x cannot be empty') # Make a shadow copy and also ensure the order of iteration is consistent. ordered_dict_data = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) # Deep copy keys which is a view in python 3 feature_keys = list(ordered_dict_data.keys()) if y is None: target_keys = None elif isinstance(y, dict): if not y: raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) if len(duplicate_keys): raise ValueError('{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) ordered_dict_data.update(ordered_dict_y) else: target_keys = _get_unique_target_key(ordered_dict_data) ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) batch = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. if len(batch) > 0: batch.pop(0) features = dict(zip(feature_keys, batch[:len(feature_keys)])) if target_keys is None: # TODO(martinwicke), return consistent result return features elif isinstance(target_keys, string_types): target = batch[-1] return features, target else: target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target
def enqueue_data(*args, **kwargs): return _enqueue_data(*args, **kwargs)
def input_fn(): """Numpy input function.""" # Note that `x` should not be used after conversion to ordered_dict_data, # as type could be either dict or array. ordered_dict_data = _validate_and_convert_features(x) # Deep copy keys which is a view in python 3 feature_keys = list(ordered_dict_data.keys()) if y is None: target_keys = None elif isinstance(y, dict): if not y: raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) if duplicate_keys: raise ValueError('{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) ordered_dict_data.update(ordered_dict_y) else: target_keys = _get_unique_target_key(ordered_dict_data) ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) batch = ( queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. if batch: batch.pop(0) if isinstance(x, np.ndarray): # Return as the same type as original array. features = batch[0] else: # Return as the original dict type features = dict(zip(feature_keys, batch[:len(feature_keys)])) if target_keys is None: # TODO(martinwicke), return consistent result return features elif isinstance(target_keys, string_types): target = batch[-1] return features, target else: target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target
def input_fn(): """Numpy input function.""" if not isinstance(x, dict): raise TypeError('x must be dict; got {}'.format(type(x).__name__)) if not x: raise ValueError('x cannot be empty') # Make a shadow copy and also ensure the order of iteration is consistent. ordered_dict_data = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) # Deep copy keys which is a view in python 3 feature_keys = list(ordered_dict_data.keys()) if y is None: target_keys = None elif isinstance(y, dict): if not y: raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) if duplicate_keys: raise ValueError( '{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) ordered_dict_data.update(ordered_dict_y) else: target_keys = _get_unique_target_key(ordered_dict_data) ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: shape_dict_of_x = { k: ordered_dict_data[k].shape for k in feature_keys } if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: shape_of_y = { k: ordered_dict_data[k].shape for k in target_keys } raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shapes in y: {}\n'.format( shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) batch = (queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. if batch: batch.pop(0) features = dict(zip(feature_keys, batch[:len(feature_keys)])) if target_keys is None: # TODO(martinwicke), return consistent result return features elif isinstance(target_keys, string_types): target = batch[-1] return features, target else: target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target
def enqueue_data(*args, **kwargs): return _enqueue_data(*args, **kwargs)