def preprocessing(dsData: tf.data.Dataset, window_size, batch_size): dsData = dsData.window(window_size + 1, shift=1, drop_remainder=True) dsData = dsData.flat_map(lambda w: w.batch(window_size + 1)) dsData = dsData.map(lambda x: (x[:-1], x[-1])) dsData = dsData.shuffle(1000) dsData = dsData.batch(batch_size).prefetch(1) return dsData
def batch_dataset(dataset: tf.data.Dataset, feature_type2name, feature_name2num, batch_size): """Performs batching on ranking dataset When there's no sparse features, padded_batch() is enough. When there are sparse features, we use batching function specific for sparse features """ padded_shapes, padded_values = _get_padded_shapes_and_values( feature_type2name, feature_name2num) # Use padded_batch() if no sparse features if InputFtrType.SPARSE_FTRS_COLUMN_NAMES not in feature_type2name and InputFtrType.SHALLOW_TOWER_SPARSE_FTRS_COLUMN_NAMES not in feature_type2name: # drop_remainder=True to avoid input batch_size=0 issue in evaluation mode in multi gpu training return dataset.padded_batch(batch_size, padded_shapes=padded_shapes, padding_values=padded_values, drop_remainder=True) sparse_batch_fn = partial(_sparse_batch_fn, feature_type2name=feature_type2name, padded_shapes=padded_shapes, padded_values=padded_values, batch_size=batch_size) # drop_remainder=True to avoid input batch_size=0 issue in evaluation mode in multi gpu training return dataset.window(batch_size, drop_remainder=True).flat_map(sparse_batch_fn)
def processing(dataset: tf.data.Dataset, window_size, batch_size): dataset = dataset.map(lambda x: table.lookup(x)) dataset = dataset.unbatch() dataset = dataset.window(window_size+1, shift = 1, drop_remainder=True) dataset = dataset.flat_map(lambda ds: ds.batch(window_size+1)) dataset = dataset.map(lambda x: (x[:-1], x[-1]-1)) dataset = dataset.shuffle(10000) dataset = dataset.batch(batch_size).prefetch(1) return dataset
def flat_window_zipped_example_and_label_dataset(self, dataset: tf.data.Dataset, batch_size: int, window_shift: int, ) -> tf.data.Dataset: """ Takes a zipped example and label dataset and repeats examples in a windowed fashion of a given batch size. It is expected that the resulting dataset will subsequently be batched in some fashion by the given batch size. :param dataset: The zipped example and label dataset. :param batch_size: The size of the batches to produce. :param window_shift: The shift of the moving window between batches. :return: The flattened window dataset. """ if window_shift != 0: windowed_dataset = dataset.window(batch_size, shift=window_shift) unbatched_window_dataset = windowed_dataset.flat_map( lambda *sample: tf.data.Dataset.zip(tuple(element for element in sample))) return unbatched_window_dataset else: return dataset