示例#1
0
def preprocessing(dsData: tf.data.Dataset, window_size, batch_size):
    dsData = dsData.window(window_size + 1, shift=1, drop_remainder=True)
    dsData = dsData.flat_map(lambda w: w.batch(window_size + 1))
    dsData = dsData.map(lambda x: (x[:-1], x[-1]))
    dsData = dsData.shuffle(1000)
    dsData = dsData.batch(batch_size).prefetch(1)
    return dsData
示例#2
0
def batch_dataset(dataset: tf.data.Dataset, feature_type2name,
                  feature_name2num, batch_size):
    """Performs batching on ranking dataset

    When there's no sparse features, padded_batch() is enough. When there are sparse features, we use batching function specific for sparse features
    """
    padded_shapes, padded_values = _get_padded_shapes_and_values(
        feature_type2name, feature_name2num)

    # Use padded_batch() if no sparse features
    if InputFtrType.SPARSE_FTRS_COLUMN_NAMES not in feature_type2name and InputFtrType.SHALLOW_TOWER_SPARSE_FTRS_COLUMN_NAMES not in feature_type2name:
        # drop_remainder=True to avoid input batch_size=0 issue in evaluation mode in multi gpu training
        return dataset.padded_batch(batch_size,
                                    padded_shapes=padded_shapes,
                                    padding_values=padded_values,
                                    drop_remainder=True)

    sparse_batch_fn = partial(_sparse_batch_fn,
                              feature_type2name=feature_type2name,
                              padded_shapes=padded_shapes,
                              padded_values=padded_values,
                              batch_size=batch_size)
    # drop_remainder=True to avoid input batch_size=0 issue in evaluation mode in multi gpu training
    return dataset.window(batch_size,
                          drop_remainder=True).flat_map(sparse_batch_fn)
示例#3
0
def processing(dataset: tf.data.Dataset, window_size, batch_size):
    dataset = dataset.map(lambda x: table.lookup(x))
    dataset = dataset.unbatch()
    dataset = dataset.window(window_size+1, shift = 1, drop_remainder=True)
    dataset = dataset.flat_map(lambda ds: ds.batch(window_size+1))
    dataset = dataset.map(lambda x: (x[:-1], x[-1]-1))
    dataset = dataset.shuffle(10000)
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset
示例#4
0
    def flat_window_zipped_example_and_label_dataset(self, dataset: tf.data.Dataset, batch_size: int, window_shift: int,
                                                     ) -> tf.data.Dataset:
        """
        Takes a zipped example and label dataset and repeats examples in a windowed fashion of a given batch size.
        It is expected that the resulting dataset will subsequently be batched in some fashion by the given batch size.

        :param dataset: The zipped example and label dataset.
        :param batch_size: The size of the batches to produce.
        :param window_shift: The shift of the moving window between batches.
        :return: The flattened window dataset.
        """
        if window_shift != 0:
            windowed_dataset = dataset.window(batch_size, shift=window_shift)
            unbatched_window_dataset = windowed_dataset.flat_map(
                lambda *sample: tf.data.Dataset.zip(tuple(element for element in sample)))
            return unbatched_window_dataset
        else:
            return dataset