Пример #1
0
def encode_string_categorical_feature(feature, name, dataset):
    # Create a StringLookup layer which will turn strings into integer indices
    index = StringLookup()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    index.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = index(feature)

    # Create a CategoryEncoding for our integer indices
    encoder = CategoryEncoding(output_mode="binary")

    # Prepare a dataset of indices
    feature_ds = feature_ds.map(index)

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(encoded_feature)
    return encoded_feature
Пример #2
0
def encode_string_categorical_feature(feature, name, dataset):
    index = StringLookup()

    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    index.adapt(feature_ds)

    encoded_feature = index(feature)
    encoder = CategoryEncoding(output_mode="binary")
    feature_ds = feature_ds.map(index)
    encoder.adapt(feature_ds)
    encoded_feature = encoder(encoded_feature)
    return encoded_feature
Пример #3
0
    def _encode_categorical_feature(
        feature: KerasTensor,
        name: str,
        dataset: Optional[BatchDataset],
    ) -> KerasTensor:
        """One-hot encode categorical features.

        Args:
            - feature: The input layer of the feature.
            - name: The feature's name (its column name in the original dataframe).
            - dataset: The training data, if not specified, return a no-op layer.

        Returns:
            The one-hot encoded tensor of the input feature.

        """
        # Return generic layer for the tuner initialization
        if not dataset:
            return KerasTensor(type_spec=TensorSpec(
                shape=(None, 1), dtype=tf.float32, name=None))

        # Create a StringLookup layer which will turn strings into integer indices
        index = StringLookup()

        # Prepare a Dataset that only yields our feature
        feature_ds = dataset.map(lambda x, y: x[name])
        feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

        # Learn the set of possible string values and assign them a fixed integer index
        index.adapt(feature_ds)

        # Turn the string input into integer indices
        encoded_feature = index(feature)

        # Create a CategoryEncoding for our integer indices
        encoder = CategoryEncoding(output_mode="binary")

        # Learn the space of possible indices
        encoder.adapt(np.arange(index.vocab_size()))

        # Apply one-hot encoding to our indices{split + 1} / {n_splits}
        encoded_feature = encoder(encoded_feature)

        return encoded_feature