Exemplo n.º 1
0
    def test_call_column(self):
        user_id = tf.feature_column.categorical_column_with_identity(
            "user_id", num_buckets=32
        )

        item_id = tf.feature_column.categorical_column_with_identity(
            "item_id", num_buckets=128
        )

        item_id_user_id_concat = concatenated_categorical_column(
            [user_id, item_id]
        )

        concat_indicator = tf.feature_column.indicator_column(
            item_id_user_id_concat
        )

        output = call_feature_columns(
            [concat_indicator], {"user_id": [10, 20], "item_id": [1, 120]},
        )

        expected_output = tf.one_hot(indices=[10, 20], depth=160) + tf.one_hot(
            indices=[1 + 32, 120 + 32], depth=160
        )

        self.assertTrue(
            np.array_equal(output.numpy(), expected_output.numpy())
        )
Exemplo n.º 2
0
 def test_num_buckets(self):
     a = tf.feature_column.categorical_column_with_hash_bucket(
         "aaa", hash_bucket_size=1024)
     b = tf.feature_column.categorical_column_with_identity("bbb",
                                                            num_buckets=32)
     concat = concatenated_categorical_column([a, b])
     self.assertEqual(1056, concat.num_buckets)
Exemplo n.º 3
0
 def test_is_v2_column(self):
     a = tf.feature_column.categorical_column_with_hash_bucket(
         "aaa", hash_bucket_size=1024)
     b = tf.feature_column.categorical_column_with_identity("bbb",
                                                            num_buckets=32)
     concat = concatenated_categorical_column([a, b])
     self.assertTrue(concat._is_v2_column)
Exemplo n.º 4
0
 def test_deep_copy(self):
     a = tf.feature_column.categorical_column_with_hash_bucket(
         "aaa", hash_bucket_size=1024)
     b = tf.feature_column.categorical_column_with_identity("bbb",
                                                            num_buckets=32)
     concat = concatenated_categorical_column([a, b])
     concat_copy = copy.deepcopy(concat)
     self.assertEqual("aaa_C_bbb", concat_copy.name)
     self.assertEqual(1056, concat_copy.num_buckets)
Exemplo n.º 5
0
 def test_name(self):
     a = tf.feature_column.categorical_column_with_hash_bucket(
         "aaa", hash_bucket_size=1024)
     b = tf.feature_column.categorical_column_with_identity("bbb",
                                                            num_buckets=32)
     c = tf.feature_column.bucketized_column(
         tf.feature_column.numeric_column("ccc"),
         boundaries=[1, 2, 3, 4, 5])
     concat = concatenated_categorical_column([a, b, c])
     self.assertEqual("aaa_C_bbb_C_ccc_bucketized", concat.name)
Exemplo n.º 6
0
def transform(inputs):
    feature_column_dict = {}

    for feature_transform_info in FEATURE_TRANSFORM_INFO_EXECUTE_ARRAY:
        if feature_transform_info.op_type == TransformOpType.HASH:
            feature_column_dict[
                feature_transform_info.
                output] = tf.feature_column.categorical_column_with_hash_bucket(
                    feature_transform_info.input,
                    hash_bucket_size=feature_transform_info.hash_bucket_size,
                )
        elif feature_transform_info.op_type == TransformOpType.BUCKETIZE:
            feature_column_dict[
                feature_transform_info.
                output] = tf.feature_column.bucketized_column(
                    fc.numeric_column(feature_transform_info.input),
                    boundaries=feature_transform_info.boundaries,
                )
        elif feature_transform_info.op_type == TransformOpType.LOOKUP:
            feature_column_dict[
                feature_transform_info.
                output] = tf.feature_column.categorical_column_with_vocabulary_list(
                    feature_transform_info.input,
                    vocabulary_list=workclass_lookup.vocabulary_list,
                )
        elif feature_transform_info.op_type == TransformOpType.CONCAT:
            concat_inputs = [
                feature_column_dict[name]
                for name in feature_transform_info.input
            ]
            concat_column = edl_fc.concatenated_categorical_column(
                concat_inputs)
            feature_column_dict[feature_transform_info.output] = concat_column
        elif feature_transform_info.op_type == TransformOpType.EMBEDDING:
            feature_column_dict[
                feature_transform_info.
                output] = tf.feature_column.embedding_column(
                    feature_column_dict[feature_transform_info.input],
                    dimension=feature_transform_info.output_dim,
                )
        elif feature_transform_info.op_type == TransformOpType.ARRAY:
            feature_column_dict[feature_transform_info.output] = [
                feature_column_dict[name]
                for name in feature_transform_info.input
            ]

    return tuple([
        tf.keras.layers.DenseFeatures(feature_column_dict[name])(inputs)
        for name in TRANSFORM_OUTPUTS
    ])
Exemplo n.º 7
0
 def test_parse_spec(self):
     a = tf.feature_column.categorical_column_with_hash_bucket(
         "aaa", hash_bucket_size=1024, dtype=tf.string)
     b = tf.feature_column.bucketized_column(
         tf.feature_column.numeric_column("bbb", dtype=tf.int32),
         boundaries=[1, 2, 3, 4, 5],
     )
     concat = concatenated_categorical_column([a, b])
     self.assertEqual(
         {
             "aaa": tf.io.VarLenFeature(dtype=tf.string),
             "bbb": tf.io.FixedLenFeature(shape=(1, ), dtype=tf.int32),
         },
         concat.parse_example_spec,
     )
Exemplo n.º 8
0
def transform_from_code_gen(source_inputs):
    education_hash_fc = fc.categorical_column_with_hash_bucket(
        "education", hash_bucket_size=education_hash.hash_bucket_size)

    occupation_hash_fc = fc.categorical_column_with_hash_bucket(
        "occupation", hash_bucket_size=occupation_hash.hash_bucket_size)

    native_country_hash_fc = fc.categorical_column_with_hash_bucket(
        "native_country",
        hash_bucket_size=native_country_hash.hash_bucket_size)

    workclass_lookup_fc = fc.categorical_column_with_vocabulary_list(
        "workclass", vocabulary_list=workclass_lookup.vocabulary_list)

    marital_status_lookup_fc = fc.categorical_column_with_vocabulary_list(
        "marital_status",
        vocabulary_list=marital_status_lookup.vocabulary_list)

    relationship_lookup_fc = fc.categorical_column_with_vocabulary_list(
        "relationship", vocabulary_list=relationship_lookup.vocabulary_list)

    race_lookup_fc = fc.categorical_column_with_vocabulary_list(
        "race", vocabulary_list=race_lookup.vocabulary_list)

    sex_lookup_fc = fc.categorical_column_with_vocabulary_list(
        "sex", vocabulary_list=sex_lookup.vocabulary_list)

    age_bucketize_fc = fc.bucketized_column(
        fc.numeric_column("age"), boundaries=age_bucketize.boundaries)

    capital_gain_bucketize_fc = fc.bucketized_column(
        fc.numeric_column("capital_gain"),
        boundaries=capital_gain_bucketize.boundaries,
    )

    capital_loss_bucketize_fc = fc.bucketized_column(
        fc.numeric_column("capital_loss"),
        boundaries=capital_loss_bucketize.boundaries,
    )

    hours_per_week_bucketize_fc = fc.bucketized_column(
        fc.numeric_column("hours_per_week"),
        boundaries=hours_per_week_bucketize.boundaries,
    )

    group1_fc = edl_fc.concatenated_categorical_column(categorical_columns=[
        workclass_lookup_fc,
        hours_per_week_bucketize_fc,
        capital_gain_bucketize_fc,
        capital_loss_bucketize_fc,
    ])

    group2_fc = edl_fc.concatenated_categorical_column(categorical_columns=[
        education_hash_fc,
        marital_status_lookup_fc,
        relationship_lookup_fc,
        occupation_hash_fc,
    ])

    group3_fc = edl_fc.concatenated_categorical_column(categorical_columns=[
        age_bucketize_fc,
        sex_lookup_fc,
        race_lookup_fc,
        native_country_hash_fc,
    ])

    group1_wide_embedding_fc = fc.embedding_column(
        group1_fc,
        dimension=group1_embedding_wide.output_dim,
    )

    group2_wide_embedding_fc = fc.embedding_column(
        group2_fc,
        dimension=group2_embedding_wide.output_dim,
    )

    group1_deep_embedding_fc = fc.embedding_column(
        group1_fc,
        dimension=group1_embedding_deep.output_dim,
    )

    group2_deep_embedding_fc = fc.embedding_column(
        group2_fc,
        dimension=group2_embedding_deep.output_dim,
    )

    group3_deep_embedding_fc = fc.embedding_column(
        group3_fc,
        dimension=group3_embedding_deep.output_dim,
    )

    wide_feature_columns = [
        group1_wide_embedding_fc,
        group2_wide_embedding_fc,
    ]

    deep_feature_columns = [
        group1_deep_embedding_fc,
        group2_deep_embedding_fc,
        group3_deep_embedding_fc,
    ]

    return (
        tf.keras.layers.DenseFeatures(wide_feature_columns)(source_inputs),
        tf.keras.layers.DenseFeatures(deep_feature_columns)(source_inputs),
    )