示例#1
0
 def _replace_attr_with_keras_embedding(model):
     """Replace the elasticdl.layers.Embedding attributes in the model
     with `tf.keras.layers.Embedding` or `SparseEmbedding` layers.
     """
     for name, value in model.__dict__.items():
         if type(value) == Embedding:
             # The combiner is not None only for SparseEmbedding,
             if value.combiner is not None:
                 logger.info("Replace elasticdl with SparseEmbedding")
                 embedding_layer = SparseEmbedding(
                     output_dim=value.output_dim,
                     input_dim=value.input_dim,
                     embeddings_initializer=value.embeddings_initializer,
                     combiner=value.combiner,
                 )
             else:
                 logger.info(
                     "Replace elasticdl with tf.kerasl.layers.Embedding")
                 embedding_layer = tf.keras.layers.Embedding(
                     output_dim=value.output_dim,
                     input_dim=value.input_dim,
                     embeddings_initializer=value.embeddings_initializer,
                     mask_zero=value.mask_zero,
                     input_length=value.input_length,
                 )
             setattr(model, name, embedding_layer)
         elif type(value) == tf.keras.layers.DenseFeatures:
             feature_layer = _replace_edl_embedding_column_with_tf(value)
             setattr(model, name, feature_layer)
     return model
示例#2
0
 def _clone_function(layer):
     if type(layer) == Embedding:
         logger.info("Replace embedding layer with "
                     "elasticdl.layers.Embedding")
         # The combiner is not None only for SparseEmbedding,
         if layer.combiner is not None:
             embedding_layer = SparseEmbedding(
                 output_dim=layer.output_dim,
                 input_dim=layer.input_dim,
                 embeddings_initializer=layer.embeddings_initializer,
                 name=layer.name,
                 combiner=layer.combiner,
             )
         else:
             embedding_layer = tf.keras.layers.Embedding(
                 output_dim=layer.output_dim,
                 input_dim=layer.input_dim,
                 embeddings_initializer=layer.embeddings_initializer,
                 mask_zero=layer.mask_zero,
                 input_length=layer.input_length,
                 name=layer.name,
             )
         return embedding_layer
     elif type(layer) == tf.keras.layers.DenseFeatures:
         return _replace_edl_embedding_column_with_tf(layer)
     return layer
示例#3
0
def custom_model_with_sparse_embedding():
    sparse_input = tf.keras.layers.Input(
        shape=(4,), dtype="int64", sparse=True, name="sparse_feature"
    )
    embedding = SparseEmbedding(
        EMBEDDING_INPUT_DIM, 2, combiner="sum", name="embedding"
    )(sparse_input)
    outputs = tf.keras.layers.Dense(1)(embedding)
    return tf.keras.models.Model(sparse_input, outputs)
def transform(inputs):
    transformed = inputs.copy()

    for feature_transform_info in FEATURE_TRANSFORM_INFO_EXECUTE_ARRAY:
        if feature_transform_info.op_type == TransformOpType.HASH:
            transformed[feature_transform_info.input] = ToSparse()(
                transformed[feature_transform_info.input]
            )
            transformed[feature_transform_info.output] = Hashing(
                feature_transform_info.hash_bucket_size
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.BUCKETIZE:
            transformed[feature_transform_info.input] = ToSparse()(
                transformed[feature_transform_info.input]
            )
            transformed[feature_transform_info.output] = Discretization(
                feature_transform_info.boundaries
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.LOOKUP:
            transformed[feature_transform_info.input] = ToSparse()(
                transformed[feature_transform_info.input]
            )
            transformed[feature_transform_info.output] = IndexLookup(
                feature_transform_info.vocabulary_list
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.CONCAT:
            inputs_to_concat = [
                transformed[name] for name in feature_transform_info.input
            ]
            transformed[feature_transform_info.output] = ConcatenateWithOffset(
                feature_transform_info.id_offsets
            )(inputs_to_concat)
        elif feature_transform_info.op_type == TransformOpType.EMBEDDING:
            transformed[feature_transform_info.output] = SparseEmbedding(
                input_dim=feature_transform_info.input_dim,
                output_dim=feature_transform_info.output_dim,
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.ARRAY:
            transformed[feature_transform_info.output] = [
                transformed[name] for name in feature_transform_info.input
            ]

    return tuple([transformed[name] for name in TRANSFORM_OUTPUTS])
def transform_from_code_gen(source_inputs):
    inputs = source_inputs.copy()

    education_hash_out = Hashing(education_hash.hash_bucket_size)(
        ToSparse()(inputs["education"])
    )
    occupation_hash_out = Hashing(occupation_hash.hash_bucket_size)(
        ToSparse()(inputs["occupation"])
    )
    native_country_hash_out = Hashing(native_country_hash.hash_bucket_size)(
        ToSparse()(inputs["native_country"])
    )
    workclass_lookup_out = IndexLookup(workclass_lookup.vocabulary_list)(
        ToSparse()(inputs["workclass"])
    )
    marital_status_lookup_out = IndexLookup(
        marital_status_lookup.vocabulary_list
    )(ToSparse()(inputs["marital_status"]))
    relationship_lookup_out = IndexLookup(relationship_lookup.vocabulary_list)(
        ToSparse()(inputs["relationship"])
    )
    race_lookup_out = IndexLookup(race_lookup.vocabulary_list)(
        ToSparse()(inputs["race"])
    )
    sex_lookup_out = IndexLookup(sex_lookup.vocabulary_list)(
        ToSparse()(inputs["sex"])
    )
    age_bucketize_out = Discretization(age_bucketize.boundaries)(
        ToSparse()(inputs["age"])
    )
    capital_gain_bucketize_out = Discretization(
        capital_gain_bucketize.boundaries
    )(ToSparse()(inputs["capital_gain"]))
    capital_loss_bucketize_out = Discretization(
        capital_loss_bucketize.boundaries
    )(ToSparse()(inputs["capital_loss"]))
    hours_per_week_bucketize_out = Discretization(
        hours_per_week_bucketize.boundaries
    )(ToSparse()(inputs["hours_per_week"]))

    group1_out = ConcatenateWithOffset(group1.id_offsets)(
        [
            workclass_lookup_out,
            hours_per_week_bucketize_out,
            capital_gain_bucketize_out,
            capital_loss_bucketize_out,
        ]
    )
    group2_out = ConcatenateWithOffset(group2.id_offsets)(
        [
            education_hash_out,
            marital_status_lookup_out,
            relationship_lookup_out,
            occupation_hash_out,
        ]
    )
    group3_out = ConcatenateWithOffset(group3.id_offsets)(
        [
            age_bucketize_out,
            sex_lookup_out,
            race_lookup_out,
            native_country_hash_out,
        ]
    )

    group1_embedding_wide_out = SparseEmbedding(
        input_dim=group1_embedding_wide.input_dim,
        output_dim=group1_embedding_wide.output_dim,
    )(group1_out)
    group2_embedding_wide_out = SparseEmbedding(
        input_dim=group2_embedding_wide.input_dim,
        output_dim=group2_embedding_wide.output_dim,
    )(group2_out)

    group1_embedding_deep_out = SparseEmbedding(
        input_dim=group1_embedding_deep.input_dim,
        output_dim=group1_embedding_deep.output_dim,
    )(group1_out)
    group2_embedding_deep_out = SparseEmbedding(
        input_dim=group2_embedding_deep.input_dim,
        output_dim=group2_embedding_deep.output_dim,
    )(group2_out)
    group3_embedding_deep_out = SparseEmbedding(
        input_dim=group3_embedding_deep.input_dim,
        output_dim=group3_embedding_deep.output_dim,
    )(group3_out)

    wide_embeddings_out = [
        group1_embedding_wide_out,
        group2_embedding_wide_out,
    ]

    deep_embeddings_out = [
        group1_embedding_deep_out,
        group2_embedding_deep_out,
        group3_embedding_deep_out,
    ]

    return wide_embeddings_out, deep_embeddings_out