def _replace_attr_with_keras_embedding(model): """Replace the elasticdl.layers.Embedding attributes in the model with `tf.keras.layers.Embedding` or `SparseEmbedding` layers. """ for name, value in model.__dict__.items(): if type(value) == Embedding: # The combiner is not None only for SparseEmbedding, if value.combiner is not None: logger.info("Replace elasticdl with SparseEmbedding") embedding_layer = SparseEmbedding( output_dim=value.output_dim, input_dim=value.input_dim, embeddings_initializer=value.embeddings_initializer, combiner=value.combiner, ) else: logger.info( "Replace elasticdl with tf.kerasl.layers.Embedding") embedding_layer = tf.keras.layers.Embedding( output_dim=value.output_dim, input_dim=value.input_dim, embeddings_initializer=value.embeddings_initializer, mask_zero=value.mask_zero, input_length=value.input_length, ) setattr(model, name, embedding_layer) elif type(value) == tf.keras.layers.DenseFeatures: feature_layer = _replace_edl_embedding_column_with_tf(value) setattr(model, name, feature_layer) return model
def _clone_function(layer): if type(layer) == Embedding: logger.info("Replace embedding layer with " "elasticdl.layers.Embedding") # The combiner is not None only for SparseEmbedding, if layer.combiner is not None: embedding_layer = SparseEmbedding( output_dim=layer.output_dim, input_dim=layer.input_dim, embeddings_initializer=layer.embeddings_initializer, name=layer.name, combiner=layer.combiner, ) else: embedding_layer = tf.keras.layers.Embedding( output_dim=layer.output_dim, input_dim=layer.input_dim, embeddings_initializer=layer.embeddings_initializer, mask_zero=layer.mask_zero, input_length=layer.input_length, name=layer.name, ) return embedding_layer elif type(layer) == tf.keras.layers.DenseFeatures: return _replace_edl_embedding_column_with_tf(layer) return layer
def custom_model_with_sparse_embedding(): sparse_input = tf.keras.layers.Input( shape=(4,), dtype="int64", sparse=True, name="sparse_feature" ) embedding = SparseEmbedding( EMBEDDING_INPUT_DIM, 2, combiner="sum", name="embedding" )(sparse_input) outputs = tf.keras.layers.Dense(1)(embedding) return tf.keras.models.Model(sparse_input, outputs)
def transform(inputs): transformed = inputs.copy() for feature_transform_info in FEATURE_TRANSFORM_INFO_EXECUTE_ARRAY: if feature_transform_info.op_type == TransformOpType.HASH: transformed[feature_transform_info.input] = ToSparse()( transformed[feature_transform_info.input] ) transformed[feature_transform_info.output] = Hashing( feature_transform_info.hash_bucket_size )(transformed[feature_transform_info.input]) elif feature_transform_info.op_type == TransformOpType.BUCKETIZE: transformed[feature_transform_info.input] = ToSparse()( transformed[feature_transform_info.input] ) transformed[feature_transform_info.output] = Discretization( feature_transform_info.boundaries )(transformed[feature_transform_info.input]) elif feature_transform_info.op_type == TransformOpType.LOOKUP: transformed[feature_transform_info.input] = ToSparse()( transformed[feature_transform_info.input] ) transformed[feature_transform_info.output] = IndexLookup( feature_transform_info.vocabulary_list )(transformed[feature_transform_info.input]) elif feature_transform_info.op_type == TransformOpType.CONCAT: inputs_to_concat = [ transformed[name] for name in feature_transform_info.input ] transformed[feature_transform_info.output] = ConcatenateWithOffset( feature_transform_info.id_offsets )(inputs_to_concat) elif feature_transform_info.op_type == TransformOpType.EMBEDDING: transformed[feature_transform_info.output] = SparseEmbedding( input_dim=feature_transform_info.input_dim, output_dim=feature_transform_info.output_dim, )(transformed[feature_transform_info.input]) elif feature_transform_info.op_type == TransformOpType.ARRAY: transformed[feature_transform_info.output] = [ transformed[name] for name in feature_transform_info.input ] return tuple([transformed[name] for name in TRANSFORM_OUTPUTS])
def transform_from_code_gen(source_inputs): inputs = source_inputs.copy() education_hash_out = Hashing(education_hash.hash_bucket_size)( ToSparse()(inputs["education"]) ) occupation_hash_out = Hashing(occupation_hash.hash_bucket_size)( ToSparse()(inputs["occupation"]) ) native_country_hash_out = Hashing(native_country_hash.hash_bucket_size)( ToSparse()(inputs["native_country"]) ) workclass_lookup_out = IndexLookup(workclass_lookup.vocabulary_list)( ToSparse()(inputs["workclass"]) ) marital_status_lookup_out = IndexLookup( marital_status_lookup.vocabulary_list )(ToSparse()(inputs["marital_status"])) relationship_lookup_out = IndexLookup(relationship_lookup.vocabulary_list)( ToSparse()(inputs["relationship"]) ) race_lookup_out = IndexLookup(race_lookup.vocabulary_list)( ToSparse()(inputs["race"]) ) sex_lookup_out = IndexLookup(sex_lookup.vocabulary_list)( ToSparse()(inputs["sex"]) ) age_bucketize_out = Discretization(age_bucketize.boundaries)( ToSparse()(inputs["age"]) ) capital_gain_bucketize_out = Discretization( capital_gain_bucketize.boundaries )(ToSparse()(inputs["capital_gain"])) capital_loss_bucketize_out = Discretization( capital_loss_bucketize.boundaries )(ToSparse()(inputs["capital_loss"])) hours_per_week_bucketize_out = Discretization( hours_per_week_bucketize.boundaries )(ToSparse()(inputs["hours_per_week"])) group1_out = ConcatenateWithOffset(group1.id_offsets)( [ workclass_lookup_out, hours_per_week_bucketize_out, capital_gain_bucketize_out, capital_loss_bucketize_out, ] ) group2_out = ConcatenateWithOffset(group2.id_offsets)( [ education_hash_out, marital_status_lookup_out, relationship_lookup_out, occupation_hash_out, ] ) group3_out = ConcatenateWithOffset(group3.id_offsets)( [ age_bucketize_out, sex_lookup_out, race_lookup_out, native_country_hash_out, ] ) group1_embedding_wide_out = SparseEmbedding( input_dim=group1_embedding_wide.input_dim, output_dim=group1_embedding_wide.output_dim, )(group1_out) group2_embedding_wide_out = SparseEmbedding( input_dim=group2_embedding_wide.input_dim, output_dim=group2_embedding_wide.output_dim, )(group2_out) group1_embedding_deep_out = SparseEmbedding( input_dim=group1_embedding_deep.input_dim, output_dim=group1_embedding_deep.output_dim, )(group1_out) group2_embedding_deep_out = SparseEmbedding( input_dim=group2_embedding_deep.input_dim, output_dim=group2_embedding_deep.output_dim, )(group2_out) group3_embedding_deep_out = SparseEmbedding( input_dim=group3_embedding_deep.input_dim, output_dim=group3_embedding_deep.output_dim, )(group3_out) wide_embeddings_out = [ group1_embedding_wide_out, group2_embedding_wide_out, ] deep_embeddings_out = [ group1_embedding_deep_out, group2_embedding_deep_out, group3_embedding_deep_out, ] return wide_embeddings_out, deep_embeddings_out