Exemplo n.º 1
0
def dnn_model(hidden_units: List[int], dropout: float, activation_fn=tf.keras.activations.relu):
    timestamp = tf.keras.layers.Input(shape=(1,), name="timestamp", dtype=tf.int64)
    gender = tf.keras.layers.Input(shape=(1,), name="gender", dtype=tf.string)
    age = tf.keras.layers.Input(shape=(1,), name="age", dtype=tf.int64)
    occupation = tf.keras.layers.Input(shape=(1,), name="occupation", dtype=tf.string)
    zip_code = tf.keras.layers.Input(shape=(1,), name="zip_code", dtype=tf.string)
    keywords = tf.keras.layers.Input(shape=(None,), name="keywords", dtype=tf.string, ragged=True)
    publish_year = tf.keras.layers.Input(shape=(1,), name="publishYear", dtype=tf.int64)
    categories = tf.keras.layers.Input(shape=(None,), name="categories", dtype=tf.string, ragged=True)

    timestamp_hour_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=24)(
        tf.keras.layers.experimental.preprocessing.Discretization(bins=list(range(24)))(
            tf.keras.layers.Lambda(function=lambda tensor: tensor // 3600 % 24)(timestamp)))
    timestamp_week_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=7)(
        tf.keras.layers.experimental.preprocessing.Discretization(bins=list(range(7)))(
            tf.keras.layers.Lambda(function=lambda tensor: tensor // 86400 % 7)(timestamp)))
    gender_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=2, output_mode="binary")(
        tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=["F", "M"], mask_token=None)(gender))
    age_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=7)(
        tf.keras.layers.experimental.preprocessing.IntegerLookup(
            vocabulary=[1, 18, 25, 35, 45, 50, 56], mask_value=None)(age))
    occupation_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=21)(
        tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=list(user_occupation.values()), mask_token=None)(occupation))
    # 3439
    zip_code_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(
        tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(zip_code))
    # 4862
    keywords_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(
        tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(keywords))
    # 1919 ~ 2000
    publish_year_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=82)(
        tf.keras.layers.experimental.preprocessing.IntegerLookup(
            vocabulary=list(range(1919, 2001)), mask_value=None)(publish_year))
    categories_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=18)(
        tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama",
                        "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War",
                        "Western"], mask_token=None)(categories))

    input_layer = tf.keras.layers.Concatenate(axis=1)(inputs=[
        timestamp_hour_layer, timestamp_week_layer, gender_layer, age_layer, occupation_layer, zip_code_layer,
        keywords_layer, publish_year_layer, categories_layer
    ])

    for hidden_unit in hidden_units:
        input_layer = tf.keras.layers.Dense(units=hidden_unit, activation=activation_fn)(input_layer)
        input_layer = tf.keras.layers.Dropout(rate=dropout)(input_layer)

    logits_layer = tf.keras.layers.Dense(units=1, activation=None, name="LogitsLayer")(input_layer)
    predict = tf.keras.layers.Lambda(function=lambda tensor: tf.nn.sigmoid(tensor), name="Sigmoid")(logits_layer)

    model = tf.keras.Model(inputs=[
        timestamp, gender, age, occupation, zip_code, keywords, publish_year, categories
    ], outputs=predict)

    return model
Exemplo n.º 2
0
def youtube_net_model(embedding_size,
                      hidden_units: List[int],
                      dropout: float,
                      activation_fn=tf.keras.activations.relu,
                      num_sampled: int = 100,
                      movie_id_size=3953):
    assert embedding_size == hidden_units[-1], "隐含层最后一层需要和嵌入向量维度保持一致"

    # movie
    movie_id = tf.keras.layers.Input(shape=(1, ),
                                     name="movie_id",
                                     dtype=tf.int64)
    # user
    gender = tf.keras.layers.Input(shape=(1, ), name="gender", dtype=tf.string)
    age = tf.keras.layers.Input(shape=(1, ), name="age", dtype=tf.int64)
    occupation = tf.keras.layers.Input(shape=(1, ),
                                       name="occupation",
                                       dtype=tf.string)
    zip_code = tf.keras.layers.Input(shape=(1, ),
                                     name="zip_code",
                                     dtype=tf.string)
    user_history_high_score_movies = tf.keras.layers.Input(
        shape=(None, ),
        name="user_history_high_score_movies",
        dtype=tf.int64,
        ragged=True)
    user_history_low_score_movies = tf.keras.layers.Input(
        shape=(None, ),
        name="user_history_low_score_movies",
        dtype=tf.int64,
        ragged=True)

    movie_id_embedding_layer = tf.keras.layers.Embedding(
        input_dim=movie_id_size,
        output_dim=embedding_size,
        name="MovieIdEmbedding")

    # user input features
    gender_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=2, output_mode="binary")(
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=["F",
                            "M"], num_oov_indices=0, mask_token=None)(gender))
    age_layer = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(
        input_dim=8, output_dim=3, name="AgeEmbedding")(
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                vocabulary=[1, 18, 25, 35, 45, 50, 56], mask_value=None)(age)))
    occupation_layer = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(
        input_dim=22, output_dim=5, name="OccupationEmbedding")(
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=list(user_occupation.values()),
                mask_token=None)(occupation)))
    zip_code_layer = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(
        input_dim=10000, output_dim=13, name="ZipCodeEmbedding")(
            tf.keras.layers.experimental.preprocessing.Hashing(
                num_bins=10000)(zip_code)))
    user_history_high_score_movies_layer = tf.keras.layers.Lambda(
        function=lambda tensor: tf.math.reduce_sum(tensor, axis=1),
        name="UserHistoryHighScoreMovieReduceSum")(
            movie_id_embedding_layer(user_history_high_score_movies))
    user_history_low_score_movies_layer = tf.keras.layers.Lambda(
        function=lambda tensor: tf.math.reduce_sum(tensor, axis=1),
        name="UserHistoryLowScoreMovieReduceSum")(
            movie_id_embedding_layer(user_history_low_score_movies))

    user_layer = tf.keras.layers.Concatenate()(inputs=[
        gender_layer, age_layer, occupation_layer, zip_code_layer,
        user_history_high_score_movies_layer,
        user_history_low_score_movies_layer
    ])

    for hidden_unit in hidden_units:
        user_layer = tf.keras.layers.Dense(
            units=hidden_unit, activation=activation_fn)(user_layer)
        user_layer = tf.keras.layers.Dropout(rate=dropout)(user_layer)

    loss = CandidateSampledLossLayer(
        movie_id_embedding_layer, num_sampled)(inputs=[user_layer, movie_id])

    model = tf.keras.models.Model(inputs=[
        movie_id, gender, age, occupation, zip_code,
        user_history_high_score_movies, user_history_low_score_movies
    ],
                                  outputs=[loss])

    return model
Exemplo n.º 3
0
def fmm_model(embedding_size):
    timestamp = tf.keras.layers.Input(shape=(1,), name="timestamp", dtype=tf.int64)
    gender = tf.keras.layers.Input(shape=(1,), name="gender", dtype=tf.string)
    age = tf.keras.layers.Input(shape=(1,), name="age", dtype=tf.int64)
    occupation = tf.keras.layers.Input(shape=(1,), name="occupation", dtype=tf.string)
    zip_code = tf.keras.layers.Input(shape=(1,), name="zip_code", dtype=tf.string)
    keywords = tf.keras.layers.Input(shape=(None,), name="keywords", dtype=tf.string, ragged=True)
    publish_year = tf.keras.layers.Input(shape=(1,), name="publishYear", dtype=tf.int64)
    categories = tf.keras.layers.Input(shape=(None,), name="categories", dtype=tf.string, ragged=True)

    timestamp_hour_layer = tf.keras.layers.experimental.preprocessing.Discretization(bins=list(range(24)))(
        tf.keras.layers.Lambda(function=lambda tensor: tensor // 3600 % 24)(timestamp))
    timestamp_week_layer = tf.keras.layers.experimental.preprocessing.Discretization(bins=list(range(7)))(
        tf.keras.layers.Lambda(function=lambda tensor: tensor // 86400 % 7)(timestamp))
    gender_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=["F", "M"], mask_token=None, num_oov_indices=0)(gender)
    age_layer = tf.keras.layers.experimental.preprocessing.IntegerLookup(
        vocabulary=[1, 18, 25, 35, 45, 50, 56], mask_value=None, num_oov_indices=0)(age)
    occupation_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=list(user_occupation.values()), mask_token=None, num_oov_indices=0)(occupation)
    # 3439
    zip_code_layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(zip_code)
    # 4862
    keywords_layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(keywords)
    # 1919 ~ 2000
    publish_year_layer = tf.keras.layers.experimental.preprocessing.IntegerLookup(
        vocabulary=list(range(1919, 2001)), mask_value=None, num_oov_indices=0)(publish_year)
    categories_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama",
                    "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War",
                    "Western"], mask_token=None, num_oov_indices=0)(categories)

    # lr
    timestamp_hour_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=24)(timestamp_hour_layer)
    timestamp_week_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=7)(timestamp_week_layer)
    gender_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=2, output_mode="binary")(gender_layer)
    age_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=7)(age_layer)
    occupation_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=21)(occupation_layer)
    # 3439
    zip_code_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(zip_code_layer)
    # 4862
    keywords_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(keywords_layer)
    # 1919 ~ 2000
    publish_year_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=82)(publish_year_layer)
    categories_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=18)(categories_layer)

    lr_input_layer = tf.keras.layers.Concatenate()(inputs=[
        timestamp_hour_lr_layer, timestamp_week_lr_layer, gender_lr_layer, age_lr_layer, occupation_lr_layer,
        zip_code_lr_layer, keywords_lr_layer, publish_year_lr_layer, categories_lr_layer
    ])
    lr_input_layer = tf.keras.layers.Dense(units=1)(lr_input_layer)

    # fmm
    fmm_input_layer = FMMCrossLayer(embedding_size=embedding_size)(inputs=[
        timestamp_hour_layer, timestamp_week_layer, gender_layer, age_layer, occupation_layer, zip_code_layer,
        keywords_layer, publish_year_layer, categories_layer
    ])

    predict = tf.keras.layers.Add()(inputs=[lr_input_layer, fmm_input_layer])

    model = tf.keras.models.Model(inputs=[
        timestamp, gender, age, occupation, zip_code, keywords, publish_year, categories
    ], outputs=[predict])

    return model
Exemplo n.º 4
0
def poly2_model(output_dim):
    timestamp = tf.keras.layers.Input(shape=(1, ),
                                      name="timestamp",
                                      dtype=tf.int64)
    gender = tf.keras.layers.Input(shape=(1, ), name="gender", dtype=tf.string)
    age = tf.keras.layers.Input(shape=(1, ), name="age", dtype=tf.int64)
    occupation = tf.keras.layers.Input(shape=(1, ),
                                       name="occupation",
                                       dtype=tf.string)
    zip_code = tf.keras.layers.Input(shape=(1, ),
                                     name="zip_code",
                                     dtype=tf.string)
    keywords = tf.keras.layers.Input(shape=(None, ),
                                     name="keywords",
                                     dtype=tf.string,
                                     ragged=True)
    publish_year = tf.keras.layers.Input(shape=(1, ),
                                         name="publishYear",
                                         dtype=tf.int64)
    categories = tf.keras.layers.Input(shape=(None, ),
                                       name="categories",
                                       dtype=tf.string,
                                       ragged=True)

    timestamp_hour_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=24)(
            tf.keras.layers.experimental.preprocessing.Discretization(
                bins=list(range(24)))(tf.keras.layers.Lambda(
                    function=lambda tensor: tensor // 3600 % 24)(timestamp)))
    timestamp_week_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=7)(
            tf.keras.layers.experimental.preprocessing.Discretization(
                bins=list(range(7)))(tf.keras.layers.Lambda(
                    function=lambda tensor: tensor // 86400 % 7)(timestamp)))
    gender_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=2, output_mode="binary")(
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=["F", "M"], mask_token=None)(gender))
    age_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=7)(tf.keras.layers.experimental.preprocessing.IntegerLookup(
            vocabulary=[1, 18, 25, 35, 45, 50, 56], mask_value=None)(age))
    occupation_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=21)(tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=list(user_occupation.values()),
            mask_token=None)(occupation))
    # 3439
    zip_code_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=10000)(tf.keras.layers.experimental.preprocessing.Hashing(
            num_bins=10000)(zip_code))
    # 4862
    keywords_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=10000)(tf.keras.layers.experimental.preprocessing.Hashing(
            num_bins=10000)(keywords))
    # 1919 ~ 2000
    publish_year_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=82)(
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                vocabulary=list(range(1919,
                                      2001)), mask_value=None)(publish_year))
    categories_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=18)(tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=[
                "Action", "Adventure", "Animation", "Children's", "Comedy",
                "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir",
                "Horror", "Musical", "Mystery", "Romance", "Sci-Fi",
                "Thriller", "War", "Western"
            ],
            mask_token=None)(categories))

    # cross inputs
    gender_x_age_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=50)(tf.keras.layers.experimental.preprocessing.Hashing(
            num_bins=50)(inputs=[gender, age]))
    age_x_publish_year_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=100)(tf.keras.layers.experimental.preprocessing.Hashing(
            num_bins=100)(inputs=[age, publish_year]))
    # Hashing with ragged input is not supported yet
    # age_x_keywords_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(
    #     tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(inputs=[age, keywords]))
    # gender_x_categories_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=100)(
    #     tf.keras.layers.experimental.preprocessing.Hashing(num_bins=100)(inputs=[gender, categories]))

    inputs = tf.keras.layers.Concatenate(axis=1)(
        inputs=[
            timestamp_hour_layer,
            timestamp_week_layer,
            gender_layer,
            age_layer,
            occupation_layer,
            zip_code_layer,
            keywords_layer,
            publish_year_layer,
            categories_layer,
            gender_x_age_layer,
            age_x_publish_year_layer,
            # age_x_keywords_layer, gender_x_categories_layer
        ])
    predict = tf.keras.layers.Dense(
        units=output_dim, activation=tf.keras.activations.sigmoid)(inputs)
    model = tf.keras.models.Model(inputs=[
        timestamp, gender, age, occupation, zip_code, keywords, publish_year,
        categories
    ],
                                  outputs=predict)
    return model
Exemplo n.º 5
0
def afm_model(embedding_size, l2_factor, hidden_unit, dropout):
    timestamp = tf.keras.layers.Input(shape=(1,), name="timestamp", dtype=tf.int64)
    gender = tf.keras.layers.Input(shape=(1,), name="gender", dtype=tf.string)
    age = tf.keras.layers.Input(shape=(1,), name="age", dtype=tf.int64)
    occupation = tf.keras.layers.Input(shape=(1,), name="occupation", dtype=tf.string)
    zip_code = tf.keras.layers.Input(shape=(1,), name="zip_code", dtype=tf.string)
    keywords = tf.keras.layers.Input(shape=(None,), name="keywords", dtype=tf.string, ragged=True)
    publish_year = tf.keras.layers.Input(shape=(1,), name="publishYear", dtype=tf.int64)
    categories = tf.keras.layers.Input(shape=(None,), name="categories", dtype=tf.string, ragged=True)

    timestamp_hour_layer = tf.keras.layers.experimental.preprocessing.Discretization(bins=list(range(24)))(
        tf.keras.layers.Lambda(function=lambda tensor: tensor // 3600 % 24)(timestamp))
    timestamp_week_layer = tf.keras.layers.experimental.preprocessing.Discretization(bins=list(range(7)))(
        tf.keras.layers.Lambda(function=lambda tensor: tensor // 86400 % 7)(timestamp))
    gender_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=["F", "M"], mask_token=None, num_oov_indices=0)(gender)
    age_layer = tf.keras.layers.experimental.preprocessing.IntegerLookup(
        vocabulary=[1, 18, 25, 35, 45, 50, 56], mask_value=None, num_oov_indices=0)(age)
    occupation_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=list(user_occupation.values()), mask_token=None, num_oov_indices=0)(occupation)
    # 3439
    zip_code_layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(zip_code)
    # 4862
    keywords_layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=10000)(keywords)
    # 1919 ~ 2000
    publish_year_layer = tf.keras.layers.experimental.preprocessing.IntegerLookup(
        vocabulary=list(range(1919, 2001)), mask_value=None, num_oov_indices=0)(publish_year)
    categories_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama",
                    "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War",
                    "Western"], mask_token=None, num_oov_indices=0)(categories)

    # lr
    timestamp_hour_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=24)(timestamp_hour_layer)
    timestamp_week_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=7)(timestamp_week_layer)
    gender_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=2, output_mode="binary")(gender_layer)
    age_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=7)(age_layer)
    occupation_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=21)(occupation_layer)
    # 3439
    zip_code_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(zip_code_layer)
    # 4862
    keywords_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=10000)(keywords_layer)
    # 1919 ~ 2000
    publish_year_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
        max_tokens=82)(publish_year_layer)
    categories_lr_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens=18)(categories_layer)

    lr_input_layer = tf.keras.layers.Concatenate()(inputs=[
        timestamp_hour_lr_layer, timestamp_week_lr_layer, gender_lr_layer, age_lr_layer, occupation_lr_layer,
        zip_code_lr_layer, keywords_lr_layer, publish_year_lr_layer, categories_lr_layer
    ])
    lr_input_layer = tf.keras.layers.Dense(units=1, name="LinearLayer", use_bias=True)(lr_input_layer)

    # fm
    embedding_pooling_layer = tf.keras.layers.Lambda(lambda tensor: embedding_pooling(tensor), name="EmbeddingPooling")
    timestamp_hour_fm_layer = tf.keras.layers.Embedding(input_dim=24, output_dim=embedding_size)(timestamp_hour_layer)
    timestamp_hour_fm_layer = embedding_pooling_layer(timestamp_hour_fm_layer)
    timestamp_week_fm_layer = tf.keras.layers.Embedding(input_dim=7, output_dim=embedding_size)(timestamp_week_layer)
    timestamp_week_fm_layer = embedding_pooling_layer(timestamp_week_fm_layer)
    gender_fm_layer = tf.keras.layers.Embedding(input_dim=2, output_dim=embedding_size)(gender_layer)
    gender_fm_layer = embedding_pooling_layer(gender_fm_layer)
    age_fm_layer = tf.keras.layers.Embedding(input_dim=7, output_dim=embedding_size)(age_layer)
    age_fm_layer = embedding_pooling_layer(age_fm_layer)
    occupation_fm_layer = tf.keras.layers.Embedding(input_dim=21, output_dim=embedding_size)(occupation_layer)
    occupation_fm_layer = embedding_pooling_layer(occupation_fm_layer)
    # 3439
    zip_code_fm_layer = tf.keras.layers.Embedding(input_dim=10000, output_dim=embedding_size)(zip_code_layer)
    zip_code_fm_layer = embedding_pooling_layer(zip_code_fm_layer)
    # 4862
    keywords_fm_layer = tf.keras.layers.Embedding(input_dim=10000, output_dim=embedding_size)(keywords_layer)
    keywords_fm_layer = embedding_pooling_layer(keywords_fm_layer)
    # 1919 ~ 2000
    publish_year_fm_layer = tf.keras.layers.Embedding(input_dim=82, output_dim=embedding_size)(publish_year_layer)
    publish_year_fm_layer = embedding_pooling_layer(publish_year_fm_layer)
    categories_fm_layer = tf.keras.layers.Embedding(input_dim=18, output_dim=embedding_size)(categories_layer)
    categories_fm_layer = embedding_pooling_layer(categories_fm_layer)

    # fm pair-wise interaction layer
    pair_wise_interaction_layer = tf.keras.layers.Lambda(
        function=pair_wise_interaction_layer_fn, name="PairWiseInteractionLayer")(
        inputs=[timestamp_hour_fm_layer, timestamp_week_fm_layer, gender_fm_layer, age_fm_layer, occupation_fm_layer,
                zip_code_fm_layer, keywords_fm_layer, publish_year_fm_layer, categories_fm_layer])
    # dropout layer
    pair_wise_interaction_layer = tf.keras.layers.Dropout(rate=dropout)(pair_wise_interaction_layer)
    # attention layer
    attention_fm_layer = AttentionLayer(hidden_unit=hidden_unit, l2_factor=l2_factor)(pair_wise_interaction_layer)

    logits = tf.keras.layers.Add(name="LogitsLayer")(inputs=[lr_input_layer, attention_fm_layer])
    predict = tf.keras.layers.Lambda(function=lambda tensor: tf.nn.sigmoid(tensor), name="SigmoidLayer")(logits)

    model = tf.keras.models.Model(inputs=[
        timestamp, gender, age, occupation, zip_code, keywords, publish_year, categories
    ], outputs=[predict])

    return model
Exemplo n.º 6
0
def din_model(hidden_units: List[int], dropout: float,
              attention_hidden_unit: int):
    # context inputs
    timestamp = tf.keras.layers.Input(shape=(1, ),
                                      name="timestamp",
                                      dtype=tf.int64)
    # user inputs
    gender = tf.keras.layers.Input(shape=(1, ), name="gender", dtype=tf.string)
    age = tf.keras.layers.Input(shape=(1, ), name="age", dtype=tf.int64)
    occupation = tf.keras.layers.Input(shape=(1, ),
                                       name="occupation",
                                       dtype=tf.string)
    zip_code = tf.keras.layers.Input(shape=(1, ),
                                     name="zip_code",
                                     dtype=tf.string)
    # item inputs
    movie_id = tf.keras.layers.Input(shape=(1, ),
                                     name="movie_id",
                                     dtype=tf.int64)
    keywords = tf.keras.layers.Input(shape=(None, ),
                                     name="keywords",
                                     dtype=tf.string,
                                     ragged=True)
    categories = tf.keras.layers.Input(shape=(None, ),
                                       name="categories",
                                       dtype=tf.string,
                                       ragged=True)
    # behavior inputs
    user_history_high_score_movies = tf.keras.layers.Input(
        shape=(None, ),
        name="user_history_high_score_movies",
        dtype=tf.int64,
        ragged=True)
    user_history_high_score_movie_keywords = tf.keras.layers.Input(
        shape=(None, ),
        name="user_history_high_score_movie_keywords",
        dtype=tf.string,
        ragged=True)
    user_history_high_score_movie_categories = tf.keras.layers.Input(
        shape=(None, ),
        name="user_history_high_score_movie_categories",
        dtype=tf.string,
        ragged=True)

    embedding_pooling_layer = tf.keras.layers.Lambda(
        lambda tensor: embedding_pooling(tensor), name="EmbeddingPooling")
    multi_behavior_pooling_layer = tf.keras.layers.Lambda(
        lambda tensor: multi_behavior_embedding_pooling(tensor),
        name="MultiEmbeddingPooling")

    # context features
    timestamp_hour_layer = tf.keras.layers.Embedding(
        input_dim=24, output_dim=5)(
            tf.keras.layers.experimental.preprocessing.Discretization(
                bins=list(range(24)))(tf.keras.layers.Lambda(
                    function=lambda tensor: tensor // 3600 % 24)(timestamp)))
    timestamp_hour_layer = embedding_pooling_layer(timestamp_hour_layer)
    timestamp_week_layer = tf.keras.layers.Embedding(
        input_dim=7, output_dim=3)(
            tf.keras.layers.experimental.preprocessing.Discretization(
                bins=list(range(7)))(tf.keras.layers.Lambda(
                    function=lambda tensor: tensor // 86400 % 7)(timestamp)))
    timestamp_week_layer = embedding_pooling_layer(timestamp_week_layer)
    context_profile_embed_layer = tf.keras.layers.Concatenate(
        name="ContextProfile")(
            inputs=[timestamp_hour_layer, timestamp_week_layer])

    # user profile features
    gender_embed_layer = tf.keras.layers.Embedding(input_dim=2, output_dim=5)(
        tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=["F", "M"], mask_token=None, num_oov_indices=0)(gender))
    gender_embed_layer = embedding_pooling_layer(gender_embed_layer)
    age_embed_layer = tf.keras.layers.Embedding(input_dim=7, output_dim=5)(
        tf.keras.layers.experimental.preprocessing.IntegerLookup(
            vocabulary=[1, 18, 25, 35, 45, 50, 56],
            mask_value=None,
            num_oov_indices=0)(age))
    age_embed_layer = embedding_pooling_layer(age_embed_layer)
    occupation_embed_layer = tf.keras.layers.Embedding(
        input_dim=21,
        output_dim=10)(tf.keras.layers.experimental.preprocessing.StringLookup(
            vocabulary=list(user_occupation.values()),
            mask_token=None,
            num_oov_indices=0)(occupation))
    occupation_embed_layer = embedding_pooling_layer(occupation_embed_layer)
    # 3439
    zip_code_embed_layer = tf.keras.layers.Embedding(
        input_dim=10000,
        output_dim=15)(tf.keras.layers.experimental.preprocessing.Hashing(
            num_bins=10000)(zip_code))
    zip_code_embed_layer = embedding_pooling_layer(zip_code_embed_layer)
    user_profile_embed_layer = tf.keras.layers.Concatenate(name="UserProfile")(
        inputs=[
            gender_embed_layer, age_embed_layer, occupation_embed_layer,
            zip_code_embed_layer
        ])

    # common layers of behavior and candidate
    # # id
    movie_id_embedding_size = 16
    id_embed_layer = tf.keras.layers.Embedding(
        input_dim=3953, output_dim=movie_id_embedding_size)
    # # keyword
    movie_keyword_embedding_size = 16
    keywords_layer = tf.keras.layers.experimental.preprocessing.Hashing(
        num_bins=10000)
    keyword_embed_layer = tf.keras.layers.Embedding(
        input_dim=10000, output_dim=movie_keyword_embedding_size)
    # # category
    category_embedding_size = 3
    categories_layer = tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=[
            "Action", "Adventure", "Animation", "Children's", "Comedy",
            "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
            "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War",
            "Western"
        ],
        mask_token=None,
        num_oov_indices=0)
    categories_embed_layer = tf.keras.layers.Embedding(
        input_dim=18, output_dim=category_embedding_size)

    # candidate item profile features
    candidate_id_embed_layer = id_embed_layer(movie_id)
    candidate_id_embed_layer = embedding_pooling_layer(
        candidate_id_embed_layer)
    candidate_keywords_embed_layer = keyword_embed_layer(
        keywords_layer(keywords))
    candidate_keywords_embed_layer = embedding_pooling_layer(
        candidate_keywords_embed_layer)
    candidate_categories_embed_layer = categories_embed_layer(
        categories_layer(categories))
    candidate_categories_embed_layer = embedding_pooling_layer(
        candidate_categories_embed_layer)
    # batch_size * embedding_size
    candidate_profile_embed_layer = tf.keras.layers.Concatenate(
        name="CandidateProfile")(inputs=[
            candidate_id_embed_layer, candidate_keywords_embed_layer,
            candidate_categories_embed_layer
        ])

    # behavior features
    behavior_id_embed_layer = id_embed_layer(user_history_high_score_movies)
    behavior_id_embed_layer = multi_behavior_pooling_layer(
        behavior_id_embed_layer)
    behavior_keywords_embed_layer = keyword_embed_layer(
        keywords_layer(user_history_high_score_movie_keywords))
    behavior_keywords_embed_layer = multi_behavior_pooling_layer(
        behavior_keywords_embed_layer)
    behavior_categories_embed_layer = categories_embed_layer(
        categories_layer(user_history_high_score_movie_categories))
    behavior_categories_embed_layer = multi_behavior_pooling_layer(
        behavior_categories_embed_layer)
    # batch_size * behavior_count * embedding_size
    behavior_profile_embed_layer = tf.keras.layers.Concatenate(
        name="UserBehaviorProfile")(inputs=[
            behavior_id_embed_layer, behavior_keywords_embed_layer,
            behavior_categories_embed_layer
        ])

    # attention behaviors
    attention_behavior_profile_embed_layer = AttentionUnitLayer(
        hidden_unit=attention_hidden_unit
    )(inputs=[candidate_profile_embed_layer, behavior_profile_embed_layer])

    all_inputs_layer = tf.keras.layers.Concatenate(name="ConcatProfiles")(
        inputs=[
            context_profile_embed_layer, user_profile_embed_layer,
            candidate_profile_embed_layer,
            attention_behavior_profile_embed_layer
        ])

    for hidden_unit in hidden_units:
        all_inputs_layer = tf.keras.layers.Dense(
            units=hidden_unit)(all_inputs_layer)
        all_inputs_layer = DiceActivation()(all_inputs_layer)
        all_inputs_layer = tf.keras.layers.Dropout(
            rate=dropout)(all_inputs_layer)

    predict = tf.keras.layers.Dense(
        units=2, activation=tf.keras.activations.softmax)(all_inputs_layer)

    model = tf.keras.Model(inputs=[
        timestamp, gender, age, occupation, zip_code, movie_id, keywords,
        categories, user_history_high_score_movies,
        user_history_high_score_movie_keywords,
        user_history_high_score_movie_categories
    ],
                           outputs=[predict])

    return model