Пример #1
0
    def build_graph(self, **kwargs):
        user_sparse_inputs = {uf['feat']: Input(shape=(1,), dtype=tf.float32) for uf in
                              self.user_sparse_feature_columns}
        item_sparse_inputs = {uf['feat']: Input(shape=(1,), dtype=tf.float32) for uf in
                              self.item_sparse_feature_columns}

        seq_inputs = Input(shape=(self.seq_len,), dtype=tf.int32)
        pos_inputs = Input(shape=(1,), dtype=tf.int32)
        neg_inputs = Input(shape=(self.neg_len,), dtype=tf.int32)

        model = Model(inputs=[seq_inputs, pos_inputs, neg_inputs],
                      outputs=self.call([seq_inputs, pos_inputs, neg_inputs]))

        user_input = user_sparse_inputs.update({'seq_item': seq_inputs})

        model.__setattr__("user_input", user_input)
        model.__setattr__("item_input", item_sparse_inputs)
        model.__setattr__("embed", self.embed)
        return model



# def model_test():
#     user_features = [{'feat': 'user_id', 'feat_num': 100, 'feat_len': 1, 'embed_dim': 8},
#                      {'feat': 'seq_item', 'feat_num': 100, 'feat_len': 10, 'embed_dim': 64},
#                      {'feat': 'pos_item', 'feat_num': 100, 'feat_len': 1, 'embed_dim': 64},
#                      {'feat': 'neg_item', 'feat_num': 100, 'feat_len': 100, 'embed_dim': 64}]
#     item_features = [{'feat': 'item_id', 'feat_num': 100, 'feat_len': 1, 'embed_dim': 32}]
#     model = SASRec(user_features, item_features, att_hidden_unit=64)
#     m = model.build_graph()
#     m.summary()
#
#
# model_test()
Пример #2
0
def DSSM(user_feature_columns,
         item_feature_columns,
         user_dnn_hidden_units=(256, 256, 128),
         item_dnn_hidden_units=(256, 256, 128),
         user_dnn_dropout=(0, 0, 0),
         item_dnn_dropout=(0, 0, 0),
         out_dnn_activation='tanh',
         gamma=20,
         dnn_use_bn=False,
         seed=1024,
         metric='cos'):
    features_columns = user_feature_columns + item_feature_columns
    # 构建 embedding_dict
    embedding_dict = build_embedding_dict(features_columns)

    # user 特征 处理
    user_features = build_input_features(user_feature_columns)
    user_inputs_list = list(user_features.values())
    user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns(
        user_features, user_feature_columns, embedding_dict)
    user_dnn_input = combined_dnn_input(user_sparse_embedding_list,
                                        user_dense_value_list)

    # item 特征 处理
    item_features = build_input_features(item_feature_columns)
    item_inputs_list = list(item_features.values())
    item_sparse_embedding_list, item_dense_value_list = input_from_feature_columns(
        item_features, item_feature_columns, embedding_dict)
    item_dnn_input = combined_dnn_input(item_sparse_embedding_list,
                                        item_dense_value_list)

    # user tower
    for i in range(len(user_dnn_hidden_units)):
        if i == len(user_dnn_hidden_units) - 1:
            user_dnn_out = Dense(units=user_dnn_hidden_units[i],
                                 activation=out_dnn_activation,
                                 name='user_embed_out')(user_dnn_input)
            break
        user_dnn_input = Dense(units=user_dnn_hidden_units[i],
                               activation=out_dnn_activation,
                               name='dnn_user_' + str(i))(user_dnn_input)

    # item tower
    for i in range(len(item_dnn_hidden_units)):
        if i == len(item_dnn_hidden_units) - 1:
            item_dnn_out = Dense(units=item_dnn_hidden_units[i],
                                 activation=out_dnn_activation,
                                 name='item_embed_out')(item_dnn_input)
            break
        item_dnn_input = Dense(units=item_dnn_hidden_units[i],
                               activation=out_dnn_activation,
                               name='dnn_item_' + str(i))(item_dnn_input)

    score = Similarity(type_sim=metric, gamma=gamma,
                       name='dssm_out')([user_dnn_out, item_dnn_out])

    output = score

    model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output)
    model.__setattr__("user_input", user_inputs_list)
    model.__setattr__("item_input", item_inputs_list)
    model.__setattr__("user_embedding", user_dnn_out)
    model.__setattr__("item_embedding", item_dnn_out)

    return model
Пример #3
0
def YouTubeNet(sparse_input_length=1,
               dense_input_length=1,
               sparse_seq_input_length=50,
               embedding_dim=64,
               neg_sample_num=10,
               user_hidden_unit_list=[128, 64]):

    # 1. Input layer
    user_id_input_layer = Input(shape=(sparse_input_length, ),
                                name="user_id_input_layer")
    gender_input_layer = Input(shape=(sparse_input_length, ),
                               name="gender_input_layer")
    age_input_layer = Input(shape=(sparse_input_length, ),
                            name="age_input_layer")
    occupation_input_layer = Input(shape=(sparse_input_length, ),
                                   name="occupation_input_layer")
    zip_input_layer = Input(shape=(sparse_input_length, ),
                            name="zip_input_layer")

    user_click_item_seq_input_layer = Input(
        shape=(sparse_seq_input_length, ),
        name="user_click_item_seq_input_layer")
    user_click_item_seq_length_input_layer = Input(
        shape=(sparse_input_length, ),
        name="user_click_item_seq_length_input_layer")

    pos_item_sample_input_layer = Input(shape=(sparse_input_length, ),
                                        name="pos_item_sample_input_layer")
    neg_item_sample_input_layer = Input(shape=(neg_sample_num, ),
                                        name="neg_item_sample_input_layer")

    # 2. Embedding layer
    user_id_embedding_layer = Embedding(
        6040 + 1,
        embedding_dim,
        mask_zero=True,
        name='user_id_embedding_layer')(user_id_input_layer)
    gender_embedding_layer = Embedding(
        2 + 1, embedding_dim, mask_zero=True,
        name='gender_embedding_layer')(gender_input_layer)
    age_embedding_layer = Embedding(
        7 + 1, embedding_dim, mask_zero=True,
        name='age_embedding_layer')(age_input_layer)
    occupation_embedding_layer = Embedding(
        21 + 1,
        embedding_dim,
        mask_zero=True,
        name='occupation_embedding_layer')(occupation_input_layer)
    zip_embedding_layer = Embedding(
        3439 + 1, embedding_dim, mask_zero=True,
        name='zip_embedding_layer')(zip_input_layer)

    item_id_embedding_layer = Embedding(3706 + 1,
                                        embedding_dim,
                                        mask_zero=True,
                                        name='item_id_embedding_layer')
    pos_item_sample_embedding_layer = item_id_embedding_layer(
        pos_item_sample_input_layer)
    neg_item_sample_embedding_layer = item_id_embedding_layer(
        neg_item_sample_input_layer)

    user_click_item_seq_embedding_layer = item_id_embedding_layer(
        user_click_item_seq_input_layer)
    user_click_item_seq_embedding_layer = SequencePoolingLayer(sequence_mask_length=sparse_seq_input_length)\
        ([user_click_item_seq_embedding_layer, user_click_item_seq_length_input_layer])

    ### ********** ###
    # user part
    ### ********** ###

    # 3. Concat "sparse" embedding & "sparse_seq" embedding
    user_embedding_layer = concatenate([
        user_id_embedding_layer, gender_embedding_layer, age_embedding_layer,
        occupation_embedding_layer, zip_embedding_layer,
        user_click_item_seq_embedding_layer
    ],
                                       axis=-1)

    for i, u in enumerate(user_hidden_unit_list):
        user_embedding_layer = Dense(
            u, activation="relu",
            name="FC_{0}".format(i + 1))(user_embedding_layer)
        #user_embedding_layer = Dropout(0.3)(user_embedding_layer)

    ### ********** ###
    # item part
    ### ********** ###

    item_embedding_layer = concatenate([pos_item_sample_embedding_layer, neg_item_sample_embedding_layer], \
                                       axis=1)

    item_embedding_layer = tf.transpose(item_embedding_layer, [0, 2, 1])

    # Output
    dot_output = tf.matmul(user_embedding_layer, item_embedding_layer)
    dot_output = tf.nn.softmax(
        dot_output)  # 输出11个值,index为0的值是正样本,负样本的索引位置为[1-10]

    user_inputs_list = [user_id_input_layer, gender_input_layer, age_input_layer, \
                        occupation_input_layer, zip_input_layer, \
                        user_click_item_seq_input_layer, user_click_item_seq_length_input_layer]

    item_inputs_list = [
        pos_item_sample_input_layer, neg_item_sample_input_layer
    ]

    model = Model(inputs=user_inputs_list + item_inputs_list,
                  outputs=dot_output)

    #print(model.summary())
    #tf.keras.utils.plot_model(model, to_file='YouTubeNet_model.png', show_shapes=True)

    model.__setattr__("user_input", user_inputs_list)
    model.__setattr__("user_embedding", user_embedding_layer)

    model.__setattr__("item_input", pos_item_sample_input_layer)
    model.__setattr__("item_embedding", pos_item_sample_embedding_layer)

    return model
Пример #4
0
def mind(sparse_input_length=1,
         dense_input_length=1,
         sparse_seq_input_length=50,
         embedding_dim=64,
         neg_sample_num=10,
         user_hidden_unit_list=[128, 64],
         k_max=5,
         p=1,
         dynamic_k=True):

    # 1. Input layer
    user_id_input_layer = Input(shape=(sparse_input_length, ),
                                name="user_id_input_layer")
    gender_input_layer = Input(shape=(sparse_input_length, ),
                               name="gender_input_layer")
    age_input_layer = Input(shape=(sparse_input_length, ),
                            name="age_input_layer")
    occupation_input_layer = Input(shape=(sparse_input_length, ),
                                   name="occupation_input_layer")
    zip_input_layer = Input(shape=(sparse_input_length, ),
                            name="zip_input_layer")

    user_click_item_seq_input_layer = Input(
        shape=(sparse_seq_input_length, ),
        name="user_click_item_seq_input_layer")
    user_click_item_seq_length_input_layer = Input(
        shape=(sparse_input_length, ),
        name="user_click_item_seq_length_input_layer")

    pos_item_sample_input_layer = Input(shape=(sparse_input_length, ),
                                        name="pos_item_sample_input_layer")
    neg_item_sample_input_layer = Input(shape=(neg_sample_num, ),
                                        name="neg_item_sample_input_layer")

    # 2. Embedding layer
    user_id_embedding_layer = Embedding(
        6040 + 1,
        embedding_dim,
        mask_zero=True,
        name='user_id_embedding_layer')(user_id_input_layer)
    gender_embedding_layer = Embedding(
        2 + 1, embedding_dim, mask_zero=True,
        name='gender_embedding_layer')(gender_input_layer)
    age_embedding_layer = Embedding(
        7 + 1, embedding_dim, mask_zero=True,
        name='age_embedding_layer')(age_input_layer)
    occupation_embedding_layer = Embedding(
        21 + 1,
        embedding_dim,
        mask_zero=True,
        name='occupation_embedding_layer')(occupation_input_layer)
    zip_embedding_layer = Embedding(
        3439 + 1, embedding_dim, mask_zero=True,
        name='zip_embedding_layer')(zip_input_layer)

    item_id_embedding_layer = Embedding(3706 + 1,
                                        embedding_dim,
                                        mask_zero=True,
                                        name='item_id_embedding_layer')
    pos_item_sample_embedding_layer = item_id_embedding_layer(
        pos_item_sample_input_layer)
    neg_item_sample_embedding_layer = item_id_embedding_layer(
        neg_item_sample_input_layer)

    user_click_item_seq_embedding_layer = item_id_embedding_layer(
        user_click_item_seq_input_layer)

    ### ********** ###
    # 3. user part
    ### ********** ###

    # 3.1 pooling layer
    user_click_item_seq_embedding_layer_pooling = SequencePoolingLayer()\
        ([user_click_item_seq_embedding_layer, user_click_item_seq_length_input_layer])

    print("user_click_item_seq_embedding_layer_pooling",
          user_click_item_seq_embedding_layer_pooling)

    # 3.2 capsule layer
    high_capsule = CapsuleLayer(input_units=embedding_dim,
                                out_units=embedding_dim, max_len=sparse_seq_input_length,
                                k_max=k_max)\
                        ([user_click_item_seq_embedding_layer, user_click_item_seq_length_input_layer])

    print("high_capsule: ", high_capsule)

    # 3.3 Concat "sparse" embedding & "sparse_seq" embedding, and tile embedding
    other_user_embedding_layer = concatenate([user_id_embedding_layer, gender_embedding_layer, \
                                                        age_embedding_layer, occupation_embedding_layer, \
                                                        zip_embedding_layer, user_click_item_seq_embedding_layer_pooling],
                                       axis=-1)

    other_user_embedding_layer = tf.tile(other_user_embedding_layer,
                                         [1, k_max, 1])

    print("other_user_embedding_layer: ", other_user_embedding_layer)

    # 3.4 user dnn part
    user_deep_input = concatenate([other_user_embedding_layer, high_capsule],
                                  axis=-1)
    print("user_deep_input: ", user_deep_input)

    for i, u in enumerate(user_hidden_unit_list):
        user_deep_input = Dense(u,
                                activation="relu",
                                name="FC_{0}".format(i + 1))(user_deep_input)
        #user_deep_input = Dropout(0.3)(user_deep_input)

    print("user_deep_input: ", user_deep_input)

    if dynamic_k:
        user_embedding_final = LabelAwareAttention(k_max=k_max, pow_p=p, )(\
                                    [user_deep_input, pos_item_sample_embedding_layer, user_click_item_seq_length_input_layer])
    else:
        user_embedding_final = LabelAwareAttention(k_max=k_max, pow_p=p, )(\
                                    [user_deep_input, pos_item_sample_embedding_layer])

    user_embedding_final = tf.expand_dims(user_embedding_final, 1)
    print("user_embedding_final: ", user_embedding_final)

    ### ********** ###
    # 4. item part
    ### ********** ###

    item_embedding_layer = concatenate([pos_item_sample_embedding_layer, neg_item_sample_embedding_layer], \
                                       axis=1)

    item_embedding_layer = tf.transpose(item_embedding_layer, [0, 2, 1])

    print("item_embedding_layer: ", item_embedding_layer)

    ### ********** ###
    # 5. Output
    ### ********** ###

    dot_output = tf.matmul(user_embedding_final, item_embedding_layer)
    dot_output = tf.nn.softmax(
        dot_output)  # 输出11个值,index为0的值是正样本,负样本的索引位置为[1-10]

    print(dot_output)

    user_inputs_list = [user_id_input_layer, gender_input_layer, age_input_layer, \
                        occupation_input_layer, zip_input_layer, \
                        user_click_item_seq_input_layer, user_click_item_seq_length_input_layer]

    item_inputs_list = [
        pos_item_sample_input_layer, neg_item_sample_input_layer
    ]

    model = Model(inputs=user_inputs_list + item_inputs_list,
                  outputs=dot_output)

    #print(model.summary())
    #tf.keras.utils.plot_model(model, to_file='MIND_model.png', show_shapes=True)

    model.__setattr__("user_input", user_inputs_list)
    model.__setattr__("user_embedding", user_deep_input)

    model.__setattr__("item_input", pos_item_sample_input_layer)
    model.__setattr__("item_embedding", pos_item_sample_embedding_layer)

    return model