def DCN( cross_num, input_shape, cross_parameterization="vector", dnn_hidden_units=( 128, 128, ), l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False, dnn_activation="relu", ): if len(dnn_hidden_units) == 0 and cross_num == 0: raise ValueError("Either hidden_layer or cross layer must > 0") inputs = tf.keras.layers.Input(shape=(input_shape, ), name="input", dtype=tf.float32) linear_logit = tf.keras.layers.Dense(input_shape)(inputs) dnn_input = inputs if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross deep_out = DNN( dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, )(dnn_input) cross_out = CrossNet(cross_num, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(dnn_input) final_logit = tf.keras.layers.Concatenate()([cross_out, deep_out]) elif len(dnn_hidden_units) > 0: # Only Deep final_logit = DNN( dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, )(dnn_input) elif cross_num > 0: # Only Cross final_logit = CrossNet(cross_num, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(dnn_input) else: # Error raise NotImplementedError final_logit = tf.keras.layers.Concatenate()([final_logit, linear_logit]) model = tf.keras.models.Model(inputs=inputs, outputs=final_logit) return model
def MMOE(dnn_feature_columns, num_tasks, tasks, num_experts=4, expert_dim=8, dnn_hidden_units=(128, 128), l2_reg_embedding=1e-5, l2_reg_dnn=0, task_dnn_units=None, seed=1024, dnn_dropout=0, dnn_activation='relu'): """Instantiates the Multi-gate Mixture-of-Experts architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param num_tasks: integer, number of tasks, equal to number of outputs, must be greater than 1. :param tasks: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression'] :param num_experts: integer, number of experts. :param expert_dim: integer, the hidden units of each expert. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of shared-bottom DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param task_dnn_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :return: a Keras model instance """ if num_tasks <= 1: raise ValueError("num_tasks must be greater than 1") if len(tasks) != num_tasks: raise ValueError("num_tasks must be equal to the length of tasks") for task in tasks: if task not in ['binary', 'regression']: raise ValueError("task must be binary or regression, {} is illegal".format(task)) features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) mmoe_outs = MMOELayer(num_tasks, num_experts, expert_dim)(dnn_out) if task_dnn_units != None: mmoe_outs = [DNN(task_dnn_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(mmoe_out) for mmoe_out in mmoe_outs] task_outputs = [] for mmoe_out, task in zip(mmoe_outs, tasks): logit = tf.keras.layers.Dense( 1, use_bias=False, activation=None)(mmoe_out) output = PredictionLayer(task)(logit) task_outputs.append(output) model = tf.keras.models.Model(inputs=inputs_list, outputs=task_outputs) return model
def DeepFM2(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', use_attention=True, attention_factor=8, l2_reg_att=1e-5, afm_dropout=0): """Instantiates the DeepFM Network architecture. :param afm_dropout: :param l2_reg_att: :param attention_factor: :param use_attention: :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) if use_attention: fm_logit = add_func([AFMLayer(attention_factor, l2_reg_att, afm_dropout, seed)(list(v)) for k, v in group_embedding_dict.items() if k in fm_group]) else: fm_logit = add_func([FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) # fm_logit = add_func([FM()(concat_func(v, axis=1)) # for k, v in group_embedding_dict.items() if k in fm_group]) dnn_input = combined_dnn_input(list(chain.from_iterable( group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def xDeepFM_MTL( linear_feature_columns, dnn_feature_columns, gate_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, init_std=0.0001, l2_reg_dnn=0, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): # check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') features = build_input_features(linear_feature_columns + dnn_feature_columns + gate_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) gate = get_dense_input(features, gate_feature_columns)[0] linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, 0, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) # dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_input = tf.keras.layers.Flatten()(fm_input) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) finish_out1 = DNN(task_net_size)(deep_out) finish_logit1 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out1) like_out1 = DNN(task_net_size)(deep_out) like_logit1 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out1) finish_out2 = DNN(task_net_size)(deep_out) finish_logit2 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out2) like_out2 = DNN(task_net_size)(deep_out) like_logit2 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out2) # condition = tf.placeholder("float32", shape=[None, 1], name="condition") finish_logit = gate * finish_logit1 + (1.0 - gate) * finish_logit2 like_logit = gate * like_logit1 + (1.0 - gate) * like_logit2 print(np.shape(like_logit)) finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit]) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) output_finish = PredictionLayer('binary', name='finish')(finish_logit) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', use_image=False, use_text=False, embedding_size=128): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ train_path = '../data/underexpose_train' features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, init_std, seed, support_group=True) if use_image: video_input = tf.keras.layers.Input(shape=(128, ), name='image') video_emb = tf.keras.layers.Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding))(video_input) video_emb = tf.keras.layers.Reshape( (1, embedding_size), input_shape=(embedding_size, ))(video_emb) group_embedding_dict[DEFAULT_GROUP_NAME].append(video_emb) inputs_list.append(video_input) if use_text: audio_input = tf.keras.layers.Input(shape=(128, ), name='text') audio_emb = tf.keras.layers.Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding))(audio_input) audio_emb = tf.keras.layers.Reshape( (1, embedding_size), input_shape=(embedding_size, ))(audio_emb) group_embedding_dict[DEFAULT_GROUP_NAME].append(audio_emb) inputs_list.append(audio_input) linear_logit = get_linear_logit(features, linear_feature_columns, init_std=init_std, seed=seed, prefix='linear', l2_reg=l2_reg_linear) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def BST(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) # sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, # embeddings_initializer=RandomNormal( # mean=0.0, stddev=init_std, seed=seed), # embeddings_regularizer=l2( # l2_reg_embedding), # name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in # enumerate(feature_dim_dict["sparse"])} # print(sparse_embedding_dict) sparse_embedding_dict = {feat.name: Embedding(tf.cast(feat.dimension, tf.int32), embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"])} # deep_emb_list = get_embedding_vec_list( # deep_sparse_emb_dict, sparse_input_dict, feature_dim_dict['sparse']) query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) print("prev: {0}".format(keys_emb)) # hist_cap = Capsule( # num_capsule=8, dim_capsule=2, # routings=3, share_weights=True)(NoMask()(keys_emb)) # print("now: {0}".format(hist_cap)) # # exit(0) # # keys_emb = concat_fun(keys_emb_list) # hist_cap = Reshape([1, 16])(hist_cap) deep_input_emb = concat_fun(deep_input_emb_list) print("deep input emb: ", deep_input_emb) # print("hist_cap: ", hist_cap) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) # print("now: {0}".format(hist)) hists = [] for key_emb in keys_emb_list: hist = Self_Attention([key_emb, key_emb, user_behavior_length, user_behavior_length]) hists.append(hist) hist = concat_fun(hists) # Tensor("concatenate_2/concat:0", shape=(?, 50, 8), dtype=float32) # <tf.Tensor 'concatenate_3/concat:0' shape=(?, 4, 8) dtype=float32> deep_input_emb = Concatenate()([deep_input_emb, hist]) # print(deep_input_emb) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, use_only_dnn=False, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) fm_logit = FM()(fm_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_out) if use_only_dnn == True: final_logit = dnn_logit elif len(dnn_hidden_units) == 0 and use_fm == False: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM final_logit = tf.keras.layers.add([linear_logit, fm_logit]) elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, dnn_logit]) elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, only_dnn=False, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ ## 为每个特征创建Input[1,]; feature == > {'feature1': Input[1,], ...} features = build_input_features(linear_feature_columns + dnn_feature_columns) ## [Input1, Input2, ... ] inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) ## [feature_1对应的embedding层,下连接对应feature1的Input[1,]层,...], [feature_1对应的Input[1,]层,...] linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') # linear_logit_finish = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, # seed=seed, prefix='linear_finish') # linear_logit_like = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, # seed=seed, prefix='linear_like') ## 线性变换层,没有激活函数 fm_input = concat_fun(sparse_embedding_list, axis=1) ## 稀疏embedding层concate在一起 fm_logit = FM()(fm_input) # fm_logit_finish = FM()(fm_input) # fm_logit_like = FM()(fm_input) ## FM的二次项部分输出,不包含一次项和bias dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) # dnn_out = Dense(128, dnn_activation, l2_reg_dnn, dnn_dropout, # dnn_use_bn, seed)(dnn_input) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) mmoe_out = MMoE(units=16, num_experts=8, num_tasks=2)(dnn_out) [finish_in, like_in] = mmoe_out finish_out_1 = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(finish_in) finish_out = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(finish_out_1) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out_1 = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(like_in) like_out = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(like_out_1) # finish_logit_stop_grad = Lambda(lambda x: stop_gradient(x))(finish_out) # like_out_finish = concat_fun([like_out, finish_logit_stop_grad]) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_out) # if len(dnn_hidden_units) > 0 and only_dnn == True: # final_logit = dnn_logit # elif len(dnn_hidden_units) == 0 and use_fm == False: # only linear # final_logit = linear_logit # elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM # final_logit = tf.keras.layers.add([linear_logit, fm_logit]) # elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep # final_logit = tf.keras.layers.add([linear_logit, dnn_logit]) # elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep # final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit]) # else: # raise NotImplementedError finish_logit = tf.keras.layers.add([linear_logit, fm_logit, finish_logit]) like_logit = tf.keras.layers.add([linear_logit, fm_logit, like_logit]) output_finish = PredictionLayer('binary', name='finish')(finish_logit) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def xDeepFM_MTL(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256), task_net_size=(128, ), cin_layer_size=( 128, 128, ), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the xDeepFM architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_dnn: L2 regularizer strength applied to deep net :param l2_reg_cin: L2 regularizer strength applied to CIN. :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed) fm_input = concat_func(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, l2_reg_cin, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( exFM_out) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) finish_out = DNN(task_net_size)(dnn_output) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out = DNN(task_net_size)(dnn_output) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit]) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) output_finish = PredictionLayer('binary', name='finish')(finish_logit) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def MT_xDeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the xDeepFM architecture. :param flag_columns: :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_dnn: L2 regularizer strength applied to deep net :param l2_reg_cin: L2 regularizer strength applied to CIN. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features,dnn_feature_columns, embedding_size, l2_reg_embedding,init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, l2_reg_cin, seed)(fm_input) exFM_logit = tf.keras.layers.Dense(4, activation=None, )(exFM_out) dnn_input = combined_dnn_input(sparse_embedding_list,dense_value_list) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) deep_logit = tf.keras.layers.Dense( 4, use_bias=False, activation=None)(deep_out) if len(dnn_hidden_units) == 0 and len(cin_layer_size) == 0: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and len(cin_layer_size) > 0: # linear + CIN final_logit = tf.keras.layers.add([linear_logit, exFM_logit]) elif len(dnn_hidden_units) > 0 and len(cin_layer_size) == 0: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, deep_logit]) elif len(dnn_hidden_units) > 0 and len(cin_layer_size) > 0: # linear + CIN + Deep final_logit = tf.keras.layers.add( [linear_logit, deep_logit, exFM_logit]) else: raise NotImplementedError output_units = PredictionLayer(task)(final_logit) # output = None # for i in range(len(flag_columns)): # print(i) # selected_index = [0, 1] if flag_columns[i] else [2, 3] # if output != None: # output = tf.concat([output, tf.reshape(tf.gather(output_units[i, :], selected_index), (1, -1))], axis=0) # else: # output = tf.reshape(tf.gather(output_units[i, :], selected_index), (1, -1)) finish = tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,0]+\ tf.cast(features['u_region_id'], dtype=tf.float32)*output_units[:,1] like = tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,2]+\ tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,3] # mask = tf.cond(pred=tf.equal(features['u_region_id'], tf.constant(value = 1, dtype = tf.int32)), # true_fn=lambda: [True, True, False, False], false_fn=lambda: [False, False, True, True]) # output = tf.reshape(tf.boolean_mask(output_units, mask), shape=[-1, 2]) # finish = output[:, 0] # like = output[:, 1] # print(output) model = tf.keras.models.Model(inputs=inputs_list, outputs=[finish, like]) return model
def NCF(user_feature_columns, item_feature_columns, user_gmf_embedding_dim=20, item_gmf_embedding_dim=20, user_mlp_embedding_dim=20, item_mlp_embedding_dim=20, dnn_use_bn=False, dnn_hidden_units=(64, 16), dnn_activation='relu', l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024): """Instantiates the NCF Model architecture. :param user_feature_columns: A dict containing user's features and features'dim. :param item_feature_columns: A dict containing item's features and features'dim. :param user_gmf_embedding_dim: int. :param item_gmf_embedding_dim: int. :param user_mlp_embedding_dim: int. :param item_mlp_embedding_dim: int. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ user_dim = len(user_feature_columns) * user_gmf_embedding_dim item_dim = len(item_feature_columns) * item_gmf_embedding_dim dim = (user_dim * item_dim) / (math.gcd(user_dim, item_dim)) user_gmf_embedding_dim = int(dim / len(user_feature_columns)) item_gmf_embedding_dim = int(dim / len(item_feature_columns)) # Generalized Matrix Factorization (GMF) Part user_gmf_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=user_gmf_embedding_dim) for feat, size in user_feature_columns.items() ] user_features = build_input_features(user_gmf_feature_columns) user_inputs_list = list(user_features.values()) user_gmf_sparse_embedding_list, user_gmf_dense_value_list = input_from_feature_columns( user_features, user_gmf_feature_columns, l2_reg_embedding, init_std, seed, prefix='gmf_') user_gmf_input = combined_dnn_input(user_gmf_sparse_embedding_list, []) user_gmf_out = Lambda(lambda x: x, name="user_gmf_embedding")(user_gmf_input) item_gmf_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=item_gmf_embedding_dim) for feat, size in item_feature_columns.items() ] item_features = build_input_features(item_gmf_feature_columns) item_inputs_list = list(item_features.values()) item_gmf_sparse_embedding_list, item_gmf_dense_value_list = input_from_feature_columns( item_features, item_gmf_feature_columns, l2_reg_embedding, init_std, seed, prefix='gmf_') item_gmf_input = combined_dnn_input(item_gmf_sparse_embedding_list, []) item_gmf_out = Lambda(lambda x: x, name="item_gmf_embedding")(item_gmf_input) gmf_out = Multiply()([user_gmf_out, item_gmf_out]) # Multi-Layer Perceptron (MLP) Part user_mlp_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=user_mlp_embedding_dim) for feat, size in user_feature_columns.items() ] user_mlp_sparse_embedding_list, user_mlp_dense_value_list = input_from_feature_columns( user_features, user_mlp_feature_columns, l2_reg_embedding, init_std, seed, prefix='mlp_') user_mlp_input = combined_dnn_input(user_mlp_sparse_embedding_list, user_mlp_dense_value_list) user_mlp_out = Lambda(lambda x: x, name="user_mlp_embedding")(user_mlp_input) item_mlp_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=item_mlp_embedding_dim) for feat, size in item_feature_columns.items() ] item_mlp_sparse_embedding_list, item_mlp_dense_value_list = input_from_feature_columns( item_features, item_mlp_feature_columns, l2_reg_embedding, init_std, seed, prefix='mlp_') item_mlp_input = combined_dnn_input(item_mlp_sparse_embedding_list, item_mlp_dense_value_list) item_mlp_out = Lambda(lambda x: x, name="item_mlp_embedding")(item_mlp_input) mlp_input = Concatenate(axis=1)([user_mlp_out, item_mlp_out]) mlp_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="mlp_embedding")(mlp_input) # Fusion of GMF and MLP neumf_input = Concatenate(axis=1)([gmf_out, mlp_out]) neumf_out = DNN(hidden_units=[1], activation='sigmoid')(neumf_input) output = Lambda(lambda x: x, name='neumf_out')(neumf_out) # output = PredictionLayer(task, False)(neumf_out) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) return model
def DSSM(user_feature_columns, item_feature_columns, user_dnn_hidden_units=(64, 32), item_dnn_hidden_units=(64, 32), dnn_activation='tanh', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, metric='cos'): embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, seq_mask_zero=True) user_features = build_input_features(user_feature_columns) user_inputs_list = list(user_features.values()) user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns( user_features, user_feature_columns, l2_reg_embedding, init_std, seed, embedding_matrix_dict=embedding_matrix_dict) user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list) item_features = build_input_features(item_feature_columns) item_inputs_list = list(item_features.values()) item_sparse_embedding_list, item_dense_value_list = input_from_feature_columns( item_features, item_feature_columns, l2_reg_embedding, init_std, seed, embedding_matrix_dict=embedding_matrix_dict) item_dnn_input = combined_dnn_input(item_sparse_embedding_list, item_dense_value_list) user_dnn_out = DNN( user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, )(user_dnn_input) item_dnn_out = DNN(item_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(item_dnn_input) score = Similarity(type=metric)([user_dnn_out, item_dnn_out]) output = PredictionLayer("binary", False)(score) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) plot_model(model, to_file='dnn.png', show_shapes=True) print("go") model.__setattr__("user_input", user_inputs_list) model.__setattr__("item_input", item_inputs_list) model.__setattr__("user_embedding", user_dnn_out) model.__setattr__("item_embedding", item_dnn_out) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, bias_encoding=False, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0, dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Deep Session Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param sess_feature_list: list,to indicate session feature sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param sess_max_count: positive int, to indicate the max number of sessions :param sess_len_max: positive int, to indicate the max length of each session :param bias_encoding: bool. Whether use bias encoding or postional encoding :param att_embedding_size: positive int, the embedding size of each attention head :param att_head_num: positive int, the number of attention head :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) if (att_embedding_size * att_head_num != len(sess_feature_list) * embedding_size): raise ValueError( "len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d" % (len(sess_feature_list), embedding_size, att_embedding_size, att_head_num)) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=bias_encoding) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] # sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) # sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def DSSM(user_dnn_feature_columns, item_dnn_feature_columns, gamma=1, dnn_use_bn=True, dnn_hidden_units=(300, 300, 128), dnn_activation='tanh', l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Structured Semantic Model architecture. :param user_dnn_feature_columns:An iterable containing user's features used by deep part of the model. :param item_dnn_feature_columns:An iterable containing item's the features used by deep part of the model. :param gamma: smoothing factor in the softmax function for DSSM :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ user_features = build_input_features(user_dnn_feature_columns) user_inputs_list = list(user_features.values()) user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns( user_features, user_dnn_feature_columns, l2_reg_embedding, init_std, seed) user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list) item_features = build_input_features(item_dnn_feature_columns) item_inputs_list = list(item_features.values()) item_sparse_embedding_list, item_dense_value_list = input_from_feature_columns( item_features, item_dnn_feature_columns, l2_reg_embedding, init_std, seed) item_dnn_input = combined_dnn_input(item_sparse_embedding_list, item_dense_value_list) user_dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="user_embedding")(user_dnn_input) item_dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="item_embedding")(item_dnn_input) score = Cosine_Similarity(user_dnn_out, item_dnn_out, gamma=gamma) output = PredictionLayer(task, False)(score) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) return model
def xDeepFM_MTL( linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, init_std=0.0001, l2_reg_dnn=0, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): # check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, 0, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) like_out = DNN(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output_like) return model
def DSSM(user_feature_columns, item_feature_columns, user_dnn_hidden_units=(64, 32), item_dnn_hidden_units=(64, 32), dnn_activation='tanh', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, metric='cos'): """Instantiates the Deep Structured Semantic Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param item_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of item tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param metric: str, ``"cos"`` for cosine or ``"ip"`` for inner product :return: A Keras model instance. """ embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, seq_mask_zero=True) user_features = build_input_features(user_feature_columns) user_inputs_list = list(user_features.values()) user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns( user_features, user_feature_columns, l2_reg_embedding, init_std, seed, embedding_matrix_dict=embedding_matrix_dict) user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list) item_features = build_input_features(item_feature_columns) item_inputs_list = list(item_features.values()) item_sparse_embedding_list, item_dense_value_list = input_from_feature_columns( item_features, item_feature_columns, l2_reg_embedding, init_std, seed, embedding_matrix_dict=embedding_matrix_dict) item_dnn_input = combined_dnn_input(item_sparse_embedding_list, item_dense_value_list) user_dnn_out = DNN( user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, )(user_dnn_input) item_dnn_out = DNN(item_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(item_dnn_input) score = Similarity(type=metric)([user_dnn_out, item_dnn_out]) output = PredictionLayer("binary", False)(score) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", user_inputs_list) model.__setattr__("item_input", item_inputs_list) model.__setattr__("user_embedding", user_dnn_out) model.__setattr__("item_embedding", item_dnn_out) return model
def DeepFM(feature_dim_dict, attention_feature_name=None, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = \ preprocess_input_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True, use_var_attention=( True if attention_feature_name else False), attention_feature_name=attention_feature_name) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) fm_input = concat_fun(deep_emb_list, axis=1) deep_input = tf.keras.layers.Flatten()(fm_input) fm_out = FM()(fm_input) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) if len(dnn_hidden_units) == 0 and use_fm == False: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM final_logit = tf.keras.layers.add([linear_logit, fm_out]) elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, deep_logit]) elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep final_logit = tf.keras.layers.add([linear_logit, fm_out, deep_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def create_model(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): K.clear_session() #!################################################################################################################ inputs_all = [ get_input_feature_layer(name='slotid_nettype', feature_shape=dense_feature_size) ] # slotid_nettype layer_slotid_nettype = inputs_all[0] layer_slotid_nettype = K.expand_dims(layer_slotid_nettype, 1) #!################################################################################################################ # seq_inputs_dict = get_cross_seq_input_layers(cols=cross_arr_name_list) # inputs_all = inputs_all + list(seq_inputs_dict.values()) # 输入层list 做交叉 # cross_emb_out = [] # last_col = '' # for index, col in enumerate(cross_arr_name_list): # # print(col, 'get embedding!') # emb_layer = get_emb_layer( # col, trainable=False, emb_matrix=dict_cross_emb_all[col]) # x = emb_layer(inputs_all[1+index]) # if col.split('_')[-1] == 'i': # cross_user_item_i = x # last_col = col # continue # else: # print(f'crossing net add {last_col} and {col}') # cross_emb_out.append( # cross_net(cross_user_item_i, x, layer_slotid_nettype, hidden_unit=4)) # cross_emb_out = tf.keras.layers.concatenate(cross_emb_out) # cross_emb_out = tf.squeeze(cross_emb_out, [1]) #!################################################################################################################ # seq_inputs_dict = get_seq_input_layers(cols=arr_name_list) # inputs_all = inputs_all+list(seq_inputs_dict.values()) # 输入层list # masks = tf.equal(seq_inputs_dict['task_id'], 0) # # 普通序列+label序列 # layers2concat = [] # for index, col in enumerate(arr_name_list): # print(col, 'get embedding!') # emb_layer = get_emb_layer( # col, trainable=TRAINABLE_DICT[col], emb_matrix=id_list_dict_emb_all[col][1]) # x = emb_layer(seq_inputs_dict[col]) # if conv1d_info_dict[col] > -1: # cov_layer = tf.keras.layers.Conv1D(filters=conv1d_info_dict[col], # kernel_size=1, # activation='relu') # x = cov_layer(x) # layers2concat.append(x) # x = tf.keras.layers.concatenate(layers2concat) #!################################################################################################################ #!mix1 # x = trans_net(x, masks, hidden_unit=256) # max_pool = tf.keras.layers.GlobalMaxPooling1D() # average_pool = tf.keras.layers.GlobalAveragePooling1D() # xmaxpool = max_pool(x) # xmeanpool = average_pool(x) # trans_output = tf.keras.layers.concatenate([xmaxpool, xmeanpool]) #!################################################################################################################ #!mix2 features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) # mix = concatenate([cross_emb_out, trans_output, # dnn_input], axis=-1) # !#mix mix = dnn_input dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(mix) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) #!################################################################################################################ # model = Model(inputs=inputs_all+[features], model = Model(inputs=inputs_list, outputs=[output]) print(model.summary()) return model
def KDD_DIN(dnn_feature_columns, history_feature_list, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = kdd_create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, init_std, seed, prefix="") query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list, history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list, mask=True) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list, mask=True) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', l2_reg_dnn=0, l2_reg_embedding=1e-6, task='binary', dnn_dropout=0, init_std=0.0001, seed=1024, encoding='bias', ): check_feature_config_dict(feature_dim_dict) print( 'sess_count', sess_max_count, 'encoding', encoding, ) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) be_flag = True if encoding == 'bias' else False tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=be_flag) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not be_flag), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] #sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) #sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def create_model(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): K.clear_session() #!################################################################################################################ inputs_all = [ # get_input_feature_layer(name = 'user_0',feature_shape = dense_feature_size), # get_input_feature_layer(name = 'item_0',feature_shape = dense_feature_size), get_input_feature_layer(name='user_1', feature_shape=dense_feature_size), get_input_feature_layer(name='item_1', feature_shape=dense_feature_size) ] # slotid_nettype # layer_user_0 = inputs_all[0] # layer_user_0 = K.expand_dims(layer_user_0, 1) # layer_item_0 = inputs_all[1] # layer_item_0 = K.expand_dims(layer_item_0, 1) layer_user_1 = inputs_all[0] layer_user_1 = K.expand_dims(layer_user_1, 1) layer_item_1 = inputs_all[1] layer_item_1 = K.expand_dims(layer_item_1, 1) # cross_emb_out0 = cross_net(layer_user_0,layer_item_0) cross_emb_out1 = cross_net(layer_user_1, layer_item_1) # cross_emb_out = tf.keras.layers.concatenate([cross_emb_out0,cross_emb_out1]) cross_emb_out = tf.squeeze(cross_emb_out1, [1]) #!################################################################################################################ seq_inputs_dict = get_seq_input_layers(cols=arr_name_list) inputs_all = inputs_all + list(seq_inputs_dict.values()) # 输入层list masks = tf.equal(seq_inputs_dict['task_id'], 0) # 普通序列+label序列 layers2concat = [] for index, col in enumerate(arr_name_list): print(col, 'get embedding!') emb_layer = get_emb_layer(col, trainable=TRAINABLE_DICT[col], emb_matrix=id_list_dict_emb_all[col][1]) x = emb_layer(seq_inputs_dict[col]) if conv1d_info_dict[col] > -1: cov_layer = tf.keras.layers.Conv1D(filters=conv1d_info_dict[col], kernel_size=1, activation='relu') x = cov_layer(x) layers2concat.append(x) x = keras.layers.concatenate(layers2concat) #!################################################################################################################ #!mix1 x = trans_net(x, masks, hidden_unit=256) max_pool = tf.keras.layers.GlobalMaxPooling1D() average_pool = tf.keras.layers.GlobalAveragePooling1D() xmaxpool = max_pool(x) xmeanpool = average_pool(x) trans_output = tf.keras.layers.concatenate([xmaxpool, xmeanpool]) #!################################################################################################################ #!mix2 features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) mix = concatenate([cross_emb_out, trans_output, dnn_input], axis=-1) # !#mix dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(mix) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) #!################################################################################################################ model = Model(inputs=inputs_all + [features], outputs=[output]) print(model.summary()) return model
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) if use_negsampling: neg_user_behavior_input = OrderedDict() for i, feat in enumerate(seq_feature_list): neg_user_behavior_input[feat] = Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) neg_uiseq_embed_list = get_embedding_vec_list(sparse_embedding_dict, neg_user_behavior_input, feature_dim_dict["sparse"], seq_feature_list, ) # [sparse_embedding_dict[feat]( # neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = concat_fun(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def M(emb1, emb1_label, emb2, emb2_label, emb3, emb3_label, emb4, emb4_label, emb5, emb5_label, linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): #!################################################################################################################ feed_forward_size_trans_1 = 2048 max_seq_len_trans_1 = 40 model_dim_trans_1 = 128 input_trans_1 = Input(shape=(max_seq_len_trans_1, ), name='input_trans_1_layer') input_trans_1_label = Input(shape=(max_seq_len_trans_1, ), name='input_trans_1_label_layer') x = Embedding(input_dim=5307 + 1, output_dim=128, weights=[emb1], trainable=False, input_length=40, mask_zero=True)(input_trans_1) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb1_label], trainable=False, input_length=40, mask_zero=True)(input_trans_1_label) encodings = PositionEncoding(model_dim_trans_1)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_1, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_1, feed_forward_size_trans_1) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_1 = Dense(5, activation='softmax', name='output_trans_1_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_2 = 2048 max_seq_len_trans_2 = 40 model_dim_trans_2 = 128 input_trans_2 = Input(shape=(max_seq_len_trans_2, ), name='input_trans_2_layer') input_trans_2_label = Input(shape=(max_seq_len_trans_2, ), name='input_trans_2_label_layer') x = Embedding(input_dim=101 + 1, output_dim=128, weights=[emb2], trainable=False, input_length=40, mask_zero=True)(input_trans_2) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb2_label], trainable=False, input_length=40, mask_zero=True)(input_trans_2_label) encodings = PositionEncoding(model_dim_trans_2)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_2, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_2, feed_forward_size_trans_2) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_2 = Dense(5, activation='softmax', name='output_trans_2_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_3 = 2048 max_seq_len_trans_3 = 40 model_dim_trans_3 = 128 input_trans_3 = Input(shape=(max_seq_len_trans_3, ), name='input_trans_3_layer') input_trans_3_label = Input(shape=(max_seq_len_trans_3, ), name='input_trans_3_label_layer') x = Embedding(input_dim=8 + 1, output_dim=128, weights=[emb3], trainable=False, input_length=40, mask_zero=True)(input_trans_3) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb3_label], trainable=False, input_length=40, mask_zero=True)(input_trans_3_label) encodings = PositionEncoding(model_dim_trans_3)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_3, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_3, feed_forward_size_trans_3) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_3 = Dense(5, activation='softmax', name='output_trans_3_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_4 = 2048 max_seq_len_trans_4 = 40 model_dim_trans_4 = 128 input_trans_4 = Input(shape=(max_seq_len_trans_4, ), name='input_trans_4_layer') input_trans_4_label = Input(shape=(max_seq_len_trans_4, ), name='input_trans_4_label_layer') x = Embedding(input_dim=38 + 1, output_dim=128, weights=[emb4], trainable=False, input_length=40, mask_zero=True)(input_trans_4) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb4_label], trainable=False, input_length=40, mask_zero=True)(input_trans_4_label) encodings = PositionEncoding(model_dim_trans_4)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_4, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_4, feed_forward_size_trans_4) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_4 = Dense(5, activation='softmax', name='output_trans_4_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_5 = 2048 max_seq_len_trans_5 = 40 model_dim_trans_5 = 128 input_trans_5 = Input(shape=(max_seq_len_trans_5, ), name='input_trans_5_layer') input_trans_5_label = Input(shape=(max_seq_len_trans_5, ), name='input_trans_5_label_layer') x = Embedding(input_dim=4317 + 1, output_dim=128, weights=[emb5], trainable=False, input_length=40, mask_zero=True)(input_trans_5) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb5_label], trainable=False, input_length=40, mask_zero=True)(input_trans_5_label) encodings = PositionEncoding(model_dim_trans_5)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_5, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_5, feed_forward_size_trans_5) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_5 = Dense(5, activation='softmax', name='output_trans_5_layer')(encodings) #!################################################################################################################ trans_output = concatenate([output_trans_1, output_trans_2], axis=-1) trans_output = concatenate([trans_output, output_trans_3], axis=-1) trans_output = concatenate([trans_output, output_trans_4], axis=-1) trans_output = concatenate([trans_output, output_trans_5], axis=-1) # trans_output = Dense(2, activation='softmax', name='output_trans')(trans_output) #!################################################################################################################ #!mix2 features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) mix = concatenate([trans_output, dnn_input], axis=-1) #!#mix dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(mix) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) #!################################################################################################################ model = Model(inputs=[ input_trans_1, input_trans_1_label, input_trans_2, input_trans_2_label, input_trans_3, input_trans_3_label, input_trans_4, input_trans_4_label, input_trans_5, input_trans_5_label, features ], outputs=[output]) model.compile(optimizer=optimizers.Adam(2.5e-4), loss={'prediction_layer': losses.binary_crossentropy}, metrics=['AUC']) return model
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], mask_feat_list=seq_feature_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model = Model(inputs=model_input_list, outputs=output) return model
def YoutubeDNN( user_feature_columns, item_feature_columns, num_sampled=5, user_dnn_hidden_units=(64, 16), dnn_activation='relu', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, ): """Instantiates the YoutubeDNN Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param num_sampled: int, the number of classes to randomly sample per batch. :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError( "Now YoutubeNN only support 1 item feature like item_id") item_feature_name = item_feature_columns[0].name embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, prefix="") user_features = build_input_features(user_feature_columns) user_inputs_list = list(user_features.values()) user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns( user_features, user_feature_columns, l2_reg_embedding, init_std, seed, embedding_matrix_dict=embedding_matrix_dict) user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list) item_features = build_input_features(item_feature_columns) item_inputs_list = list(item_features.values()) user_dnn_out = DNN( user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, )(user_dnn_input) item_embedding = embedding_matrix_dict[item_feature_name] output = SampledSoftmaxLayer(item_embedding, num_sampled=num_sampled)( inputs=(user_dnn_out, item_features[item_feature_name])) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", user_inputs_list) model.__setattr__("user_embedding", user_dnn_out) model.__setattr__("item_input", item_inputs_list) model.__setattr__( "item_embedding", get_item_embedding(item_embedding, item_features[item_feature_name])) return model
def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) with variable_scope(DNN_SCOPE_NAME): sparse_feature_columns = [] dense_feature_columns = [] varlen_sparse_feature_columns = [] for feat in dnn_feature_columns: new_feat_name = list(feat.parse_example_spec.keys())[0] if new_feat_name in ['hist_price_id', 'hist_des_id']: varlen_sparse_feature_columns.append( VarLenSparseFeat(SparseFeat(new_feat_name, vocabulary_size=100, embedding_dim=32, use_hash=False), maxlen=3)) elif is_embedding(feat): sparse_feature_columns.append( SparseFeat(new_feat_name, vocabulary_size=feat[0]._num_buckets + 1, embedding_dim=feat.dimension)) else: dense_feature_columns.append(DenseFeat(new_feat_name)) history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list( map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) my_feature_columns = sparse_feature_columns + dense_feature_columns + varlen_sparse_feature_columns embedding_dict = create_embedding_matrix(my_feature_columns, l2_reg_embedding, seed, prefix="") query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list, history_feature_list, to_list=True) print('query_emb_list', query_emb_list) print('embedding_dict', embedding_dict) print('haha') print('history_feature_columns', history_feature_columns) print('haha') keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) print('keys_emb_list', keys_emb_list) dnn_input_emb_list = embedding_lookup( embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) print('dnn_input_emb_list', dnn_input_emb_list) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list, mask=True) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list, mask=True) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = tf.keras.layers.Concatenate()( [NoMask()(deep_input_emb), hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( output) # logits_list.append(final_logit) # logits = add_func(logits_list) # print(labels) # tf.summary.histogram(final_logit + '/final_logit', final_logit) return deepctr_model_fn(features, mode, final_logit, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks)
def CapsuleNet(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=50, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', num_capsule=8, dim_capsule=2, routing_iterations=3, att_hidden_size=(64, 16), att_activation="dice", att_weight_normalization=True, att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, alpha=1e-6, seed=1024, task='binary'): check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) scores = AttentionSequencePoolingLayer( att_hidden_units=att_hidden_size, att_activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([query_emb, keys_emb, user_behavior_length]) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=True, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) keys_emb = Self_Attention( [keys_emb, keys_emb, user_behavior_length, user_behavior_length]) cap = Capsule(num_capsule=num_capsule, dim_capsule=dim_capsule, routings=routing_iterations, share_weights=True, supports_masking=True) hist_cap = cap(keys_emb, scores=scores) disp_loss = get_disp_loss(hist_cap) hist_cap = Reshape([1, num_capsule * dim_capsule])(NoMask()(hist_cap)) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Concatenate()([deep_input_emb, hist_cap]) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) model.add_loss(alpha * disp_loss) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def MIND(user_feature_columns, item_feature_columns, num_sampled=5, k_max=2, p=1.0, dynamic_k=False, user_dnn_hidden_units=(64, 32), dnn_activation='relu', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024): """Instantiates the MIND Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param num_sampled: int, the number of classes to randomly sample per batch. :param k_max: int, the max size of user interest embedding :param p: float,the parameter for adjusting the attention distribution in LabelAwareAttention. :param dynamic_k: bool, whether or not use dynamic interest number :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError("Now MIND only support 1 item feature like item_id") item_feature_column = item_feature_columns[0] item_feature_name = item_feature_column.name history_feature_list = [item_feature_name] features = build_input_features(user_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), user_feature_columns)) if user_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), user_feature_columns)) if user_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), user_feature_columns)) if user_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, prefix="") item_features = build_input_features(item_feature_columns) query_emb_list = embedding_lookup(embedding_dict, item_features, item_feature_columns, history_feature_list, history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list # keys_emb = concat_func(keys_emb_list, mask=True) # query_emb = concat_func(query_emb_list, mask=True) history_emb = PoolingLayer()(NoMask()(keys_emb_list)) target_emb = PoolingLayer()(NoMask()(query_emb_list)) target_emb_size = target_emb.get_shape()[-1].value max_len = history_emb.get_shape()[1].value hist_len = features['hist_len'] high_capsule = CapsuleLayer(input_units=target_emb_size, out_units=target_emb_size, max_len=max_len, k_max=k_max)((history_emb, hist_len)) if len(dnn_input_emb_list) > 0 or len(dense_value_list) > 0: user_other_feature = combined_dnn_input(dnn_input_emb_list, dense_value_list) other_feature_tile = tf.keras.layers.Lambda( tile_user_otherfeat, arguments={'k_max': k_max})(user_other_feature) user_deep_input = Concatenate()( [NoMask()(other_feature_tile), high_capsule]) else: user_deep_input = high_capsule # user_deep_input._uses_learning_phase = True # attention_score._uses_learning_phase user_embeddings = DNN(user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="user_embedding")(user_deep_input) item_inputs_list = list(item_features.values()) item_embedding = embedding_dict[item_feature_name] if dynamic_k: user_embedding_final = LabelAwareAttention( k_max=k_max, pow_p=p, )((user_embeddings, target_emb, hist_len)) else: user_embedding_final = LabelAwareAttention( k_max=k_max, pow_p=p, )((user_embeddings, target_emb)) output = SampledSoftmaxLayer(item_embedding, num_sampled=num_sampled)( inputs=(user_embedding_final, item_features[item_feature_name])) model = Model(inputs=inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", inputs_list) model.__setattr__("user_embedding", user_embeddings) model.__setattr__("item_input", item_inputs_list) model.__setattr__( "item_embedding", get_item_embedding(item_embedding, item_features[item_feature_name])) return model
def MIND(dnn_feature_columns, history_feature_list, target_song_size, k_max=2, dnn_use_bn=False, user_hidden_unit=64, dnn_activation='relu', l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024): """ :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param target_song_size: int, the total size of the recall songs :param k_max: int, the max size of user interest embedding :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param user_hidden_unit: int. user dnn hidden layer size :param dnn_activation: Activation function to use in deep net :param l2_reg_dnn: L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: """ features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) hist_len = features['hist_len'] inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, init_std, seed, prefix="") history_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) history_emb = concat_func(history_emb_list, mask=False) target_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, ['item'], history_feature_list, to_list=True) target_emb_tmp = concat_func(target_emb_list, mask=False) target_emb_size = target_emb_tmp.get_shape()[-1].value target_emb = tf.keras.layers.Lambda( shape_target, arguments={'target_emb_size': target_emb_size})(target_emb_tmp) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list deep_input_emb = concat_func(dnn_input_emb_list) user_other_feature = Flatten()(deep_input_emb) max_len = history_emb.get_shape()[1].value high_capsule = CapsuleLayer(input_units=target_emb_size, out_units=target_emb_size, max_len=max_len, k_max=k_max)((history_emb, hist_len)) other_feature_tile = tf.keras.layers.Lambda( tile_user_otherfeat, arguments={'k_max': k_max})(user_other_feature) user_deep_input = Concatenate()( [NoMask()(other_feature_tile), high_capsule]) user_embeddings = DNN((user_hidden_unit, target_emb_size), dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="user_embedding")(user_deep_input) k_user = tf.cast(tf.maximum( 1., tf.minimum(tf.cast(k_max, dtype="float32"), tf.log1p(tf.cast(hist_len, dtype="float32")) / tf.log(2.))), dtype="int64") # [B,1] forword/Cast_2 user_embedding_final = DotProductAttentionLayer( shape=[target_emb_size, target_emb_size])( (user_embeddings, target_emb), seq_length=k_user, max_len=k_max) output = SampledSoftmaxLayer( target_song_size=target_song_size, target_emb_size=target_emb_size)(inputs=(user_embedding_final, features['item'])) model = Model(inputs=inputs_list, outputs=output) return model
def DeepAutoInt( linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, dnn_hidden_units=(256, 256), dnn_activation='relu', l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024, fm_group=[DEFAULT_GROUP_NAME], task='binary', ): """Instantiates the AutoInt Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param att_layer_num: int.The InteractingLayer number to be used. :param att_embedding_size: int.The embedding size in multi-head self-attention network. :param att_head_num: int.The head number in multi-head self-attention network. :param att_res: bool.Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(dnn_hidden_units) <= 0 and att_layer_num <= 0: raise ValueError("Either hidden_layer or att_layer_num must > 0") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) sparse_embedding_list = list( chain.from_iterable(group_embedding_dict.values())) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) # sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, # l2_reg_embedding, seed) att_input = concat_func(sparse_embedding_list, axis=1) for _ in range(att_layer_num): att_input = InteractingLayer(att_embedding_size, att_head_num, att_res)(att_input) att_output = tf.keras.layers.Flatten()(att_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units ) > 0 and att_layer_num > 0: # Deep & Interacting Layer deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) stack_out = tf.keras.layers.Concatenate()([att_output, deep_out]) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( deep_out) elif att_layer_num > 0: # Only Interacting Layer final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( att_output) else: # Error raise NotImplementedError # final_logit = tf.keras.layers.Dense( # 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(att_output) final_logit = add_func([linear_logit, fm_logit, final_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model