Exemplo n.º 1
0
def plot_nfm():
    # 读取数据
    data, dense_features, sparse_features = read_criteo_data()
    dense_features = dense_features[:3]
    sparse_features = sparse_features[:2]

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    dnn_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    # 构建NFM模型
    history = NFM(linear_feature_columns, dnn_feature_columns)
    keras.utils.plot_model(history, to_file="./imgs/NFM.png", show_shapes=True)
Exemplo n.º 2
0
def plot_dien():
    """读取数据"""
    samples_data = pd.read_csv("data/movie_sample.txt", sep="\t", header=None)
    samples_data.columns = [
        "user_id", "gender", "age", "hist_movie_id", "hist_len", "movie_id",
        "movie_type_id", "label"
    ]
    """数据集"""
    X = samples_data[[
        "user_id", "gender", "age", "hist_movie_id", "hist_len", "movie_id",
        "movie_type_id"
    ]]
    y = samples_data["label"]
    """特征封装"""
    feature_columns = [
        SparseFeat('user_id',
                   max(samples_data["user_id"]) + 1,
                   embedding_dim=8),
        SparseFeat('gender', max(samples_data["gender"]) + 1, embedding_dim=8),
        SparseFeat('age', max(samples_data["age"]) + 1, embedding_dim=8),
        SparseFeat('movie_id',
                   max(samples_data["movie_id"]) + 1,
                   embedding_dim=8),
        SparseFeat('movie_type_id',
                   max(samples_data["movie_type_id"]) + 1,
                   embedding_dim=8),
        DenseFeat('hist_len', 1)
    ]

    feature_columns += [
        VarLenSparseFeat('hist_movie_id',
                         vocabulary_size=max(samples_data["movie_id"]) + 1,
                         embedding_dim=8,
                         maxlen=50)
    ]
    feature_columns += [
        VarLenSparseFeat('neg_hist_movie_id',
                         vocabulary_size=max(samples_data["movie_id"]) + 1,
                         embedding_dim=8,
                         maxlen=50)
    ]

    # 行为特征列表,表示的是基础特征
    behavior_feature_list = ['movie_id']
    # 行为序列特征
    behavior_seq_feature_list = ['hist_movie_id']
    # 负采样序列特征
    neg_seq_feature_list = ['neg_hist_movie_id']
    """构建DIN模型"""
    history = DIEN(feature_columns,
                   behavior_feature_list,
                   behavior_seq_feature_list,
                   neg_seq_feature_list,
                   use_neg_sample=True)

    keras.utils.plot_model(history,
                           to_file="./imgs/DIEN.png",
                           show_shapes=True)
Exemplo n.º 3
0
def plot_pnn():
    data, dense_features, sparse_features = read_criteo_data()
    dense_features = dense_features[:3]
    sparse_features = sparse_features[:3]

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    dnn_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for feat in sparse_features
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    # 构建DeepCrossing模型
    history = PNN(dnn_feature_columns)
    keras.utils.plot_model(history, to_file="./imgs/PNN.png", show_shapes=True)
Exemplo n.º 4
0
def plot_din():
    # 读取数据
    samples_data = pd.read_csv("./data/movie_sample.txt",
                               sep="\t",
                               header=None)
    samples_data.columns = [
        "user_id", "gender", "age", "hist_movie_id", "hist_len", "movie_id",
        "movie_type_id", "label"
    ]

    feature_columns = [
        SparseFeat('user_id',
                   max(samples_data["user_id"]) + 1,
                   embedding_dim=8),
        SparseFeat('gender', max(samples_data["gender"]) + 1, embedding_dim=8),
        SparseFeat('age', max(samples_data["age"]) + 1, embedding_dim=8),
        SparseFeat('movie_id',
                   max(samples_data["movie_id"]) + 1,
                   embedding_dim=8),
        SparseFeat('movie_type_id',
                   max(samples_data["movie_type_id"]) + 1,
                   embedding_dim=8),
        DenseFeat('hist_len', 1)
    ]

    feature_columns += [
        VarLenSparseFeat('hist_movie_id',
                         vocabulary_size=max(samples_data["movie_id"]) + 1,
                         embedding_dim=8,
                         maxlen=50)
    ]

    # 行为特征列表,表示的是基础特征
    behavior_feature_list = ['movie_id']
    # 行为序列特征
    behavior_seq_feature_list = ['hist_movie_id']

    history = DIN(feature_columns, behavior_feature_list,
                  behavior_seq_feature_list)
    keras.utils.plot_model(history, to_file="./imgs/DIN.png", show_shapes=True)
Exemplo n.º 5
0
    # 划分dense和sparse特征
    columns = data.columns.values
    dense_features = [feat for feat in columns if 'I' in feat]
    sparse_features = [feat for feat in columns if 'C' in feat]

    # 简单的数据预处理
    train_data = data_process(data, dense_features, sparse_features)
    train_data['label'] = data['label']

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    dnn_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    # 构建xDeepFM模型
    model = xDeepFM(linear_feature_columns, dnn_feature_columns)
    model.summary()
    model.compile(
        optimizer="adam",
Exemplo n.º 6
0
            "age": np.array(X["age"]), \
            "hist_movie_id": np.array([[int(i) for i in l.split(',')] for l in X["hist_movie_id"]]), \
            "neg_hist_movie_id": np.array([[int(i) for i in l.split(',')] for l in X["neg_hist_movie_id"]]), \
            "hist_len": np.array(X["hist_len"]), \
            "movie_id": np.array(X["movie_id"]), \
            "movie_type_id": np.array(X["movie_type_id"])}

    y_train = np.array(y)

    """特征封装"""
    feature_columns = [SparseFeat('user_id', max(samples_data["user_id"])+1, embedding_dim=8), 
                        SparseFeat('gender', max(samples_data["gender"])+1, embedding_dim=8), 
                        SparseFeat('age', max(samples_data["age"])+1, embedding_dim=8), 
                        SparseFeat('movie_id', max(samples_data["movie_id"])+1, embedding_dim=8),
                        SparseFeat('movie_type_id', max(samples_data["movie_type_id"])+1, embedding_dim=8),
                        DenseFeat('hist_len', 1)]

    feature_columns += [VarLenSparseFeat('hist_movie_id', vocabulary_size=max(samples_data["movie_id"])+1, embedding_dim=8, maxlen=50)]
    feature_columns += [VarLenSparseFeat('neg_hist_movie_id', vocabulary_size=max(samples_data["movie_id"])+1, embedding_dim=8, maxlen=50)]

    # 行为特征列表,表示的是基础特征
    behavior_feature_list = ['movie_id']
    # 行为序列特征
    behavior_seq_feature_list = ['hist_movie_id']
    # 负采样序列特征
    neg_seq_feature_list = ['neg_hist_movie_id']

    """构建DIN模型"""
    history = DIEN(feature_columns, behavior_feature_list, behavior_seq_feature_list, neg_seq_feature_list, use_neg_sample=True)
    
    history.compile('adam', 'binary_crossentropy')
Exemplo n.º 7
0
if __name__ == "__main__":
    # 读取数据
    data = pd.read_csv('./data/criteo_sample.txt')

    # 划分dense和sparse特征
    columns = data.columns.values
    dense_features = [feat for feat in columns if 'I' in feat]
    sparse_features = [feat for feat in columns if 'C' in feat]

    # 简单的数据预处理
    train_data = data_process(data, dense_features, sparse_features)
    train_data['label'] = data['label']

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4)
                            for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
                            for feat in dense_features]

    dnn_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4)
                            for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
                            for feat in dense_features]

    # 构建DCN模型
    history = DCN(linear_feature_columns, dnn_feature_columns)
    history.summary()
    history.compile(optimizer="adam", 
                loss="binary_crossentropy", 
                metrics=["binary_crossentropy", tf.keras.metrics.AUC(name='auc')])

    # 将输入数据转化成字典的形式输入
    train_model_input = {name: data[name] for name in dense_features + sparse_features}
Exemplo n.º 8
0
    # 读取数据
    data = pd.read_csv('./data/criteo_sample.txt')
    # 划分一下两类的数据
    columns = data.columns.values
    dense_features = [feat for feat in columns if "I" in feat]
    sparse_features = [feat for feat in columns if "C" in feat]

    # 简单的数据处理
    train_data = data_process(data, dense_features, sparse_features)
    train_data['label'] = data['label']

    # 将特征进行分组
    # 分成linear部分和dnn部分(根据实际场景进行选择),
    # 并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
                              for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1) for feat in dense_features]

    # 深度神经网络需要的数据
    dnn_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat], embedding_dim=4)
                           for feat in enumerate(sparse_features)] + [DenseFeat(feat, 1) for feat in dense_features]

    # 构建NFM模型
    history = NFM(linear_feature_columns, dnn_feature_columns)
    history.summary()
    # 检验的条件
    history.compile(optimizer="adam", loss="binary_crossentropy", metrics=["bianry_crossentropy", tf.keras.metrics.AUC(name='auc')])

    # 将输入数据变为字典的形式进行导入
    train_model_input = {name: data[name] for name in dense_features + sparse_features}

    # 模型的训练