Exemplo n.º 1
0
    lr.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid)
elif algo == 'fm':
    fm_params = {
        "feature_size": feature_size,
        "field_size": field_size,
        "embedding_size": 15,
        "epoch": 20,
        "batch_size": 1024,
        "learning_rate": 0.001,
        "optimizer_type": "adam",
        "l2_w_reg": 0.01,
        "l2_v_reg": 0.01,
        "verbose": True
    }
    fm = FM(**fm_params)
    fm.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid)
elif algo == 'deepfm':
    deepfm_params = {
        "feature_size": feature_size,
        "field_size": field_size,
        "embedding_size": 15,
        "deep_layers": [256, 128, 64],
        "epoch": 20,
        "batch_size": 1024,
        "learning_rate": 0.001,
        "optimizer_type": "adam",
        "l2_reg": 0.01,
        "dropout_deep": [0.5, 0.5, 0.5, 0.5],
        "verbose": True
    }
    deepfm = DeepFM(**deepfm_params)
Exemplo n.º 2
0
                                                    test_size=0.2,
                                                    random_state=42)
y_train = y_train.values.reshape((-1, 1))
y_test = y_test.values.reshape((-1, 1))

#model=LR(features_sizes,loss_type='rmse')#,hash_size=r)
#model=FM(features_sizes,k=24)
#model=MLP(features_sizes,deep_layers=(12,12),k=24) best(12,12) k=24
#model=FM(features_sizes,k=24,FM_ignore_interaction=[(0,2),(0,3),(0,4)])
model = FM(features_sizes,
           k=24,
           FM_ignore_interaction=[(0, 1), (0, 2), (0, 3), (0, 4)])
#model=DeepFM(features_sizes,deep_layers=(12,12),k=24)
#model = NFM(features_sizes, k=24)
print(model)
best_score = model.fit(X_train,
                       X_test,
                       y_train,
                       y_test,
                       lr=0.0005,
                       N_EPOCH=50,
                       batch_size=5000,
                       early_stopping_rounds=5)  #0.0005->0.001(1e-3 bs=1000)
'''
ls=[]
Rounds=1
for _ in range(Rounds):
        ls.append(best_score)
print(model)
print(" Protocol Test Result : \n%.4f %.4f %s" % (pd.Series(ls).mean(),pd.Series(ls).min(),str([round(i,4) for i in ls])))
'''
import pandas as pd
import numpy as np
from models import LR,FM,MLP,WideAndDeep,DeepFM


if __name__=='__main__':

    np.random.seed(2019)
    data_dir="../data/movie_lens_100k/"
    train = pd.read_csv(data_dir+'ua.base', sep='\t', names=['user_id', 'movie_id', 'ratings', 'time'])
    test = pd.read_csv(data_dir+'ua.test', sep='\t', names=['user_id', 'movie_id', 'ratings', 'time'])
    data=pd.concat([train,test],axis=0)
    y_train = train['ratings'].values.reshape(-1, 1)  # 一列
    y_test = test['ratings'].values.reshape(-1, 1)


    features=['user_id','movie_id']
    features_sizes=[data[f].nunique() for f in features]
    print("DFM")
    ls=[]
    model=LR
    for _ in range(10):
        model=FM(features_sizes)
        #model = LR(features_sizes)
        #model=DeepFM(features_sizes,deep_layers=(10,10),k=10)
        best_score=model.fit(train[features]-1,test[features]-1,y_train,y_test,lr=0.0005,N_EPOCH=150,batch_size=500,early_stopping_rounds=30)
        #-1是因为ids要从0起.而数据中是从1起的
        ls.append(best_score)
    print(pd.Series(ls).mean(),pd.Series(ls).min())
    print(str(ls))
Exemplo n.º 4
0
y_test=y_test.values.reshape((-1,1))
'''

#<Model>
#model=LR(features_sizes,loss_type='binary',metric_type='auc')
model=FM(features_sizes,k=8,loss_type='binary',metric_type='auc')
#model=FM(features_sizes,k=8,loss_type='binary',metric_type='auc',FM_ignore_interaction=[(0,2),(0,3),(0,4)]) #FMDE
#model=MLP(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(32,8))
#model=NFM(features_sizes,k=8,loss_type='binary',metric_type='auc')
#model=WideAndDeep(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(8,8))
#model=DeepFM(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(8,8))
#model=AFM(features_sizes,k=8,loss_type='binary',metric_type='auc',attention_FM=8)
#model=DeepAFM(features_sizes,k=8,loss_type='binary',metric_type='auc',attention_FM=8,deep_layers=(8,8))
print(model)
#[BUG fix] 老版本一定要传入拷贝..wtf~! 06/27修补BUG 内部copy防止影响数据
best_score = model.fit(X_train[train_features], X_test[train_features], y_train, y_test, lr=0.0005, N_EPOCH=50, batch_size=4096,early_stopping_rounds=5)#0.0005->0.001(1e-3 bs=1000)
y_pred=model.predict(X_test)
y_pred=1./(1.+np.exp(-1.*y_pred))#sigmoid transform
from sklearn.metrics import roc_auc_score,log_loss
print("ROC-AUC score on valid set: %.4f" %roc_auc_score(y_test,y_pred))
#print(log_loss(y_test,y_pred))

test_data_=pd.concat([test_data_new_msno,test_data_new_song,test_data_old],axis=0).copy()
for i,c in enumerate(train_features):
    enc = encs[i]
    test_data_[c] = enc.transform(test_data_[c])
y_pred_test=model.predict(test_data_[train_features])
y_pred_test=1./(1.+np.exp(-1.*y_pred_test))#sigmoid transform
print("ROC-AUC score on test set: %.4f" %roc_auc_score(test_data_['target'],y_pred_test))