lr.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid) elif algo == 'fm': fm_params = { "feature_size": feature_size, "field_size": field_size, "embedding_size": 15, "epoch": 20, "batch_size": 1024, "learning_rate": 0.001, "optimizer_type": "adam", "l2_w_reg": 0.01, "l2_v_reg": 0.01, "verbose": True } fm = FM(**fm_params) fm.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid) elif algo == 'deepfm': deepfm_params = { "feature_size": feature_size, "field_size": field_size, "embedding_size": 15, "deep_layers": [256, 128, 64], "epoch": 20, "batch_size": 1024, "learning_rate": 0.001, "optimizer_type": "adam", "l2_reg": 0.01, "dropout_deep": [0.5, 0.5, 0.5, 0.5], "verbose": True } deepfm = DeepFM(**deepfm_params)
test_size=0.2, random_state=42) y_train = y_train.values.reshape((-1, 1)) y_test = y_test.values.reshape((-1, 1)) #model=LR(features_sizes,loss_type='rmse')#,hash_size=r) #model=FM(features_sizes,k=24) #model=MLP(features_sizes,deep_layers=(12,12),k=24) best(12,12) k=24 #model=FM(features_sizes,k=24,FM_ignore_interaction=[(0,2),(0,3),(0,4)]) model = FM(features_sizes, k=24, FM_ignore_interaction=[(0, 1), (0, 2), (0, 3), (0, 4)]) #model=DeepFM(features_sizes,deep_layers=(12,12),k=24) #model = NFM(features_sizes, k=24) print(model) best_score = model.fit(X_train, X_test, y_train, y_test, lr=0.0005, N_EPOCH=50, batch_size=5000, early_stopping_rounds=5) #0.0005->0.001(1e-3 bs=1000) ''' ls=[] Rounds=1 for _ in range(Rounds): ls.append(best_score) print(model) print(" Protocol Test Result : \n%.4f %.4f %s" % (pd.Series(ls).mean(),pd.Series(ls).min(),str([round(i,4) for i in ls]))) '''
import pandas as pd import numpy as np from models import LR,FM,MLP,WideAndDeep,DeepFM if __name__=='__main__': np.random.seed(2019) data_dir="../data/movie_lens_100k/" train = pd.read_csv(data_dir+'ua.base', sep='\t', names=['user_id', 'movie_id', 'ratings', 'time']) test = pd.read_csv(data_dir+'ua.test', sep='\t', names=['user_id', 'movie_id', 'ratings', 'time']) data=pd.concat([train,test],axis=0) y_train = train['ratings'].values.reshape(-1, 1) # 一列 y_test = test['ratings'].values.reshape(-1, 1) features=['user_id','movie_id'] features_sizes=[data[f].nunique() for f in features] print("DFM") ls=[] model=LR for _ in range(10): model=FM(features_sizes) #model = LR(features_sizes) #model=DeepFM(features_sizes,deep_layers=(10,10),k=10) best_score=model.fit(train[features]-1,test[features]-1,y_train,y_test,lr=0.0005,N_EPOCH=150,batch_size=500,early_stopping_rounds=30) #-1是因为ids要从0起.而数据中是从1起的 ls.append(best_score) print(pd.Series(ls).mean(),pd.Series(ls).min()) print(str(ls))
y_test=y_test.values.reshape((-1,1)) ''' #<Model> #model=LR(features_sizes,loss_type='binary',metric_type='auc') model=FM(features_sizes,k=8,loss_type='binary',metric_type='auc') #model=FM(features_sizes,k=8,loss_type='binary',metric_type='auc',FM_ignore_interaction=[(0,2),(0,3),(0,4)]) #FMDE #model=MLP(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(32,8)) #model=NFM(features_sizes,k=8,loss_type='binary',metric_type='auc') #model=WideAndDeep(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(8,8)) #model=DeepFM(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(8,8)) #model=AFM(features_sizes,k=8,loss_type='binary',metric_type='auc',attention_FM=8) #model=DeepAFM(features_sizes,k=8,loss_type='binary',metric_type='auc',attention_FM=8,deep_layers=(8,8)) print(model) #[BUG fix] 老版本一定要传入拷贝..wtf~! 06/27修补BUG 内部copy防止影响数据 best_score = model.fit(X_train[train_features], X_test[train_features], y_train, y_test, lr=0.0005, N_EPOCH=50, batch_size=4096,early_stopping_rounds=5)#0.0005->0.001(1e-3 bs=1000) y_pred=model.predict(X_test) y_pred=1./(1.+np.exp(-1.*y_pred))#sigmoid transform from sklearn.metrics import roc_auc_score,log_loss print("ROC-AUC score on valid set: %.4f" %roc_auc_score(y_test,y_pred)) #print(log_loss(y_test,y_pred)) test_data_=pd.concat([test_data_new_msno,test_data_new_song,test_data_old],axis=0).copy() for i,c in enumerate(train_features): enc = encs[i] test_data_[c] = enc.transform(test_data_[c]) y_pred_test=model.predict(test_data_[train_features]) y_pred_test=1./(1.+np.exp(-1.*y_pred_test))#sigmoid transform print("ROC-AUC score on test set: %.4f" %roc_auc_score(test_data_['target'],y_pred_test))