def xdeepfm_model(linear_feature_columns,dnn_feature_columns,train_model_input,train,test_model_input,test): cols = ['model','RMSE','MAE','MSE','AUC','score'] df_result = pd.DataFrame(columns=cols, index=range(1)) model = xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=config.xdeepfm_att["dnn_hidden_units"], init_std=config.xdeepfm_att["init_std"],cin_layer_size=config.xdeepfm_att["cin_layer_size"], cin_split_half=config.xdeepfm_att["cin_split_half"], cin_activation=config.xdeepfm_att["cin_activation"], l2_reg_cin=config.xdeepfm_att["l2_reg_cin"],seed=config.xdeepfm_att["seed"], dnn_dropout=config.xdeepfm_att["dnn_dropout"], dnn_activation=config.xdeepfm_att["dnn_activation"], task=config.xdeepfm_att["task"],dnn_use_bn=config.xdeepfm_att["dnn_use_bn"]) model.compile("adam", "mse", metrics=['mse']) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=config.model_epoch['epoch'], verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=256) save_model(model, 'saved_xdeepfm.h5')# save_model auc = roc_auc_score(test[target].values, pred_ans) df_result.loc[0].model = "XDeepFM" df_result.loc[0].RMSE = np.round(math.sqrt(mean_squared_error(test[target].values, pred_ans)),3) df_result.loc[0].MAE = np.round(mean_absolute_error(test[target].values, pred_ans),3) df_result.loc[0].MSE = np.round(mean_squared_error(test[target].values, pred_ans),3) df_result.loc[0].AUC = np.round(auc,3) #df_result.loc[0].score=(1/df_result.iloc[0]['RMSE'])*(1/df_result.iloc[0]['MAE'])*(2*df_result.iloc[0]['AUC']) return df_result
def test_xDeepFM(hidden_size, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim): model_name = "xDeepFM" sample_size = 64 feature_dim_dict = {"sparse": {}, 'dense': []} for name, num in zip(["sparse", "dense"], [sparse_feature_num, dense_feature_dim]): if name == "sparse": for i in range(num): feature_dim_dict[name][name + '_' + str(i)] = np.random.randint(1, 10) else: for i in range(num): feature_dim_dict[name].append(name + '_' + str(i)) sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = xDeepFM( feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size, cin_split_half=cin_split_half, cin_activation=cin_activation, keep_prob=0.5, ) check_model(model, model_name, x, y)
def test_xDeepFM(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim): model_name = "xDeepFM" sample_size = SAMPLE_SIZE x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num, sparse_feature_num) model = xDeepFM(feature_dim_dict, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size, cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5, ) check_model(model, model_name, x, y)
def test_xDeepFM(hidden_size, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim): model_name = "xDeepFM" sample_size = 64 x, y, feature_dim_dict = get_test_data( sample_size, sparse_feature_num, sparse_feature_num) model = xDeepFM(feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size, cin_split_half=cin_split_half, cin_activation=cin_activation, keep_prob=0.5, ) check_model(model, model_name, x, y)
def run_xdeepfm_model(): train, test, train_model_input, test_model_input, dnn_feature_columns, linear_feature_columns, feature_names, target = read_data_as_model() model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) return pred_ans, test[target].values, round(roc_auc_score(test[target].values, pred_ans), 4), 'xdeepfm'
def test_xDeepFM_invalid(hidden_size, cin_layer_size): feature_dim_dict = { 'sparse': { 'sparse_1': 2, 'sparse_2': 5, 'sparse_3': 10 }, 'dense': ['dense_1', 'dense_2', 'dense_3'] } with pytest.raises(ValueError): _ = xDeepFM( feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size, )
# 2.count #unique features for each sparse field,and record dense feature field name sparse_feature_list = [SingleFeat(feat, data[feat].nunique()) for feat in sparse_features] dense_feature_list = [SingleFeat(feat, 0) for feat in dense_features] # 3.generate input data for model train = data.iloc[:train_size] test = data.iloc[train_size:] train_model_input = [train[feat.name].values for feat in sparse_feature_list] + \ [train[feat.name].values for feat in dense_feature_list] test_model_input = [test[feat.name].values for feat in sparse_feature_list] + \ [test[feat.name].values for feat in dense_feature_list] # 4.Define Model,train,predict and evaluate model = xDeepFM({"sparse": sparse_feature_list, "dense": dense_feature_list}, final_activation='sigmoid') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) # 4096 # 2**19 history = model.fit(train_model_input, train[target].values, batch_size=1024, epochs=5, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=2**10) #print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) #print("test AUC", round(roc_auc_score(train[target].values, pred_ans), 4)) submission = pd.read_csv('data/raw/sample_submission.csv') submission['HasDetections'] = pred_ans #print(submission['HasDetections'].head()) submission.to_csv('nffm_submission.csv', index=False)
if model_type == "DeepFM": model = DeepFM( linear_feature_columns, dnn_feature_columns, task="binary", embedding_size=emb_dim, use_fm=True, dnn_hidden_units=[400, 400, 400], ) if model_type == "xDeepFM": model = xDeepFM( linear_feature_columns, dnn_feature_columns, task="binary", embedding_size=emb_dim, dnn_hidden_units=[400, 400], cin_layer_size=[200, 200, 200], ) if model_type == "WDL": model = WDL( linear_feature_columns, dnn_feature_columns, task="binary", embedding_size=emb_dim, dnn_hidden_units=[1024, 512, 256], ) if model_type == "DCN": model = DCN(
def test_xDeepFM_invalid(hidden_size, cin_layer_size): feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5, 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']} with pytest.raises(ValueError): _ = xDeepFM(feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size,)
from deepctr.models import xDeepFM from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names from sklearn.metrics import log_loss, roc_auc_score from utils import * if __name__ == "__main__": train, test, train_model_input, test_model_input, dnn_feature_columns, linear_feature_columns, feature_names, target = read_data_as_model() model = xDeepFM(linear_feature_columns,dnn_feature_columns,task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))