示例#1
0
def xdeepfm_model(linear_feature_columns,dnn_feature_columns,train_model_input,train,test_model_input,test):
    cols = ['model','RMSE','MAE','MSE','AUC','score']
    df_result = pd.DataFrame(columns=cols, index=range(1)) 
    model = xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=config.xdeepfm_att["dnn_hidden_units"],
                    init_std=config.xdeepfm_att["init_std"],cin_layer_size=config.xdeepfm_att["cin_layer_size"], 
                    cin_split_half=config.xdeepfm_att["cin_split_half"], cin_activation=config.xdeepfm_att["cin_activation"],
                     l2_reg_cin=config.xdeepfm_att["l2_reg_cin"],seed=config.xdeepfm_att["seed"],
                     dnn_dropout=config.xdeepfm_att["dnn_dropout"], dnn_activation=config.xdeepfm_att["dnn_activation"],
                     task=config.xdeepfm_att["task"],dnn_use_bn=config.xdeepfm_att["dnn_use_bn"])

    model.compile("adam", "mse", metrics=['mse'])

    history = model.fit(train_model_input, train[target].values,
                            batch_size=256, epochs=config.model_epoch['epoch'], verbose=2, validation_split=0.2)
    
    pred_ans = model.predict(test_model_input, batch_size=256)
    save_model(model, 'saved_xdeepfm.h5')# save_model
    auc = roc_auc_score(test[target].values, pred_ans)
    
    df_result.loc[0].model = "XDeepFM"
    df_result.loc[0].RMSE = np.round(math.sqrt(mean_squared_error(test[target].values, pred_ans)),3)
    df_result.loc[0].MAE = np.round(mean_absolute_error(test[target].values, pred_ans),3)
    df_result.loc[0].MSE = np.round(mean_squared_error(test[target].values, pred_ans),3)
    df_result.loc[0].AUC = np.round(auc,3)    
    #df_result.loc[0].score=(1/df_result.iloc[0]['RMSE'])*(1/df_result.iloc[0]['MAE'])*(2*df_result.iloc[0]['AUC'])
    return df_result
示例#2
0
def test_xDeepFM(hidden_size, cin_layer_size, cin_split_half, cin_activation,
                 sparse_feature_num, dense_feature_dim):
    model_name = "xDeepFM"

    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, dense_feature_dim]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]

    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = xDeepFM(
        feature_dim_dict,
        hidden_size=hidden_size,
        cin_layer_size=cin_layer_size,
        cin_split_half=cin_split_half,
        cin_activation=cin_activation,
        keep_prob=0.5,
    )
    check_model(model, model_name, x, y)
示例#3
0
def test_xDeepFM(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim):
    model_name = "xDeepFM"

    sample_size = SAMPLE_SIZE
    x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num, sparse_feature_num)

    model = xDeepFM(feature_dim_dict, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size,
                    cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5, )
    check_model(model, model_name, x, y)
示例#4
0
def test_xDeepFM(hidden_size, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim):
    model_name = "xDeepFM"

    sample_size = 64
    x, y, feature_dim_dict = get_test_data(
        sample_size, sparse_feature_num, sparse_feature_num)

    model = xDeepFM(feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size,
                    cin_split_half=cin_split_half, cin_activation=cin_activation, keep_prob=0.5, )
    check_model(model, model_name, x, y)
示例#5
0
def run_xdeepfm_model():
  train, test, train_model_input, test_model_input, dnn_feature_columns, linear_feature_columns, feature_names, target = read_data_as_model()
  model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
  model.compile("adam", "binary_crossentropy",
                metrics=['binary_crossentropy'], )

  model.fit(train_model_input, train[target].values,
                      batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
  pred_ans = model.predict(test_model_input, batch_size=256)
  print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
  print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
  return pred_ans, test[target].values, round(roc_auc_score(test[target].values, pred_ans), 4), 'xdeepfm'
示例#6
0
def test_xDeepFM_invalid(hidden_size, cin_layer_size):
    feature_dim_dict = {
        'sparse': {
            'sparse_1': 2,
            'sparse_2': 5,
            'sparse_3': 10
        },
        'dense': ['dense_1', 'dense_2', 'dense_3']
    }
    with pytest.raises(ValueError):
        _ = xDeepFM(
            feature_dim_dict,
            hidden_size=hidden_size,
            cin_layer_size=cin_layer_size,
        )
示例#7
0
# 2.count #unique features for each sparse field,and record dense feature field name
sparse_feature_list = [SingleFeat(feat, data[feat].nunique()) for feat in sparse_features]
dense_feature_list = [SingleFeat(feat, 0) for feat in dense_features]

# 3.generate input data for model

train = data.iloc[:train_size]
test = data.iloc[train_size:]

train_model_input = [train[feat.name].values for feat in sparse_feature_list] + \
    [train[feat.name].values for feat in dense_feature_list]
test_model_input = [test[feat.name].values for feat in sparse_feature_list] + \
    [test[feat.name].values for feat in dense_feature_list]

# 4.Define Model,train,predict and evaluate
model = xDeepFM({"sparse": sparse_feature_list,
                "dense": dense_feature_list}, final_activation='sigmoid')
model.compile("adam", "binary_crossentropy",
              metrics=['binary_crossentropy'], )
# 4096
# 2**19
history = model.fit(train_model_input, train[target].values,
                    batch_size=1024, epochs=5, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=2**10)
#print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
#print("test AUC", round(roc_auc_score(train[target].values, pred_ans), 4))

submission = pd.read_csv('data/raw/sample_submission.csv')
submission['HasDetections'] = pred_ans
#print(submission['HasDetections'].head())
submission.to_csv('nffm_submission.csv', index=False)
示例#8
0
    if model_type == "DeepFM":
        model = DeepFM(
            linear_feature_columns,
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            use_fm=True,
            dnn_hidden_units=[400, 400, 400],
        )

    if model_type == "xDeepFM":
        model = xDeepFM(
            linear_feature_columns,
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[400, 400],
            cin_layer_size=[200, 200, 200],
        )

    if model_type == "WDL":
        model = WDL(
            linear_feature_columns,
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[1024, 512, 256],
        )

    if model_type == "DCN":
        model = DCN(
示例#9
0
def test_xDeepFM_invalid(hidden_size, cin_layer_size):
    feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5,
                                   'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']}
    with pytest.raises(ValueError):
        _ = xDeepFM(feature_dim_dict, hidden_size=hidden_size,
                    cin_layer_size=cin_layer_size,)
示例#10
0
from deepctr.models import xDeepFM
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
from sklearn.metrics import log_loss, roc_auc_score
from utils import *

if __name__ == "__main__":
  train, test, train_model_input, test_model_input, dnn_feature_columns, linear_feature_columns, feature_names, target = read_data_as_model()
  model = xDeepFM(linear_feature_columns,dnn_feature_columns,task='binary')
  model.compile("adam", "binary_crossentropy",
                metrics=['binary_crossentropy'], )

  history = model.fit(train_model_input, train[target].values,
                      batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
  pred_ans = model.predict(test_model_input, batch_size=256)
  print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
  print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))