def test_PNN(use_inner, use_outter, sparse_feature_num): model_name = "PNN" sample_size = 64 feature_dim_dict = {"sparse": {}, 'dense': []} for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): if name == "sparse": for i in range(num): feature_dim_dict[name][name + '_' + str(i)] = np.random.randint(1, 10) else: for i in range(num): feature_dim_dict[name].append(name + '_' + str(i)) sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = PNN(feature_dim_dict, embedding_size=8, hidden_size=[32, 32], keep_prob=0.5, use_inner=use_inner, use_outter=use_outter) check_model(model, model_name, x, y)
def test_OPNN(): name = "OPNN" sample_size = 64 feature_dim_dict = { 'sparse': { 'sparse_1': 2, 'sparse_2': 5, 'sparse_3': 10 }, 'dense': ['dense_1', 'dense_2', 'dense_3'] } sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = PNN( feature_dim_dict, embedding_size=8, hidden_size=[32, 32], use_inner=False, use_outter=True, keep_prob=0.5, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5) print(name + " test train valid pass!") model.save_weights(name + '_weights.h5') model.load_weights(name + '_weights.h5') print(name + " test save load weight pass!") save_model(model, name + '.h5') model = load_model(name + '.h5', custom_objects) print(name + " test save load model pass!") print(name + " test pass!")
def test_PNN(use_inner, use_outter, sparse_feature_num): model_name = "PNN" sample_size = SAMPLE_SIZE x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num, sparse_feature_num) model = PNN(feature_dim_dict, embedding_size=8, hidden_size=[32, 32], keep_prob=0.5, use_inner=use_inner, use_outter=use_outter) check_model(model, model_name, x, y)
def test_PNN(use_inner, use_outter, sparse_feature_num): model_name = "PNN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, sparse_feature_num) model = PNN(feature_columns, embedding_size=4, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, use_outter=use_outter) check_model(model, model_name, x, y)
def test_PNN_avazu(data, train, test): print("\nTesting PNN on avazu dataset...\n") results_activation_function = {"auc": [], "logloss": [], "rmse": []} results_dropout = {"auc": [], "logloss": [], "rmse": []} results_number_of_neurons = {"auc": [], "logloss": [], "rmse": []} auc = 0 logloss = 0 rmse = 0 features_labels = train.columns sparse_features_labels = features_labels[1:23] target_label = features_labels[0] dnn_feature_columns = [ SparseFeat( feat, vocabulary_size=data[feat].nunique(), embedding_dim=4, ) for feat in sparse_features_labels ] feature_names = get_feature_names(dnn_feature_columns) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} true_y = test[target_label].values print("\t\t-- ACTIVATION FUNCTIONS --\t\t") for dnn_activation in dnn_activation_list: print("\nTesting {dnn_activation}...".format( dnn_activation=dnn_activation)) # model = PNN(dnn_feature_columns, use_inner=False, use_outter=True, dnn_activation = dnn_activation, task='binary') model = PNN(dnn_feature_columns, use_inner=True, use_outter=False, dnn_activation=dnn_activation, task='binary') model.compile( "adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit( train_model_input, train[target_label].values, batch_size=256, epochs=10, verbose=0, validation_split=TEST_PROPORTION, ) pred_y = model.predict(test_model_input, batch_size=256) auc = compute_auc(true_y, pred_y) logloss = compute_log_loss(true_y, pred_y) rmse = compute_rmse(true_y, pred_y) results_activation_function["auc"].append(auc) results_activation_function["logloss"].append(logloss) results_activation_function["rmse"].append(rmse) print("\t\t-- DROPOUT RATES --\t\t") for dnn_dropout in dnn_dropout_list: print("\nTesting {dnn_dropout}...".format(dnn_dropout=dnn_dropout)) # model = PNN(dnn_feature_columns, use_inner=False, use_outter=True, dnn_dropout = dnn_dropout, task='binary') model = PNN(dnn_feature_columns, use_inner=True, use_outter=False, dnn_dropout=dnn_dropout, task='binary') model.compile( "adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit( train_model_input, train[target_label].values, batch_size=256, epochs=10, verbose=0, validation_split=TEST_PROPORTION, ) pred_y = model.predict(test_model_input, batch_size=256) auc = compute_auc(true_y, pred_y) logloss = compute_log_loss(true_y, pred_y) rmse = compute_rmse(true_y, pred_y) results_dropout["auc"].append(auc) results_dropout["logloss"].append(logloss) results_dropout["rmse"].append(rmse) print("\t\t-- HIDDEN UNITS --\t\t") for dnn_hidden_units in dnn_hidden_units_list: print("\nTesting {dnn_hidden_units}...".format( dnn_hidden_units=dnn_hidden_units)) # model = PNN(dnn_feature_columns, use_inner=False, use_outter=True, dnn_hidden_units = dnn_hidden_units, task='binary') model = PNN(dnn_feature_columns, use_inner=True, use_outter=False, dnn_hidden_units=dnn_hidden_units, task='binary') model.compile( "adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit(train_model_input, train[target_label].values, batch_size=256, epochs=10, verbose=0, validation_split=TEST_PROPORTION) pred_y = model.predict(test_model_input, batch_size=256) auc = compute_auc(true_y, pred_y) logloss = compute_log_loss(true_y, pred_y) rmse = compute_rmse(true_y, pred_y) results_number_of_neurons["auc"].append(auc) results_number_of_neurons["logloss"].append(logloss) results_number_of_neurons["rmse"].append(rmse) if PLOT: # create_plots("OPNN", "avazu", results_activation_function, "Activation Function", "activation_func", dnn_activation_list) # create_plots("OPNN", "avazu", results_dropout, "Dropout Rate", "dropout", dnn_dropout_list) # create_plots("OPNN", "avazu", results_number_of_neurons, "Number of Neurons per layer", "nr_neurons", dnn_hidden_units_list) create_plots("PNN", "avazu", results_activation_function, "Activation Function", "activation_func", dnn_activation_list) create_plots("PNN", "avazu", results_dropout, "Dropout Rate", "dropout", dnn_dropout_list) create_plots("PNN", "avazu", results_number_of_neurons, "Number of Neurons per layer", "nr_neurons", dnn_hidden_units_list)
VarLenSparseFeat(SparseFeat('neg_hist_category', train['category'].nunique() + 1, embedding_dim = int(sys.argv[5]), embedding_name='category'), maxlen=max_len, length_name='seq_length') ] behavior_feature_list = ['itemId', 'category'] if sys.argv[1] == 'DeepFM_UDG': model = DeepFM_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5])) elif sys.argv[1] == 'DeepFM': model = DeepFM(linear_feature_columns, dnn_feature_columns, [], (200, 80)) elif sys.argv[1] == 'PNN_UDG': model = PNN_UDG(dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5])) elif sys.argv[1] == 'PNN': model = PNN(dnn_feature_columns, untrainable_features_columns, (200, 80)) elif sys.argv[1] == 'WDL': model = WDL(linear_feature_columns, dnn_feature_columns, [], (200, 80)) elif sys.argv[1] == 'WDL_UDG': model = WDL_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5])) elif sys.argv[1] == 'DIEN': model = DIEN(fixlen_feature_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True) elif sys.argv[1] == 'DIEN_UDG': model = DIEN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5])) elif sys.argv[1] == 'DIN': model = DIN(fixlen_feature_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0) elif sys.argv[1] == 'DIN_UDG': model = DIN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5])) if sys.argv[4] == 'focal':
linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model # train, test = train_test_split(data, test_size=0.2) print("Spltting dataset into train and test sets...\n") train, test = train_test_split(data, test_size=0.2) # train, test = train_test_split(data_ohe, test_size=0.2) train_model_input = {name:train[name] for name in feature_names} test_model_input = {name:test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate print("Defining PNN model...\n") model = PNN(dnn_feature_columns, task='binary') print("Compiling PNN model...\n") model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) print("Training the model...\n") model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=1, validation_split=0.2, ) print("\nTesting the model...\n") pred = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred), 4)) print("\nProgram ended in {time}".format(time = datetime.now() - start_time))