def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num): model_name = "DCN" sample_size = 64 feature_dim_dict = {"sparse": {}, 'dense': []} for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): if name == "sparse": for i in range(num): feature_dim_dict[name][name + '_' + str(i)] = np.random.randint(1, 10) else: for i in range(num): feature_dim_dict[name].append(name + '_' + str(i)) sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = DCN( feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, hidden_size=hidden_size, keep_prob=0.5, ) check_model(model, model_name, x, y)
def test_DCN_2(): model_name = "DCN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=3, dense_feature_num=2) model = DCN([], feature_columns, cross_num=1, dnn_hidden_units=(8,), dnn_dropout=0.5) check_model(model, model_name, x, y)
def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization): model_name = "DCN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DCN(feature_columns, feature_columns, cross_num=cross_num, cross_parameterization=cross_parameterization, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y)
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num): model_name = "DCN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, sparse_feature_num) model = DCN(feature_columns, embedding_size=embedding_size, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y)
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num): model_name = "DCN" sample_size = 64 x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num, sparse_feature_num) model = DCN( feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, hidden_size=hidden_size, keep_prob=0.5, ) check_model(model, model_name, x, y)
def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()): feature_dim_dict = { 'sparse': { 'sparse_1': 2, 'sparse_2': 5, 'sparse_3': 10 }, 'dense': ['dense_1', 'dense_2', 'dense_3'] } with pytest.raises(ValueError): _ = DCN( feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, hidden_size=hidden_size, keep_prob=0.5, )
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num): model_name = "DCN" sample_size = 64 feature_dim_dict = {"sparse": {}, 'dense': []} for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): if name == "sparse": for i in range(num): feature_dim_dict[name][name + '_' + str(i)] = np.random.randint(1, 10) else: for i in range(num): feature_dim_dict[name].append(name + '_' + str(i)) sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = DCN( feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, hidden_size=hidden_size, keep_prob=0.5, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5) print(model_name + " test train valid pass!") model.save_weights(model_name + '_weights.h5') model.load_weights(model_name + '_weights.h5') print(model_name + " test save load weight pass!") save_model(model, model_name + '.h5') model = load_model(model_name + '.h5', custom_objects) print(model_name + " test save load model pass!") print(model_name + " test pass!")
def test_DCN(embedding_size, cross_num, hidden_size): name = "DCN" sample_size = 64 feature_dim_dict = { 'sparse': { 'sparse_1': 2, 'sparse_2': 5, 'sparse_3': 10 }, 'dense': ['dense_1', 'dense_2', 'dense_3'] } sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = DCN( feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, hidden_size=hidden_size, keep_prob=0.5, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5) print(name + " test train valid pass!") model.save_weights(name + '_weights.h5') model.load_weights(name + '_weights.h5') print(name + " test save load weight pass!") save_model(model, name + '.h5') model = load_model(name + '.h5', custom_objects) print(name + " test save load model pass!") print(name + " test pass!")
def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()): feature_dim_dict = { 'sparse': [ SingleFeat('sparse_1', 2), SingleFeat('sparse_2', 5), SingleFeat('sparse_3', 10) ], 'dense': [ SingleFeat('dense_1', 1), SingleFeat('dense_1', 1), SingleFeat('dense_1', 1) ] } with pytest.raises(ValueError): _ = DCN( feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, hidden_size=hidden_size, keep_prob=0.5, )
lbe = LabelEncoder() data[feature] = lbe.fit_transform(data[feature]) # 计算每个特征中的 不同特征值的个数 fixlen_feature_columns = [SparseFeat(feature, data[feature].nunique()) for feature in sparse_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) print(fixlen_feature_columns) print(feature_names) # 将数据集切分成训练集和测试集 train, test = train_test_split(data, test_size=0.2) train_model_input = {name:train[name].values for name in feature_names} test_model_input = {name:test[name].values for name in feature_names} # 使用DCN进行训练 #model = DCN(linear_feature_columns, dnn_feature_columns, task='regression') model = DCN(linear_feature_columns, dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=1, verbose=True, validation_split=0.2, ) # 使用DCN进行预测 pred_ans = model.predict(test_model_input, batch_size=256) # 输出RMSE或MSE mse = round(mean_squared_error(test[target].values, pred_ans), 4) rmse = mse ** 0.5 print("test RMSE", rmse) # 输出LogLoss from sklearn.metrics import log_loss score = log_loss(test[target].values, pred_ans) print("LogLoss", score)
) if model_type == "WDL": model = WDL( linear_feature_columns, dnn_feature_columns, task="binary", embedding_size=emb_dim, dnn_hidden_units=[1024, 512, 256], ) if model_type == "DCN": model = DCN( dnn_feature_columns, task="binary", embedding_size=emb_dim, dnn_hidden_units=[1024, 1024], cross_num=6, ) if opt == "adagrad": optimizer = Adagrad elif opt == "adam": optimizer = Adam else: raise ValueError("Invalid optimizer") model.compile(optimizer(learning_rate), "binary_crossentropy", metrics=["binary_crossentropy"])