Пример #1
0
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num):
    model_name = "DCN"

    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]

    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = DCN(
        feature_dim_dict,
        embedding_size=embedding_size,
        cross_num=cross_num,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    check_model(model, model_name, x, y)
Пример #2
0
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num):
    model_name = "DCN"

    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]

    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = DCN(
        feature_dim_dict,
        embedding_size=embedding_size,
        cross_num=cross_num,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")
    save_model(model, model_name + '.h5')
    model = load_model(model_name + '.h5', custom_objects)
    print(model_name + " test save load model pass!")

    print(model_name + " test pass!")
Пример #3
0
def test_DCN_2():
    model_name = "DCN"

    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=3,
                                          dense_feature_num=2)

    model = DCN([], feature_columns, cross_num=1, dnn_hidden_units=(8,), dnn_dropout=0.5)
    check_model(model, model_name, x, y)
Пример #4
0
def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization):
    model_name = "DCN"

    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
                                          dense_feature_num=sparse_feature_num)

    model = DCN(feature_columns, feature_columns, cross_num=cross_num, cross_parameterization=cross_parameterization,
                dnn_hidden_units=hidden_size, dnn_dropout=0.5)
    check_model(model, model_name, x, y)
Пример #5
0
def test_DCN(embedding_size, cross_num, hidden_size):
    name = "DCN"

    sample_size = 64
    feature_dim_dict = {
        'sparse': {
            'sparse_1': 2,
            'sparse_2': 5,
            'sparse_3': 10
        },
        'dense': ['dense_1', 'dense_2', 'dense_3']
    }
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = DCN(
        feature_dim_dict,
        embedding_size=embedding_size,
        cross_num=cross_num,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
    print(name + " test train valid pass!")
    model.save_weights(name + '_weights.h5')
    model.load_weights(name + '_weights.h5')
    print(name + " test save load weight pass!")
    save_model(model, name + '.h5')
    model = load_model(name + '.h5', custom_objects)
    print(name + " test save load model pass!")

    print(name + " test pass!")
Пример #6
0
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num):
    model_name = "DCN"

    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num,
                                          sparse_feature_num)

    model = DCN(feature_columns,
                embedding_size=embedding_size,
                cross_num=cross_num,
                dnn_hidden_units=hidden_size,
                dnn_dropout=0.5)
    check_model(model, model_name, x, y)
Пример #7
0
def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num):
    model_name = "DCN"

    sample_size = 64
    x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num,
                                           sparse_feature_num)

    model = DCN(
        feature_dim_dict,
        embedding_size=embedding_size,
        cross_num=cross_num,
        hidden_size=hidden_size,
        keep_prob=0.5,
    )
    check_model(model, model_name, x, y)
Пример #8
0
def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()):
    feature_dim_dict = {
        'sparse': {
            'sparse_1': 2,
            'sparse_2': 5,
            'sparse_3': 10
        },
        'dense': ['dense_1', 'dense_2', 'dense_3']
    }
    with pytest.raises(ValueError):
        _ = DCN(
            feature_dim_dict,
            embedding_size=embedding_size,
            cross_num=cross_num,
            hidden_size=hidden_size,
            keep_prob=0.5,
        )
Пример #9
0
def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()):
    feature_dim_dict = {
        'sparse': [
            SingleFeat('sparse_1', 2),
            SingleFeat('sparse_2', 5),
            SingleFeat('sparse_3', 10)
        ],
        'dense': [
            SingleFeat('dense_1', 1),
            SingleFeat('dense_1', 1),
            SingleFeat('dense_1', 1)
        ]
    }
    with pytest.raises(ValueError):
        _ = DCN(
            feature_dim_dict,
            embedding_size=embedding_size,
            cross_num=cross_num,
            hidden_size=hidden_size,
            keep_prob=0.5,
        )
Пример #10
0
    lbe = LabelEncoder()
    data[feature] = lbe.fit_transform(data[feature])
# 计算每个特征中的 不同特征值的个数
fixlen_feature_columns = [SparseFeat(feature, data[feature].nunique()) for feature in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
print(fixlen_feature_columns)
print(feature_names)

# 将数据集切分成训练集和测试集
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}

# 使用DCN进行训练
#model = DCN(linear_feature_columns, dnn_feature_columns, task='regression')
model = DCN(linear_feature_columns, dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=1, verbose=True, validation_split=0.2, )
# 使用DCN进行预测
pred_ans = model.predict(test_model_input, batch_size=256)
# 输出RMSE或MSE
mse = round(mean_squared_error(test[target].values, pred_ans), 4)
rmse = mse ** 0.5
print("test RMSE", rmse)

# 输出LogLoss
from sklearn.metrics import log_loss
score = log_loss(test[target].values, pred_ans)
print("LogLoss", score)
Пример #11
0
        )

    if model_type == "WDL":
        model = WDL(
            linear_feature_columns,
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[1024, 512, 256],
        )

    if model_type == "DCN":
        model = DCN(
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[1024, 1024],
            cross_num=6,
        )

    if opt == "adagrad":
        optimizer = Adagrad
    elif opt == "adam":
        optimizer = Adam
    else:
        raise ValueError("Invalid optimizer")

    model.compile(optimizer(learning_rate),
                  "binary_crossentropy",
                  metrics=["binary_crossentropy"])