def test_make_user_item_regression():
    from fastFM.mcmc import FMRegression
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    fm = FMRegression(rank=2)
    y_pred = fm.fit_predict(sp.csc_matrix(X_train), y_train,
                            sp.csc_matrix(X_test))

    # generate data with noisy lables
    X, y, coef = make_user_item_regression(label_stdev=2)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    fm = FMRegression(rank=2)
    y_pred_noise = fm.fit_predict(sp.csc_matrix(X_train), y_train,
                                  sp.csc_matrix(X_test))
    assert mean_squared_error(y_pred_noise, y_test) > \
        mean_squared_error(y_pred, y_test)
示例#2
0
def test_find_init_stdev():
    X, y, coef = make_user_item_regression(label_stdev=.5)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=44)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = mcmc.FMRegression(n_iter=10, rank=5)
    best_init_stdev, mse = mcmc.find_init_stdev(fm,
                                                X_train,
                                                y_train,
                                                stdev_range=[0.2, 0.5, 1.0])
    best_init_stdev_bad, _ = mcmc.find_init_stdev(fm,
                                                  X_train,
                                                  y_train,
                                                  stdev_range=[5.])
    print('--' * 30)
    best_init_stdev_vali, mse_vali = mcmc.find_init_stdev(
        fm, X_train, y_train, X_test, y_test, stdev_range=[0.2, 0.5, 1.0])
    assert best_init_stdev < best_init_stdev_bad
    assert best_init_stdev_vali == best_init_stdev
    assert mse_vali > mse
示例#3
0
def test_als_warm_start():
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2)
    fm.fit(X_train, y_train)
    y_pred = fm.predict(X_test)
    error_10_iter = mean_squared_error(y_pred, y_test)

    fm = als.FMRegression(n_iter=5, l2_reg_w=0, l2_reg_V=0, rank=2)
    fm.fit(X_train, y_train)
    print(fm.iter_count)
    y_pred = fm.predict(X_test)
    error_5_iter = mean_squared_error(y_pred, y_test)

    fm.fit(sp.csc_matrix(X_train), y_train, n_more_iter=5)
    print(fm.iter_count)
    y_pred = fm.predict(X_test)
    error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)

    print(error_5_iter, error_5_iter_plus_5, error_10_iter)

    assert error_10_iter == error_5_iter_plus_5
示例#4
0
def test_als_warm_start():
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2)
    fm.fit(X_train, y_train)
    y_pred = fm.predict(X_test)
    error_10_iter = mean_squared_error(y_pred, y_test)

    fm = als.FMRegression(n_iter=5, l2_reg_w=0, l2_reg_V=0, rank=2)
    fm.fit(X_train, y_train)
    print fm.iter_count
    y_pred = fm.predict(X_test)
    error_5_iter = mean_squared_error(y_pred, y_test)

    fm.fit(sp.csc_matrix(X_train), y_train, n_more_iter=5)
    print fm.iter_count
    y_pred = fm.predict(X_test)
    error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)

    print error_5_iter, error_5_iter_plus_5, error_10_iter

    assert error_10_iter == error_5_iter_plus_5
示例#5
0
def test_make_user_item_regression():
    from fastFM.mcmc import FMRegression
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)

    fm = FMRegression(rank=2)
    y_pred = fm.fit_predict(sp.csc_matrix(X_train), y_train, sp.csc_matrix(X_test))

    # generate data with noisy lables
    X, y, coef = make_user_item_regression(label_stdev=2)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)

    fm = FMRegression(rank=2)
    y_pred_noise = fm.fit_predict(sp.csc_matrix(X_train), y_train, sp.csc_matrix(X_test))
    assert mean_squared_error(y_pred_noise, y_test) > \
        mean_squared_error(y_pred, y_test)
示例#6
0
def test_warm_start_path():

    X, y, coef = make_user_item_regression(label_stdev=.4)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)
    n_iter = 10

    rank = 4
    seed = 333
    step_size = 1
    l2_reg_w = 0
    l2_reg_V = 0

    fm = als.FMRegression(n_iter=0,
                          l2_reg_w=l2_reg_w,
                          l2_reg_V=l2_reg_V,
                          rank=rank,
                          random_state=seed)
    # initalize coefs
    fm.fit(X_train, y_train)

    rmse_train = []
    rmse_test = []
    for i in range(1, n_iter):
        fm.fit(X_train, y_train, n_more_iter=step_size)
        rmse_train.append(
            np.sqrt(mean_squared_error(fm.predict(X_train), y_train)))
        rmse_test.append(
            np.sqrt(mean_squared_error(fm.predict(X_test), y_test)))

    print('------- restart ----------')
    values = np.arange(1, n_iter)
    rmse_test_re = []
    rmse_train_re = []
    for i in values:
        fm = als.FMRegression(n_iter=i,
                              l2_reg_w=l2_reg_w,
                              l2_reg_V=l2_reg_V,
                              rank=rank,
                              random_state=seed)
        fm.fit(X_train, y_train)
        rmse_test_re.append(
            np.sqrt(mean_squared_error(fm.predict(X_test), y_test)))
        rmse_train_re.append(
            np.sqrt(mean_squared_error(fm.predict(X_train), y_train)))

    assert_almost_equal(rmse_train, rmse_train_re)
    assert_almost_equal(rmse_test, rmse_test_re)
示例#7
0
文件: test_als.py 项目: ibayer/fastFM
def test_warm_start_path():

    X, y, coef = make_user_item_regression(label_stdev=.4)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)
    n_iter = 10

    rank = 4
    seed = 333
    step_size = 1
    l2_reg_w = 0
    l2_reg_V = 0

    fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w,
                          l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
    # initalize coefs
    fm.fit(X_train, y_train)

    rmse_train = []
    rmse_test = []
    for i in range(1, n_iter):
        fm.fit(X_train, y_train, n_more_iter=step_size)
        rmse_train.append(np.sqrt(mean_squared_error(
            fm.predict(X_train), y_train)))
        rmse_test.append(np.sqrt(mean_squared_error(
            fm.predict(X_test), y_test)))

    print('------- restart ----------')
    values = np.arange(1, n_iter)
    rmse_test_re = []
    rmse_train_re = []
    for i in values:
        fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w,
                              l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
        fm.fit(X_train, y_train)
        rmse_test_re.append(np.sqrt(mean_squared_error(
            fm.predict(X_test), y_test)))
        rmse_train_re.append(np.sqrt(mean_squared_error(
            fm.predict(X_train), y_train)))

    assert_almost_equal(rmse_train, rmse_train_re)
    assert_almost_equal(rmse_test, rmse_test_re)
示例#8
0
def test_find_init_stdev():
    X, y, coef = make_user_item_regression(label_stdev=.5)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=44)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = mcmc.FMRegression(n_iter=10, rank=5)
    best_init_stdev, mse = mcmc.find_init_stdev(fm, X_train, y_train,
            stdev_range=[0.2, 0.5, 1.0])
    best_init_stdev_bad, _ = mcmc.find_init_stdev(fm, X_train, y_train,
        stdev_range=[5.])
    print('--' * 30)
    best_init_stdev_vali, mse_vali = mcmc.find_init_stdev(fm, X_train, y_train, X_test,
            y_test, stdev_range=[0.2, 0.5, 1.0])
    assert best_init_stdev < best_init_stdev_bad
    assert best_init_stdev_vali == best_init_stdev
    assert mse_vali > mse
示例#9
0
def test_mcmc_warm_start():
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=44)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = mcmc.FMRegression(n_iter=100, rank=2)
    y_pred = fm.fit_predict(X_train, y_train, X_test)
    error_10_iter = mean_squared_error(y_pred, y_test)

    fm = mcmc.FMRegression(n_iter=50, rank=2)
    y_pred = fm.fit_predict(X_train, y_train, X_test)
    error_5_iter = mean_squared_error(y_pred, y_test)

    y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=50)
    error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)
    print(error_5_iter, error_5_iter_plus_5, error_10_iter)
    print(fm.hyper_param_)
    assert_almost_equal(error_10_iter, error_5_iter_plus_5, decimal=2)
示例#10
0
def test_mcmc_warm_start():
    X, y, coef = make_user_item_regression(label_stdev=0)
    from sklearn.cross_validation import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=44)
    X_train = sp.csc_matrix(X_train)
    X_test = sp.csc_matrix(X_test)

    fm = mcmc.FMRegression(n_iter=100, rank=2)
    y_pred = fm.fit_predict(X_train, y_train, X_test)
    error_10_iter = mean_squared_error(y_pred, y_test)

    fm = mcmc.FMRegression(n_iter=50, rank=2)
    y_pred = fm.fit_predict(X_train, y_train, X_test)
    error_5_iter = mean_squared_error(y_pred, y_test)

    y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=50)
    error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)
    print(error_5_iter, error_5_iter_plus_5, error_10_iter)
    print(fm.hyper_param_)
    assert_almost_equal(error_10_iter, error_5_iter_plus_5, decimal=2)
示例#11
0
from fastFM.datasets import make_user_item_regression
from sklearn.cross_validation import train_test_split

# This sets up a small test dataset.
X, y, _ = make_user_item_regression(label_stdev=.4)
X_train, X_test, y_train, y_test = train_test_split(X, y)

from fastFM import als
fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=2, l2_reg_w=0.1, l2_reg_V=0.5)
fm.fit(X_train, y_train)
y_pred = fm.predict(X_test)

from sklearn.metrics import mean_squared_error
print('mse:', mean_squared_error(y_test, y_pred))

import numpy as np
# Convert dataset to binary classification task.
y_labels = np.ones_like(y)
y_labels[y < np.mean(y)] = -1
X_train, X_test, y_train, y_test = train_test_split(X, y_labels)

from fastFM import sgd
fm = sgd.FMClassification(n_iter=1000, init_stdev=0.1, l2_reg_w=0,
                          l2_reg_V=0, rank=2, step_size=0.1)
fm.fit(X_train, y_train)
y_pred = fm.predict(X_test)

y_pred_proba = fm.predict_proba(X_test)

from sklearn.metrics import accuracy_score, roc_auc_score
print('acc:', accuracy_score(y_test, y_pred))