def test_make_user_item_regression(): from fastFM.mcmc import FMRegression X, y, coef = make_user_item_regression(label_stdev=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) fm = FMRegression(rank=2) y_pred = fm.fit_predict(sp.csc_matrix(X_train), y_train, sp.csc_matrix(X_test)) # generate data with noisy lables X, y, coef = make_user_item_regression(label_stdev=2) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) fm = FMRegression(rank=2) y_pred_noise = fm.fit_predict(sp.csc_matrix(X_train), y_train, sp.csc_matrix(X_test)) assert mean_squared_error(y_pred_noise, y_test) > \ mean_squared_error(y_pred, y_test)
def test_find_init_stdev(): X, y, coef = make_user_item_regression(label_stdev=.5) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) fm = mcmc.FMRegression(n_iter=10, rank=5) best_init_stdev, mse = mcmc.find_init_stdev(fm, X_train, y_train, stdev_range=[0.2, 0.5, 1.0]) best_init_stdev_bad, _ = mcmc.find_init_stdev(fm, X_train, y_train, stdev_range=[5.]) print('--' * 30) best_init_stdev_vali, mse_vali = mcmc.find_init_stdev( fm, X_train, y_train, X_test, y_test, stdev_range=[0.2, 0.5, 1.0]) assert best_init_stdev < best_init_stdev_bad assert best_init_stdev_vali == best_init_stdev assert mse_vali > mse
def test_als_warm_start(): X, y, coef = make_user_item_regression(label_stdev=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) fm = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2) fm.fit(X_train, y_train) y_pred = fm.predict(X_test) error_10_iter = mean_squared_error(y_pred, y_test) fm = als.FMRegression(n_iter=5, l2_reg_w=0, l2_reg_V=0, rank=2) fm.fit(X_train, y_train) print(fm.iter_count) y_pred = fm.predict(X_test) error_5_iter = mean_squared_error(y_pred, y_test) fm.fit(sp.csc_matrix(X_train), y_train, n_more_iter=5) print(fm.iter_count) y_pred = fm.predict(X_test) error_5_iter_plus_5 = mean_squared_error(y_pred, y_test) print(error_5_iter, error_5_iter_plus_5, error_10_iter) assert error_10_iter == error_5_iter_plus_5
def test_als_warm_start(): X, y, coef = make_user_item_regression(label_stdev=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) fm = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2) fm.fit(X_train, y_train) y_pred = fm.predict(X_test) error_10_iter = mean_squared_error(y_pred, y_test) fm = als.FMRegression(n_iter=5, l2_reg_w=0, l2_reg_V=0, rank=2) fm.fit(X_train, y_train) print fm.iter_count y_pred = fm.predict(X_test) error_5_iter = mean_squared_error(y_pred, y_test) fm.fit(sp.csc_matrix(X_train), y_train, n_more_iter=5) print fm.iter_count y_pred = fm.predict(X_test) error_5_iter_plus_5 = mean_squared_error(y_pred, y_test) print error_5_iter, error_5_iter_plus_5, error_10_iter assert error_10_iter == error_5_iter_plus_5
def test_make_user_item_regression(): from fastFM.mcmc import FMRegression X, y, coef = make_user_item_regression(label_stdev=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) fm = FMRegression(rank=2) y_pred = fm.fit_predict(sp.csc_matrix(X_train), y_train, sp.csc_matrix(X_test)) # generate data with noisy lables X, y, coef = make_user_item_regression(label_stdev=2) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) fm = FMRegression(rank=2) y_pred_noise = fm.fit_predict(sp.csc_matrix(X_train), y_train, sp.csc_matrix(X_test)) assert mean_squared_error(y_pred_noise, y_test) > \ mean_squared_error(y_pred, y_test)
def test_warm_start_path(): X, y, coef = make_user_item_regression(label_stdev=.4) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) n_iter = 10 rank = 4 seed = 333 step_size = 1 l2_reg_w = 0 l2_reg_V = 0 fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, rank=rank, random_state=seed) # initalize coefs fm.fit(X_train, y_train) rmse_train = [] rmse_test = [] for i in range(1, n_iter): fm.fit(X_train, y_train, n_more_iter=step_size) rmse_train.append( np.sqrt(mean_squared_error(fm.predict(X_train), y_train))) rmse_test.append( np.sqrt(mean_squared_error(fm.predict(X_test), y_test))) print('------- restart ----------') values = np.arange(1, n_iter) rmse_test_re = [] rmse_train_re = [] for i in values: fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, rank=rank, random_state=seed) fm.fit(X_train, y_train) rmse_test_re.append( np.sqrt(mean_squared_error(fm.predict(X_test), y_test))) rmse_train_re.append( np.sqrt(mean_squared_error(fm.predict(X_train), y_train))) assert_almost_equal(rmse_train, rmse_train_re) assert_almost_equal(rmse_test, rmse_test_re)
def test_warm_start_path(): X, y, coef = make_user_item_regression(label_stdev=.4) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) n_iter = 10 rank = 4 seed = 333 step_size = 1 l2_reg_w = 0 l2_reg_V = 0 fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, rank=rank, random_state=seed) # initalize coefs fm.fit(X_train, y_train) rmse_train = [] rmse_test = [] for i in range(1, n_iter): fm.fit(X_train, y_train, n_more_iter=step_size) rmse_train.append(np.sqrt(mean_squared_error( fm.predict(X_train), y_train))) rmse_test.append(np.sqrt(mean_squared_error( fm.predict(X_test), y_test))) print('------- restart ----------') values = np.arange(1, n_iter) rmse_test_re = [] rmse_train_re = [] for i in values: fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, rank=rank, random_state=seed) fm.fit(X_train, y_train) rmse_test_re.append(np.sqrt(mean_squared_error( fm.predict(X_test), y_test))) rmse_train_re.append(np.sqrt(mean_squared_error( fm.predict(X_train), y_train))) assert_almost_equal(rmse_train, rmse_train_re) assert_almost_equal(rmse_test, rmse_test_re)
def test_find_init_stdev(): X, y, coef = make_user_item_regression(label_stdev=.5) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=44) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) fm = mcmc.FMRegression(n_iter=10, rank=5) best_init_stdev, mse = mcmc.find_init_stdev(fm, X_train, y_train, stdev_range=[0.2, 0.5, 1.0]) best_init_stdev_bad, _ = mcmc.find_init_stdev(fm, X_train, y_train, stdev_range=[5.]) print('--' * 30) best_init_stdev_vali, mse_vali = mcmc.find_init_stdev(fm, X_train, y_train, X_test, y_test, stdev_range=[0.2, 0.5, 1.0]) assert best_init_stdev < best_init_stdev_bad assert best_init_stdev_vali == best_init_stdev assert mse_vali > mse
def test_mcmc_warm_start(): X, y, coef = make_user_item_regression(label_stdev=0) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=44) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) fm = mcmc.FMRegression(n_iter=100, rank=2) y_pred = fm.fit_predict(X_train, y_train, X_test) error_10_iter = mean_squared_error(y_pred, y_test) fm = mcmc.FMRegression(n_iter=50, rank=2) y_pred = fm.fit_predict(X_train, y_train, X_test) error_5_iter = mean_squared_error(y_pred, y_test) y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=50) error_5_iter_plus_5 = mean_squared_error(y_pred, y_test) print(error_5_iter, error_5_iter_plus_5, error_10_iter) print(fm.hyper_param_) assert_almost_equal(error_10_iter, error_5_iter_plus_5, decimal=2)
def test_mcmc_warm_start(): X, y, coef = make_user_item_regression(label_stdev=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=44) X_train = sp.csc_matrix(X_train) X_test = sp.csc_matrix(X_test) fm = mcmc.FMRegression(n_iter=100, rank=2) y_pred = fm.fit_predict(X_train, y_train, X_test) error_10_iter = mean_squared_error(y_pred, y_test) fm = mcmc.FMRegression(n_iter=50, rank=2) y_pred = fm.fit_predict(X_train, y_train, X_test) error_5_iter = mean_squared_error(y_pred, y_test) y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=50) error_5_iter_plus_5 = mean_squared_error(y_pred, y_test) print(error_5_iter, error_5_iter_plus_5, error_10_iter) print(fm.hyper_param_) assert_almost_equal(error_10_iter, error_5_iter_plus_5, decimal=2)
from fastFM.datasets import make_user_item_regression from sklearn.cross_validation import train_test_split # This sets up a small test dataset. X, y, _ = make_user_item_regression(label_stdev=.4) X_train, X_test, y_train, y_test = train_test_split(X, y) from fastFM import als fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=2, l2_reg_w=0.1, l2_reg_V=0.5) fm.fit(X_train, y_train) y_pred = fm.predict(X_test) from sklearn.metrics import mean_squared_error print('mse:', mean_squared_error(y_test, y_pred)) import numpy as np # Convert dataset to binary classification task. y_labels = np.ones_like(y) y_labels[y < np.mean(y)] = -1 X_train, X_test, y_train, y_test = train_test_split(X, y_labels) from fastFM import sgd fm = sgd.FMClassification(n_iter=1000, init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2, step_size=0.1) fm.fit(X_train, y_train) y_pred = fm.predict(X_test) y_pred_proba = fm.predict_proba(X_test) from sklearn.metrics import accuracy_score, roc_auc_score print('acc:', accuracy_score(y_test, y_pred))