def test_fsvd_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings svd_algo = svd.FunkSVD(25, 125, damping=10) algo = basic.Fallback(svd_algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.74, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05)
def test_fallback_predict(): algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.fit(lktu.ml_pandas.renamed.ratings) assert len(algo.algorithms) == 2 bias = algo.algorithms[1] assert isinstance(bias, basic.Bias) assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean()) # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(bias.mean_ + bias.user_offsets_.loc[15] + bias.item_offsets_.loc[1]) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(bias.mean_ + bias.user_offsets_.loc[12] + bias.item_offsets_.loc[2]) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) assert preds.loc[-23081] == approx(bias.mean_ + bias.user_offsets_.loc[10])
def test_ii_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings ii_algo = knn.ItemItem(30) algo = basic.Fallback(ii_algo, basic.Bias()) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test, n_jobs=4) preds = pd.concat((eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.70, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.90, abs=0.05)
def test_tf_bmf_batch_accuracy(tf_session): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings algo = lktf.BiasedMF(25, damping=10, batch_size=1024, epochs=20, rng_spec=42) algo = basic.Fallback(algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.83, abs=0.025) user_rmse = preds.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(1.03, abs=0.05)
def test_fallback_train_one(): algo = basic.Fallback(basic.Bias()) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 1 assert isinstance(algo.algorithms[0], basic.Bias) assert algo.algorithms[0].mean_ == approx( lktu.ml_test.ratings.rating.mean())
def _create_recommender_algorithm_with_fallback(algo, aggregation): base = basic.Bias(damping=DAMPING_FACTOR) if aggregation != Aggregation.NONE: algo = GroupPredictorFallback(algo, base) else: algo = basic.Fallback(algo, base) return algo
def test_fallback_save_load(tmp_path): tmp_path = lktu.norm_path(tmp_path) original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_pandas.renamed.ratings) fn = tmp_path / 'fallback' original.save(fn) algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.load(fn) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_fallback_clone(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 clone = lku.clone(algo) assert clone is not algo for a1, a2 in zip(algo.algorithms, clone.algorithms): assert a1 is not a2 assert type(a2) == type(a1)
def test_fallback_list(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 params = algo.get_params() assert list(params.keys()) == ['algorithms'] assert len(params['algorithms']) == 2 assert isinstance(params['algorithms'][0], basic.Memorized) assert isinstance(params['algorithms'][1], basic.Bias)
def test_fallback_save_load(tmp_path): tmp_path = lktu.norm_path(tmp_path) original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_pandas.renamed.ratings) fn = tmp_path / 'fallback' original.save(fn) algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.load(fn) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean()) # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(bias.mean_ + bias.user_offsets_.loc[15] + bias.item_offsets_.loc[1]) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(bias.mean_ + bias.user_offsets_.loc[12] + bias.item_offsets_.loc[2]) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) assert preds.loc[-23081] == approx(bias.mean_ + bias.user_offsets_.loc[10])
def test_fallback_train_one_pred_impossible(): algo = basic.Fallback(basic.Memorized(simple_df)) algo.fit(lktu.ml_test.ratings) preds = algo.predict_for_user(10, [1, 2]) assert set(preds.index) == set([1, 2]) assert all(preds == pd.Series({1: 4.0, 2: 5.0})) preds = algo.predict_for_user(12, [1, 3]) assert set(preds.index) == set([1, 3]) assert preds.loc[1] == 3.0 assert np.isnan(preds.loc[3])
def test_fallback_save_load(tmp_path): original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_test.ratings) fn = tmp_path / 'fb.mod' with fn.open('wb') as f: pickle.dump(original, f) with fn.open('rb') as f: algo = pickle.load(f) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_uu_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings uu_algo = knn.UserUser(30) algo = basic.Fallback(uu_algo, basic.Bias()) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = [__batch_eval((algo, train, test)) for (train, test) in folds] preds = pd.concat(preds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.71, abs=0.028) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.055)
def test_fallback_predict(): algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 bias = algo.algorithms[1] assert isinstance(bias, basic.Bias) assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_als_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.load_ratings() svd_algo = als.BiasedMF(25, iterations=20, damping=5) algo = basic.Fallback(svd_algo, basic.Bias(damping=5)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return test.assign(prediction=algo.predict(test)) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.73, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.05)
def test_fallback_string(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) assert 'Fallback' in str(algo)