Пример #1
0
def test_fsvd_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    svd_algo = svd.FunkSVD(25, 125, damping=10)
    algo = basic.Fallback(svd_algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.74, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Пример #2
0
def test_fallback_predict():
    algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    algo.fit(lktu.ml_pandas.renamed.ratings)
    assert len(algo.algorithms) == 2

    bias = algo.algorithms[1]
    assert isinstance(bias, basic.Bias)
    assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean())

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(bias.mean_ + bias.user_offsets_.loc[15] +
                                  bias.item_offsets_.loc[1])

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(bias.mean_ + bias.user_offsets_.loc[12] +
                                  bias.item_offsets_.loc[2])

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] +
                                  bias.item_offsets_.loc[5])

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] +
                                  bias.item_offsets_.loc[5])
    assert preds.loc[-23081] == approx(bias.mean_ + bias.user_offsets_.loc[10])
Пример #3
0
def test_ii_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    ii_algo = knn.ItemItem(30)
    algo = basic.Fallback(ii_algo, basic.Bias())

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test, n_jobs=4)

    preds = pd.concat((eval(train, test)
                       for (train, test)
                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.70, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.90, abs=0.05)
Пример #4
0
def test_tf_bmf_batch_accuracy(tf_session):
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = lktf.BiasedMF(25,
                         damping=10,
                         batch_size=1024,
                         epochs=20,
                         rng_spec=42)
    algo = basic.Fallback(algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.83, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(1.03, abs=0.05)
Пример #5
0
def test_fallback_train_one():
    algo = basic.Fallback(basic.Bias())
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 1
    assert isinstance(algo.algorithms[0], basic.Bias)
    assert algo.algorithms[0].mean_ == approx(
        lktu.ml_test.ratings.rating.mean())
 def _create_recommender_algorithm_with_fallback(algo, aggregation):
     base = basic.Bias(damping=DAMPING_FACTOR)
     if aggregation != Aggregation.NONE:
         algo = GroupPredictorFallback(algo, base)
     else:
         algo = basic.Fallback(algo, base)
     return algo
Пример #7
0
def test_fallback_save_load(tmp_path):
    tmp_path = lktu.norm_path(tmp_path)

    original = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    original.fit(lktu.ml_pandas.renamed.ratings)

    fn = tmp_path / 'fallback'
    original.save(fn)

    algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    algo.load(fn)

    bias = algo.algorithms[1]
    assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean())

    def exp_val(user, item):
        v = bias.mean_
        if user is not None:
            v += bias.user_offsets_.loc[user]
        if item is not None:
            v += bias.item_offsets_.loc[item]
        return v

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(exp_val(15, 1))

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(exp_val(12, 2))

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))
    assert preds.loc[-23081] == approx(exp_val(10, None))
Пример #8
0
def test_fallback_clone():
    algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()])
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    clone = lku.clone(algo)
    assert clone is not algo
    for a1, a2 in zip(algo.algorithms, clone.algorithms):
        assert a1 is not a2
        assert type(a2) == type(a1)
Пример #9
0
def test_fallback_list():
    algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()])
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    params = algo.get_params()
    assert list(params.keys()) == ['algorithms']
    assert len(params['algorithms']) == 2
    assert isinstance(params['algorithms'][0], basic.Memorized)
    assert isinstance(params['algorithms'][1], basic.Bias)
Пример #10
0
def test_fallback_save_load(tmp_path):
    tmp_path = lktu.norm_path(tmp_path)

    original = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    original.fit(lktu.ml_pandas.renamed.ratings)

    fn = tmp_path / 'fallback'
    original.save(fn)

    algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    algo.load(fn)

    bias = algo.algorithms[1]
    assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean())

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(bias.mean_ + bias.user_offsets_.loc[15] +
                                  bias.item_offsets_.loc[1])

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(bias.mean_ + bias.user_offsets_.loc[12] +
                                  bias.item_offsets_.loc[2])

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] +
                                  bias.item_offsets_.loc[5])

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] +
                                  bias.item_offsets_.loc[5])
    assert preds.loc[-23081] == approx(bias.mean_ + bias.user_offsets_.loc[10])
Пример #11
0
def test_fallback_train_one_pred_impossible():
    algo = basic.Fallback(basic.Memorized(simple_df))
    algo.fit(lktu.ml_test.ratings)

    preds = algo.predict_for_user(10, [1, 2])
    assert set(preds.index) == set([1, 2])
    assert all(preds == pd.Series({1: 4.0, 2: 5.0}))

    preds = algo.predict_for_user(12, [1, 3])
    assert set(preds.index) == set([1, 3])
    assert preds.loc[1] == 3.0
    assert np.isnan(preds.loc[3])
Пример #12
0
def test_fallback_save_load(tmp_path):
    original = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    original.fit(lktu.ml_test.ratings)

    fn = tmp_path / 'fb.mod'

    with fn.open('wb') as f:
        pickle.dump(original, f)

    with fn.open('rb') as f:
        algo = pickle.load(f)

    bias = algo.algorithms[1]
    assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean())

    def exp_val(user, item):
        v = bias.mean_
        if user is not None:
            v += bias.user_offsets_.loc[user]
        if item is not None:
            v += bias.item_offsets_.loc[item]
        return v

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(exp_val(15, 1))

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(exp_val(12, 2))

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))
    assert preds.loc[-23081] == approx(exp_val(10, None))
Пример #13
0
def test_uu_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    uu_algo = knn.UserUser(30)
    algo = basic.Fallback(uu_algo, basic.Bias())

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
    preds = pd.concat(preds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.71, abs=0.028)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.055)
Пример #14
0
def test_fallback_predict():
    algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    bias = algo.algorithms[1]
    assert isinstance(bias, basic.Bias)
    assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean())

    def exp_val(user, item):
        v = bias.mean_
        if user is not None:
            v += bias.user_offsets_.loc[user]
        if item is not None:
            v += bias.item_offsets_.loc[item]
        return v

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(exp_val(15, 1))

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(exp_val(12, 2))

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))
    assert preds.loc[-23081] == approx(exp_val(10, None))
Пример #15
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.load_ratings()

    svd_algo = als.BiasedMF(25, iterations=20, damping=5)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(prediction=algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
Пример #16
0
def test_fallback_string():
    algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()])
    assert 'Fallback' in str(algo)