示例#1
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu')
    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd')
    # algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('training LU')
        lu_algo.fit(train)
        _log.info('training CD')
        cd_algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(lu_pred=lu_algo.predict(test), cd_pred=cd_algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred)
    _log.info('predictions:\n%s', preds.sort_values('abs_diff', ascending=False))
    _log.info('diff summary:\n%s', preds.abs_diff.describe())

    lu_mae = pm.mae(preds.lu_pred, preds.rating)
    assert lu_mae == approx(0.73, abs=0.025)
    cd_mae = pm.mae(preds.cd_pred, preds.rating)
    assert cd_mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.lu_pred, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.cd_pred, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
示例#2
0
def test_mae_series_two():
    mae = pm.mae(pd.Series([1, 2]), pd.Series([1, 2]))
    assert isinstance(mae, float)
    assert mae == approx(0)

    mae = pm.mae(pd.Series([1, 1]), pd.Series([2, 2]))
    assert mae == approx(1)

    mae = pm.mae(pd.Series([1, 3]), pd.Series([3, 1]))
    assert mae == approx(2)
示例#3
0
def test_mae_array_two():
    mae = pm.mae(np.array([1, 2]), np.array([1, 2]))
    assert isinstance(mae, float)
    assert mae == approx(0)

    mae = pm.mae(np.array([1, 1]), np.array([2, 2]))
    assert mae == approx(1)

    mae = pm.mae(np.array([1, 3]), np.array([3, 1]))
    assert mae == approx(2)
示例#4
0
def test_mae_two():
    mae = pm.mae([1, 2], [1, 2])
    assert isinstance(mae, float)
    assert mae == approx(0)

    mae = pm.mae([1, 1], [2, 2])
    assert mae == approx(1)

    mae = pm.mae([1, 3], [3, 1])
    assert mae == approx(2)

    mae = pm.mae([1, 3], [3, 2])
    assert mae == approx(1.5)
示例#5
0
def test_fsvd_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    svd_algo = svd.FunkSVD(25, 125, damping=10)
    algo = basic.Fallback(svd_algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.74, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
示例#6
0
def test_ii_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    ii_algo = knn.ItemItem(30)
    algo = basic.Fallback(ii_algo, basic.Bias())

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test, n_jobs=4)

    preds = pd.concat((eval(train, test)
                       for (train, test)
                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.70, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.90, abs=0.05)
示例#7
0
def test_tf_bmf_batch_accuracy(tf_session):
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = lktf.BiasedMF(25,
                         damping=10,
                         batch_size=1024,
                         epochs=20,
                         rng_spec=42)
    algo = basic.Fallback(algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.83, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(1.03, abs=0.05)
示例#8
0
def test_tf_isvd(ml20m):
    algo = lenskit_tf.IntegratedBiasMF(20)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.60, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
示例#9
0
def test_global_metric():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    from lenskit.algorithms.bias import Bias

    train, test = next(
        xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
    algo = Bias()
    algo.fit(train)

    preds = batch.predict(algo, test)

    rmse = pm.global_metric(preds)
    assert rmse == pm.rmse(preds.prediction, preds.rating)

    mae = pm.global_metric(preds, metric=pm.mae)
    assert mae == pm.mae(preds.prediction, preds.rating)
示例#10
0
def test_uu_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    uu_algo = knn.UserUser(30)
    algo = basic.Fallback(uu_algo, basic.Bias())

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
    preds = pd.concat(preds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.71, abs=0.028)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.055)
示例#11
0
def test_user_metric():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    from lenskit.algorithms.bias import Bias

    train, test = next(
        xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
    algo = Bias()
    algo.fit(train)

    preds = batch.predict(algo, test)

    rmse = pm.user_metric(preds)
    u_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert rmse == approx(u_rmse.mean())

    mae = pm.user_metric(preds, metric=pm.mae)
    u_mae = preds.groupby('user').apply(
        lambda df: pm.mae(df.prediction, df.rating))
    assert mae == approx(u_mae.mean())
示例#12
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.load_ratings()

    svd_algo = als.BiasedMF(25, iterations=20, damping=5)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(prediction=algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)