Exemplo n.º 1
0
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    # elif algo == 'topn':
    #     return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(iknn.ItemItem(center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(center=False, aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
Exemplo n.º 2
0
def test_uu_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.load_ratings()

    uu_algo = knn.UserUser(30)
    algo = basic.Fallback(uu_algo, basic.Bias())

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
    preds = pd.concat(preds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.71, abs=0.028)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.055)
Exemplo n.º 3
0
def test_fallback_save_load(tmp_path):
    original = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    original.fit(lktu.ml_test.ratings)

    fn = tmp_path / 'fb.mod'

    binpickle.dump(original, fn)

    algo = binpickle.load(fn)

    bias = algo.algorithms[1]
    assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean())

    def exp_val(user, item):
        v = bias.mean_
        if user is not None:
            v += bias.user_offsets_.loc[user]
        if item is not None:
            v += bias.item_offsets_.loc[item]
        return v

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(exp_val(15, 1))

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(exp_val(12, 2))

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))
    assert preds.loc[-23081] == approx(exp_val(10, None))
Exemplo n.º 4
0
def test_topn_big():
    ratings = lktu.ml_test.ratings
    users = ratings.user.unique()
    items = ratings.item.unique()
    user_items = ratings.set_index('user').item

    algo = basic.TopN(basic.Bias())
    a2 = algo.fit(ratings)
    assert a2 is algo

    # test 100 random users
    for u in np.random.choice(users, 100, False):
        recs = algo.recommend(u, 100)
        assert len(recs) == 100
        rated = user_items.loc[u]
        assert all(~recs['item'].isin(rated))
        unrated = np.setdiff1d(items, rated)
        scores = algo.predictor.predict_for_user(u, unrated)
        top = scores.nlargest(100)
        assert top.values == approx(recs.score.values)
Exemplo n.º 5
0
def test_alogrithms():
    data = MovieLens('ml-latest-small')
    #data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = {
        'Bias': basic.Bias(damping=5),
        'Popular': basic.Popular(),
        'ItemItem': item_knn.ItemItem(20),
        'UserUser': user_knn.UserUser(20),
        'BiasedMF': als.BiasedMF(50),
        'ImplicitMF': als.ImplicitMF(50),
        'FunkSVD': funksvd.FunkSVD(50)
    }
    all_recs, test_data = eval_algos(ratings, algorithms)
    ndcg_means = eval_ndcg(all_recs, test_data)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
Exemplo n.º 6
0
def test_bias_train_ml_ratings():
    algo = bl.Bias()
    ratings = ml_pandas.ratings.rename(columns={
        'userId': 'user',
        'movieId': 'item'
    })
    algo.fit(ratings)

    assert algo.mean_ == approx(ratings.rating.mean())
    imeans_data = ratings.groupby('item').rating.mean()
    imeans_algo = algo.item_offsets_ + algo.mean_
    ares, data = imeans_algo.align(imeans_data)
    assert ares.values == approx(data.values)

    urates = ratings.set_index('user').loc[2].set_index('item').rating
    umean = (urates - imeans_data[urates.index]).mean()
    p = algo.predict_for_user(2, [10, 11, -1])
    assert len(p) == 3
    assert p.iloc[0] == approx(imeans_data.loc[10] + umean)
    assert p.iloc[1] == approx(imeans_data.loc[11] + umean)
    assert p.iloc[2] == approx(ratings.rating.mean() + umean)
Exemplo n.º 7
0
def test_fallback_predict():
    algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    bias = algo.algorithms[1]
    assert isinstance(bias, basic.Bias)
    assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean())

    def exp_val(user, item):
        v = bias.mean_
        if user is not None:
            v += bias.user_offsets_.loc[user]
        if item is not None:
            v += bias.item_offsets_.loc[item]
        return v

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(exp_val(15, 1))

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(exp_val(12, 2))

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))
    assert preds.loc[-23081] == approx(exp_val(10, None))
Exemplo n.º 8
0
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(
            iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(nnbrs=5, center=False,
                                        aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
    elif algo == 'tf_bpr':
        return basic.TopN(
            lktf.BPR(20, batch_size=1024, epochs=5, neg_count=2, rng_spec=42))
Exemplo n.º 9
0
def all_movie_recommends(ratings, optionList):
    all_recs = []
    test_data = []

    #Declare algorithm models
    basic_bias_model = basic.Bias()
    knn_model = iknn.ItemItem(20)
    knn_u_model = uknn.UserUser(20)
    als_b_model = als.BiasedMF(50)
    als_i_model = als.ImplicitMF(50)
    funk_model = funksvd.FunkSVD(50)

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          5, xf.SampleFrac(0.2)):
        test_data.append(test)

        for option in optionList:
            if option == 1:
                all_recs.append(
                    batch_eval('BasicBias', basic_bias_model, train, test))
            if option == 2:
                all_recs.append(batch_eval('ItemItem', knn_model, train, test))
            if option == 3:
                all_recs.append(
                    batch_eval('UserUser', knn_u_model, train, test))
            if option == 4:
                all_recs.append(
                    batch_eval('ALS-Biased', als_b_model, train, test))
            if option == 5:
                all_recs.append(
                    batch_eval('ALS-Implicit', als_i_model, train, test))
            if option == 6:
                all_recs.append(batch_eval('FunkSVD', funk_model, train, test))

    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)

    return all_recs, test_data
Exemplo n.º 10
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.load_ratings()

    svd_algo = als.BiasedMF(25, iterations=20, damping=5)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(prediction=algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
Exemplo n.º 11
0
def test_bias_batch_recommend():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch, topn
    import lenskit.metrics.topn as lm

    if not os.path.exists('ml-100k/u.data'):
        raise pytest.skip()

    ratings = pd.read_csv('ml-100k/u.data',
                          sep='\t',
                          names=['user', 'item', 'rating', 'timestamp'])

    algo = basic.Bias(damping=5)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        cand_fun = topn.UnratedCandidates(train)
        recs = batch.recommend(algo, test.user.unique(), 100, cand_fun)
        # combine with test ratings for relevance data
        res = pd.merge(recs, test, how='left', on=('user', 'item'))
        # fill in missing 0s
        res.loc[res.rating.isna(), 'rating'] = 0
        return res

    recs = pd.concat(
        (eval(train, test)
         for (train,
              test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))

    _log.info('analyzing recommendations')
    dcg = recs.groupby('user').rating.apply(lm.dcg)
    _log.info('DCG for %d users is %f (max=%f)', len(dcg), dcg.mean(),
              dcg.max())
    assert dcg.mean() > 0
Exemplo n.º 12
0
def test_fsvd_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    svd_algo = svd.FunkSVD(25, 125, damping=10)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.74, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Exemplo n.º 13
0
def test_bias_save():
    original = bl.Bias(damping=5)
    original.fit(simple_df)
    assert original.mean_ == approx(3.5)

    _log.info('saving baseline model')
    mod = pickle.dumps(original)
    _log.info('serialized to %d bytes', len(mod))

    algo = pickle.loads(mod)

    assert algo.mean_ == original.mean_

    assert algo.item_offsets_ is not None
    assert algo.item_offsets_.index.name == 'item'
    assert set(algo.item_offsets_.index) == set([1, 2, 3])
    assert algo.item_offsets_.loc[1:3].values == approx(
        np.array([0, 0.25, -0.25]))

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
Exemplo n.º 14
0
def test_bias_global_only():
    algo = bl.Bias(users=False, items=False)
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)
    assert algo.item_offsets_ is None
    assert algo.user_offsets_ is None
Exemplo n.º 15
0
def test_fallback_train_one():
    algo = basic.Fallback(basic.Bias())
    algo.fit(lktu.ml_pandas.renamed.ratings)
    assert len(algo.algorithms) == 1
    assert isinstance(algo.algorithms[0], basic.Bias)
    assert algo.algorithms[0].mean_ == approx(lktu.ml_pandas.ratings.rating.mean())
Exemplo n.º 16
0
"""
Basic algorithm definitions as starting points.
"""

from lenskit.algorithms import item_knn, user_knn, als, funksvd
from lenskit.algorithms import basic

Bias = basic.Bias(damping=5)
Pop = basic.Popular()
II = item_knn.ItemItem(20, save_nbrs=2500)
UU = user_knn.UserUser(30)
ALS = als.BiasedMF(50)
IALS = als.ImplicitMF(50)
MFSGD = funksvd.FunkSVD(50)
Exemplo n.º 17
0
from sklearn.metrics import roc_auc_score
import numpy as np

import pandas as pd

if __name__ == '__main__':

    train = pd.read_csv('data/beer/train.csv')
    validation = pd.read_csv('data/beer/validation.csv')
    test = pd.read_csv('data/beer/test.csv')
    # train = pd.read_csv('data/jester/clean/train.csv')
    # validation = pd.read_csv('data/jester/clean/validation.csv')
    # test = pd.read_csv('data/jester/clean/test.csv')

    algo = basic.Bias()
    # algo = als.BiasedMF()

    train = train[['reviewer_id', 'beer_beerid', 'review_overall']]
    validation = validation[['reviewer_id', 'beer_beerid', 'review_overall']]
    test = test[['reviewer_id', 'beer_beerid', 'review_overall']]
    train = train.rename(columns={
        'review_overall': 'rating',
        'reviewer_id': 'user',
        'beer_beerid': 'item'
    })
    validation = validation.rename(columns={
        'review_overall': 'rating',
        'reviewer_id': 'user',
        'beer_beerid': 'item'
    })
Exemplo n.º 18
0
test = pd.read_csv("/project/naray190/ml-20m/truncated_user_ratings.csv")
train = train[['userId', 'movieId', 'rating']]
test = test[['userId', 'movieId', 'rating']]
train.columns = ['user', 'item', 'rating']
test.columns = ['user', 'item', 'rating']
algo_30als = als.BiasedMF(features=30, iterations=50, reg=0.1)
algo_40als = als.BiasedMF(features=40, iterations=50, reg=0.1)
algo_20als = als.BiasedMF(features=20, iterations=50, reg=0.1)
algo_25als = als.BiasedMF(features=25, iterations=50, reg=0.1)
algo_15als = als.BiasedMF(features=15, iterations=50, reg=0.1)
algo_50als = als.BiasedMF(features=50, iterations=50, reg=0.1)
algo_60als = als.BiasedMF(features=60, iterations=50, reg=0.1)
algo_10als = als.BiasedMF(features=10, iterations=50, reg=0.1)
algo_70als = als.BiasedMF(features=70, iterations=50, reg=0.1)
algo_80als = als.BiasedMF(features=80, iterations=50, reg=0.1)
algo_base = basic.Bias()
algo_ii = item_knn.ItemItem(nnbrs=20)


def eval(algo, train, test):
    fittable = util.clone(algo)
    algo.fit(train)
    users = test.user.unique()
    preds = algo.predict(test)

    rmse = predict.rmse(preds, test['rating'])
    return rmse


rmse_scores = pd.DataFrame(columns=['Algorithm', 'Dataset', 'RMSE'])
count = 0
Exemplo n.º 19
0
def test_fallback_string():
    algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()])
    assert 'Fallback' in str(algo)