示例#1
0
def test_uu_train():
    algo = knn.UserUser(30)
    ret = algo.fit(ml_ratings)
    assert ret is algo

    # it should have computed correct means
    umeans = ml_ratings.groupby('user').rating.mean()
    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
    umeans, mlmeans = umeans.align(mlmeans)
    assert mlmeans.values == approx(umeans.values)

    # we should be able to reconstruct rating values
    uir = ml_ratings.set_index(['user', 'item']).rating
    r_items = algo.transpose_matrix_.rowinds()
    ui_rbdf = pd.DataFrame({
        'user':
        algo.user_index_[algo.transpose_matrix_.colinds],
        'item':
        algo.item_index_[r_items],
        'nrating':
        algo.transpose_matrix_.values
    }).set_index(['user', 'item'])
    ui_rbdf = ui_rbdf.join(mlmeans)
    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
    ui_rbdf['orig_rating'] = uir
    assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
示例#2
0
def test_uu_known_preds():
    from lenskit import batch

    algo = knn.UserUser(30, min_sim=1.0e-6)
    _log.info('training %s on ml data', algo)
    algo.fit(lktu.ml_test.ratings)

    dir = Path(__file__).parent
    pred_file = dir / 'user-user-preds.csv'
    _log.info('reading known predictions from %s', pred_file)
    known_preds = pd.read_csv(str(pred_file))
    pairs = known_preds.loc[:, ['user', 'item']]
    _log.info('generating %d known predictions', len(pairs))

    preds = batch.predict(algo, pairs)
    merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds)
    assert len(merged) == len(preds)
    merged['error'] = merged.expected - merged.prediction
    try:
        assert not any(merged.prediction.isna() & merged.expected.notna())
    except AssertionError as e:
        bad = merged[merged.prediction.isna() & merged.expected.notna()]
        _log.error('%d missing predictions:\n%s', len(bad), bad)
        raise e

    err = merged.error
    err = err[err.notna()]
    try:
        assert all(err.abs() < 0.01)
    except AssertionError as e:
        bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
        _log.error('%d erroneous predictions:\n%s', len(bad), bad)
        raise e
示例#3
0
def test_uu_predict_unknown_empty():
    algo = knn.UserUser(30, min_nbrs=2)
    algo.fit(ml_ratings)

    preds = algo.predict_for_user(-28018, [1016, 2091])
    assert len(preds) == 2
    assert all(preds.isna())
示例#4
0
def test_uu_implicit_batch_accuracy():
    from lenskit import batch, topn
    import lenskit.crossfold as xf

    ratings = lktu.ml100k.ratings

    algo = knn.UserUser(30, center=False, aggregate='sum')

    folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
    all_test = pd.concat(f.test for f in folds)

    rec_lists = []
    for train, test in folds:
        _log.info('running training')
        rec_algo = Recommender.adapt(algo)
        rec_algo.fit(train.loc[:, ['user', 'item']])
        _log.info('testing %d users', test.user.nunique())
        recs = batch.recommend(rec_algo, test.user.unique(), 100, n_jobs=2)
        rec_lists.append(recs)
    recs = pd.concat(rec_lists)

    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(recs, all_test)
    user_dcg = results.ndcg

    dcg = user_dcg.mean()
    assert dcg >= 0.03
示例#5
0
def user_movie_recommend(ratings, optionList, userId):
    all_recs = []

    for option in optionList:
        if option == 1:
            basic_bias_model = basic.Bias()
            all_recs.append(
                user_eval('BasicBias', basic_bias_model, ratings, userId))
        if option == 2:
            knn_model = iknn.ItemItem(20)
            all_recs.append(user_eval('ItemItem', knn_model, ratings, userId))
        if option == 3:
            knn_u_model = uknn.UserUser(20)
            all_recs.append(user_eval('UserUser', knn_u_model, ratings,
                                      userId))
        if option == 4:
            als_b_model = als.BiasedMF(50)
            all_recs.append(
                user_eval('ALS-Biased', als_b_model, ratings, userId))
        if option == 5:
            als_i_model = als.ImplicitMF(50)
            all_recs.append(
                user_eval('ALS-Implicit', als_i_model, ratings, userId))
        if option == 6:
            funk_model = funksvd.FunkSVD(50)
            all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId))

    all_recs = pd.concat(all_recs, ignore_index=True)

    return all_recs
示例#6
0
def test_alogrithms():
    # data = MovieLens('ml-latest-small')
    data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = [
        basic.Bias(damping=5),
        basic.Popular(),
        item_knn.ItemItem(20),
        user_knn.UserUser(20),
        als.BiasedMF(50),
        als.ImplicitMF(50),
        funksvd.FunkSVD(50)
    ]
    pairs = list(
        partition_users(ratings[['user', 'item', 'rating']], 5,
                        SampleFrac(0.2)))
    eval_algorithms(dataset=pairs, algorithms=algorithms)
    runs = display_runs()
    recs = display_recommendations()
    truth = pd.concat((p.test for p in pairs), ignore_index=True)
    ndcg_means = check_recommendations(runs, recs, truth)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
示例#7
0
def test_uu_predict_one():
    algo = knn.UserUser(30)
    algo.fit(ml_ratings)

    preds = algo.predict_for_user(4, [1016])
    assert len(preds) == 1
    assert preds.index == [1016]
    assert preds.values == approx([3.62221550680778])
示例#8
0
def test_uu_predict_too_few_blended():
    algo = knn.UserUser(30, min_nbrs=2)
    algo.fit(ml_ratings)

    preds = algo.predict_for_user(4, [1016, 2091])
    assert len(preds) == 2
    assert np.isnan(preds.loc[2091])
    assert preds.loc[1016] == approx(3.62221550680778)
示例#9
0
def test_uu_predict_too_few():
    algo = knn.UserUser(30, min_nbrs=2)
    algo.fit(ml_ratings)

    preds = algo.predict_for_user(4, [2091])
    assert len(preds) == 1
    assert preds.index == [2091]
    assert all(preds.isna())
 def userKNN(self, nnbrs, aggregate, center, min_nbrs=3):
     algoname = "userKNN"
     user_user = user_knn.UserUser(nnbrs=nnbrs,
                                   min_nbrs=min_nbrs,
                                   aggregate=aggregate,
                                   center=center)
     eval = self.eval(algoname, user_user)
     print("UserKNN was fitted.")
     return eval
示例#11
0
def test_uu_train_adapt():
    "Test training an adapted user-user (#129)."
    from lenskit.algorithms import Recommender

    uu = knn.UserUser(30)
    uu = Recommender.adapt(uu)
    ret = uu.fit(ml_ratings)
    assert ret is uu
    assert isinstance(uu.predictor, knn.UserUser)
示例#12
0
def test_uu_predict_live_ratings():
    algo = knn.UserUser(30, min_nbrs=2)
    no4 = ml_ratings[ml_ratings.user != 4]
    algo.fit(no4)

    ratings = ml_ratings[ml_ratings.user == 4].set_index('item').rating

    preds = algo.predict_for_user(20381, [1016, 2091], ratings)
    assert len(preds) == 2
    assert np.isnan(preds.loc[2091])
    assert preds.loc[1016] == approx(3.62221550680778)
示例#13
0
def test_uu_implicit():
    "Train and use user-user on an implicit data set."
    algo = knn.UserUser(20, center=False, aggregate='sum')
    data = ml_ratings.loc[:, ['user', 'item']]

    algo.fit(data)
    assert algo.user_means_ is None

    mat = matrix.csr_to_scipy(algo.rating_matrix_)
    norms = sps.linalg.norm(mat, 2, 1)
    assert norms == approx(1.0)

    preds = algo.predict_for_user(50, [1, 2, 42])
    assert all(preds[preds.notna()] > 0)
示例#14
0
def test_uu_implicit():
    "Train and use user-user on an implicit data set."
    algo = knn.UserUser(20, feedback='implicit')
    data = ml_ratings.loc[:, ['user', 'item']]

    algo.fit(data)
    assert algo.user_means_ is None

    mat = algo.rating_matrix_.to_scipy()
    norms = spla.norm(mat, 2, 1)
    assert norms == approx(1.0)

    preds = algo.predict_for_user(50, [1, 2, 42])
    assert all(preds[preds.notna()] > 0)
示例#15
0
def test_uu_save_load(tmp_path):
    tmp_path = lktu.norm_path(tmp_path)

    orig = knn.UserUser(30)
    _log.info('training model')
    orig.fit(ml_ratings)

    fn = tmp_path / 'uu.model'
    _log.info('saving to %s', fn)
    orig.save(fn)

    _log.info('reloading model')
    algo = knn.UserUser(30)
    algo.load(fn)
    _log.info('checking model')

    # it should have computed correct means
    umeans = ml_ratings.groupby('user').rating.mean()
    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
    umeans, mlmeans = umeans.align(mlmeans)
    assert mlmeans.values == approx(umeans.values)

    # we should be able to reconstruct rating values
    uir = ml_ratings.set_index(['user', 'item']).rating
    r_items = matrix.csr_rowinds(algo.transpose_matrix_)
    ui_rbdf = pd.DataFrame({
        'user':
        algo.user_index_[algo.transpose_matrix_.colinds],
        'item':
        algo.item_index_[r_items],
        'nrating':
        algo.transpose_matrix_.values
    }).set_index(['user', 'item'])
    ui_rbdf = ui_rbdf.join(mlmeans)
    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
    ui_rbdf['orig_rating'] = uir
    assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
示例#16
0
def test_uu_save_load_implicit(tmp_path):
    "Save and load user-user on an implicit data set."
    tmp_path = lktu.norm_path(tmp_path)
    orig = knn.UserUser(20, center=False, aggregate='sum')
    data = ml_ratings.loc[:, ['user', 'item']]

    orig.fit(data)
    orig.save(tmp_path / 'uu.mod')

    algo = knn.UserUser(20, center=False, aggregate='sum')
    algo.load(tmp_path / 'uu.mod')
    assert algo.user_means_ is None
    assert all(algo.user_index_ == orig.user_index_)
    assert all(algo.item_index_ == orig.item_index_)

    assert all(algo.rating_matrix_.rowptrs == orig.rating_matrix_.rowptrs)
    assert all(algo.rating_matrix_.colinds == orig.rating_matrix_.colinds)
    assert all(algo.rating_matrix_.values == orig.rating_matrix_.values)

    assert all(
        algo.transpose_matrix_.rowptrs == orig.transpose_matrix_.rowptrs)
    assert all(
        algo.transpose_matrix_.colinds == orig.transpose_matrix_.colinds)
    assert algo.transpose_matrix_.values is None
示例#17
0
 def get_algo_class(self, algo):
     if algo == 'popular':
         return basic.Popular()
     elif algo == 'bias':
         return basic.Bias(users=False)
     elif algo == 'topn':
         return basic.TopN(basic.Bias())
     elif algo == 'itemitem':
         return iknn.ItemItem(nnbrs=-1)
     elif algo == 'useruser':
         return uknn.UserUser(nnbrs=5)
     elif algo == 'biasedmf':
         return als.BiasedMF(50, iterations=10)
     elif algo == 'implicitmf':
         return als.ImplicitMF(20, iterations=10)
     elif algo == 'funksvd':
         return svd.FunkSVD(20, iterations=20)
示例#18
0
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
示例#19
0
def test_uu_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    uu_algo = knn.UserUser(30)
    algo = basic.Fallback(uu_algo, basic.Bias())

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
    preds = pd.concat(preds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.71, abs=0.028)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.055)
示例#20
0
def test_uu_save_load(tmp_path):
    orig = knn.UserUser(30)
    _log.info('training model')
    orig.fit(ml_ratings)

    fn = tmp_path / 'uu.model'
    _log.info('saving to %s', fn)
    with fn.open('wb') as f:
        pickle.dump(orig, f)

    _log.info('reloading model')
    with fn.open('rb') as f:
        algo = pickle.load(f)

    _log.info('checking model')

    # it should have computed correct means
    umeans = ml_ratings.groupby('user').rating.mean()
    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
    umeans, mlmeans = umeans.align(mlmeans)
    assert mlmeans.values == approx(umeans.values)

    # we should be able to reconstruct rating values
    uir = ml_ratings.set_index(['user', 'item']).rating
    r_items = algo.transpose_matrix_.rowinds()
    ui_rbdf = pd.DataFrame({
        'user':
        algo.user_index_[algo.transpose_matrix_.colinds],
        'item':
        algo.item_index_[r_items],
        'nrating':
        algo.transpose_matrix_.values
    }).set_index(['user', 'item'])
    ui_rbdf = ui_rbdf.join(mlmeans)
    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
    ui_rbdf['orig_rating'] = uir
    assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)

    # running the predictor should work
    preds = algo.predict_for_user(4, [1016])
    assert len(preds) == 1
    assert preds.index == [1016]
    assert preds.values == approx([3.62221550680778])
示例#21
0
def test_alogrithms():
    data = MovieLens('ml-latest-small')
    #data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = {
        'Bias': basic.Bias(damping=5),
        'Popular': basic.Popular(),
        'ItemItem': item_knn.ItemItem(20),
        'UserUser': user_knn.UserUser(20),
        'BiasedMF': als.BiasedMF(50),
        'ImplicitMF': als.ImplicitMF(50),
        'FunkSVD': funksvd.FunkSVD(50)
    }
    all_recs, test_data = eval_algos(ratings, algorithms)
    ndcg_means = eval_ndcg(all_recs, test_data)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
示例#22
0
def test_uu_save_load_implicit(tmp_path):
    "Save and load user-user on an implicit data set."
    orig = knn.UserUser(20, feedback='implicit')
    data = ml_ratings.loc[:, ['user', 'item']]

    orig.fit(data)
    ser = pickle.dumps(orig)

    algo = pickle.loads(ser)

    assert algo.user_means_ is None
    assert all(algo.user_index_ == orig.user_index_)
    assert all(algo.item_index_ == orig.item_index_)

    assert all(algo.rating_matrix_.rowptrs == orig.rating_matrix_.rowptrs)
    assert all(algo.rating_matrix_.colinds == orig.rating_matrix_.colinds)
    assert all(algo.rating_matrix_.values == orig.rating_matrix_.values)

    assert all(algo.transpose_matrix_.rowptrs == orig.transpose_matrix_.rowptrs)
    assert all(algo.transpose_matrix_.colinds == orig.transpose_matrix_.colinds)
    assert algo.transpose_matrix_.values is None
    def run(self, strategy_context: RecommenderAlgorithmStrategyContext
            ) -> np.ndarray:
        data_set_source = strategy_context.data_set_source
        data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create(
            data_set_source)
        data_set: DataFrame = data_frame_reader.parse(
            DataFrameReaderStrategyContext(data_set_source))

        partition = list(
            partition_users(data=data_set,
                            partitions=1,
                            method=crossfold.SampleFrac(0.2)))[0]
        test, train = partition.test, partition.train
        number_of_recommendations = strategy_context.number_of_recommendations
        algorithm = Recommender.adapt(
            user_knn.UserUser(number_of_recommendations))
        trained_algorithm = algorithm.fit(train)
        recommendations = lenskit.batch.recommend(trained_algorithm,
                                                  test['user'].unique(),
                                                  number_of_recommendations)
        return recommendations.groupby('user')['item'].apply(
            lambda x: x).to_numpy().reshape((-1, number_of_recommendations))
示例#24
0
def all_movie_recommends(ratings, optionList):
    all_recs = []
    test_data = []

    #Declare algorithm models
    basic_bias_model = basic.Bias()
    knn_model = iknn.ItemItem(20)
    knn_u_model = uknn.UserUser(20)
    als_b_model = als.BiasedMF(50)
    als_i_model = als.ImplicitMF(50)
    funk_model = funksvd.FunkSVD(50)

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          5, xf.SampleFrac(0.2)):
        test_data.append(test)

        for option in optionList:
            if option == 1:
                all_recs.append(
                    batch_eval('BasicBias', basic_bias_model, train, test))
            if option == 2:
                all_recs.append(batch_eval('ItemItem', knn_model, train, test))
            if option == 3:
                all_recs.append(
                    batch_eval('UserUser', knn_u_model, train, test))
            if option == 4:
                all_recs.append(
                    batch_eval('ALS-Biased', als_b_model, train, test))
            if option == 5:
                all_recs.append(
                    batch_eval('ALS-Implicit', als_i_model, train, test))
            if option == 6:
                all_recs.append(batch_eval('FunkSVD', funk_model, train, test))

    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)

    return all_recs, test_data
示例#25
0
def test_uu_implicit_batch_accuracy():
    from lenskit import batch, topn
    import lenskit.crossfold as xf
    import lenskit.metrics.topn as lm

    ratings = lktu.ml100k.load_ratings()

    algo = knn.UserUser(30, center=False, aggregate='sum')

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    rec_lists = []
    for train, test in folds:
        _log.info('running training')
        algo.fit(train.loc[:, ['user', 'item']])
        cands = topn.UnratedCandidates(train)
        _log.info('testing %d users', test.user.nunique())
        recs = batch.recommend(algo, test.user.unique(), 100, cands, test)
        rec_lists.append(recs)
    recs = pd.concat(rec_lists)

    user_dcg = recs.groupby('user').rating.apply(lm.dcg)
    dcg = user_dcg.mean()
    assert dcg >= 0.1
示例#26
0
def get_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.Bias(users=False)
    elif algo == 'topn':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return iknn.ItemItem(nnbrs=-1)
    elif algo == 'useruser':
        return uknn.UserUser(nnbrs=5)
    elif algo == 'biasedmf':
        return als.BiasedMF(50, iterations=10)
    elif algo == 'implicitmf':
        return als.ImplicitMF(20, iterations=10)
    elif algo == 'funksvd':
        return svd.FunkSVD(20, iterations=20)
    elif algo == 'tf_bpr':
        return lktf.BPR(20,
                        batch_size=1024,
                        epochs=5,
                        neg_count=2,
                        rng_spec=42)
示例#27
0
def test_uu_imp_clone():
    algo = knn.UserUser(30, feedback='implicit')
    a2 = clone(algo)

    assert a2.get_params() == algo.get_params()
    assert a2.__dict__ == algo.__dict__
示例#28
0
def test_uu_imp_config():
    algo = knn.UserUser(30, feedback='implicit')
    assert algo.nnbrs == 30
    assert not algo.center
    assert algo.aggregate == 'sum'
    assert not algo.use_ratings
示例#29
0
def test_uu_exp_config():
    algo = knn.UserUser(30, feedback='explicit')
    assert algo.nnbrs == 30
    assert algo.center
    assert algo.aggregate == 'weighted-average'
    assert algo.use_ratings
示例#30
0
def test_uu_dft_config():
    algo = knn.UserUser(30)
    assert algo.nnbrs == 30
    assert algo.center
    assert algo.aggregate == 'weighted-average'
    assert algo.use_ratings