示例#1
0
def test_ii_impl_match(ml20m):
    sps = knn.ItemItem(20, min_sim=1.0e-6)
    sps._use_mkl = False
    timer = Stopwatch()
    _log.info('training SciPy %s on ml data', sps)
    sps.fit(ml20m)
    _log.info('trained SciPy in %s', timer)

    mkl = knn.ItemItem(20, min_sim=1.0e-6)
    timer = Stopwatch()
    _log.info('training MKL %s on ml data', mkl)
    mkl.fit(ml20m)
    _log.info('trained MKL in %s', timer)

    assert mkl.sim_matrix_.nnz == sps.sim_matrix_.nnz
    assert mkl.sim_matrix_.nrows == sps.sim_matrix_.nrows
    assert mkl.sim_matrix_.ncols == sps.sim_matrix_.ncols

    assert all(mkl.sim_matrix_.rowptrs == sps.sim_matrix_.rowptrs)
    for i in range(mkl.sim_matrix_.nrows):
        sp, ep = mkl.sim_matrix_.row_extent(i)
        assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0)
        assert all(np.diff(sps.sim_matrix_.values[sp:ep]) <= 0)
        assert set(mkl.sim_matrix_.colinds[sp:ep]) == set(
            sps.sim_matrix_.colinds[sp:ep])
        assert all(
            np.abs(mkl.sim_matrix_.values[sp:ep] -
                   sps.sim_matrix_.values[sp:ep]) < 1.0e-3)
示例#2
0
def test_ii_save_load(tmp_path):
    "Save and load a model"
    tmp_path = lktu.norm_path(tmp_path)
    original = knn.ItemItem(30, save_nbrs=500)
    _log.info('building model')
    original.fit(lktu.ml_sample())

    fn = tmp_path / 'ii.mod'
    _log.info('saving model to %s', fn)
    original.save(fn)
    _log.info('reloading model')

    algo = knn.ItemItem(30)
    algo.load(fn)
    _log.info('checking model')

    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
    assert all(algo.sim_matrix_.values > 0)
    # a little tolerance
    assert all(algo.sim_matrix_.values < 1 + 1.0e-6)

    assert all(algo.item_counts_ == original.item_counts_)
    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
    assert algo.sim_matrix_.nnz == original.sim_matrix_.nnz
    assert all(algo.sim_matrix_.rowptrs == original.sim_matrix_.rowptrs)
    assert algo.sim_matrix_.values == approx(original.sim_matrix_.values)

    r_mat = algo.sim_matrix_
    o_mat = original.sim_matrix_
    assert all(r_mat.rowptrs == o_mat.rowptrs)

    for i in range(len(algo.item_index_)):
        sp = r_mat.rowptrs[i]
        ep = r_mat.rowptrs[i + 1]

        # everything is in decreasing order
        assert all(np.diff(r_mat.values[sp:ep]) <= 0)
        assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep])

    means = ml_ratings.groupby('item').rating.mean()
    assert means[algo.item_index_].values == approx(original.item_means_)

    matrix = lm.csr_to_scipy(algo.sim_matrix_)

    items = pd.Series(algo.item_index_)
    items = items[algo.item_counts_ > 0]
    for i in items.sample(50):
        ipos = algo.item_index_.get_loc(i)
        _log.debug('checking item %d at position %d', i, ipos)

        row = matrix.getrow(ipos)

        # it should be sorted !
        # check this by diffing the row values, and make sure they're negative
        assert all(np.diff(row.data) < 1.0e-6)
示例#3
0
def test_ii_no_ratings():
    a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
    a1.fit(ml_ratings.loc[:, ['user', 'item']])

    algo = knn.ItemItem(20, save_nbrs=100, feedback='implicit')

    algo.fit(ml_ratings)
    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
    assert all(algo.sim_matrix_.values > 0)
    assert all(algo.item_counts_ <= 100)

    preds = algo.predict_for_user(50, [1, 2, 42])
    assert all(preds[preds.notna()] > 0)
    p2 = algo.predict_for_user(50, [1, 2, 42])
    preds, p2 = preds.align(p2)
    assert preds.values == approx(p2.values, nan_ok=True)
示例#4
0
def test_ii_known_preds():
    from lenskit import batch

    algo = knn.ItemItem(20, min_sim=1.0e-6)
    _log.info('training %s on ml data', algo)
    algo.fit(lktu.ml_test.ratings)
    assert algo.center
    assert algo.item_means_ is not None
    _log.info('model means: %s', algo.item_means_)

    dir = Path(__file__).parent
    pred_file = dir / 'item-item-preds.csv'
    _log.info('reading known predictions from %s', pred_file)
    known_preds = pd.read_csv(str(pred_file))
    pairs = known_preds.loc[:, ['user', 'item']]

    preds = batch.predict(algo, pairs)
    merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds)
    assert len(merged) == len(preds)
    merged['error'] = merged.expected - merged.prediction
    try:
        assert not any(merged.prediction.isna() & merged.expected.notna())
    except AssertionError as e:
        bad = merged[merged.prediction.isna() & merged.expected.notna()]
        _log.error('erroneously missing or present predictions:\n%s', bad)
        raise e

    err = merged.error
    err = err[err.notna()]
    try:
        assert all(err.abs() < 0.03)  # FIXME this threshold is too high
    except AssertionError as e:
        bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
        _log.error('erroneous predictions:\n%s', bad)
        raise e
示例#5
0
def test_ii_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    ii_algo = knn.ItemItem(30)
    algo = basic.Fallback(ii_algo, bias.Bias())

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test, n_jobs=4)

    preds = pd.concat(
        (eval(train, test)
         for (train,
              test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.70, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.90, abs=0.05)
示例#6
0
def user_movie_recommend(ratings, optionList, userId):
    all_recs = []

    for option in optionList:
        if option == 1:
            basic_bias_model = basic.Bias()
            all_recs.append(
                user_eval('BasicBias', basic_bias_model, ratings, userId))
        if option == 2:
            knn_model = iknn.ItemItem(20)
            all_recs.append(user_eval('ItemItem', knn_model, ratings, userId))
        if option == 3:
            knn_u_model = uknn.UserUser(20)
            all_recs.append(user_eval('UserUser', knn_u_model, ratings,
                                      userId))
        if option == 4:
            als_b_model = als.BiasedMF(50)
            all_recs.append(
                user_eval('ALS-Biased', als_b_model, ratings, userId))
        if option == 5:
            als_i_model = als.ImplicitMF(50)
            all_recs.append(
                user_eval('ALS-Implicit', als_i_model, ratings, userId))
        if option == 6:
            funk_model = funksvd.FunkSVD(50)
            all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId))

    all_recs = pd.concat(all_recs, ignore_index=True)

    return all_recs
示例#7
0
def test_ii_train():
    algo = knn.ItemItem(30, save_nbrs=500)
    algo.fit(simple_ratings)

    assert isinstance(algo.item_index_, pd.Index)
    assert isinstance(algo.item_means_, np.ndarray)
    assert isinstance(algo.item_counts_, np.ndarray)
    matrix = algo.sim_matrix_.to_scipy()

    # 6 is a neighbor of 7
    six, seven = algo.item_index_.get_indexer([6, 7])
    _log.info('six: %d', six)
    _log.info('seven: %d', seven)
    _log.info('matrix: %s', algo.sim_matrix_)
    assert matrix[six, seven] > 0
    # and has the correct score
    six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating
    six_v = six_v - six_v.mean()
    seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating
    seven_v = seven_v - seven_v.mean()
    denom = la.norm(six_v.values) * la.norm(seven_v.values)
    six_v, seven_v = six_v.align(seven_v, join='inner')
    num = six_v.dot(seven_v)
    assert matrix[six, seven] == approx(num / denom, 0.01)

    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
    assert all(algo.sim_matrix_.values > 0)
    # a little tolerance
    assert all(algo.sim_matrix_.values < 1 + 1.0e-6)
示例#8
0
def test_alogrithms():
    # data = MovieLens('ml-latest-small')
    data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = [
        basic.Bias(damping=5),
        basic.Popular(),
        item_knn.ItemItem(20),
        user_knn.UserUser(20),
        als.BiasedMF(50),
        als.ImplicitMF(50),
        funksvd.FunkSVD(50)
    ]
    pairs = list(
        partition_users(ratings[['user', 'item', 'rating']], 5,
                        SampleFrac(0.2)))
    eval_algorithms(dataset=pairs, algorithms=algorithms)
    runs = display_runs()
    recs = display_recommendations()
    truth = pd.concat((p.test for p in pairs), ignore_index=True)
    ndcg_means = check_recommendations(runs, recs, truth)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
 def _create_non_social_recommender_algorithm(algo_name, aggregation):
     if algo_name == 'ii':
         algo = knn.ItemItem(NEIGHBORS)
     elif algo_name == 'als':
         algo = als.BiasedMF(NUM_FEATURES)
     return SocialRecommenderAlgorithmFactory._create_recommender_algorithm_with_fallback(
         algo, aggregation)
示例#10
0
def _train_ii():
    algo = knn.ItemItem(20, min_sim=1.0e-6)
    timer = Stopwatch()
    _log.info('training %s on ml data', algo)
    algo.fit(lktu.ml_test.ratings)
    _log.info('trained in %s', timer)
    shr = persist(algo)
    return shr.transfer()
示例#11
0
 def eval(train, test):
     _log.info('running training')
     algo = knn.ItemItem(30)
     algo = Recommender.adapt(algo)
     algo.fit(train)
     _log.info('testing %d users', test.user.nunique())
     recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus)
     return recs
示例#12
0
def _build_predict(ratings, fold):
    algo = Fallback(knn.ItemItem(20), Bias(5))
    train = ratings[ratings['partition'] != fold]
    algo.fit(train)

    test = ratings[ratings['partition'] == fold]
    preds = batch.predict(algo, test, n_jobs=1)
    return preds
 def itemKNN(self, nnbrs, aggregate, center, min_nbrs=3):
     algoname = "itemKNN"
     item_item = item_knn.ItemItem(nnbrs=nnbrs,
                                   min_nbrs=min_nbrs,
                                   aggregate=aggregate,
                                   center=center)
     eval = self.eval(algoname, item_item)
     print("ItemKNN was fitted.")
     return eval
示例#14
0
class LegMedLensKit():
    def loadData():
        ratings = pd.read_csv('/Users/josse/Desktop/ratings.dat',
                              sep='::',
                              names=['user', 'item', 'rating', 'timestamp'])
        print(ratings.head())
        return (ratings)

    #print ("test")
    ratings = loadData()
    data_matrix = np.array(
        ratings.pivot(index='item', columns='user', values='rating'))
    print(data_matrix)
    data_matrix_rev = np.nan_to_num(data_matrix)
    print(data_matrix_rev)

    algo_ii = knn.ItemItem(20)
    algo_als = als.BiasedMF(50)

    def eval(aname, algo, train, test):
        print("test")
        fittable = util.clone(algo)
        fittable = Recommender.adapt(fittable)
        fittable.fit(train)
        users = test.user.unique()
        # now we run the recommender
        recs = batch.recommend(fittable, users, 100)
        # add the algorithm name for analyzability
        recs['Algorithm'] = aname
        print("recs")
        print(recs.head())
        return recs

    all_recs = []
    test_data = []

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          1, xf.SampleFrac(0.2)):
        test_data.append(test)
        #print(test.head(10))
        all_recs.append(eval('ItemItem', algo_ii, train, test))
        all_recs.append(eval('ALS', algo_als, train, test))

    print("test2")
    print(all_recs.head())
    all_recs = pd.concat(all_recs, ignore_index=True)
    print(all_recs.head())
    test_data = pd.concat(test_data, ignore_index=True)
    #print(test_data.head)

    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
    results.head()

    results.groupby('Algorithm').ndcg.mean()
    results.groupby('Algorithm').ndcg.mean().plot.bar()
示例#15
0
def test_ii_simple_predict():
    algo = knn.ItemItem(30, save_nbrs=500)
    algo.fit(simple_ratings)

    res = algo.predict_for_user(3, [6])
    assert res is not None
    assert len(res) == 1
    assert 6 in res.index
    assert not np.isnan(res.loc[6])
示例#16
0
def test_ii_simple_implicit_predict():
    algo = knn.ItemItem(30, center=False, aggregate='sum')
    algo.fit(simple_ratings.loc[:, ['user', 'item']])

    res = algo.predict_for_user(3, [6])
    assert res is not None
    assert len(res) == 1
    assert 6 in res.index
    assert not np.isnan(res.loc[6])
    assert res.loc[6] > 0
示例#17
0
def test_ii_train_ml100k(tmp_path):
    "Test an unbounded model on ML-100K"
    tmp_path = lktu.norm_path(tmp_path)

    ratings = lktu.ml100k.load_ratings()
    algo = knn.ItemItem(30)
    _log.info('training model')
    algo.fit(ratings)

    _log.info('testing model')

    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
    assert all(algo.sim_matrix_.values > 0)

    # a little tolerance
    assert all(algo.sim_matrix_.values < 1 + 1.0e-6)

    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz

    means = ratings.groupby('item').rating.mean()
    assert means[algo.item_index_].values == approx(algo.item_means_)

    # save
    fn = tmp_path / 'ii.mod'
    _log.info('saving model to %s', fn)
    algo.save(fn)
    _log.info('reloading model')
    restored = knn.ItemItem(30)
    restored.load(fn)
    assert all(restored.sim_matrix_.values > 0)

    r_mat = restored.sim_matrix_
    o_mat = algo.sim_matrix_

    assert all(r_mat.rowptrs == o_mat.rowptrs)

    for i in range(len(restored.item_index_)):
        sp = r_mat.rowptrs[i]
        ep = r_mat.rowptrs[i + 1]

        # everything is in decreasing order
        assert all(np.diff(r_mat.values[sp:ep]) <= 0)
        assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep])
示例#18
0
def test_ii_implicit_save_load(tmp_path, ml_subset):
    "Save and load a model"
    original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate='sum')
    _log.info('building model')
    original.fit(ml_subset.loc[:, ['user', 'item']])

    fn = tmp_path / 'ii.mod'
    _log.info('saving model to %s', fn)
    with fn.open('wb') as modf:
        pickle.dump(original, modf)

    _log.info('reloading model')
    with fn.open('rb') as modf:
        algo = pickle.load(modf)

    _log.info('checking model')
    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
    assert all(algo.sim_matrix_.values > 0)
    # a little tolerance
    assert all(algo.sim_matrix_.values < 1 + 1.0e-6)

    assert all(algo.item_counts_ == original.item_counts_)
    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
    assert algo.sim_matrix_.nnz == original.sim_matrix_.nnz
    assert all(algo.sim_matrix_.rowptrs == original.sim_matrix_.rowptrs)
    assert algo.sim_matrix_.values == approx(original.sim_matrix_.values)
    assert algo.rating_matrix_.values is None

    r_mat = algo.sim_matrix_
    o_mat = original.sim_matrix_
    assert all(r_mat.rowptrs == o_mat.rowptrs)

    for i in range(len(algo.item_index_)):
        sp = r_mat.rowptrs[i]
        ep = r_mat.rowptrs[i + 1]

        # everything is in decreasing order
        assert all(np.diff(r_mat.values[sp:ep]) <= 0)
        assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep])

    assert algo.item_means_ is None

    matrix = algo.sim_matrix_.to_scipy()

    items = pd.Series(algo.item_index_)
    items = items[algo.item_counts_ > 0]
    for i in items.sample(50):
        ipos = algo.item_index_.get_loc(i)
        _log.debug('checking item %d at position %d', i, ipos)

        row = matrix.getrow(ipos)

        # it should be sorted !
        # check this by diffing the row values, and make sure they're negative
        assert all(np.diff(row.data) < 1.0e-6)
示例#19
0
def test_ii_impl_match():
    sps = knn.ItemItem(20, min_sim=1.0e-6)
    sps._use_mkl = False
    _log.info('training SciPy %s on ml data', sps)
    sps.fit(lktu.ml_test.ratings)

    mkl = knn.ItemItem(20, min_sim=1.0e-6)
    _log.info('training MKL %s on ml data', mkl)
    mkl.fit(lktu.ml_test.ratings)

    assert mkl.sim_matrix_.nnz == sps.sim_matrix_.nnz
    assert mkl.sim_matrix_.nrows == sps.sim_matrix_.nrows
    assert mkl.sim_matrix_.ncols == sps.sim_matrix_.ncols

    assert all(mkl.sim_matrix_.rowptrs == sps.sim_matrix_.rowptrs)
    for i in range(mkl.sim_matrix_.nrows):
        sp, ep = mkl.sim_matrix_.row_extent(i)
        assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0)
        assert all(np.diff(sps.sim_matrix_.values[sp:ep]) <= 0)
        assert set(mkl.sim_matrix_.colinds[sp:ep]) == set(sps.sim_matrix_.colinds[sp:ep])
示例#20
0
def test_ii_old_implicit():
    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
    data = ml_ratings.loc[:, ['user', 'item']]

    algo.fit(data)
    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
    assert all(algo.sim_matrix_.values > 0)
    assert all(algo.item_counts_ <= 100)

    preds = algo.predict_for_user(50, [1, 2, 42])
    assert all(preds[preds.notna()] > 0)
示例#21
0
def test_ii_warn_duplicates():
    extra = pd.DataFrame.from_records([(3, 7, 4.5)],
                                      columns=['user', 'item', 'rating'])
    ratings = pd.concat([simple_ratings, extra])
    algo = knn.ItemItem(5)
    algo.fit(ratings)

    try:
        with pytest.warns(DataWarning):
            algo.predict_for_user(3, [6])
    except AssertionError:
        pass  # this is fine
示例#22
0
def test_ii_train_big_unbounded():
    "Simple tests for unbounded models"
    algo = knn.ItemItem(30)
    algo.fit(ml_ratings)

    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
    assert all(algo.sim_matrix_.values > 0)
    # a little tolerance
    assert all(algo.sim_matrix_.values < 1 + 1.0e-6)

    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz

    means = ml_ratings.groupby('item').rating.mean()
    assert means[algo.item_index_].values == approx(algo.item_means_)
示例#23
0
 def get_algo_class(self, algo):
     if algo == 'popular':
         return basic.Popular()
     elif algo == 'bias':
         return basic.Bias(users=False)
     elif algo == 'topn':
         return basic.TopN(basic.Bias())
     elif algo == 'itemitem':
         return iknn.ItemItem(nnbrs=-1)
     elif algo == 'useruser':
         return uknn.UserUser(nnbrs=5)
     elif algo == 'biasedmf':
         return als.BiasedMF(50, iterations=10)
     elif algo == 'implicitmf':
         return als.ImplicitMF(20, iterations=10)
     elif algo == 'funksvd':
         return svd.FunkSVD(20, iterations=20)
示例#24
0
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
示例#25
0
def test_ii_implicit_fast_ident():
    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
    data = ml_ratings.loc[:, ['user', 'item']]

    algo.fit(data)
    assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
    assert all(algo.sim_matrix_.values > 0)
    assert all(algo.item_counts_ <= 100)

    preds = algo.predict_for_user(50, [1, 2, 42])
    assert all(preds[preds.notna()] > 0)
    assert np.isnan(preds.iloc[2])

    algo.min_sim = -1  # force it to take the slow path for all predictions
    p2 = algo.predict_for_user(50, [1, 2, 42])
    assert preds.values[:2] == approx(p2.values[:2])
    assert np.isnan(p2.iloc[2])
示例#26
0
def test_alogrithms():
    data = MovieLens('ml-latest-small')
    #data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = {
        'Bias': basic.Bias(damping=5),
        'Popular': basic.Popular(),
        'ItemItem': item_knn.ItemItem(20),
        'UserUser': user_knn.UserUser(20),
        'BiasedMF': als.BiasedMF(50),
        'ImplicitMF': als.ImplicitMF(50),
        'FunkSVD': funksvd.FunkSVD(50)
    }
    all_recs, test_data = eval_algos(ratings, algorithms)
    ndcg_means = eval_ndcg(all_recs, test_data)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
示例#27
0
def test_ii_batch_recommend(ncpus):
    import lenskit.crossfold as xf
    from lenskit import batch, topn

    if not os.path.exists('ml-100k/u.data'):
        raise pytest.skip()

    ratings = pd.read_csv('ml-100k/u.data',
                          sep='\t',
                          names=['user', 'item', 'rating', 'timestamp'])

    algo = knn.ItemItem(30)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        cand_fun = topn.UnratedCandidates(train)
        recs = batch.recommend(algo,
                               test.user.unique(),
                               100,
                               cand_fun,
                               n_jobs=ncpus)
        return recs

    test_frames = []
    recs = []
    for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)):
        test_frames.append(test)
        recs.append(eval(train, test))

    test = pd.concat(test_frames)
    recs = pd.concat(recs)

    _log.info('analyzing recommendations')
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(recs, test)
    dcg = results.ndcg
    _log.info('nDCG for %d users is %f', len(dcg), dcg.mean())
    assert dcg.mean() > 0.03
    def run(self, strategy_context: RecommenderAlgorithmStrategyContext
            ) -> np.ndarray:
        data_set_source = strategy_context.data_set_source
        data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create(
            data_set_source)
        data_set: DataFrame = data_frame_reader.parse(
            DataFrameReaderStrategyContext(data_set_source))

        partition = list(
            partition_users(data=data_set,
                            partitions=1,
                            method=crossfold.SampleFrac(0.2)))[0]
        test, train = partition.test, partition.train
        number_of_recommendations = strategy_context.number_of_recommendations
        algorithm = Recommender.adapt(
            item_knn.ItemItem(number_of_recommendations))
        trained_algorithm = algorithm.fit(train)
        recommendations = lenskit.batch.recommend(trained_algorithm,
                                                  test['user'].unique(),
                                                  number_of_recommendations)
        return recommendations.groupby('user')['item'].apply(
            lambda x: x).to_numpy().reshape((-1, number_of_recommendations))
示例#29
0
def test_ii_batch_recommend(ncpus):
    import lenskit.crossfold as xf
    from lenskit import batch, topn
    import lenskit.metrics.topn as lm

    if not os.path.exists('ml-100k/u.data'):
        raise pytest.skip()

    ratings = pd.read_csv('ml-100k/u.data',
                          sep='\t',
                          names=['user', 'item', 'rating', 'timestamp'])

    algo = knn.ItemItem(30)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        cand_fun = topn.UnratedCandidates(train)
        recs = batch.recommend(algo,
                               test.user.unique(),
                               100,
                               cand_fun,
                               nprocs=ncpus)
        # combine with test ratings for relevance data
        res = pd.merge(recs, test, how='left', on=('user', 'item'))
        # fill in missing 0s
        res.loc[res.rating.isna(), 'rating'] = 0
        return res

    recs = pd.concat(
        (eval(train, test)
         for (train,
              test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))

    _log.info('analyzing recommendations')
    dcg = recs.groupby('user').rating.apply(lm.dcg)
    _log.info('DCG for %d users is %f', len(dcg), dcg.mean())
    assert dcg.mean() > 0
示例#30
0
def all_movie_recommends(ratings, optionList):
    all_recs = []
    test_data = []

    #Declare algorithm models
    basic_bias_model = basic.Bias()
    knn_model = iknn.ItemItem(20)
    knn_u_model = uknn.UserUser(20)
    als_b_model = als.BiasedMF(50)
    als_i_model = als.ImplicitMF(50)
    funk_model = funksvd.FunkSVD(50)

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          5, xf.SampleFrac(0.2)):
        test_data.append(test)

        for option in optionList:
            if option == 1:
                all_recs.append(
                    batch_eval('BasicBias', basic_bias_model, train, test))
            if option == 2:
                all_recs.append(batch_eval('ItemItem', knn_model, train, test))
            if option == 3:
                all_recs.append(
                    batch_eval('UserUser', knn_u_model, train, test))
            if option == 4:
                all_recs.append(
                    batch_eval('ALS-Biased', als_b_model, train, test))
            if option == 5:
                all_recs.append(
                    batch_eval('ALS-Implicit', als_i_model, train, test))
            if option == 6:
                all_recs.append(batch_eval('FunkSVD', funk_model, train, test))

    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)

    return all_recs, test_data