Пример #1
0
def test_tf_bpr_general(tmp_path, tf_session):
    "Training, saving, loading, and using a BPR model."
    fn = tmp_path / 'bias.bpk'
    ratings = lktu.ml_test.ratings

    original = lktf.BPR(20,
                        batch_size=1024,
                        epochs=20,
                        neg_count=2,
                        rng_spec=42)
    original.fit(ratings)
    ue = original.model.get_layer('user-embed')
    assert ue.get_weights()[0].shape == (ratings.user.nunique(), 20)
    ie = original.model.get_layer('item-embed')
    assert ie.get_weights()[0].shape == (ratings.item.nunique(), 20)

    binpickle.dump(original, fn)

    _log.info('serialized to %d bytes', fn.stat().st_size)
    algo = binpickle.load(fn)

    # does scoring work?
    preds = algo.predict_for_user(100, [5, 10, 30])
    assert all(preds.notna())

    # can we include a nonexistent item?
    preds = algo.predict_for_user(100, [5, 10, 230413804])
    assert len(preds) == 3
    assert all(preds.loc[[230413804]].isna())
    assert preds.isna().sum() == 1
Пример #2
0
def test_tf_bpr_batch_accuracy(tf_session):
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch, topn

    ratings = lktu.ml100k.ratings

    algo = lktf.BPR(20, batch_size=1024, epochs=20, rng_spec=42)
    algo = Recommender.adapt(algo)

    all_recs = []
    all_test = []
    for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        recs = batch.recommend(algo, np.unique(test.user), 50)
        all_recs.append(recs)
        all_test.append(test)

    _log.info('analyzing results')
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    rla.add_metric(topn.recip_rank)
    scores = rla.compute(pd.concat(all_recs, ignore_index=True),
                         pd.concat(all_test, ignore_index=True),
                         include_missing=True)
    scores.fillna(0, inplace=True)
    _log.info('MRR: %f', scores['recip_rank'].mean())
    _log.info('nDCG: %f', scores['ndcg'].mean())
    assert scores['ndcg'].mean() > 0.1
Пример #3
0
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(
            iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(nnbrs=5, center=False,
                                        aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
    elif algo == 'tf_bpr':
        return basic.TopN(
            lktf.BPR(20, batch_size=1024, epochs=5, neg_count=2, rng_spec=42))
Пример #4
0
def get_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.Bias(users=False)
    elif algo == 'topn':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return iknn.ItemItem(nnbrs=-1)
    elif algo == 'useruser':
        return uknn.UserUser(nnbrs=5)
    elif algo == 'biasedmf':
        return als.BiasedMF(50, iterations=10)
    elif algo == 'implicitmf':
        return als.ImplicitMF(20, iterations=10)
    elif algo == 'funksvd':
        return svd.FunkSVD(20, iterations=20)
    elif algo == 'tf_bpr':
        return lktf.BPR(20,
                        batch_size=1024,
                        epochs=5,
                        neg_count=2,
                        rng_spec=42)
movie_titles["vector"] = np.nan
movie_titles["vector"] = movie_titles["vector"].astype('object')
for index, item in enumerate(movie_titles["title"]):
    vector = np.mean(np.array([new_model[x] for x in item]), axis=0)
    vector = np.concatenate([vector, one_hot[index]])
    movie_titles.at[index, 'vector'] = np.ndarray.tolist(vector)

movie_titles["title"] = movie_titles["title"].apply(lambda x: " ".join(x))
movie_titles = pd.merge(movie_titles, output_df, on="title")
movie_titles = movie_titles.drop(columns=["genres"])
movie_titles.rename(columns={"vector": "item", "userId": "user"}, inplace=True)
movie_titles["item"] = movie_titles["item"].apply(lambda x: str(x))
print(movie_titles.head())

net = tf.BPR(features=30, epochs=80)
net = net.fit(movie_titles.reset_index(drop=True))

un_user = output_df.userId.unique()

result = pd.Series()
full_result = pd.DataFrame()

i = 0
printProgressBar(0,
                 len(un_user),
                 prefix='Progress:',
                 suffix='Complete',
                 length=50)
for user in un_user:
    result = pd.DataFrame(
Пример #6
0
"""
This module defines the algorithms, and their default configurations, that
we are going to use.
"""

from lenskit.algorithms import item_knn, user_knn, als, tf
from lenskit.algorithms import basic

Bias = basic.Bias(damping=5)
Pop = basic.Popular()
II = item_knn.ItemItem(20, save_nbrs=2500)
UU = user_knn.UserUser(30)
ALS = als.BiasedMF(50)
IALS = als.ImplicitMF(50)
BPR = tf.BPR(50)
TFMF = tf.IntegratedBiasMF(50)