def test_tf_bpr_general(tmp_path, tf_session): "Training, saving, loading, and using a BPR model." fn = tmp_path / 'bias.bpk' ratings = lktu.ml_test.ratings original = lktf.BPR(20, batch_size=1024, epochs=20, neg_count=2, rng_spec=42) original.fit(ratings) ue = original.model.get_layer('user-embed') assert ue.get_weights()[0].shape == (ratings.user.nunique(), 20) ie = original.model.get_layer('item-embed') assert ie.get_weights()[0].shape == (ratings.item.nunique(), 20) binpickle.dump(original, fn) _log.info('serialized to %d bytes', fn.stat().st_size) algo = binpickle.load(fn) # does scoring work? preds = algo.predict_for_user(100, [5, 10, 30]) assert all(preds.notna()) # can we include a nonexistent item? preds = algo.predict_for_user(100, [5, 10, 230413804]) assert len(preds) == 3 assert all(preds.loc[[230413804]].isna()) assert preds.isna().sum() == 1
def test_tf_bpr_batch_accuracy(tf_session): from lenskit.algorithms import basic import lenskit.crossfold as xf from lenskit import batch, topn ratings = lktu.ml100k.ratings algo = lktf.BPR(20, batch_size=1024, epochs=20, rng_spec=42) algo = Recommender.adapt(algo) all_recs = [] all_test = [] for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, np.unique(test.user), 50) all_recs.append(recs) all_test.append(test) _log.info('analyzing results') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) rla.add_metric(topn.recip_rank) scores = rla.compute(pd.concat(all_recs, ignore_index=True), pd.concat(all_test, ignore_index=True), include_missing=True) scores.fillna(0, inplace=True) _log.info('MRR: %f', scores['recip_rank'].mean()) _log.info('nDCG: %f', scores['ndcg'].mean()) assert scores['ndcg'].mean() > 0.1
def get_topn_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return basic.TopN( iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum')) elif algo == 'useruser': return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum')) elif algo == 'biasedmf': return basic.TopN(als.BiasedMF(50, iterations=10)) elif algo == 'implicitmf': return basic.TopN(als.ImplicitMF(20, iterations=10)) elif algo == 'funksvd': return basic.TopN(svd.FunkSVD(20, iterations=20)) elif algo == 'bpr': return basic.TopN(BPR(25)) elif algo == 'tf_bpr': return basic.TopN( lktf.BPR(20, batch_size=1024, epochs=5, neg_count=2, rng_spec=42))
def get_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.Bias(users=False) elif algo == 'topn': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return iknn.ItemItem(nnbrs=-1) elif algo == 'useruser': return uknn.UserUser(nnbrs=5) elif algo == 'biasedmf': return als.BiasedMF(50, iterations=10) elif algo == 'implicitmf': return als.ImplicitMF(20, iterations=10) elif algo == 'funksvd': return svd.FunkSVD(20, iterations=20) elif algo == 'tf_bpr': return lktf.BPR(20, batch_size=1024, epochs=5, neg_count=2, rng_spec=42)
movie_titles["vector"] = np.nan movie_titles["vector"] = movie_titles["vector"].astype('object') for index, item in enumerate(movie_titles["title"]): vector = np.mean(np.array([new_model[x] for x in item]), axis=0) vector = np.concatenate([vector, one_hot[index]]) movie_titles.at[index, 'vector'] = np.ndarray.tolist(vector) movie_titles["title"] = movie_titles["title"].apply(lambda x: " ".join(x)) movie_titles = pd.merge(movie_titles, output_df, on="title") movie_titles = movie_titles.drop(columns=["genres"]) movie_titles.rename(columns={"vector": "item", "userId": "user"}, inplace=True) movie_titles["item"] = movie_titles["item"].apply(lambda x: str(x)) print(movie_titles.head()) net = tf.BPR(features=30, epochs=80) net = net.fit(movie_titles.reset_index(drop=True)) un_user = output_df.userId.unique() result = pd.Series() full_result = pd.DataFrame() i = 0 printProgressBar(0, len(un_user), prefix='Progress:', suffix='Complete', length=50) for user in un_user: result = pd.DataFrame(
""" This module defines the algorithms, and their default configurations, that we are going to use. """ from lenskit.algorithms import item_knn, user_knn, als, tf from lenskit.algorithms import basic Bias = basic.Bias(damping=5) Pop = basic.Popular() II = item_knn.ItemItem(20, save_nbrs=2500) UU = user_knn.UserUser(30) ALS = als.BiasedMF(50) IALS = als.ImplicitMF(50) BPR = tf.BPR(50) TFMF = tf.IntegratedBiasMF(50)