def test_ii_impl_match(ml20m): sps = knn.ItemItem(20, min_sim=1.0e-6) sps._use_mkl = False timer = Stopwatch() _log.info('training SciPy %s on ml data', sps) sps.fit(ml20m) _log.info('trained SciPy in %s', timer) mkl = knn.ItemItem(20, min_sim=1.0e-6) timer = Stopwatch() _log.info('training MKL %s on ml data', mkl) mkl.fit(ml20m) _log.info('trained MKL in %s', timer) assert mkl.sim_matrix_.nnz == sps.sim_matrix_.nnz assert mkl.sim_matrix_.nrows == sps.sim_matrix_.nrows assert mkl.sim_matrix_.ncols == sps.sim_matrix_.ncols assert all(mkl.sim_matrix_.rowptrs == sps.sim_matrix_.rowptrs) for i in range(mkl.sim_matrix_.nrows): sp, ep = mkl.sim_matrix_.row_extent(i) assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0) assert all(np.diff(sps.sim_matrix_.values[sp:ep]) <= 0) assert set(mkl.sim_matrix_.colinds[sp:ep]) == set( sps.sim_matrix_.colinds[sp:ep]) assert all( np.abs(mkl.sim_matrix_.values[sp:ep] - sps.sim_matrix_.values[sp:ep]) < 1.0e-3)
def test_ii_save_load(tmp_path): "Save and load a model" tmp_path = lktu.norm_path(tmp_path) original = knn.ItemItem(30, save_nbrs=500) _log.info('building model') original.fit(lktu.ml_sample()) fn = tmp_path / 'ii.mod' _log.info('saving model to %s', fn) original.save(fn) _log.info('reloading model') algo = knn.ItemItem(30) algo.load(fn) _log.info('checking model') assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6) assert all(algo.item_counts_ == original.item_counts_) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert algo.sim_matrix_.nnz == original.sim_matrix_.nnz assert all(algo.sim_matrix_.rowptrs == original.sim_matrix_.rowptrs) assert algo.sim_matrix_.values == approx(original.sim_matrix_.values) r_mat = algo.sim_matrix_ o_mat = original.sim_matrix_ assert all(r_mat.rowptrs == o_mat.rowptrs) for i in range(len(algo.item_index_)): sp = r_mat.rowptrs[i] ep = r_mat.rowptrs[i + 1] # everything is in decreasing order assert all(np.diff(r_mat.values[sp:ep]) <= 0) assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep]) means = ml_ratings.groupby('item').rating.mean() assert means[algo.item_index_].values == approx(original.item_means_) matrix = lm.csr_to_scipy(algo.sim_matrix_) items = pd.Series(algo.item_index_) items = items[algo.item_counts_ > 0] for i in items.sample(50): ipos = algo.item_index_.get_loc(i) _log.debug('checking item %d at position %d', i, ipos) row = matrix.getrow(ipos) # it should be sorted ! # check this by diffing the row values, and make sure they're negative assert all(np.diff(row.data) < 1.0e-6)
def test_ii_no_ratings(): a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum') a1.fit(ml_ratings.loc[:, ['user', 'item']]) algo = knn.ItemItem(20, save_nbrs=100, feedback='implicit') algo.fit(ml_ratings) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert all(algo.sim_matrix_.values > 0) assert all(algo.item_counts_ <= 100) preds = algo.predict_for_user(50, [1, 2, 42]) assert all(preds[preds.notna()] > 0) p2 = algo.predict_for_user(50, [1, 2, 42]) preds, p2 = preds.align(p2) assert preds.values == approx(p2.values, nan_ok=True)
def test_ii_known_preds(): from lenskit import batch algo = knn.ItemItem(20, min_sim=1.0e-6) _log.info('training %s on ml data', algo) algo.fit(lktu.ml_test.ratings) assert algo.center assert algo.item_means_ is not None _log.info('model means: %s', algo.item_means_) dir = Path(__file__).parent pred_file = dir / 'item-item-preds.csv' _log.info('reading known predictions from %s', pred_file) known_preds = pd.read_csv(str(pred_file)) pairs = known_preds.loc[:, ['user', 'item']] preds = batch.predict(algo, pairs) merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds) assert len(merged) == len(preds) merged['error'] = merged.expected - merged.prediction try: assert not any(merged.prediction.isna() & merged.expected.notna()) except AssertionError as e: bad = merged[merged.prediction.isna() & merged.expected.notna()] _log.error('erroneously missing or present predictions:\n%s', bad) raise e err = merged.error err = err[err.notna()] try: assert all(err.abs() < 0.03) # FIXME this threshold is too high except AssertionError as e: bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)] _log.error('erroneous predictions:\n%s', bad) raise e
def test_ii_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings ii_algo = knn.ItemItem(30) algo = basic.Fallback(ii_algo, bias.Bias()) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test, n_jobs=4) preds = pd.concat( (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.70, abs=0.025) user_rmse = preds.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.90, abs=0.05)
def user_movie_recommend(ratings, optionList, userId): all_recs = [] for option in optionList: if option == 1: basic_bias_model = basic.Bias() all_recs.append( user_eval('BasicBias', basic_bias_model, ratings, userId)) if option == 2: knn_model = iknn.ItemItem(20) all_recs.append(user_eval('ItemItem', knn_model, ratings, userId)) if option == 3: knn_u_model = uknn.UserUser(20) all_recs.append(user_eval('UserUser', knn_u_model, ratings, userId)) if option == 4: als_b_model = als.BiasedMF(50) all_recs.append( user_eval('ALS-Biased', als_b_model, ratings, userId)) if option == 5: als_i_model = als.ImplicitMF(50) all_recs.append( user_eval('ALS-Implicit', als_i_model, ratings, userId)) if option == 6: funk_model = funksvd.FunkSVD(50) all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId)) all_recs = pd.concat(all_recs, ignore_index=True) return all_recs
def test_ii_train(): algo = knn.ItemItem(30, save_nbrs=500) algo.fit(simple_ratings) assert isinstance(algo.item_index_, pd.Index) assert isinstance(algo.item_means_, np.ndarray) assert isinstance(algo.item_counts_, np.ndarray) matrix = algo.sim_matrix_.to_scipy() # 6 is a neighbor of 7 six, seven = algo.item_index_.get_indexer([6, 7]) _log.info('six: %d', six) _log.info('seven: %d', seven) _log.info('matrix: %s', algo.sim_matrix_) assert matrix[six, seven] > 0 # and has the correct score six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating six_v = six_v - six_v.mean() seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating seven_v = seven_v - seven_v.mean() denom = la.norm(six_v.values) * la.norm(seven_v.values) six_v, seven_v = six_v.align(seven_v, join='inner') num = six_v.dot(seven_v) assert matrix[six, seven] == approx(num / denom, 0.01) assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6)
def test_alogrithms(): # data = MovieLens('ml-latest-small') data = ML1M('ml-1m') ratings = data.ratings print('Initial ratings table head:') print(ratings.head()) algorithms = [ basic.Bias(damping=5), basic.Popular(), item_knn.ItemItem(20), user_knn.UserUser(20), als.BiasedMF(50), als.ImplicitMF(50), funksvd.FunkSVD(50) ] pairs = list( partition_users(ratings[['user', 'item', 'rating']], 5, SampleFrac(0.2))) eval_algorithms(dataset=pairs, algorithms=algorithms) runs = display_runs() recs = display_recommendations() truth = pd.concat((p.test for p in pairs), ignore_index=True) ndcg_means = check_recommendations(runs, recs, truth) print('NDCG means:') print(ndcg_means) plot_comparison(ndcg_means)
def _create_non_social_recommender_algorithm(algo_name, aggregation): if algo_name == 'ii': algo = knn.ItemItem(NEIGHBORS) elif algo_name == 'als': algo = als.BiasedMF(NUM_FEATURES) return SocialRecommenderAlgorithmFactory._create_recommender_algorithm_with_fallback( algo, aggregation)
def _train_ii(): algo = knn.ItemItem(20, min_sim=1.0e-6) timer = Stopwatch() _log.info('training %s on ml data', algo) algo.fit(lktu.ml_test.ratings) _log.info('trained in %s', timer) shr = persist(algo) return shr.transfer()
def eval(train, test): _log.info('running training') algo = knn.ItemItem(30) algo = Recommender.adapt(algo) algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus) return recs
def _build_predict(ratings, fold): algo = Fallback(knn.ItemItem(20), Bias(5)) train = ratings[ratings['partition'] != fold] algo.fit(train) test = ratings[ratings['partition'] == fold] preds = batch.predict(algo, test, n_jobs=1) return preds
def itemKNN(self, nnbrs, aggregate, center, min_nbrs=3): algoname = "itemKNN" item_item = item_knn.ItemItem(nnbrs=nnbrs, min_nbrs=min_nbrs, aggregate=aggregate, center=center) eval = self.eval(algoname, item_item) print("ItemKNN was fitted.") return eval
class LegMedLensKit(): def loadData(): ratings = pd.read_csv('/Users/josse/Desktop/ratings.dat', sep='::', names=['user', 'item', 'rating', 'timestamp']) print(ratings.head()) return (ratings) #print ("test") ratings = loadData() data_matrix = np.array( ratings.pivot(index='item', columns='user', values='rating')) print(data_matrix) data_matrix_rev = np.nan_to_num(data_matrix) print(data_matrix_rev) algo_ii = knn.ItemItem(20) algo_als = als.BiasedMF(50) def eval(aname, algo, train, test): print("test") fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) users = test.user.unique() # now we run the recommender recs = batch.recommend(fittable, users, 100) # add the algorithm name for analyzability recs['Algorithm'] = aname print("recs") print(recs.head()) return recs all_recs = [] test_data = [] for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 1, xf.SampleFrac(0.2)): test_data.append(test) #print(test.head(10)) all_recs.append(eval('ItemItem', algo_ii, train, test)) all_recs.append(eval('ALS', algo_als, train, test)) print("test2") print(all_recs.head()) all_recs = pd.concat(all_recs, ignore_index=True) print(all_recs.head()) test_data = pd.concat(test_data, ignore_index=True) #print(test_data.head) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(all_recs, test_data) results.head() results.groupby('Algorithm').ndcg.mean() results.groupby('Algorithm').ndcg.mean().plot.bar()
def test_ii_simple_predict(): algo = knn.ItemItem(30, save_nbrs=500) algo.fit(simple_ratings) res = algo.predict_for_user(3, [6]) assert res is not None assert len(res) == 1 assert 6 in res.index assert not np.isnan(res.loc[6])
def test_ii_simple_implicit_predict(): algo = knn.ItemItem(30, center=False, aggregate='sum') algo.fit(simple_ratings.loc[:, ['user', 'item']]) res = algo.predict_for_user(3, [6]) assert res is not None assert len(res) == 1 assert 6 in res.index assert not np.isnan(res.loc[6]) assert res.loc[6] > 0
def test_ii_train_ml100k(tmp_path): "Test an unbounded model on ML-100K" tmp_path = lktu.norm_path(tmp_path) ratings = lktu.ml100k.load_ratings() algo = knn.ItemItem(30) _log.info('training model') algo.fit(ratings) _log.info('testing model') assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz means = ratings.groupby('item').rating.mean() assert means[algo.item_index_].values == approx(algo.item_means_) # save fn = tmp_path / 'ii.mod' _log.info('saving model to %s', fn) algo.save(fn) _log.info('reloading model') restored = knn.ItemItem(30) restored.load(fn) assert all(restored.sim_matrix_.values > 0) r_mat = restored.sim_matrix_ o_mat = algo.sim_matrix_ assert all(r_mat.rowptrs == o_mat.rowptrs) for i in range(len(restored.item_index_)): sp = r_mat.rowptrs[i] ep = r_mat.rowptrs[i + 1] # everything is in decreasing order assert all(np.diff(r_mat.values[sp:ep]) <= 0) assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep])
def test_ii_implicit_save_load(tmp_path, ml_subset): "Save and load a model" original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate='sum') _log.info('building model') original.fit(ml_subset.loc[:, ['user', 'item']]) fn = tmp_path / 'ii.mod' _log.info('saving model to %s', fn) with fn.open('wb') as modf: pickle.dump(original, modf) _log.info('reloading model') with fn.open('rb') as modf: algo = pickle.load(modf) _log.info('checking model') assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6) assert all(algo.item_counts_ == original.item_counts_) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert algo.sim_matrix_.nnz == original.sim_matrix_.nnz assert all(algo.sim_matrix_.rowptrs == original.sim_matrix_.rowptrs) assert algo.sim_matrix_.values == approx(original.sim_matrix_.values) assert algo.rating_matrix_.values is None r_mat = algo.sim_matrix_ o_mat = original.sim_matrix_ assert all(r_mat.rowptrs == o_mat.rowptrs) for i in range(len(algo.item_index_)): sp = r_mat.rowptrs[i] ep = r_mat.rowptrs[i + 1] # everything is in decreasing order assert all(np.diff(r_mat.values[sp:ep]) <= 0) assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep]) assert algo.item_means_ is None matrix = algo.sim_matrix_.to_scipy() items = pd.Series(algo.item_index_) items = items[algo.item_counts_ > 0] for i in items.sample(50): ipos = algo.item_index_.get_loc(i) _log.debug('checking item %d at position %d', i, ipos) row = matrix.getrow(ipos) # it should be sorted ! # check this by diffing the row values, and make sure they're negative assert all(np.diff(row.data) < 1.0e-6)
def test_ii_impl_match(): sps = knn.ItemItem(20, min_sim=1.0e-6) sps._use_mkl = False _log.info('training SciPy %s on ml data', sps) sps.fit(lktu.ml_test.ratings) mkl = knn.ItemItem(20, min_sim=1.0e-6) _log.info('training MKL %s on ml data', mkl) mkl.fit(lktu.ml_test.ratings) assert mkl.sim_matrix_.nnz == sps.sim_matrix_.nnz assert mkl.sim_matrix_.nrows == sps.sim_matrix_.nrows assert mkl.sim_matrix_.ncols == sps.sim_matrix_.ncols assert all(mkl.sim_matrix_.rowptrs == sps.sim_matrix_.rowptrs) for i in range(mkl.sim_matrix_.nrows): sp, ep = mkl.sim_matrix_.row_extent(i) assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0) assert all(np.diff(sps.sim_matrix_.values[sp:ep]) <= 0) assert set(mkl.sim_matrix_.colinds[sp:ep]) == set(sps.sim_matrix_.colinds[sp:ep])
def test_ii_old_implicit(): algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum') data = ml_ratings.loc[:, ['user', 'item']] algo.fit(data) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert all(algo.sim_matrix_.values > 0) assert all(algo.item_counts_ <= 100) preds = algo.predict_for_user(50, [1, 2, 42]) assert all(preds[preds.notna()] > 0)
def test_ii_warn_duplicates(): extra = pd.DataFrame.from_records([(3, 7, 4.5)], columns=['user', 'item', 'rating']) ratings = pd.concat([simple_ratings, extra]) algo = knn.ItemItem(5) algo.fit(ratings) try: with pytest.warns(DataWarning): algo.predict_for_user(3, [6]) except AssertionError: pass # this is fine
def test_ii_train_big_unbounded(): "Simple tests for unbounded models" algo = knn.ItemItem(30) algo.fit(ml_ratings) assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz means = ml_ratings.groupby('item').rating.mean() assert means[algo.item_index_].values == approx(algo.item_means_)
def get_algo_class(self, algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.Bias(users=False) elif algo == 'topn': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return iknn.ItemItem(nnbrs=-1) elif algo == 'useruser': return uknn.UserUser(nnbrs=5) elif algo == 'biasedmf': return als.BiasedMF(50, iterations=10) elif algo == 'implicitmf': return als.ImplicitMF(20, iterations=10) elif algo == 'funksvd': return svd.FunkSVD(20, iterations=20)
def get_topn_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum')) elif algo == 'useruser': return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum')) elif algo == 'biasedmf': return basic.TopN(als.BiasedMF(50, iterations=10)) elif algo == 'implicitmf': return basic.TopN(als.ImplicitMF(20, iterations=10)) elif algo == 'funksvd': return basic.TopN(svd.FunkSVD(20, iterations=20)) elif algo == 'bpr': return basic.TopN(BPR(25))
def test_ii_implicit_fast_ident(): algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum') data = ml_ratings.loc[:, ['user', 'item']] algo.fit(data) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert all(algo.sim_matrix_.values > 0) assert all(algo.item_counts_ <= 100) preds = algo.predict_for_user(50, [1, 2, 42]) assert all(preds[preds.notna()] > 0) assert np.isnan(preds.iloc[2]) algo.min_sim = -1 # force it to take the slow path for all predictions p2 = algo.predict_for_user(50, [1, 2, 42]) assert preds.values[:2] == approx(p2.values[:2]) assert np.isnan(p2.iloc[2])
def test_alogrithms(): data = MovieLens('ml-latest-small') #data = ML1M('ml-1m') ratings = data.ratings print('Initial ratings table head:') print(ratings.head()) algorithms = { 'Bias': basic.Bias(damping=5), 'Popular': basic.Popular(), 'ItemItem': item_knn.ItemItem(20), 'UserUser': user_knn.UserUser(20), 'BiasedMF': als.BiasedMF(50), 'ImplicitMF': als.ImplicitMF(50), 'FunkSVD': funksvd.FunkSVD(50) } all_recs, test_data = eval_algos(ratings, algorithms) ndcg_means = eval_ndcg(all_recs, test_data) print('NDCG means:') print(ndcg_means) plot_comparison(ndcg_means)
def test_ii_batch_recommend(ncpus): import lenskit.crossfold as xf from lenskit import batch, topn if not os.path.exists('ml-100k/u.data'): raise pytest.skip() ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) algo = knn.ItemItem(30) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) cand_fun = topn.UnratedCandidates(train) recs = batch.recommend(algo, test.user.unique(), 100, cand_fun, n_jobs=ncpus) return recs test_frames = [] recs = [] for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)): test_frames.append(test) recs.append(eval(train, test)) test = pd.concat(test_frames) recs = pd.concat(recs) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) dcg = results.ndcg _log.info('nDCG for %d users is %f', len(dcg), dcg.mean()) assert dcg.mean() > 0.03
def run(self, strategy_context: RecommenderAlgorithmStrategyContext ) -> np.ndarray: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) partition = list( partition_users(data=data_set, partitions=1, method=crossfold.SampleFrac(0.2)))[0] test, train = partition.test, partition.train number_of_recommendations = strategy_context.number_of_recommendations algorithm = Recommender.adapt( item_knn.ItemItem(number_of_recommendations)) trained_algorithm = algorithm.fit(train) recommendations = lenskit.batch.recommend(trained_algorithm, test['user'].unique(), number_of_recommendations) return recommendations.groupby('user')['item'].apply( lambda x: x).to_numpy().reshape((-1, number_of_recommendations))
def test_ii_batch_recommend(ncpus): import lenskit.crossfold as xf from lenskit import batch, topn import lenskit.metrics.topn as lm if not os.path.exists('ml-100k/u.data'): raise pytest.skip() ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) algo = knn.ItemItem(30) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) cand_fun = topn.UnratedCandidates(train) recs = batch.recommend(algo, test.user.unique(), 100, cand_fun, nprocs=ncpus) # combine with test ratings for relevance data res = pd.merge(recs, test, how='left', on=('user', 'item')) # fill in missing 0s res.loc[res.rating.isna(), 'rating'] = 0 return res recs = pd.concat( (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) _log.info('analyzing recommendations') dcg = recs.groupby('user').rating.apply(lm.dcg) _log.info('DCG for %d users is %f', len(dcg), dcg.mean()) assert dcg.mean() > 0
def all_movie_recommends(ratings, optionList): all_recs = [] test_data = [] #Declare algorithm models basic_bias_model = basic.Bias() knn_model = iknn.ItemItem(20) knn_u_model = uknn.UserUser(20) als_b_model = als.BiasedMF(50) als_i_model = als.ImplicitMF(50) funk_model = funksvd.FunkSVD(50) for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)): test_data.append(test) for option in optionList: if option == 1: all_recs.append( batch_eval('BasicBias', basic_bias_model, train, test)) if option == 2: all_recs.append(batch_eval('ItemItem', knn_model, train, test)) if option == 3: all_recs.append( batch_eval('UserUser', knn_u_model, train, test)) if option == 4: all_recs.append( batch_eval('ALS-Biased', als_b_model, train, test)) if option == 5: all_recs.append( batch_eval('ALS-Implicit', als_i_model, train, test)) if option == 6: all_recs.append(batch_eval('FunkSVD', funk_model, train, test)) all_recs = pd.concat(all_recs, ignore_index=True) test_data = pd.concat(test_data, ignore_index=True) return all_recs, test_data