def test_fill_users(): rla = topn.RecListAnalysis() rla.add_metric(topn.precision) rla.add_metric(topn.recall) algo = UserUser(20, min_nbrs=10) algo = Recommender.adapt(algo) splits = xf.sample_users(ml_test.ratings, 1, 50, xf.SampleN(5)) train, test = next(splits) algo.fit(train) rec_users = test['user'].sample(50).unique() recs = batch.recommend(algo, rec_users, 25) scores = rla.compute(recs, test, include_missing=True) assert len(scores) == test['user'].nunique() assert scores['recall'].notna().sum() == len(rec_users) assert all(scores['ntruth'] == 5) mscores = rla.compute(recs, test) assert len(mscores) < len(scores) recall = scores.loc[scores['recall'].notna(), 'recall'].copy() recall, mrecall = recall.align(mscores['recall']) assert all(recall == mrecall)
def test_adv_fill_users(): rla = topn.RecListAnalysis() rla.add_metric(topn.precision) rla.add_metric(topn.recall) a_uu = UserUser(30, min_nbrs=10) a_uu = Recommender.adapt(a_uu) a_ii = ItemItem(20, min_nbrs=4) a_ii = Recommender.adapt(a_ii) splits = xf.sample_users(ml_test.ratings, 2, 50, xf.SampleN(5)) all_recs = {} all_test = {} for i, (train, test) in enumerate(splits): a_uu.fit(train) rec_users = test['user'].sample(50).unique() all_recs[(i + 1, 'UU')] = batch.recommend(a_uu, rec_users, 25) a_ii.fit(train) rec_users = test['user'].sample(50).unique() all_recs[(i + 1, 'II')] = batch.recommend(a_ii, rec_users, 25) all_test[i + 1] = test recs = pd.concat(all_recs, names=['part', 'algo']) recs.reset_index(['part', 'algo'], inplace=True) recs.reset_index(drop=True, inplace=True) test = pd.concat(all_test, names=['part']) test.reset_index(['part'], inplace=True) test.reset_index(drop=True, inplace=True) scores = rla.compute(recs, test, include_missing=True) inames = scores.index.names scores.sort_index(inplace=True) assert len(scores) == 50 * 4 assert all(scores['ntruth'] == 5) assert scores['recall'].isna().sum() > 0 _log.info('scores:\n%s', scores) ucounts = scores.reset_index().groupby('algo')['user'].agg( ['count', 'nunique']) assert all(ucounts['count'] == 100) assert all(ucounts['nunique'] == 100) mscores = rla.compute(recs, test) mscores = mscores.reset_index().set_index(inames) mscores.sort_index(inplace=True) assert len(mscores) < len(scores) _log.info('mscores:\n%s', mscores) recall = scores.loc[scores['recall'].notna(), 'recall'].copy() recall, mrecall = recall.align(mscores['recall']) assert all(recall == mrecall)
def __init__(self, nnbrs, min_nbrs=1, min_sim=0, center=True, aggregate='weighted-average'): algo = UserUser(nnbrs, min_nbrs, min_sim, center, aggregate) fallback = Bias() Fallback.__init__(self, [algo, fallback])
def __init__(self, mysql): self.mysql = mysql self.connection = mysql.get_connection() self.movies = self.read_table( """select * from movielenstable WHERE title IS NOT NULL AND genres IS NOT NULL;""" ) self.movies.columns = ['item', 'title', 'genres'] self.ratings = self.read_table( """select * from lensratings WHERE rating IS NOT NULL;""") self.ratings.columns = ['user', 'item', 'rating'] self.user_user = UserUser(15, min_nbrs=3) self.algorithm = Recommender.adapt(self.user_user) self.algorithm.fit(self.ratings)
def predictRatingForUnseenMovies(self, userMovieRatings, predictConfigDict): numOfRecom, maxNumOfNeigh, minNumOfNeigh = \ self._validatePredictConfig(predictConfigDict) if None in (numOfRecom, maxNumOfNeigh, minNumOfNeigh): return False, None userUser = UserUser(maxNumOfNeigh, min_nbrs=minNumOfNeigh) algo = Recommender.adapt(userUser) algo.fit(self.movieDataset.ratings) userRecom = algo.recommend(self.NON_EXISTING_USER, numOfRecom, ratings=pd.Series(userMovieRatings)) return True, userRecom
def default(implicit): if implicit: return UserUser(30, aggregate='sum', center=False) else: return UserUser(30)
def instantiate(opts, implicit): nnbrs, smin = opts if implicit: return UserUser(nnbrs, min_sim=smin, aggregate='sum', center=False) else: return UserUser(nnbrs, min_sim=smin)
We're guiding how the algorithm decides whether a particular group of users should be clustered together by setting a minimum and maximum neighborhood size. These parameters modify the result of the algorithm. Really small clusters represent groups of people who aren't very similar to a lot of others. So by keeping cluster size small, we'll see more unconventional recommendations. But increasing our minimum cluster size, will probably give more conventionally popular recommendations. Right now, we set the minimum to 3 and the maximum to 15, so the algorithm won't define a cluster unless it has at least 3 users, and it will use the 15 closest users (at most) to make rating predictions. **Step 4.1** """ from lenskit.algorithms import Recommender from lenskit.algorithms.user_knn import UserUser num_recs = 10 #<---- This is the number of recommendations to generate. You can change this if you want to see more recommendations user_user = UserUser(15, min_nbrs=3) #These two numbers set the minimum (3) and maximum (15) number of neighbors to consider. These are considered "reasonable defaults," but you can experiment with others too algo = Recommender.adapt(user_user) algo.fit(data.ratings) print("Set up a User-User algorithm!") """Now that the system has defined clusters, we can give it our personal ratings to get the top 10 recommended movies for USER1 and USER2. For each of us, the User-User algorithm will find a neighborhood of users similar to us based on their movie ratings. It will look at movies that these similar users have rated that we haven't seen yet. Based on their ratings, it will predict how we may rate that movie if we watched it. Finally, it will order these predictions and print them in descending order to give our "top 10." **Step 4.2** """ jabril_recs = algo.recommend(-1, num_recs, ratings=pd.Series(jabril_rating_dict)) #Here, -1 tells it that it's not an existing user in the set, that we're giving new ratings, while 10 is how many recommendations it should generate joined_data = jabril_recs.join(data.movies['genres'], on='item')
def generate_model(data, min_neighbours, max_neighbours): user_user = UserUser(max_neighbours, min_nbrs=min_neighbours) algo = Recommender.adapt(user_user) algo.fit(data.ratings) return algo
from lenskit.algorithms.user_knn import UserUser from recsys.cf.usercf import UserCF from recsys.utils.data import load_movielen_data from recsys.utils.debug import Timer, LogUtil LogUtil.configLog() ratings, users, movies = load_movielen_data() model0 = UserCF(min_threshold=0.1, min_nn=5, max_nn=20) model0.fit(ratings) model = UserUser(nnbrs=20, min_nbrs=5, min_sim=0.1, center=False) model.fit(ratings) user = 1 movies = list(movies.item.astype(int)) movies = [1] clock = Timer() for _ in range(5): df = model.predict_for_user(user, movies) print(clock.restart()) print("=" * 60) for _ in range(5): df0 = model0.predict_for_user(user, movies) print(clock.restart()) print(df.describe()) print(df0.describe())