def hybrid_rec(user_ratings, user, icm_m, sim_skr=20, w_cbf=0.82, w_cf=0.18): """ hybrid recommendations for a user Parameters ---------- user_ratings: the rating of the user to be recommended user: the user to be recommenended icm_m: the ICM matrix sim_skr: the shrink sim for CBF w_cbf: the weight for CBF w_cf: the weight for CF Returns ------- List of 5 movies recommended """ totals_cbf = {} # {item : sum (rating * similarity)} totals_cf = {} # {item : sum (rating * similarity)} rankings = {} # {item : (sim_cbf * w_cbf) + (sim_cf * w_cf)} avg_rec = [(5.0, 33173), (5.0, 33475), (5.0, 1076), (5.0, 35300), (5.0, 15743)] # generiamo il ranking di cbf for other_movie in icm_m: # scandisco tutti i movie non recensiti dall'user e li confronto con quelli recensiti if other_movie not in user_ratings: for movie in user_ratings: if movie != other_movie: # per ogni movie non recensito dall'user calcolo la similarity con quelli recensiti similarity = item_sim(icm_m, movie, other_movie, skr=sim_skr) if similarity != 0: totals_cbf.setdefault(other_movie, 0.0) totals_cbf[other_movie] += user_ratings[movie] * similarity # generiamo il ranking di cf for other in urm: # don't compare me to myself if other == user: continue similarity_urm = adj_cosine_sim(urm, user, other, 6) # ignore scores of zero or lower if similarity_urm <= 0: continue for item in urm[other]: # only score movies I haven't seen yet if item not in urm[user] or urm[user][item] == 0: # Similarity * Score totals_cf.setdefault(item, 0) totals_cf[item] += urm[other][item] * similarity_urm # mergiamo i ranking di cbf e cf pesando i valori for movie in totals_cbf: rankings[movie] = totals_cbf[movie] * w_cbf if movie in totals_cf: rankings[movie] += totals_cf[movie] * w_cf for movie in totals_cf: if movie not in rankings: rankings[movie] = totals_cf[movie] * w_cf # togliamo da ranking i movie troppo popolari """ # se il film meno popolare votato dall'user ha meno di 10 voti metto una soglia nel range dove eliminare if user_min_pop[user] < 10: for i in range(0, 500): if sort_popularity[i][0]in rankings: del rankings[sort_popularity[i][0]] # altrimenti eliminiamo i film piu' popolari del meno popolare votato dall'utente senza nessuna soglia else: for i in range(0,index_pop[user_min_pop[user]]): if sort_popularity[i][0]in rankings: del rankings[sort_popularity[i][0]] """ for i in range(0, index_pop[user_min_pop[user]]): if sort_popularity[i][0] in rankings: del rankings[sort_popularity[i][0]] # compute the ranking for every movie, but the due to the shrink term the value are not prediction rankings_final = [(total, item) for item, total in rankings.items()] sort_rankings = sorted(rankings_final, key=lambda x: -x[0])[0:5] # This should happen when there are less than five similar movie for the movies if len(sort_rankings) < 5: for elem in avg_rec: sort_rankings.append(elem) sort_rankings = sort_rankings[0:5] string_s = "" for rate in range(0, len(sort_rankings)): string_s = string_s + " " + str(sort_rankings[rate][1]) return string_s
def cbf_recommendations(user_ratings,user, icm_m, sim_skr=20, shrink=10): """ * WARNING: # This function is very resources and time consuming if it is done in large batch (e.g in a for loop over the whole movies) This function computes the recommendations using a CBF (Content Based Filtering) technique. Right now the implementation supports only a a fast similarity called item_sim (Doc in similarity.py) * NOTE: # this does not contain any optimization, it is just a simple and raw computation * TODO: # support for cosine # some optimization are needed Parameters ---------- user_ratings: ratings of the user, it contains all the rated movie by an user icm_m: item content matrix sim_skr: shrink term f the similarity shrink: shrink of the function itself Returns ------- A pre-formatted string containing the 5 best recommendations (NOTE: this has been done to be less time consuming) """ totals = {} # dizionario {item: sum (rating * similarity)} sim_sums = {} # dizionario {item: sum (similarity)} avg_rec = [(5.0, 33173), (5.0, 33475), (5.0, 1076), (5.0, 35300), (5.0, 15743)] for other_movie in icm_m: # scandisco tutti i movie non recensiti dall'user e li confronto con quelli recensiti if other_movie not in user_ratings: for movie in user_ratings: if movie != other_movie: # per ogni movie non recensito dall'user calcolo la similarity con quelli recensiti similarity = item_sim(icm_m, movie, other_movie, skr=sim_skr) if similarity != 0: totals.setdefault(other_movie, 0) totals[other_movie] += user_ratings[movie]*similarity sim_sums.setdefault(other_movie, 0) sim_sums[other_movie] += similarity #togliamo da totals i film troppo popolari for i in range(0, 1000): if sort_popularity[i][0]in totals: del totals[sort_popularity[i][0]] # compute the ranking for every movie, but the due to the shrink term the value are not prediction rankings = [(total/(sim_sums[item] + shrink), item) for item, total in totals.items()] sort_rankings = sorted(rankings, key=lambda x: -x[0])[0:5] # This should happen when there are less than five similar movie for the movies if len(sort_rankings) < 5: for elem in avg_rec: sort_rankings.append(elem) sort_rankings = sort_rankings[0:5] string_s = "" for rate in range(0, len(sort_rankings)): string_s = string_s + " " + str(sort_rankings[rate][1]) return string_s
def cbf_recommendations(user_ratings, user, icm_m, knn, urm, sim_skr=20, shrink=10, w_cbf=0.9, w_knn=0.1): """ * WARNING: # This function is very resources and time consuming if it is done in large batch (e.g in a for loop over the whole movies) This function computes the recommendations using a CBF (Content Based Filtering) technique. Right now the implementation supports only a a fast similarity called item_sim (Doc in similarity.py) * NOTE: # this does not contain any optimization, it is just a simple and raw computation * TODO: # support for cosine # some optimization are needed Parameters ---------- user_ratings: ratings of the user, it contains all the rated movie by an user icm_m: item content matrix sim_skr: shrink term f the similarity shrink: shrink of the function itself knn: it is a Returns ------- A pre-formatted string containing the 5 best recommendations (NOTE: this has been done to be less time consuming) """ totals_cbf = {} # dizionario {item: sum (rating * similarity)} sim_sums_cbf = {} # dizionario {item: sum (similarity)} avg_rec = [(5.0, 33173), (5.0, 33475), (5.0, 1076), (5.0, 35300), (5.0, 15743)] totals_knn = {} sim_sums_knn = {} # this is the cbf part of the code for other_movie in icm_m: # scandisco tutti i movie non recensiti dall'user e li confronto con quelli recensiti if other_movie not in user_ratings: for movie in user_ratings: if movie != other_movie: # per ogni movie non recensito dall'user calcolo la similarity con quelli recensiti similarity = item_sim(icm_m, movie, other_movie, skr=sim_skr) if similarity != 0: totals_cbf.setdefault(other_movie, 0.0) totals_cbf[other_movie] += user_ratings[movie]*similarity sim_sums_cbf.setdefault(other_movie, 0.0) sim_sums_cbf[other_movie] += similarity # this one should be the knn on users if user in knn: for other_user in knn[user]: for movie in urm[other_user]: if movie not in urm[user]: totals_knn.setdefault(movie, 0.0) totals_knn[movie] += urm[other_user][movie] * knn[user][other_user] rankings = {} """ scores_cbf = {} scores_knn = {} for movie in totals_cbf: scores_cbf[movie] = totals_cbf[movie]/sim_sums_cbf[movie] for movie in totals_knn: scores_knn[movie] = totals_knn[movie]/sim_sums_knn[movie] """ # this part is supposed to build up the totoal recommendations for movie in totals_cbf: rankings[movie] = totals_cbf[movie]*w_cbf if movie in totals_knn: rankings[movie] += totals_knn[movie]*w_knn for movie in totals_knn: if movie not in rankings: rankings[movie] = totals_knn[movie]*w_knn # this one are non normalized rankings """ rankings = [(total, item) for item, total in rankings.items()]""" """ if you wanna normalized rankings rankings = [(round(total/(sim_sums[item]), 3), item) for item, total in rankings.items()] """ # compute the ranking for every movie, but the due to the shrink term the value are not prediction rankings_final = [(total, item) for item, total in rankings.items()] sort_rankings = sorted(rankings_final, key=lambda x: -x[0])[0:5] # This should happen when there are less than five similar movie for the movies if len(sort_rankings) < 5: for elem in avg_rec: sort_rankings.append(elem) sort_rankings = sort_rankings[0:5] string_s = "" for rate in range(0, len(sort_rankings)): string_s = string_s + " " + str(sort_rankings[rate][1]) return string_s