def build_profile_compare_function(algo_name: Optional[str], ratings: List[Rating], user: User): ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee'] # By default, sort by rating then name def default_compare_function(item): return ordering.index(item.choice), item.work.title.lower() if algo_name is not None: try: work_ids = [rating.work_id for rating in ratings] algo = get_algo_backup(algo_name) dataset = get_dataset_backup(algo_name) best_pos = get_pos_of_best_works_for_user_via_algo(algo, dataset, user.id, work_ids) ranking = defaultdict(lambda: len(ratings)) for rank, pos in enumerate(best_pos): ranking[ratings[pos].id] = rank def special_compare_function(item): return ordering.index(item.choice), ranking[item.id] return special_compare_function except: # Two possible reasons: no backup or user not in backup pass return default_compare_function
def load_from_algo(self, algo_name): algo = get_algo_backup(algo_name) dataset = get_dataset_backup(algo_name) available_work_ids = list( set(self.work_ids) & set(dataset.encode_work.keys())) self.work_ids = np.array(available_work_ids) self.vectors = algo.VT.T[dataset.encode_works(available_work_ids)] self.preprocess()
def get_reco_algo(request, algo_name='knn', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED) already_rated_works = list(current_user_ratings(request)) if request.user.is_anonymous: assert request.user.id is None # We only support KNN for anonymous users, since the offline models did # not learn anything about them. # FIXME: We should also force KNN for new users for which we have no # offline trained model available. algo_name = 'knn' chrono.save('get rated works') if algo_name == 'knn': try: algo = get_algo_backup(algo_name) dataset = get_dataset_backup(algo_name) except FileNotFoundError: triplets = list( Rating.objects.values_list('user_id', 'work_id', 'choice')) chrono.save('get all %d interesting ratings' % len(triplets)) dataset, algo = fit_algo(algo_name, triplets) framed_rated_works = pd.DataFrame(list( current_user_ratings(request).items()), columns=['work_id', 'choice']) framed_rated_works['work_id'] = dataset.encode_works( framed_rated_works['work_id']) framed_rated_works['rating'] = framed_rated_works['choice'].map( rating_values) nb_rated_works = len(framed_rated_works['work_id']) ratings_from_user = coo_matrix( (framed_rated_works['rating'], ([0.] * nb_rated_works, framed_rated_works['work_id'])), shape=(1, algo.nb_works)) ratings_from_user = ratings_from_user.tocsr() #Expands knn.M with current user ratings (vstack is too slow) algo.M.data = np.hstack((algo.M.data, ratings_from_user.data)) algo.M.indices = np.hstack((algo.M.indices, ratings_from_user.indices)) algo.M.indptr = np.hstack( (algo.M.indptr, (ratings_from_user.indptr + algo.M.nnz)[1:])) algo.M._shape = (algo.M.shape[0] + ratings_from_user.shape[0], ratings_from_user.shape[1]) chrono.save('loading knn and expanding with current user ratings') else: # SVD or ALS, etc. try: algo = get_algo_backup(algo_name) dataset = get_dataset_backup(algo_name) except FileNotFoundError: # Every rating is useful triplets = list( Rating.objects.values_list('user_id', 'work_id', 'choice')) chrono.save('get all %d interesting ratings' % len(triplets)) dataset, algo = fit_algo(algo_name, triplets) chrono.save('fit %s' % algo.get_shortname()) if category != 'all': category_filter = set( Work.objects.filter(category__slug=category).values_list( 'id', flat=True)) else: category_filter = dataset.interesting_works filtered_works = list((dataset.interesting_works & category_filter) - set(already_rated_works)) chrono.save('remove already rated') pos_of_best = get_pos_of_best_works_for_user_via_algo(algo, dataset, request.user.id, filtered_works, limit=NB_RECO) best_work_ids = [filtered_works[pos] for pos in pos_of_best] chrono.save('compute every prediction') works = Work.objects.in_bulk(best_work_ids) # Some of the works may have been deleted since the algo backup was created. ranked_work_ids = [ work_id for work_id in best_work_ids if work_id in works ] chrono.save('get bulk') return {'work_ids': ranked_work_ids, 'works': works}
def user_exists_in_backup(user, algo_name): try: dataset = get_dataset_backup(algo_name) return user.id in dataset.encode_user except FileNotFoundError: return False