def rank(self, user_ratings: pd.DataFrame, items_directory: str, recs_number: int = None, filter_list: List[str] = None) -> pd.DataFrame: """ Rank the top-n recommended items for the user. If the recs_number parameter isn't specified, All items will be ranked. One can specify which items must be ranked with the filter_list parameter, in this case ONLY items in the filter_list will be used to calculate the rank. One can also pass items already seen by the user with the filter_list parameter. Otherwise, ALL unrated items will be used to calculate the rank. Args: user_ratings (pd.DataFrame): DataFrame containing ratings of a single user items_directory (str): path of the directory where the items are stored recs_number (int): number of the top items that will be present in the ranking filter_list (list): list of the items to rank, if None all unrated items will be used to calculate the rank Returns: pd.DataFrame: DataFrame containing one column with the items name, one column with the rating predicted, sorted in descending order by the 'rating' column """ # Load items to predict if filter_list is None: items_to_predict = get_unrated_items(items_directory, user_ratings) else: items_to_predict = get_chosen_items(items_directory, filter_list) # Extract features of the items to predict id_items_to_predict = [] features_items_to_predict = [] for item in items_to_predict: if item is not None: id_items_to_predict.append(item.content_id) features_items_to_predict.append(self.extract_features_item(item)) recsys_logger.info("Calculating rank") if len(id_items_to_predict) > 0: # Fuse the input if there are dicts, multiple representation, etc. fused_features_items_to_pred = self.fuse_representations(features_items_to_predict, self.__embedding_combiner) score_labels = self.__classifier.predict_proba(fused_features_items_to_pred) else: score_labels = [] result = {'to_id': [], 'score': []} for item_id, score in zip(id_items_to_predict, score_labels): result['to_id'].append(item_id) result['score'].append(score[1]) result = pd.DataFrame(result, columns=['to_id', 'score']) result.sort_values(by=['score'], ascending=False, inplace=True) rank = result[:recs_number] return rank
def predict(self, user_ratings: pd.DataFrame, items_directory: str, filter_list: List[str] = None) -> pd.DataFrame: """ Predicts how much a user will like unrated items. One can specify which items must be predicted with the filter_list parameter, in this case ONLY items in the filter_list will be predicted. One can also pass items already seen by the user with the filter_list parameter. Otherwise, ALL unrated items will be predicted. Args: user_ratings (pd.DataFrame): DataFrame containing ratings of a single user items_directory (str): path of the directory where the items are stored filter_list (list): list of the items to predict, if None all unrated items will be predicted Returns: pd.DataFrame: DataFrame containing one column with the items name, one column with the score predicted """ # Load items to predict if filter_list is None: items_to_predict = get_unrated_items(items_directory, user_ratings) else: items_to_predict = get_chosen_items(items_directory, filter_list) # Extract features of the items to predict id_items_to_predict = [] features_items_to_predict = [] for item in items_to_predict: if item is not None: # raises AttributeError if items are not present locally id_items_to_predict.append(item.content_id) features_items_to_predict.append( self.extract_features_item(item)) recsys_logger.info("Calculating score predictions") if len(id_items_to_predict) > 0: # Fuse the input if there are dicts, multiple representation, etc. fused_features_items_to_pred = self.fuse_representations( features_items_to_predict, self.__embedding_combiner) score_labels = self.__regressor.predict( fused_features_items_to_pred) else: score_labels = [] # Build the score_frame to return columns = ["to_id", "score"] score_frame = pd.DataFrame(columns=columns) score_frame["to_id"] = id_items_to_predict score_frame["score"] = score_labels return score_frame
def rank(self, user_ratings: pd.DataFrame, items_directory: str, recs_number: int = None, filter_list: List[str] = None) -> pd.DataFrame: """ Rank the top-n recommended items for the user. If the recs_number parameter isn't specified, All unrated items will be ranked (or only items in the filter list, if specified). One can specify which items must be ranked with the filter_list parameter, in this case ONLY items in the filter_list parameter will be ranked. One can also pass items already seen by the user with the filter_list parameter. Otherwise, ALL unrated items will be ranked. Args: user_ratings (pd.DataFrame): DataFrame containing ratings of a single user items_directory (str): path of the directory where the items are stored recs_number (int): number of the top items that will be present in the ranking, if None all unrated items will be ranked filter_list (list): list of the items to rank, if None all unrated items will be ranked Returns: pd.DataFrame: DataFrame containing one column with the items name, one column with the rating predicted, sorted in descending order by the 'rating' column """ # Load items to predict if filter_list is None: items_to_predict = get_unrated_items(items_directory, user_ratings) else: items_to_predict = get_chosen_items(items_directory, filter_list) # Extract features of the items to predict id_items_to_predict = [] features_items_to_predict = [] for item in items_to_predict: if item is not None: id_items_to_predict.append(item.content_id) features_items_to_predict.append(self.extract_features_item(item)) recsys_logger.info("Calculating rank") if len(id_items_to_predict) > 0: # Calculate predictions, they are the similarity of the new items with the centroid vector features_fused = self.fuse_representations(features_items_to_predict, self.__embedding_combiner) similarities = [self.__similarity.perform(self.__centroid, item) for item in features_fused] else: similarities = [] # Build the score frame result = {'to_id': id_items_to_predict, 'score': similarities} result = pd.DataFrame(result, columns=['to_id', 'score']) # Sort them in descending order result.sort_values(by=['score'], ascending=False, inplace=True) rank = result[:recs_number] return rank