Exemplo n.º 1
0
    def eval_fit_recsys(cls, recsys: RecSys, split_list: List[Split],
                        test_items_list: List[pd.DataFrame]):
        """
        Method which is usually called by the 'PredictionCalculator' module that generates recommendation lists. For
        every user, items that will be ranked are specified by the 'test_items_list' parameter.

        Rankings generated will be stored into a class attribute (rank_truth_list), which is a list that contains
        Split objects: every object has two DataFrames, the first one has recommendation lists for every user,
        the second one has the 'ground truth' for every user.

        If the class attribute is non-empty, then the 'AlreadyFittedRecSys' exception is raised, so remember to
        clean the class attribute by calling the private method '_clean_pred_truth_list(...)' upon every new evaluation

        Args:
            recsys (RecSys): Recommender system which will generate predictions that will later be evaluated
            split_list (List[Split]): List of Split objects where every Split contains two DataFrames, the first has
                the 'train set' for every user, the second has the 'test set' for every user
            test_items_list (List[pd.DataFrame]): List of DataFrames, one for every Split object inside the split_list
                parameter, where every DataFrame contains for every user the list of items that must be ranked

        Raises:
            AlreadyFittedRecSys exception when the class attribute 'rank_truth_list' is non-empty, meaning that
            recommendation lists are already been calculated
        """

        if len(cls.rank_truth_list) != 0:
            raise AlreadyFittedRecSys

        for counter, (split, test_items_frame) in enumerate(zip(
                split_list, test_items_list),
                                                            start=1):

            train = split.train
            test = split.test

            rank_truth = Split()
            rank_truth.truth = test

            frame_to_concat = []
            user_list_to_fit = set(train.from_id)

            for user in progbar(
                    user_list_to_fit,
                    prefix='Calculating rank for user {} - split {}'.format(
                        '{}', counter),
                    substitute_with_current=True):

                user_ratings_train = train.loc[train['from_id'] == user]

                test_items = list(
                    test_items_frame.query('from_id == @user').to_id)

                result = recsys._eval_fit_rank(user_ratings_train, test_items)

                frame_to_concat.append(result)

            rank_truth.pred = pd.concat(frame_to_concat)

            cls.rank_truth_list.append(rank_truth)
Exemplo n.º 2
0
    def eval_fit_recsys(cls, recsys: RecSys, split_list: List[Split],
                        test_items_list: List[pd.DataFrame]):
        if len(cls.rank_truth_list) != 0:
            raise AlreadyFittedRecSys

        for counter, (split, test_items_frame) in enumerate(zip(
                split_list, test_items_list),
                                                            start=1):

            train = split.train
            test = split.test

            rank_truth = Split()
            rank_truth.truth = test

            frame_to_concat = []
            user_list_to_fit = set(train.from_id)

            for user in progbar(
                    user_list_to_fit,
                    prefix='Calculating rank for user {} - split {}'.format(
                        '{}', counter),
                    substitute_with_current=True):

                user_ratings_train = train.loc[train['from_id'] == user]

                test_items = list(
                    test_items_frame.query('from_id == @user').to_id)

                result = recsys._eval_fit_rank(user_ratings_train, test_items)

                frame_to_concat.append(result)

            rank_truth.pred = pd.concat(frame_to_concat)

            cls.rank_truth_list.append(rank_truth)