def test_perform_0_gap(self):
        # DeltaGap with 2 equals frame should return 0 for every group
        split = Split(user_truth, user_truth)

        metric = DeltaGap(user_groups={'a': 0.5, 'b': 0.5})

        result = metric.perform(split)

        for col in result.columns:
            self.assertTrue(v == 0 for v in result[col])
    def test_perform_2_users_2_groups(self):
        metric = DeltaGap(user_groups={'a': 0.5, 'b': 0.5})
        result = metric.perform(self.split)

        pop_by_item_truth = Counter(list(user_truth['to_id']))

        # group_a = { u2 } (since it has higher popular ratio, it is put into the first group)
        # group_b = { u1 }

        # For every user in the group calculate the average popularity of the recommendations.
        # To calculate the avg popularity, pop_by_item_pred is used, since due to the methodology
        # items in the recommendation lists may differ from item in the truth
        RECS_avg_pop_group_a = {
            'u2': 6 / 4
        }  # for every user sum_pop_item_rated / n_item_rated
        RECS_avg_pop_group_b = {
            'u1': 8 / 8
        }  # for every user sum_pop_item_rated / n_item_rated

        # For every user in the group calculate the average popularity of the profile.
        # To calculate the avg popularity, pop_by_item_truth is used, since due to the methodology
        # items in the truth may differ from item in the recommendation lists
        PROFILE_avg_pop_group_a = {
            'u2': 5 / 3
        }  # for every user sum_pop_item_rated / n_item_rated
        PROFILE_avg_pop_group_b = {
            'u1': 7 / 5
        }  # for every user sum_pop_item_rated / n_item_rated

        RECS_gap_group_a = (
            6 / 4
        ) / 1  # sum the RECS_avg_pop of every user of the group_a / n_users in group_a
        RECS_gap_group_b = (
            8 / 8
        ) / 1  # sum the RECS_avg_pop of every user of the group_b / n_users in group_b

        PROFILE_gap_group_a = (
            5 / 3
        ) / 1  # sum the PROFILE_avg_pop of every user of the group_a / n_users in group_a
        PROFILE_gap_group_b = (
            7 / 5
        ) / 1  # sum the PROFILE_avg_pop of every user of the group_b / n_users in group_b

        expected_delta_gap_group_a = (
            RECS_gap_group_a - PROFILE_gap_group_a) / PROFILE_gap_group_a
        expected_delta_gap_group_b = (
            RECS_gap_group_b - PROFILE_gap_group_b) / PROFILE_gap_group_b

        result_delta_gap_group_a = float(result["{} | a".format(str(metric))])
        result_delta_gap_group_b = float(result["{} | b".format(str(metric))])

        self.assertAlmostEqual(expected_delta_gap_group_a,
                               result_delta_gap_group_a)
        self.assertAlmostEqual(expected_delta_gap_group_b,
                               result_delta_gap_group_b)
    def test_perform_increased_pop_percentage(self):
        truth = pd.DataFrame({
            'from_id': [
                'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2',
                'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5',
                'u5', 'u5'
            ],
            'to_id': [
                'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6',
                'i1', 'i8', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i21', 'i3',
                'i5', 'i1'
            ],
            'score': [
                650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 500,
                400, 300, 200, 150, 100, 50, 800, 600, 500
            ]
        })

        recs = pd.DataFrame({
            'from_id': [
                'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2',
                'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4',
                'u5', 'u5', 'u5', 'u5', 'u5'
            ],
            'to_id': [
                'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6',
                'i1', 'i5', 'i35', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i3',
                'i5', 'i1', 'i9', 'i36', 'i6'
            ],
            'score': [
                650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 25,
                500, 400, 300, 200, 350, 100, 50, 800, 600, 500, 400, 300
            ]
        })

        split = Split(recs, truth)

        result_pop_normal = DeltaGap(user_groups={
            'a': 0.3,
            'b': 0.3,
            'c': 0.4
        }).perform(split)
        result_pop_increased = DeltaGap(user_groups={
            'a': 0.3,
            'b': 0.3,
            'c': 0.4
        },
                                        pop_percentage=0.6).perform(split)

        result_pop_normal = np.array(result_pop_normal)
        result_pop_increased = np.array(result_pop_increased)

        result_pop_normal.sort(axis=0)
        result_pop_increased.sort(axis=0)

        # Just check that results with pop_percentage increased are different,
        # since users are put into groups differently
        self.assertFalse(
            np.array_equal(result_pop_normal, result_pop_increased))
    def test_calculate_gap(self):
        # This is basically the inner part of the GAP equation, the fraction at the numerator
        # of the GAP formula calculated preemptively for every user.
        # It is set manually for the sake of the test, but it is obtained by the
        # get_avg_pop_by_users() method of the GroupFairnessMetric Class
        avg_pop_by_users = {'u1': 2, 'u2': 1.78, 'u3': 3.5, 'u4': 1.1}

        expected_u1_u3 = (avg_pop_by_users['u1'] + avg_pop_by_users['u3']) / 2
        result_u1_u3 = DeltaGap.calculate_gap({'u1', 'u3'}, avg_pop_by_users)

        self.assertAlmostEqual(expected_u1_u3, result_u1_u3)

        expected_u2_u4 = (avg_pop_by_users['u2'] + avg_pop_by_users['u4']) / 2
        result_u2_u4 = DeltaGap.calculate_gap({'u2', 'u4'}, avg_pop_by_users)

        self.assertAlmostEqual(expected_u2_u4, result_u2_u4)
    def test_calculate_delta_gap(self):
        gap_profile = 2.32
        gap_recs = 3

        result = DeltaGap.calculate_delta_gap(gap_recs, gap_profile)
        expected = (gap_recs - gap_profile) / gap_profile

        self.assertAlmostEqual(expected, result)
Exemplo n.º 6
0
    def test_all(self):
        ratings_filename = os.path.join(contents_path, '..', 'datasets',
                                        'examples', 'new_ratings.csv')

        ratings_frame = RatingsImporter(
            CSVFile(ratings_filename)).import_ratings()

        rs = ContentBasedRS(
            LinearPredictor(
                {"Plot": ['tfidf', 'embedding']},
                SkLinearRegression(),
            ), ratings_frame, items_dir)

        catalog = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(sys_average='micro'),
                           PrecisionAtK(1, sys_average='micro'),
                           RPrecision(),
                           Recall(),
                           RecallAtK(3, ),
                           FMeasure(1, sys_average='macro'),
                           FMeasureAtK(2, beta=1, sys_average='micro'),
                           NDCG(),
                           NDCGAtK(3),
                           MRR(),
                           MRRAtK(5, ),
                           Correlation('pearson', top_n=5),
                           Correlation('kendall', top_n=3),
                           Correlation('spearman', top_n=4),
                           MAE(),
                           MSE(),
                           RMSE(),
                           CatalogCoverage(catalog),
                           CatalogCoverage(catalog, k=2),
                           CatalogCoverage(catalog, top_n=3),
                           GiniIndex(),
                           GiniIndex(top_n=3),
                           DeltaGap({
                               'primo': 0.5,
                               'secondo': 0.5
                           })
                       ],
                       methodology=TestItemsMethodology())

        result = em.fit()
    def test_perform_top_3(self):

        metric = DeltaGap(user_groups={'a': 1}, top_n=3)
        result = metric.perform(self.split)

        pop_by_item_truth = Counter(list(user_truth['to_id']))

        # group_a = { u2, u1 }

        # For every user in the group calculate the average popularity of the recommendations.
        # To calculate the avg popularity, pop_by_item_pred is used, since due to the methodology
        # items in the recommendation lists may differ from item in the truth
        RECS_avg_pop_group_a = {
            'u2': 5 / 3,
            'u1': 5 / 3
        }  # for every user sum_pop_item_rated / n_item_rated

        # For every user in the group calculate the average popularity of the profile.
        # To calculate the avg popularity, pop_by_item_truth is used, since due to the methodology
        # items in the truth may differ from item in the recommendation lists
        PROFILE_avg_pop_group_a = {
            'u2': 5 / 3,
            'u1': 7 / 5
        }  # for every user sum_pop_item_rated / n_item_rated

        # Sum the RECS_avg_pop of every user of the group_a / n_users in group_a
        RECS_gap_group_a = ((5 / 3) + (5 / 3)) / 2

        # Sum the PROFILE_avg_pop of every user of the group_a / n_users in group_a
        PROFILE_gap_group_a = ((5 / 3) + (7 / 5)) / 2

        expected_delta_gap_group_a = (
            RECS_gap_group_a - PROFILE_gap_group_a) / PROFILE_gap_group_a

        result_delta_gap_group_a = float(result["{} | a".format(str(metric))])

        self.assertAlmostEqual(expected_delta_gap_group_a,
                               result_delta_gap_group_a)
Exemplo n.º 8
0
    def test_graph(self):
        catalog = set(ratings.to_id)

        users_dir = os.path.join(dir_test_files, 'complex_contents',
                                 'users_codified/')

        graph = NXFullGraph(
            ratings,
            user_contents_dir=users_dir,
            item_contents_dir=items_dir,
            item_exo_representation="dbpedia",
            user_exo_representation='local',
            item_exo_properties=['starring'],
            user_exo_properties=['1'
                                 ]  # It's the column in the users .DAT which
            # identifies the gender
        )

        graph_rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(graph_rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(relevant_threshold=3),
                           Recall(),
                           FMeasure(beta=1),
                           FMeasure(beta=2, sys_average='micro'),
                           MRR(),
                           Correlation('pearson'),
                           GiniIndex(),
                           DeltaGap({
                               'popular': 0.5,
                               'niche': 0.5
                           }),
                           PredictionCoverage(catalog),
                           PopProfileVsRecs(user_groups={
                               'popular': 0.5,
                               'niche': 0.5
                           },
                                            out_dir='plots/'),
                           LongTailDistr('plots/', format='svg'),
                           PopRecsCorrelation('plots/')
                       ],
                       verbose_predictions=True,
                       methodology=TestItemsMethodology())

        em.fit()
    def test_eval_ranking_needed_metrics_implicit_split(self):

        # We set the split_list directly by the class attribute
        c = MetricCalculator()
        RankingNeededMetric.rank_truth_list = self.rank_split_list

        system_res, each_user_res = c.eval_metrics([
            Precision(),
            PrecisionAtK(2),
            RPrecision(),
            Recall(),
            RecallAtK(2),
            FMeasure(),
            FMeasureAtK(2),
            NDCG(),
            NDCGAtK(2),
            MRR(),
            MRRAtK(2),
            Correlation('pearson'),
            Correlation('kendall'),
            Correlation('spearman'),
            PredictionCoverage(self.catalog),
            CatalogCoverage(self.catalog, top_n=2),
            GiniIndex(),
            DeltaGap(user_groups={
                'a': 0.5,
                'b': 0.5
            }),
            LongTailDistr(out_dir='test_plot'),
            PopProfileVsRecs(user_groups={
                'a': 0.5,
                'b': 0.5
            },
                             out_dir='test_plot'),
            PopRecsCorrelation(out_dir='test_plot')
        ])

        self.assertIsInstance(system_res, pd.DataFrame)
        self.assertIsInstance(each_user_res, pd.DataFrame)
 def test_invalid_percentage(self):
     with self.assertRaises(PercentageError):
         DeltaGap(user_groups={'a': 0.5}, pop_percentage=-0.5)
         DeltaGap(user_groups={'a': 0.5}, pop_percentage=0)
         DeltaGap(user_groups={'a': 0.5}, pop_percentage=1.5)