Exemplo n.º 1
0
    def test_compute_optimal_ranking_set(self):
        ranking_set_u1 = dataset_io.RankingSet(id=dataset_io.RankingSetId(
            'u1', 'Alg'),
                                               matrix=np.array([[1, 2, 3]]),
                                               user_ids=[1])

        ranking_set_by_id = {
            ranking_set_u1.id: ranking_set_u1,
        }

        rating_set = dataset_io.RatingSet(fold='u1')
        rating_set.base = pd.DataFrame.from_records(
            columns=['user_id', 'item_id', 'rating'], data=[])

        rating_set_by_fold = {'u1': rating_set}

        oracle = eild_oracle.EILDOracle(ranking_set_by_id, rating_set_by_fold)

        distance_matrix = np.array([[0, 0.5, 0.2], [0.5, 0, 0.9],
                                    [0.2, 0.9, 0]])
        oracle.distances_by_fold = {'u1': (distance_matrix, [1, 2, 3])}

        optimal_ranking_set = oracle.compute_optimal_ranking_set(
            'u1', input_cutoff=3, output_cutoff=2)

        self.assertEqual([1], optimal_ranking_set.user_ids)

        matrix = optimal_ranking_set.matrix
        self.assertEqual([2, 3], list(matrix[0, :]))
Exemplo n.º 2
0
  def test_compute_optimal_ranking_set(self):
    ranking_set_u1 = dataset_io.RankingSet(
        id=dataset_io.RankingSetId('u1', 'Alg'),
        matrix=np.array([[1, 2, 3, 4], [5, 6, 7, 8]]),
        user_ids=[1, 2])

    ranking_set_by_id = {
      ranking_set_u1.id: ranking_set_u1,
    }

    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[])

    rating_set_by_fold = {'u1': rating_set}

    oracle = epc_oracle.EPCOracle(ranking_set_by_id, rating_set_by_fold)

    oracle.popularity_by_fold = {'u1': {
      1: 1., 2: 0.25, 3: 0.5, 4: 0.75,
      5: 0.8, 6: 0.3, 7: 0.6, 8: 0.01
    }}

    optimal_ranking_set = oracle.compute_optimal_ranking_set(
      'u1', input_cutoff=4, output_cutoff=3)

    self.assertEqual([1, 2], optimal_ranking_set.user_ids)

    matrix = optimal_ranking_set.matrix
    self.assertEqual([2, 3, 4], list(matrix[0, :]))
    self.assertEqual([8, 6, 7], list(matrix[1, :]))
Exemplo n.º 3
0
    def setUp(self):
        super().setUp()

        rating_set = dataset_io.RatingSet(fold='u1')
        rating_set.base = pd.DataFrame.from_records(
            columns=['user_id', 'item_id', 'rating'], data=[])
        rating_set.test = pd.DataFrame.from_records(
            columns=['user_id', 'item_id', 'rating'], data=[])
        rating_set_by_fold = {'u1': rating_set}

        self.eild = eild.EILD(ranking_set_by_id={},
                              rating_set_by_fold=rating_set_by_fold)
Exemplo n.º 4
0
  def test_computes_likers(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'],
        data=[(1, 2, 5), (2, 2, 5), (2, 3, 5), (3, 3, 5)])
    rating_set_by_fold = {'u1': rating_set}

    likers_by_fold = rating_utils._compute_likers_by_fold(rating_set_by_fold)
    self.assertIn('u1', likers_by_fold)

    likers_by_item_id = likers_by_fold['u1']
    self.assertDictEqual({2: {1, 2}, 3: {2, 3}}, likers_by_item_id)
Exemplo n.º 5
0
  def test_ignores_test_and_validation(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[])
    rating_set.test = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[(2, 3, 5)])
    rating_set.validation = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[(1, 3, 5)])
    rating_set_by_fold = {'u1': rating_set}

    likers_by_fold = rating_utils._compute_likers_by_fold(rating_set_by_fold)
    self.assertIn('u1', likers_by_fold)

    likers_by_item_id = likers_by_fold['u1']
    self.assertDictEqual({}, likers_by_item_id)
Exemplo n.º 6
0
  def test_compute_popularity(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'],
        data=[
            (1, 1, 5),
            (1, 2, 5),
            (2, 1, 5),
            (2, 3, 5),
        ])
    rating_set_by_fold = {'u1': rating_set}

    expected_popularity = {(1, 1.), (2, 0.5), (3, 0.5)}
    popularity = set(rating_utils._compute_popularity(rating_set).items())

    self.assertEqual(expected_popularity, popularity)
Exemplo n.º 7
0
  def test_computes_distances(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'],
        data=[(1, 2, 5), (2, 2, 5), (4, 2, 5), (2, 3, 5), (3, 3, 5), (4, 3, 5)])
    rating_set_by_fold = {'u1': rating_set}

    distance_by_fold = rating_utils.compute_distances_by_fold(rating_set_by_fold)
    self.assertIn('u1', distance_by_fold)

    distance_matrix, item_ids = distance_by_fold['u1']

    expected_matrix = np.array([[0., 1/3], [1/3, 0.]])
    error_matrix = np.abs(distance_matrix - expected_matrix)
    are_almost_equal = (error_matrix < 1e-8).all()

    self.assertTrue(are_almost_equal)

    self.assertSequenceEqual([2, 3], item_ids)
Exemplo n.º 8
0
  def test_ignores_test_and_validation(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[])
    rating_set.test = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[(2, 3, 5)])
    rating_set.validation = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[(1, 3, 5)])
    rating_set_by_fold = {'u1': rating_set}

    distance_by_fold = rating_utils.compute_distances_by_fold(rating_set_by_fold)
    self.assertIn('u1', distance_by_fold)

    distance_matrix, item_ids = distance_by_fold['u1']
    expected_matrix = np.array([])

    are_almost_equal = ((distance_matrix - expected_matrix) < 1e-8).all()
    self.assertTrue(are_almost_equal)

    self.assertSequenceEqual([], item_ids)
Exemplo n.º 9
0
  def test_compute_rated_by_fold(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'],
        data=[
            (1, 1, 5),
            (1, 2, 5),
            (2, 1, 5),
            (2, 3, 5),
        ])
    rating_set_by_fold = {'u1': rating_set}

    rated_by_fold = rating_utils.compute_rated_by_fold(
        rating_set_by_fold, split_name='base')
    self.assertIn('u1', rated_by_fold)

    rated = rated_by_fold['u1']

    expected_rated = {1: {1, 2}, 2: {1, 3}}

    self.assertDictEqual(expected_rated, rated)
Exemplo n.º 10
0
    def test_compute_optimal_ranking_set(self):
        ranking_set_u1 = dataset_io.RankingSet(id=dataset_io.RankingSetId(
            'u1', 'Alg'),
                                               matrix=np.array([[1, 2, 3, 4],
                                                                [5, 6, 7, 8]]),
                                               user_ids=[1, 2])

        ranking_set_by_id = {
            ranking_set_u1.id: ranking_set_u1,
        }

        rating_set = dataset_io.RatingSet(fold='u1')
        rating_set.base = pd.DataFrame.from_records(
            columns=['user_id', 'item_id', 'rating'], data=[(4, 1, 5)])
        rating_set.test = pd.DataFrame.from_records(
            columns=['user_id', 'item_id', 'rating'],
            data=[
                (1, 3, 5),
                (1, 2, 5),
                (2, 1, 5),
                (2, 8, 5),
            ])
        rating_set_by_fold = {'u1': rating_set}

        oracle = map_oracle.MAPOracle(ranking_set_by_id, rating_set_by_fold)

        optimal_ranking_set = oracle.compute_optimal_ranking_set(
            'u1', input_cutoff=4, output_cutoff=3)

        self.assertEqual([1, 2], optimal_ranking_set.user_ids)

        matrix = optimal_ranking_set.matrix
        self.assertEqual(3, len(matrix[0, :]))
        self.assertSetEqual({2, 3}, set(matrix[0, 0:2]))
        self.assertTrue(set(matrix[0, 2:]).issubset({1, 4}))

        self.assertEqual(3, len(matrix[1, :]))
        self.assertSetEqual({8}, set(matrix[1, 0:1]))
        self.assertTrue(set(matrix[1, 1:]).issubset({5, 6, 7}))
Exemplo n.º 11
0
  def test_compute_hits_by_fold(self):
    rating_set = dataset_io.RatingSet(fold='u1')
    rating_set.base = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'], data=[(4, 1, 5)])
    rating_set.test = pd.DataFrame.from_records(
        columns=['user_id', 'item_id', 'rating'],
        data=[
            (1, 1, 5),
            (1, 2, 5),
            (2, 1, 5),
            (2, 3, 5),
        ])
    rating_set_by_fold = {'u1': rating_set}

    hits_by_fold = rating_utils.compute_hits_by_fold(rating_set_by_fold)
    self.assertIn('u1', hits_by_fold)

    hits = hits_by_fold['u1']

    expected_hits = {1: {1, 2}, 2: {1, 3}}

    self.assertDictEqual(expected_hits, hits)