示例#1
0
    def test_init(self):
        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map=OrderedDict(),
                                            global_iid_map=OrderedDict(),
                                            global_ui_set=set(),
                                            verbose=True)

        self.assertSequenceEqual(train_set.matrix.shape, (10, 10))
        self.assertEqual(train_set.min_rating, 3)
        self.assertEqual(train_set.max_rating, 5)

        self.assertEqual(int(train_set.global_mean),
                         int((3 * 2 + 4 * 7 + 5) / 10))

        npt.assert_array_equal(train_set.item_ppl_rank()[0],
                               np.asarray([7, 9, 6, 5, 3, 2, 1, 0, 8, 4]))

        self.assertEqual(train_set.num_users, 10)
        self.assertEqual(train_set.num_items, 10)

        self.assertFalse(train_set.is_unk_user(7))
        self.assertTrue(train_set.is_unk_user(13))

        self.assertFalse(train_set.is_unk_item(3))
        self.assertTrue(train_set.is_unk_item(16))

        self.assertEqual(train_set.get_uid('768'), 1)
        self.assertEqual(train_set.get_iid('195'), 7)

        self.assertSequenceEqual(train_set.uid_list, range(10))
        self.assertListEqual(train_set.raw_uid_list, [
            '76', '768', '642', '930', '329', '633', '716', '871', '543', '754'
        ])

        self.assertSequenceEqual(train_set.iid_list, range(10))
        self.assertListEqual(train_set.raw_iid_list, [
            '93', '257', '795', '709', '705', '226', '478', '195', '737', '282'
        ])

        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map=OrderedDict(),
                                            global_iid_map=OrderedDict(),
                                            global_ui_set=set([('76', '93')]),
                                            verbose=True)

        self.assertEqual(train_set.num_users, 9)
        self.assertEqual(train_set.num_items, 9)
示例#2
0
    def test_item_iter(self):
        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map={},
                                            global_iid_map={},
                                            global_ui_set=set(),
                                            verbose=True)

        npt.assert_array_equal(
            np.arange(10).reshape(10, 1), [i for i in train_set.item_iter()])
        self.assertRaises(AssertionError, npt.assert_array_equal,
                          np.arange(10).reshape(10, 1),
                          [i for i in train_set.item_iter(shuffle=True)])
示例#3
0
def test_matrix_trainset_uir_iter():
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                        global_ui_set=set(), verbose=True)

    users = [batch_users for batch_users, _, _ in train_set.uir_iter()]
    assert all([a == b for a, b in zip(users, range(10))])

    items = [batch_items for _, batch_items, _ in train_set.uir_iter()]
    assert all([a == b for a, b in zip(items, range(10))])

    ratings = [batch_ratings for _, _, batch_ratings in train_set.uir_iter()]
    assert all([a == b for a, b in zip(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])])
示例#4
0
def test_matrix_trainset():
    """Test MatrixTrainSet"""
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(),
                                        verbose=True)

    assert train_set.matrix.shape == (10, 10)
    assert train_set.min_rating == 3
    assert train_set.max_rating == 5

    assert int(train_set.global_mean) == int((3 * 2 + 4 * 7 + 5) / 10)

    assert all([a == b for a, b in zip(train_set.item_ppl_rank,
                                       [7, 9, 6, 5, 3, 2, 1, 0, 8, 4])])

    assert train_set.num_users == 10
    assert train_set.num_items == 10

    assert train_set.is_unk_user(7) == False
    assert train_set.is_unk_user(13) == True

    assert train_set.is_unk_item(3) == False
    assert train_set.is_unk_item(16) == True

    assert train_set.get_uid('768') == 1
    assert train_set.get_iid('195') == 7

    assert all([a == b for a, b in zip(train_set.uid_list, range(10))])
    assert all([a == b for a, b in zip(train_set.raw_uid_list,
                                       ['76', '768', '642', '930', '329', '633', '716', '871', '543', '754'])])

    assert all([a == b for a, b in zip(train_set.iid_list, range(10))])
    assert all([a == b for a, b in zip(train_set.raw_iid_list,
                                       ['93', '257', '795', '709', '705', '226', '478', '195', '737', '282'])])

    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                        global_ui_set=set([('76', '93')]), verbose=True)
    assert train_set.num_users == 9
    assert train_set.num_items == 9
示例#5
0
def test_matrix_trainset_uij_iter():
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                        global_ui_set=set(), verbose=True)

    users = [batch_users for batch_users, _, _ in train_set.uij_iter()]
    assert all([a == b for a, b in zip(users, range(10))])

    pos_items = [batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter()]
    assert all([a == b for a, b in zip(pos_items, range(10))])

    neg_items = [batch_neg_items for _, _, batch_neg_items in train_set.uij_iter()]
    assert all([a != b for a, b in zip(neg_items, range(10))])
示例#6
0
def test_uir_tuple():
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data,
                                        global_uid_map=None,
                                        global_iid_map=None,
                                        global_ui_set=None,
                                        verbose=True)

    try:
        train_set.uir_tuple = ([], [])
    except ValueError:
        assert True

    assert 2 == train_set.num_batches(batch_size=5)
示例#7
0
    def test_idx_iter(self):
        train_set = MatrixTrainSet.from_uir(self.triplet_data)

        ids = [
            batch_ids for batch_ids in train_set.idx_iter(
                idx_range=10, batch_size=1, shuffle=False)
        ]
        npt.assert_array_equal(ids, np.arange(10).reshape(10, 1))

        ids = [
            batch_ids for batch_ids in train_set.idx_iter(
                idx_range=10, batch_size=1, shuffle=True)
        ]
        npt.assert_raises(AssertionError, npt.assert_array_equal, ids,
                          np.arange(10).reshape(10, 1))
示例#8
0
    def test_uir_tuple(self):
        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map=None,
                                            global_iid_map=None,
                                            global_ui_set=None,
                                            verbose=True)

        self.assertEqual(len(train_set.uir_tuple), 3)
        self.assertEqual(len(train_set.uir_tuple[0]), 10)

        try:
            train_set.uir_tuple = ([], [])
        except ValueError:
            assert True

        self.assertEqual(train_set.num_batches(batch_size=5), 2)
示例#9
0
    def test_uir_iter(self):
        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map={},
                                            global_iid_map={},
                                            global_ui_set=set(),
                                            verbose=True)

        users = [batch_users for batch_users, _, _ in train_set.uir_iter()]
        self.assertSequenceEqual(users, range(10))

        items = [batch_items for _, batch_items, _ in train_set.uir_iter()]
        self.assertSequenceEqual(items, range(10))

        ratings = [
            batch_ratings for _, _, batch_ratings in train_set.uir_iter()
        ]
        self.assertListEqual(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])
示例#10
0
    def test_matrix(self):
        from scipy.sparse import csr_matrix, csc_matrix, dok_matrix

        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map=None,
                                            global_iid_map=None,
                                            global_ui_set=None,
                                            verbose=True)

        self.assertTrue(isinstance(train_set.matrix, csr_matrix))
        self.assertEqual(train_set.csr_matrix[0, 0], 4)
        self.assertTrue(train_set.csr_matrix.has_sorted_indices)

        self.assertTrue(isinstance(train_set.csc_matrix, csc_matrix))
        self.assertEqual(train_set.csc_matrix[4, 4], 3)

        self.assertTrue(isinstance(train_set.dok_matrix, dok_matrix))
        self.assertEqual(train_set.dok_matrix[7, 7], 5)
示例#11
0
    def test_uij_iter(self):
        train_set = MatrixTrainSet.from_uir(self.triplet_data,
                                            global_uid_map={},
                                            global_iid_map={},
                                            global_ui_set=set(),
                                            verbose=True)

        users = [batch_users for batch_users, _, _ in train_set.uij_iter()]
        self.assertSequenceEqual(users, range(10))

        pos_items = [
            batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter()
        ]
        self.assertSequenceEqual(pos_items, range(10))

        neg_items = [
            batch_neg_items for _, _, batch_neg_items in train_set.uij_iter()
        ]
        self.assertRaises(AssertionError, self.assertSequenceEqual, neg_items,
                          range(10))