def test_init(self): train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map=OrderedDict(), global_iid_map=OrderedDict(), global_ui_set=set(), verbose=True) self.assertSequenceEqual(train_set.matrix.shape, (10, 10)) self.assertEqual(train_set.min_rating, 3) self.assertEqual(train_set.max_rating, 5) self.assertEqual(int(train_set.global_mean), int((3 * 2 + 4 * 7 + 5) / 10)) npt.assert_array_equal(train_set.item_ppl_rank()[0], np.asarray([7, 9, 6, 5, 3, 2, 1, 0, 8, 4])) self.assertEqual(train_set.num_users, 10) self.assertEqual(train_set.num_items, 10) self.assertFalse(train_set.is_unk_user(7)) self.assertTrue(train_set.is_unk_user(13)) self.assertFalse(train_set.is_unk_item(3)) self.assertTrue(train_set.is_unk_item(16)) self.assertEqual(train_set.get_uid('768'), 1) self.assertEqual(train_set.get_iid('195'), 7) self.assertSequenceEqual(train_set.uid_list, range(10)) self.assertListEqual(train_set.raw_uid_list, [ '76', '768', '642', '930', '329', '633', '716', '871', '543', '754' ]) self.assertSequenceEqual(train_set.iid_list, range(10)) self.assertListEqual(train_set.raw_iid_list, [ '93', '257', '795', '709', '705', '226', '478', '195', '737', '282' ]) train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map=OrderedDict(), global_iid_map=OrderedDict(), global_ui_set=set([('76', '93')]), verbose=True) self.assertEqual(train_set.num_users, 9) self.assertEqual(train_set.num_items, 9)
def test_item_iter(self): train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) npt.assert_array_equal( np.arange(10).reshape(10, 1), [i for i in train_set.item_iter()]) self.assertRaises(AssertionError, npt.assert_array_equal, np.arange(10).reshape(10, 1), [i for i in train_set.item_iter(shuffle=True)])
def test_matrix_trainset_uir_iter(): triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) users = [batch_users for batch_users, _, _ in train_set.uir_iter()] assert all([a == b for a, b in zip(users, range(10))]) items = [batch_items for _, batch_items, _ in train_set.uir_iter()] assert all([a == b for a, b in zip(items, range(10))]) ratings = [batch_ratings for _, _, batch_ratings in train_set.uir_iter()] assert all([a == b for a, b in zip(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])])
def test_matrix_trainset(): """Test MatrixTrainSet""" triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) assert train_set.matrix.shape == (10, 10) assert train_set.min_rating == 3 assert train_set.max_rating == 5 assert int(train_set.global_mean) == int((3 * 2 + 4 * 7 + 5) / 10) assert all([a == b for a, b in zip(train_set.item_ppl_rank, [7, 9, 6, 5, 3, 2, 1, 0, 8, 4])]) assert train_set.num_users == 10 assert train_set.num_items == 10 assert train_set.is_unk_user(7) == False assert train_set.is_unk_user(13) == True assert train_set.is_unk_item(3) == False assert train_set.is_unk_item(16) == True assert train_set.get_uid('768') == 1 assert train_set.get_iid('195') == 7 assert all([a == b for a, b in zip(train_set.uid_list, range(10))]) assert all([a == b for a, b in zip(train_set.raw_uid_list, ['76', '768', '642', '930', '329', '633', '716', '871', '543', '754'])]) assert all([a == b for a, b in zip(train_set.iid_list, range(10))]) assert all([a == b for a, b in zip(train_set.raw_iid_list, ['93', '257', '795', '709', '705', '226', '478', '195', '737', '282'])]) train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set([('76', '93')]), verbose=True) assert train_set.num_users == 9 assert train_set.num_items == 9
def test_matrix_trainset_uij_iter(): triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) users = [batch_users for batch_users, _, _ in train_set.uij_iter()] assert all([a == b for a, b in zip(users, range(10))]) pos_items = [batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter()] assert all([a == b for a, b in zip(pos_items, range(10))]) neg_items = [batch_neg_items for _, _, batch_neg_items in train_set.uij_iter()] assert all([a != b for a, b in zip(neg_items, range(10))])
def test_uir_tuple(): triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map=None, global_iid_map=None, global_ui_set=None, verbose=True) try: train_set.uir_tuple = ([], []) except ValueError: assert True assert 2 == train_set.num_batches(batch_size=5)
def test_idx_iter(self): train_set = MatrixTrainSet.from_uir(self.triplet_data) ids = [ batch_ids for batch_ids in train_set.idx_iter( idx_range=10, batch_size=1, shuffle=False) ] npt.assert_array_equal(ids, np.arange(10).reshape(10, 1)) ids = [ batch_ids for batch_ids in train_set.idx_iter( idx_range=10, batch_size=1, shuffle=True) ] npt.assert_raises(AssertionError, npt.assert_array_equal, ids, np.arange(10).reshape(10, 1))
def test_uir_tuple(self): train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map=None, global_iid_map=None, global_ui_set=None, verbose=True) self.assertEqual(len(train_set.uir_tuple), 3) self.assertEqual(len(train_set.uir_tuple[0]), 10) try: train_set.uir_tuple = ([], []) except ValueError: assert True self.assertEqual(train_set.num_batches(batch_size=5), 2)
def test_uir_iter(self): train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) users = [batch_users for batch_users, _, _ in train_set.uir_iter()] self.assertSequenceEqual(users, range(10)) items = [batch_items for _, batch_items, _ in train_set.uir_iter()] self.assertSequenceEqual(items, range(10)) ratings = [ batch_ratings for _, _, batch_ratings in train_set.uir_iter() ] self.assertListEqual(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])
def test_matrix(self): from scipy.sparse import csr_matrix, csc_matrix, dok_matrix train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map=None, global_iid_map=None, global_ui_set=None, verbose=True) self.assertTrue(isinstance(train_set.matrix, csr_matrix)) self.assertEqual(train_set.csr_matrix[0, 0], 4) self.assertTrue(train_set.csr_matrix.has_sorted_indices) self.assertTrue(isinstance(train_set.csc_matrix, csc_matrix)) self.assertEqual(train_set.csc_matrix[4, 4], 3) self.assertTrue(isinstance(train_set.dok_matrix, dok_matrix)) self.assertEqual(train_set.dok_matrix[7, 7], 5)
def test_uij_iter(self): train_set = MatrixTrainSet.from_uir(self.triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) users = [batch_users for batch_users, _, _ in train_set.uij_iter()] self.assertSequenceEqual(users, range(10)) pos_items = [ batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter() ] self.assertSequenceEqual(pos_items, range(10)) neg_items = [ batch_neg_items for _, _, batch_neg_items in train_set.uij_iter() ] self.assertRaises(AssertionError, self.assertSequenceEqual, neg_items, range(10))