Пример #1
0
def test_read_uir():
    """Test read_uir function"""
    data_file = './tests/data.txt'
    triplet_data = reader.read_uir(data_file)

    assert len(triplet_data) == 10
    assert triplet_data[4][2] == 3
    assert triplet_data[6][1] == '478'
    assert triplet_data[8][0] == '543'

    try:
        reader.read_uir(data_file, 10)
    except IndexError:
        assert True
Пример #2
0
def test_with_ratio_split():
    data_file = './tests/data.txt'
    data = reader.read_uir(data_file)
    exp = Experiment(eval_method=RatioSplit(data, verbose=True),
                     models=[PMF(1, 0)],
                     metrics=[MAE(), RMSE(),
                              Recall(1), FMeasure(1)],
                     verbose=True)
    exp.run()

    assert (1, 4) == exp.results.avg.shape

    assert 1 == len(exp.results.per_user)
    assert 4 == len(exp.results.per_user['PMF'])
    assert 2 == len(exp.results.per_user['PMF']['MAE'])
    assert 2 == len(exp.results.per_user['PMF']['RMSE'])
    assert 2 == len(exp.results.per_user['PMF']['Recall@1'])
    assert 2 == len(exp.results.per_user['PMF']['F1@1'])

    try:
        Experiment(None, None, None)
    except ValueError:
        assert True

    try:
        Experiment(None, [PMF(1, 0)], None)
    except ValueError:
        assert True
Пример #3
0
def test_with_cross_validation():
    data_file = './tests/data.txt'
    data = reader.read_uir(data_file)
    exp = Experiment(eval_method=CrossValidation(data),
                     models=[PMF(1, 0)],
                     metrics=[MAE(), RMSE(),
                              Recall(1), FMeasure(1)],
                     verbose=True)
    exp.run()
Пример #4
0
def test_get_train_test_sets_next_fold():
    data = reader.read_uir('./tests/data.txt')

    nfolds = 5
    cv = CrossValidation(data=data, n_folds=nfolds)
    
    for n in range(cv.n_folds):
        cv._get_train_test()
        assert cv.current_fold == n
        assert cv.train_set.matrix.shape == (8, 8)
        cv._next_fold()
        
Пример #5
0
def test_splits():
    data_file = './tests/data.txt'
    data = reader.read_uir(data_file)

    ratio_split = RatioSplit(data,
                             test_size=0.1,
                             val_size=0.1,
                             seed=123,
                             verbose=True)
    ratio_split.split()
    assert ratio_split._split_ran

    ratio_split.split()
Пример #6
0
def test_matrix_trainset_uir_iter():
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                        global_ui_set=set(), verbose=True)

    users = [batch_users for batch_users, _, _ in train_set.uir_iter()]
    assert all([a == b for a, b in zip(users, range(10))])

    items = [batch_items for _, batch_items, _ in train_set.uir_iter()]
    assert all([a == b for a, b in zip(items, range(10))])

    ratings = [batch_ratings for _, _, batch_ratings in train_set.uir_iter()]
    assert all([a == b for a, b in zip(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])])
Пример #7
0
def test_matrix_trainset_uij_iter():
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                        global_ui_set=set(), verbose=True)

    users = [batch_users for batch_users, _, _ in train_set.uij_iter()]
    assert all([a == b for a, b in zip(users, range(10))])

    pos_items = [batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter()]
    assert all([a == b for a, b in zip(pos_items, range(10))])

    neg_items = [batch_neg_items for _, _, batch_neg_items in train_set.uij_iter()]
    assert all([a != b for a, b in zip(neg_items, range(10))])
Пример #8
0
def test_partition_data():
    data = reader.read_uir('./tests/data.txt')

    nfolds = 5
    cv = CrossValidation(data=data, n_folds=nfolds)

    ref_set = set(range(nfolds))
    res_set = set(cv.partition)
    fold_sizes = np.unique(cv.partition, return_counts=True)[1]

    assert len(data) == len(cv.partition)
    assert res_set == ref_set
    assert np.all(fold_sizes == 2)
Пример #9
0
def test_testset():
    """Test TestSet"""
    triplet_data = reader.read_uir('./tests/data.txt')
    test_set = TestSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set())

    assert test_set.get_uid('768') == 1
    assert test_set.get_iid('195') == 7

    assert all([a == b for a, b in zip(test_set.users, range(10))])
    assert all([a == b for a, b in zip(test_set.get_ratings(2), [(2, 4)])])

    test_set = TestSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                global_ui_set=set([('76', '93')]), verbose=True)
    assert len(test_set.users) == 9
Пример #10
0
def test_uir_tuple():
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data,
                                        global_uid_map=None,
                                        global_iid_map=None,
                                        global_ui_set=None,
                                        verbose=True)

    try:
        train_set.uir_tuple = ([], [])
    except ValueError:
        assert True

    assert 2 == train_set.num_batches(batch_size=5)
Пример #11
0
def test_validate_partition():
    data = reader.read_uir('./tests/data.txt')

    nfolds = 5
    cv = CrossValidation(data=data, n_folds=nfolds)

    try:
        cv._validate_partition([0, 0, 1, 1])
    except:
        assert True

    try:
        cv._validate_partition([0, 0, 1, 1, 2, 2, 2, 2, 3, 3])
    except:
        assert True
Пример #12
0
def test_matrix_trainset():
    """Test MatrixTrainSet"""
    triplet_data = reader.read_uir('./tests/data.txt')
    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(),
                                        verbose=True)

    assert train_set.matrix.shape == (10, 10)
    assert train_set.min_rating == 3
    assert train_set.max_rating == 5

    assert int(train_set.global_mean) == int((3 * 2 + 4 * 7 + 5) / 10)

    assert all([a == b for a, b in zip(train_set.item_ppl_rank,
                                       [7, 9, 6, 5, 3, 2, 1, 0, 8, 4])])

    assert train_set.num_users == 10
    assert train_set.num_items == 10

    assert train_set.is_unk_user(7) == False
    assert train_set.is_unk_user(13) == True

    assert train_set.is_unk_item(3) == False
    assert train_set.is_unk_item(16) == True

    assert train_set.get_uid('768') == 1
    assert train_set.get_iid('195') == 7

    assert all([a == b for a, b in zip(train_set.uid_list, range(10))])
    assert all([a == b for a, b in zip(train_set.raw_uid_list,
                                       ['76', '768', '642', '930', '329', '633', '716', '871', '543', '754'])])

    assert all([a == b for a, b in zip(train_set.iid_list, range(10))])
    assert all([a == b for a, b in zip(train_set.raw_iid_list,
                                       ['93', '257', '795', '709', '705', '226', '478', '195', '737', '282'])])

    train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={},
                                        global_ui_set=set([('76', '93')]), verbose=True)
    assert train_set.num_users == 9
    assert train_set.num_items == 9
Пример #13
0
def test_from_splits():
    data = reader.read_uir('./tests/data.txt')

    try:
        BaseMethod.from_splits(train_data=None, test_data=None)
    except ValueError:
        assert True

    try:
        BaseMethod.from_splits(train_data=data, test_data=None)
    except ValueError:
        assert True

    bm = BaseMethod.from_splits(train_data=data, test_data=data)
    assert bm.total_users == 10
    assert bm.total_items == 10

    bm = BaseMethod.from_splits(train_data=data,
                                test_data=data,
                                val_data=data,
                                verbose=True)
    assert bm.total_users == 10
    assert bm.total_items == 10
Пример #14
0
def test_evaluate():
    data_file = './tests/data.txt'
    data = reader.read_uir(data_file)

    ratio_split = RatioSplit(data, exclude_unknowns=True, verbose=True)
    ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False)

    ratio_split = RatioSplit(data, exclude_unknowns=False, verbose=True)
    ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False)

    users = []
    items = []
    for u, i, r in data:
        users.append(u)
        items.append(i)
    for u in users:
        for i in items:
            data.append((u, i, 5))

    ratio_split = RatioSplit(data, exclude_unknowns=True, verbose=True)
    ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True)

    ratio_split = RatioSplit(data, exclude_unknowns=False, verbose=True)
    ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True)
Пример #15
0
@author: Quoc-Tuan Truong <*****@*****.**>
"""

from cornac.data import reader
from cornac.eval_methods import BaseMethod
from cornac.models import MF
from cornac.metrics import MAE, RMSE
from cornac.utils import cache

# Download MovieLens 100K provided training and test splits
train_path = cache(
    url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.base')
test_path = cache(
    url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.test')
train_data = reader.read_uir(train_path)
test_data = reader.read_uir(test_path)

eval_method = BaseMethod.from_splits(train_data=train_data,
                                     test_data=test_data,
                                     exclude_unknowns=False,
                                     verbose=True)

mf = MF(k=10,
        max_iter=25,
        learning_rate=0.01,
        lambda_reg=0.02,
        use_bias=True,
        early_stop=True,
        verbose=True)