Пример #1
0
def data():
    random_seed = 10
    N = 100
    D1 = 10
    D2 = 6
    N_test = 5
    random_data = []
    np.random.seed(random_seed)
    random_data.append(np.random.rand(N, D1))
    random_data.append(np.random.rand(N, D2))
    random_labels = np.floor(2 * np.random.rand(N, ) + 2)
    random_labels[:-10] = np.nan
    random_test = []
    random_test.append(np.random.rand(N_test, D1))
    random_test.append(np.random.rand(N_test, D2))
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf_test = CTClassifier(gnb1, gnb2, random_state=random_seed)

    return {
        'N_test': N_test,
        'clf_test': clf_test,
        'random_data': random_data,
        'random_labels': random_labels,
        'random_test': random_test,
        'random_seed': random_seed
    }
Пример #2
0
def test_fit_defaultclassifier(data):
    clf = CTClassifier(random_state=10)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [2., 3., 2., 2., 2.]
    truth_proba = [[0.88555037, 0.11444963], [0.05650123, 0.94349877],
                   [0.50057741, 0.49942259], [0.89236186, 0.10763814],
                   [0.95357416, 0.04642584]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #3
0
def test_fit_defaultclassifier(data):
    clf = CTClassifier(random_state=10)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [2., 3., 2., 2., 2.]
    truth_proba = [[0.90643161, 0.09356839], [0.35361377, 0.64638623],
                   [0.50968072, 0.49031928], [0.58446856, 0.41553144],
                   [0.87548943, 0.12451057]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #4
0
def test_predict_num_iter(data):
    random_seed = 10
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf = CTClassifier(gnb1, gnb2, num_iter=9, random_state=random_seed)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [2., 3., 2., 2., 2.]
    truth_proba = [[0.88555037, 0.11444963], [0.05650123, 0.94349877],
                   [0.50057741, 0.49942259], [0.89236186, 0.10763814],
                   [0.95357416, 0.04642584]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #5
0
def test_predict_set_n(data):
    random_seed = 10
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf = CTClassifier(gnb1, gnb2, n=9, random_state=random_seed)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [3., 3., 2., 3., 3.]
    truth_proba = [[0.29020704, 0.70979296], [0.44024614, 0.55975386],
                   [0.5710383, 0.4289617], [0.37366059, 0.62633941],
                   [0.22157484, 0.77842516]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #6
0
def test_predict_set_p(data):
    random_seed = 10
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf = CTClassifier(gnb1, gnb2, p=12, random_state=random_seed)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [3., 3., 3., 3., 3.]
    truth_proba = [[0.31422418, 0.68577582], [0.40938282, 0.59061718],
                   [0.48448605, 0.51551395], [0.38853225, 0.61146775],
                   [0.22972488, 0.77027512]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #7
0
def test_predict_num_iter(data):
    random_seed = 10
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf = CTClassifier(gnb1, gnb2, num_iter=9, random_state=random_seed)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [2., 3., 2., 2., 2.]
    truth_proba = [[0.90643161, 0.09356839], [0.35361377, 0.64638623],
                   [0.50968072, 0.49031928], [0.58446856, 0.41553144],
                   [0.87548943, 0.12451057]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #8
0
def test_predict_unlabeled_pool_size(data):
    random_seed = 10
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf = CTClassifier(gnb1,
                       gnb2,
                       unlabeled_pool_size=20,
                       random_state=random_seed)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [2., 3., 2., 2., 2.]
    truth_proba = [[0.55708013, 0.44291987], [0.29591617, 0.70408383],
                   [0.50441055, 0.49558945], [0.99276393, 0.00723607],
                   [0.95221514, 0.04778486]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #9
0
def test_predict_unlabeled_pool_size(data):
    random_seed = 10
    gnb1 = GaussianNB()
    gnb2 = GaussianNB()
    clf = CTClassifier(gnb1,
                       gnb2,
                       unlabeled_pool_size=20,
                       random_state=random_seed)
    clf.fit(data['random_data'], data['random_labels'])
    y_pred_test = clf.predict(data['random_test'])
    y_pred_prob = clf.predict_proba(data['random_test'])

    truth = [2., 3., 3., 2., 2.]
    truth_proba = [[0.75269031, 0.24730969], [0.37316567, 0.62683433],
                   [0.45283102, 0.54716898], [0.59666021, 0.40333979],
                   [0.81196019, 0.18803981]]

    for i in range(data['N_test']):
        assert y_pred_test[i] == truth[i]

    for i in range(data['N_test']):
        for j in range(2):
            assert abs(y_pred_prob[i, j] - truth_proba[i][j]) < 0.00000001
Пример #10
0
def test_no_predict_proba_attribute():
    with pytest.raises(AttributeError):
        clf = CTClassifier(RidgeClassifier(), RidgeClassifier())
Пример #11
0
def test_predict_check_p_n(data):
    labels1 = np.zeros(100, )
    labels1[:5] = 4  # 5 "negative"
    labels1[5:15] = 6  # 10 "positive"
    labels1[15:] = np.nan
    clf = CTClassifier(random_state=0)
    clf.fit(data['random_data'], labels1)
    assert clf.p_ == 2
    assert clf.n_ == 1

    labels2 = np.zeros(100, )
    labels2[:5] = 6  # 5 "positive"
    labels2[5:15] = 4  # 10 "negative"
    labels2[15:] = np.nan
    clf = CTClassifier(random_state=0)
    clf.fit(data['random_data'], labels2)
    assert clf.p_ == 1
    assert clf.n_ == 2

    labels1 = np.zeros(100, )
    labels1[:5] = 4  # 5 "negative"
    labels1[5:15] = 6  # 10 "positive"
    labels1[15:] = np.nan
    clf = CTClassifier(p=4, n=3, random_state=0)
    clf.fit(data['random_data'], labels1)
    assert clf.p_ == 4
    assert clf.n_ == 3
Пример #12
0
def test_negative_num_iter():
    with pytest.raises(ValueError):
        clf = CTClassifier(GaussianNB(), GaussianNB(), num_iter=-1)
Пример #13
0
def test_zero_num_iter():
    with pytest.raises(ValueError):
        clf = CTClassifier(GaussianNB(), GaussianNB(), num_iter=0)
Пример #14
0
def test_negative_pool_size():
    with pytest.raises(ValueError):
        clf = CTClassifier(GaussianNB(), GaussianNB(), unlabeled_pool_size=-1)
Пример #15
0
def test_zero_p():
    with pytest.raises(ValueError):
        clf = CTClassifier(GaussianNB(), GaussianNB(), p=0)