def test_fit_with_inm(
    prune_count_method='inverse_nm_dot_s',
    seed=0,
    used_by_another_test=False,
):
    lnl = LearningWithNoisyLabels(
        seed=seed,
        prune_count_method=prune_count_method,
    )
    inm = compute_inv_noise_matrix(
        data["py"],
        data["noise_matrix"],
        data["ps"],
    )
    # Learn with noisy labels with inverse noise matrix given
    lnl.fit(data['X_train'], data['s'], inverse_noise_matrix=inm)
    score_inm = lnl.score(data['X_test'], data['y_test'])
    # Learn with noisy labels and estimate the inv noise matrix.
    lnl2 = LearningWithNoisyLabels(
        seed=seed,
        prune_count_method=prune_count_method,
    )
    lnl2.fit(
        data['X_train'],
        data['s'],
    )
    score = lnl2.score(data['X_test'], data['y_test'])
    if used_by_another_test:
        return score, score_inm
    else:
        assert (score < score_inm + 1e-4)
示例#2
0
def test_clf_fit_nm_inm(sparse):
    data = SPARSE_DATA if sparse else DATA
    lnl = LearningWithNoisyLabels(seed=SEED)
    nm = data['noise_matrix']
    inm = compute_inv_noise_matrix(
        data["py"],
        nm,
        data["ps"],
    )
    lnl.fit(
        X=data['X_train'],
        s=data['s'],
        noise_matrix=nm,
        inverse_noise_matrix=inm,
    )
    score_nm_inm = lnl.score(data['X_test'], data['y_test'])

    # Learn with noisy labels and estimate the inv noise matrix.
    lnl2 = LearningWithNoisyLabels(seed=SEED)
    lnl2.fit(
        data['X_train'],
        data['s'],
    )
    score = lnl2.score(data['X_test'], data['y_test'])
    assert (score < score_nm_inm + 1e-4)
示例#3
0
def test_fit_with_inm(
    sparse,
    seed=SEED,
    used_by_another_test=False,
):
    data = SPARSE_DATA if sparse else DATA
    lnl = LearningWithNoisyLabels(seed=seed, )
    inm = compute_inv_noise_matrix(
        data["py"],
        data["noise_matrix"],
        data["ps"],
    )
    # Learn with noisy labels with inverse noise matrix given
    lnl.fit(data['X_train'], data['s'], inverse_noise_matrix=inm)
    score_inm = lnl.score(data['X_test'], data['y_test'])
    # Learn with noisy labels and estimate the inv noise matrix.
    lnl2 = LearningWithNoisyLabels(seed=seed, )
    lnl2.fit(
        data['X_train'],
        data['s'],
    )
    score = lnl2.score(data['X_test'], data['y_test'])
    if used_by_another_test:
        return score, score_inm
    else:
        assert (score < score_inm + 1e-4)
def test_fit_psx():
    from cleanlab.latent_estimation import estimate_cv_predicted_probabilities
    lnl = LearningWithNoisyLabels()
    psx = estimate_cv_predicted_probabilities(
        X=data['X_train'],
        labels=data['y_train'],
    )
    lnl.fit(X=data['X_train'], s=data['y_train'], psx=psx)
    score_with_psx = lnl.score(data['X_test'], data['y_test'])
    lnl = LearningWithNoisyLabels()
    lnl.fit(
        X=data['X_train'],
        s=data['y_train'],
    )
    score_no_psx = lnl.score(data['X_test'], data['y_test'])
    assert (abs(score_with_psx - score_no_psx) < 1e-6)
def test_rp():
    rp = LearningWithNoisyLabels(clf=LogisticRegression(
        multi_class='auto', solver='lbfgs', random_state=seed))
    rp.fit(data["X_train"], data["s"])
    score = rp.score(data["X_test"], data["y_test"])
    print(score)
    # Check that this runs without error.
    assert (True)
示例#6
0
def test_fit_with_nm(
    seed=0,
    used_by_another_test=False,
):
    lnl = LearningWithNoisyLabels(seed=seed, )
    nm = data['noise_matrix']
    # Learn with noisy labels with noise matrix given
    lnl.fit(data['X_train'], data['s'], noise_matrix=nm)
    score_nm = lnl.score(data['X_test'], data['y_test'])
    # Learn with noisy labels and estimate the noise matrix.
    lnl2 = LearningWithNoisyLabels(seed=seed, )
    lnl2.fit(
        data['X_train'],
        data['s'],
    )
    score = lnl2.score(data['X_test'], data['y_test'])
    if used_by_another_test:
        return score, score_nm
    else:
        assert (score < score_nm + 1e-4)
def test_no_score():
    class Struct():
        def fit(self):
            pass

        def predict_proba(self):
            pass

        def predict(self, X):
            return data['y_test']

    lnl = LearningWithNoisyLabels(clf=Struct())
    score = lnl.score(data['X_test'], data['y_test'])
    assert (abs(score - 1) < 1e-6)
def test_score():
    phrase = 'cleanlab is dope'

    class Struct():
        def fit(self):
            pass

        def predict_proba(self):
            pass

        def predict(self):
            pass

        def score(self, X, y):
            return phrase

    lnl = LearningWithNoisyLabels(clf=Struct())
    score = lnl.score(data['X_test'], data['y_test'])
    assert (score == phrase)
示例#9
0
def train_without_noisy_labels(X_train, y_train, X_test, y_test, clf=None):
    if clf is None:
        model = baseclf(**params)
        clf = LearningWithNoisyLabels(clf=model, seed=seed, n_jobs=cpu_count())
        clf.fit(X_train, y_train)
    return clf.score(X_test, y_test)