Пример #1
0
def test_log_loss_random(n_samples, dtype):

    y_true, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 10, n_samples).astype(dtype))

    y_pred, _, _, _ = generate_random_labels(
        lambda rng: rng.rand(n_samples, 10))

    assert_almost_equal(log_loss(y_true, y_pred),
                        sklearn_log_loss(y_true, y_pred))
Пример #2
0
def test_roc_auc_score_random(n_samples, dtype):

    y_true, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 2, n_samples).astype(dtype))

    y_pred, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 1000, n_samples).astype(dtype))

    auc = roc_auc_score(y_true, y_pred)
    skl_auc = sklearn_roc_auc_score(y_true, y_pred)
    assert_almost_equal(auc, skl_auc)
Пример #3
0
def test_average_precision_score_random(n_samples, dtype):

    y_true, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 2, n_samples).astype(dtype))

    y_pred, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 1000, n_samples).astype(dtype))

    ap = average_precision_score(y_true, y_pred)
    skl_ap = sklearn_average_precision_score(y_true, y_pred)
    assert_almost_equal(ap, skl_ap)
Пример #4
0
def test_completeness_score_big_array(use_handle, input_range):
    a, b, _, _ = generate_random_labels(lambda rd: rd.randint(*input_range,
                                                              int(10e4),
                                                              dtype=np.int32))
    score = score_completeness(a, b, use_handle)
    ref = sk_completeness_score(a, b)
    np.testing.assert_almost_equal(score, ref, decimal=4)
Пример #5
0
def test_mutual_info_score_many_blocks(use_handle, input_range, n_samples):
    a, b, _, _ = generate_random_labels(lambda rd: rd.randint(*input_range,
                                                              n_samples,
                                                              dtype=np.int32))
    score = score_mutual_info(a, b, use_handle)
    ref = sk_mutual_info_score(a, b)
    np.testing.assert_almost_equal(score, ref, decimal=4)
Пример #6
0
def test_homogeneity_completeness_symmetry(use_handle, input_range):
    a, b, _, _ = generate_random_labels(lambda rd: rd.randint(*input_range,
                                                              int(10e3),
                                                              dtype=np.int32))
    hom = score_homogeneity(a, b, use_handle)
    com = score_completeness(b, a, use_handle)
    np.testing.assert_almost_equal(hom, com, decimal=4)
Пример #7
0
def test_confusion_matrix_multiclass_subset_labels(labels):
    y_true, y_pred, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 3, 10).astype(np.int32))

    ref = sk_confusion_matrix(y_true, y_pred, labels=labels)
    labels = cp.array(labels, dtype=np.int32)
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
Пример #8
0
def test_confusion_matrix_random(n_samples, dtype, problem_type):
    upper_range = 2 if problem_type == 'binary' else 1000

    y_true, y_pred, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype))
    cm = confusion_matrix(y_true, y_pred)
    ref = sk_confusion_matrix(y_true, y_pred)
    cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
Пример #9
0
def test_precision_recall_curve_random(n_samples, dtype):

    y_true, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 2, n_samples).astype(dtype))

    y_score, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 1000, n_samples).astype(dtype))

    precision_using_sk, recall_using_sk, thresholds_using_sk = \
        sklearn_precision_recall_curve(
            y_true, y_score)

    precision, recall, thresholds = precision_recall_curve(y_true, y_score)

    assert array_equal(precision, precision_using_sk)
    assert array_equal(recall, recall_using_sk)
    assert array_equal(thresholds, thresholds_using_sk)
Пример #10
0
def test_confusion_matrix_multiclass_subset_labels(labels, client):
    y_true, y_pred, np_y_true, np_y_pred = generate_random_labels(
        lambda rng: rng.randint(0, 3, 10).astype(np.int32), as_cupy=True)
    y_true, y_pred = da.from_array(y_true), da.from_array(y_pred)

    ref = sk_confusion_matrix(np_y_true, np_y_pred, labels=labels)
    labels = cp.array(labels, dtype=np.int32)
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
Пример #11
0
def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype):
    y_true, y_pred, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 10, n_samples).astype(dtype))

    if weights_dtype == 'int':
        sample_weight = np.random.RandomState(0).randint(0, 10, n_samples)
    else:
        sample_weight = np.random.RandomState(0).rand(n_samples)

    cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
    ref = sk_confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
    cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
Пример #12
0
def test_confusion_matrix_random(n_samples, dtype, problem_type, cluster):
    client = Client(cluster)
    upper_range = 2 if problem_type == 'binary' else 1000

    y_true, y_pred, np_y_true, np_y_pred = generate_random_labels(
        lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype),
        as_cupy=True)
    y_true, y_pred = da.from_array(y_true), da.from_array(y_pred)

    cm = confusion_matrix(y_true, y_pred)
    ref = sk_confusion_matrix(np_y_true, np_y_pred)
    cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
    client.close()
Пример #13
0
def test_entropy_random(n_samples, base, use_handle):
    handle, stream = get_handle(use_handle)

    clustering, _ = \
        generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples))

    # generate unormalized probabilities from clustering
    pk = np.bincount(clustering)

    # scipy's entropy uses probabilities
    sp_S = sp_entropy(pk, base=base)
    # we use a clustering
    S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle)

    assert_almost_equal(S, sp_S, decimal=2)
Пример #14
0
def test_regression_metrics_random(n_samples, dtype, function):
    if dtype == np.float32 and n_samples == 500000:
        # stress test for float32 fails because of floating point precision
        pytest.xfail()

    y_true, y_pred, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 1000, n_samples).astype(dtype))

    cuml_reg, sklearn_reg = {
        'mse': (mean_squared_error, sklearn_mse),
        'mae': (mean_absolute_error, sklearn_mae),
        'msle': (mean_squared_log_error, sklearn_msle)
    }[function]

    res = cuml_reg(y_true, y_pred, multioutput='raw_values')
    ref = sklearn_reg(y_true, y_pred, multioutput='raw_values')
    cp.testing.assert_array_almost_equal(res, ref, decimal=2)
Пример #15
0
def test_entropy_random(n_samples, base, use_handle):
    if has_scipy():
        from scipy.stats import entropy as sp_entropy
    else:
        pytest.skip('Skipping test_entropy_random because Scipy is missing')

    handle, stream = get_handle(use_handle)

    clustering, _, _, _ = \
        generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples))

    # generate unormalized probabilities from clustering
    pk = np.bincount(clustering)

    # scipy's entropy uses probabilities
    sp_S = sp_entropy(pk, base=base)
    # we use a clustering
    S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle)

    assert_almost_equal(S, sp_S, decimal=2)
Пример #16
0
def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype,
                                         client):
    y_true, y_pred, np_y_true, np_y_pred = generate_random_labels(
        lambda rng: rng.randint(0, 10, n_samples).astype(dtype), as_cupy=True)
    y_true, y_pred = da.from_array(y_true), da.from_array(y_pred)

    if weights_dtype == 'int':
        sample_weight = np.random.RandomState(0).randint(0, 10, n_samples)
    else:
        sample_weight = np.random.RandomState(0).rand(n_samples)

    ref = sk_confusion_matrix(np_y_true,
                              np_y_pred,
                              sample_weight=sample_weight)

    sample_weight = cp.array(sample_weight)
    sample_weight = da.from_array(sample_weight)

    cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
    cp.testing.assert_array_almost_equal(ref, cm, decimal=4)