示例#1
0
    def assert_model(pickled_model, X_train):
        cu_after_embed = pickled_model.embedding_

        n_neighbors = pickled_model.n_neighbors
        assert array_equal(result["umap_embedding"], cu_after_embed)

        cu_trust_after = trustworthiness(X_train,
                                         pickled_model.transform(X_train),
                                         n_neighbors=n_neighbors)
        assert cu_trust_after >= result["umap"] - 0.2
示例#2
0
def test_initialized_umap_trustworthiness_on_iris(iris):
    data = iris.data
    embedding = UMAP(
        n_neighbors=10, min_dist=0.01, init=data[:, 2:], n_epochs=200, random_state=42,
    ).fit_transform(data)
    trust = trustworthiness(iris.data, embedding, 10)
    assert_greater_equal(
        trust,
        0.97,
        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
    )
示例#3
0
def test_semisupervised_umap_trustworthiness_on_iris():
    iris = datasets.load_iris()
    data = iris.data
    target = iris.target.copy()
    target[25:75] = -1
    embedding = cuUMAP(n_neighbors=10, random_state=0,
                       min_dist=0.01).fit_transform(
        data, target, convert_dtype=True)

    trust = trustworthiness(iris.data, embedding, n_neighbors=10)
    assert trust >= 0.97
def test_umap_transform_on_iris(iris, iris_subset_model, iris_selection):
    fitter = iris_subset_model

    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert (
        trust >= 0.85
    ), "Insufficiently trustworthy transform for" "iris dataset: {}".format(
        trust)
def test_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    embedding = UMAP(n_neighbors=10,
                     min_dist=0.01,
                     random_state=42,
                     n_epochs=100).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, 10)
    assert (
        trust >= 0.95
    ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(
        trust)
示例#6
0
def test_densmap_trustworthiness_random_init(nn_data):  # pragma: no cover
    data = nn_data[:50]
    embedding = UMAP(
        n_neighbors=10, min_dist=0.01, random_state=42, init="random", densmap=True,
    ).fit_transform(data)
    trust = trustworthiness(data, embedding, 10)
    assert_greater_equal(
        trust,
        0.75,
        "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust),
    )
def test_umap_trustworthiness_random_init(nn_data):
    data = nn_data[:50]
    embedding = UMAP(n_neighbors=10,
                     min_dist=0.01,
                     random_state=42,
                     n_epochs=100,
                     init="random").fit_transform(data)
    trust = trustworthiness(data, embedding, 10)
    assert (
        trust >= 0.75
    ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(
        trust)
def test_semisupervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    labels[10:30] = -1
    embedding = UMAP(n_neighbors=10, min_dist=0.01,
                     random_state=42).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, 10)
    assert_greater_equal(
        trust,
        0.97,
        "Insufficiently trustworthy embedding for"
        "blobs dataset: {}".format(trust),
    )
示例#9
0
def test_composite_trustworthiness_on_iris(iris):
    iris_model1 = UMAP(
        n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100,
    ).fit(iris.data[:, :2])
    iris_model2 = UMAP(
        n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100,
    ).fit(iris.data[:, 2:])
    embedding = (iris_model1 + iris_model2).embedding_
    trust = trustworthiness(iris.data, embedding, 10)
    assert_greater_equal(
        trust,
        0.82,
        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
    )
    embedding = (iris_model1 * iris_model2).embedding_
    trust = trustworthiness(iris.data, embedding, 10)
    assert_greater_equal(
        trust,
        0.82,
        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
    )
def test_umap_trustworthiness_fast_approx(nn_data):
    data = nn_data[:50]
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        n_epochs=100,
        force_approximation_algorithm=True,
    ).fit_transform(data)
    trust = trustworthiness(data, embedding, n_neighbors=10)
    assert (
        trust >= 0.8
    ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
示例#11
0
def test_umap_transform_trustworthiness_with_consistency_enabled():
    iris = datasets.load_iris()
    data = iris.data
    selection = np.random.RandomState(42).choice(
        [True, False], data.shape[0], replace=True, p=[0.5, 0.5])
    fit_data = data[selection]
    transform_data = data[~selection]
    model = cuUMAP(n_neighbors=10, min_dist=0.01, init="random",
                   random_state=42)
    model.fit(fit_data, convert_dtype=True)
    embedding = model.transform(transform_data, convert_dtype=True)
    trust = trustworthiness(transform_data, embedding, n_neighbors=10)
    assert trust >= 0.92
示例#12
0
    def create_mod():
        X_train = load_iris().data

        model = umap_model[keys](output_type="numpy")
        cu_before_pickle_transform = model.fit_transform(X_train)

        result["umap_embedding"] = model.embedding_
        n_neighbors = model.n_neighbors

        result["umap"] = trustworthiness(X_train,
                                         cu_before_pickle_transform,
                                         n_neighbors=n_neighbors)
        return model, X_train
示例#13
0
def test_composite_trustworthiness_random_init(nn_data): # pragma: no cover
    data = nn_data[:50]
    model1 = UMAP(
        n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=50, init="random",
    ).fit(data)
    model2 = UMAP(
        n_neighbors=30, min_dist=0.01, random_state=42, n_epochs=50, init="random",
    ).fit(data)
    model3 = model1 * model2
    trust = trustworthiness(data, model3.embedding_, 10)
    assert_greater_equal(
        trust,
        0.82,
        "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust),
    )
    model4 = model1 + model2
    trust = trustworthiness(data, model4.embedding_, 10)
    assert_greater_equal(
        trust,
        0.82,
        "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust),
    )
def test_umap_transform_on_iris_modified_dtype(iris, iris_subset_model,
                                               iris_selection):
    fitter = iris_subset_model
    fitter.embedding_ = fitter.embedding_.astype(np.float64)

    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert (
        trust >= 0.8
    ), "Insufficiently trustworthy transform for iris dataset: {}".format(
        trust)
示例#15
0
def test_densmap_trustworthiness_on_iris_supervised(iris):
    densmap_iris_model = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        densmap=True,
        verbose=True,
    ).fit(iris.data, y=iris.target)
    embedding = densmap_iris_model.embedding_
    trust = trustworthiness(iris.data, embedding, n_neighbors=10)
    assert (
        trust >= 0.97
    ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
        trust)
示例#16
0
def test_densmap_trustworthiness(nn_data):
    data = nn_data[:50]
    embedding, rad_h, rad_l = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        n_epochs=100,
        densmap=True,
        output_dens=True,
    ).fit_transform(data)
    trust = trustworthiness(data, embedding, 10)
    assert (
        trust >= 0.75
    ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
def test_sparse_precomputed_metric_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    dmat = scipy.sparse.csr_matrix(pairwise_distances(data))
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        n_epochs=100,
        metric="precomputed",
    ).fit_transform(dmat)
    trust = trustworthiness(data, embedding, n_neighbors=10)
    assert (
        trust >= 0.75
    ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
示例#18
0
def test_umap_transform_on_iris(iris, iris_selection):
    data = iris.data[iris_selection]
    fitter = UMAP(n_neighbors=10, min_dist=0.01, n_epochs=200,
                  random_state=42).fit(data)

    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert_greater_equal(
        trust,
        0.85,
        "Insufficiently trustworthy transform for"
        "iris dataset: {}".format(trust),
    )
def test_string_metric_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    labels = np.array(["this", "that", "other"])[labels]
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        target_metric="string",
        target_weight=0.8,
        n_epochs=100,
        random_state=42,
    ).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, n_neighbors=10)
    assert (
        trust >= 0.95
    ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust)
def test_count_metric_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    labels = (labels ** 2) + 2 * labels
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        target_metric="count",
        target_weight=0.8,
        n_epochs=100,
        random_state=42,
    ).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, n_neighbors=10)
    assert (
        trust >= 0.95
    ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust)
示例#21
0
def test_umap_transform_on_iris_modified_dtype(iris, iris_selection):
    data = iris.data[iris_selection]
    fitter = UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data)
    fitter.embedding_ = fitter.embedding_.astype(np.float64)

    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert_greater_equal(
        trust,
        0.8,
        "Insufficiently trustworthy transform for iris dataset: {}".format(
            trust),
    )
示例#22
0
def test_densmap_trustworthiness_on_iris(iris):
    densmap_iris_model = UMAP(
        n_neighbors=10, min_dist=0.01, random_state=42, densmap=True, verbose=True,
    ).fit(iris.data)
    embedding = densmap_iris_model.embedding_
    trust = trustworthiness(iris.data, embedding, 10)
    assert (
        trust >= 0.97
    ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust)

    with pytest.raises(NotImplementedError):
        densmap_iris_model.transform(iris.data[:10])

    with pytest.raises(ValueError):
        densmap_iris_model.inverse_transform(embedding[:10])
示例#23
0
def test_composite_trustworthiness(nn_data, iris_model):
    data = nn_data[:50]
    model1 = UMAP(n_neighbors=10, min_dist=0.01, random_state=42,
                  n_epochs=50).fit(data)
    model2 = UMAP(
        n_neighbors=30,
        min_dist=0.01,
        random_state=42,
        n_epochs=50,
        init=model1.embedding_,
    ).fit(data)
    model3 = model1 * model2
    trust = trustworthiness(data, model3.embedding_, 10)
    assert_greater_equal(
        trust,
        0.82,
        "Insufficiently trustworthy embedding for"
        "nn dataset: {}".format(trust),
    )
    model4 = model1 + model2
    trust = trustworthiness(data, model4.embedding_, 10)
    assert_greater_equal(
        trust,
        0.82,
        "Insufficiently trustworthy embedding for"
        "nn dataset: {}".format(trust),
    )

    with pytest.raises(ValueError):
        model5 = model1 + iris_model

    with pytest.raises(ValueError):
        model5 = model1 * iris_model

    with pytest.raises(ValueError):
        model5 = model1 - iris_model
def test_discrete_metric_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        target_metric="ordinal",
        target_weight=0.8,
        n_epochs=100,
        random_state=42,
    ).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, 10)
    assert_greater_equal(
        trust,
        0.95,
        "Insufficiently trustworthy embedding for"
        "blobs dataset: {}".format(trust),
    )
示例#25
0
    def assert_model(pickled_model, X):
        model_params = pickled_model.__dict__
        # Confirm params in model are identical
        new_keys = set(model_params.keys())
        for key, value in zip(model_params.keys(), model_params.values()):
            assert (model_params[key] == value)
            new_keys -= set([key])

        # Check all keys have been checked
        assert (len(new_keys) == 0)

        # Transform data
        result["fit_model"] = pickled_model.fit(X)
        result["data"] = X
        result["trust"] = trustworthiness(X,
                                          pickled_model.embedding_,
                                          n_neighbors=10)
示例#26
0
def test_umap_transform_on_iris(target_metric):

    iris = datasets.load_iris()

    iris_selection = np.random.RandomState(42).choice(
        [True, False], 150, replace=True, p=[0.75, 0.25])
    data = iris.data[iris_selection]

    fitter = cuUMAP(n_neighbors=10, init="random", n_epochs=800, min_dist=0.01,
                    random_state=42, target_metric=target_metric)
    fitter.fit(data, convert_dtype=True)
    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data, convert_dtype=True)

    assert not np.isnan(embedding).any()

    trust = trustworthiness(new_data, embedding, n_neighbors=10)
    assert trust >= 0.85
def test_umap_transform_on_iris_w_pynndescent(iris, iris_selection):
    data = iris.data[iris_selection]
    fitter = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        n_epochs=100,
        random_state=42,
        force_approximation_algorithm=True,
    ).fit(data)

    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert (
        trust >= 0.85
    ), "Insufficiently trustworthy transform for" "iris dataset: {}".format(
        trust)
示例#28
0
def test_contrastive_trustworthiness_on_iris(iris):
    iris_model1 = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        n_epochs=100,
    ).fit(iris.data[:, :2])
    iris_model2 = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        n_epochs=100,
    ).fit(iris.data[:, 2:])
    embedding = (iris_model1 - iris_model2).embedding_
    trust = trustworthiness(iris.data, embedding, n_neighbors=10)
    assert (
        trust >= 0.75
    ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
        trust)
def test_precomputed_sparse_transform_on_iris(iris, iris_selection):
    data = iris.data[iris_selection]
    distance_matrix = sparse.csr_matrix(squareform(pdist(data)))

    fitter = UMAP(n_neighbors=10,
                  min_dist=0.01,
                  random_state=42,
                  n_epochs=100,
                  metric='precomputed').fit(distance_matrix)

    new_data = iris.data[~iris_selection]
    new_distance_matrix = sparse.csr_matrix(cdist(new_data, data))
    embedding = fitter.transform(new_distance_matrix)

    trust = trustworthiness(new_data, embedding, 10)
    assert (
        trust >= 0.85
    ), "Insufficiently trustworthy transform for" "iris dataset: {}".format(
        trust)
示例#30
0
def test_umap_transform_on_digits_sparse(target_metric, input_type,
                                         xform_method):

    digits = datasets.load_digits()

    digits_selection = np.random.RandomState(42).choice([True, False],
                                                        1797,
                                                        replace=True,
                                                        p=[0.75, 0.25])

    if input_type == 'cupy':
        sp_prefix = cupyx.scipy.sparse
    else:
        sp_prefix = scipy.sparse

    data = sp_prefix.csr_matrix(
        scipy.sparse.csr_matrix(digits.data[digits_selection]))

    fitter = cuUMAP(n_neighbors=15,
                    verbose=logger.level_info,
                    init="random",
                    n_epochs=0,
                    min_dist=0.01,
                    random_state=42,
                    target_metric=target_metric)

    new_data = sp_prefix.csr_matrix(
        scipy.sparse.csr_matrix(digits.data[~digits_selection]))

    if xform_method == 'fit':
        fitter.fit(data, convert_dtype=True)
        embedding = fitter.transform(new_data, convert_dtype=True)
    else:
        embedding = fitter.fit_transform(new_data, convert_dtype=True)

    if input_type == 'cupy':
        embedding = embedding.get()

    trust = trustworthiness(digits.data[~digits_selection],
                            embedding,
                            n_neighbors=15)
    assert trust >= 0.96