Пример #1
0
def test_internals_api(n_components):
    callback = CustomCallback()
    reducer = UMAP(n_components=n_components, callback=callback)
    reducer.fit(data)
    callback.check()

    # Make sure super().__init__ is called
    callback = CustomCallback(skip_init=True)
    model = UMAP(n_epochs=10, callback=callback)

    with pytest.raises(ValueError):
        model.fit_transform(data)
Пример #2
0
def _build_mnmg_umap(m, data, args, tmpdir):
    client = args['client']
    del args['client']
    local_model = UMAP(**args)

    if isinstance(data, (tuple, list)):
        local_data = [x.compute() for x in data if x is not None]
    if len(local_data) == 2:
        X, y = local_data
        local_model.fit(X, y)
    else:
        X = local_data
        local_model.fit(X)

    return m(client=client, model=local_model, **args)
Пример #3
0
def _local_umap_trustworthiness(local_X, local_y, n_neighbors, supervised):
    """
    Train model on all data, report trustworthiness
    """
    from cuml.manifold import UMAP

    local_model = UMAP(n_neighbors=n_neighbors, random_state=42)
    y_train = None
    if supervised:
        y_train = local_y
    local_model.fit(local_X, y=y_train)
    embedding = local_model.transform(local_X)
    return trustworthiness(local_X,
                           embedding,
                           n_neighbors=n_neighbors,
                           batch_size=5000)
Пример #4
0
def _umap_mnmg_trustworthiness(local_X, local_y, n_neighbors, supervised,
                               n_parts, sampling_ratio):
    """
    Train model on random sample of data, transform in
    parallel, report trustworthiness
    """
    import dask.array as da
    from cuml.dask.manifold import UMAP as MNMG_UMAP

    from cuml.manifold import UMAP

    local_model = UMAP(n_neighbors=n_neighbors, random_state=42)

    n_samples = local_X.shape[0]
    n_samples_per_part = math.ceil(n_samples / n_parts)

    selection = np.random.RandomState(42).choice(
        [True, False],
        n_samples,
        replace=True,
        p=[sampling_ratio, 1.0 - sampling_ratio])
    X_train = local_X[selection]
    X_transform = local_X
    X_transform_d = da.from_array(X_transform, chunks=(n_samples_per_part, -1))

    y_train = None
    if supervised:
        y_train = local_y[selection]

    local_model.fit(X_train, y=y_train)

    distributed_model = MNMG_UMAP(local_model)
    embedding = distributed_model.transform(X_transform_d)

    embedding = embedding.compute()
    return trustworthiness(X_transform,
                           embedding,
                           n_neighbors=n_neighbors,
                           batch_size=5000)
Пример #5
0
        def umap_mnmg_trustworthiness():
            n_samples = local_X.shape[0]
            n_sampling = int(n_samples * sampling_ratio)
            n_samples_per_part = int(n_samples / n_parts)

            local_model = UMAP(n_neighbors=n_neighbors)

            selection = np.random.choice(n_samples, n_sampling)
            X_train = local_X[selection]
            X_transform = local_X[~selection]
            X_transform_d = da.from_array(X_transform,
                                          chunks=(n_samples_per_part, -1))

            y_train = None
            if supervised:
                y_train = local_y[selection]

            local_model.fit(X_train, y=y_train)

            distributed_model = MNMG_UMAP(local_model)
            embedding = distributed_model.transform(X_transform_d)

            embedding = cp.asnumpy(embedding.compute())
            return trustworthiness(X_transform, embedding, n_neighbors)
Пример #6
0
 def local_umap_trustworthiness():
     local_model = UMAP(n_neighbors=n_neighbors)
     local_model.fit(local_X, local_y)
     embedding = local_model.transform(local_X)
     return trustworthiness(local_X, embedding, n_neighbors)
Пример #7
0
def test_internals_api(n_components):
    callback = CustomCallback()
    reducer = UMAP(n_components=n_components, callback=callback)
    reducer.fit(data)
    callback.check()
Пример #8
0
from cuml.manifold import UMAP
from sklearn.datasets import load_digits
from numba import cuda
from pyrr import Matrix44
import numpy as np
import cudatashader as ds
from cudatashader import transfer_functions as tf
from cudatashader.colors import Hot
from IPython.core.display import display, HTML, clear_output

digits = load_digits()
data, target_classes = digits.data, digits.target
n_samples = target_classes.shape[0]

reducer = UMAP(n_components=3)
reducer.fit(data)
embedding = reducer.transform(data)

maxThreadsPerBlock = cuda.get_current_device().MAX_THREADS_PER_BLOCK


@cuda.jit('void(int64[:], float64[:,:])')
def fill_agg_value(target_classes, result):
    i = cuda.grid(1)
    result[i, 2] = target_classes[i]


@cuda.jit('void(float64[:,:], float64[:,:], float64[:,:])')
def apply_projection(MVP, embedding, result):
    i = cuda.grid(1)
    x, y, z = embedding[i, 0], embedding[i, 1], embedding[i, 2]