예제 #1
0
    def _init_neighbors(self, adata):
        from umap.distances import named_distances
        from umap.nndescent import (
            make_initialisations,
            make_initialized_nnd_search,
        )

        if 'use_rep' in adata.uns['neighbors']['params']:
            self._use_rep = adata.uns['neighbors']['params']['use_rep']
            self._rep = adata.X if self._use_rep == 'X' else adata.obsm[self._use_rep]
        elif 'n_pcs' in adata.uns['neighbors']['params']:
            self._use_rep = 'X_pca'
            self._n_pcs = adata.uns['neighbors']['params']['n_pcs']
            self._rep = adata.obsm['X_pca'][:, : self._n_pcs]
        elif adata.n_vars > N_PCS and 'X_pca' in adata.obsm.keys():
            self._use_rep = 'X_pca'
            self._rep = adata.obsm['X_pca'][:, :N_PCS]
            self._n_pcs = self._rep.shape[1]

        if 'metric_kwds' in adata.uns['neighbors']['params']:
            dist_args = tuple(adata.uns['neighbors']['params']['metric_kwds'].values())
        else:
            dist_args = ()
        dist_func = named_distances[adata.uns['neighbors']['params']['metric']]
        self._random_init, self._tree_init = make_initialisations(dist_func, dist_args)
        self._search = make_initialized_nnd_search(dist_func, dist_args)

        search_graph = adata.uns['neighbors']['distances'].copy()
        search_graph.data = (search_graph.data > 0).astype(np.int8)
        self._search_graph = search_graph.maximum(search_graph.transpose())

        if 'rp_forest' in adata.uns['neighbors']:
            self._rp_forest = _rp_forest_generate(adata.uns['neighbors']['rp_forest'])
        else:
            self._rp_forest = None
예제 #2
0
def test_nn_search():
    train = nn_data[100:]
    test = nn_data[:100]
    (knn_indices, knn_dists,
     rp_forest) = nearest_neighbors(train, 10, "euclidean", {}, False,
                                    np.random)

    graph = fuzzy_simplicial_set(
        nn_data,
        10,
        np.random,
        "euclidean",
        {},
        knn_indices,
        knn_dists,
        False,
        1.0,
        1.0,
        False,
    )

    search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]),
                                     dtype=np.int8)
    search_graph.rows = knn_indices
    search_graph.data = (knn_dists != 0).astype(np.int8)
    search_graph = search_graph.maximum(search_graph.transpose()).tocsr()

    random_init, tree_init = make_initialisations(dist.euclidean, ())
    search = make_initialized_nnd_search(dist.euclidean, ())

    rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
    init = initialise_search(rp_forest, train, test, int(10 * 3), random_init,
                             tree_init, rng_state)
    result = search(train, search_graph.indptr, search_graph.indices, init,
                    test)

    indices, dists = deheap_sort(result)
    indices = indices[:, :10]

    tree = KDTree(train)
    true_indices = tree.query(test, 10, return_distance=False)

    num_correct = 0.0
    for i in range(test.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))

    percent_correct = num_correct / (test.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "Sparse NN-descent did not get "
        "99% accuracy on nearest "
        "neighbors",
    )
예제 #3
0
    def _init_dist_search(self, dist_args):
        from functools import partial
        from umap.nndescent import initialise_search
        from umap.distances import named_distances

        self._random_init = None
        self._tree_init = None

        self._initialise_search = None
        self._search = None

        self._dist_func = None

        dist_func = named_distances[self._metric]

        if pkg_version('umap-learn') < version.parse("0.4.0"):
            from umap.nndescent import (
                make_initialisations,
                make_initialized_nnd_search,
            )

            self._random_init, self._tree_init = make_initialisations(
                dist_func, dist_args)
            _initialise_search = partial(
                initialise_search,
                init_from_random=self._random_init,
                init_from_tree=self._tree_init,
            )
            _search = make_initialized_nnd_search(dist_func, dist_args)

        else:
            from numba import njit
            from umap.nndescent import initialized_nnd_search

            @njit
            def partial_dist_func(x, y):
                return dist_func(x, y, *dist_args)

            _initialise_search = partial(initialise_search,
                                         dist=partial_dist_func)
            _search = partial(initialized_nnd_search, dist=partial_dist_func)

            self._dist_func = partial_dist_func

        self._initialise_search = _initialise_search
        self._search = _search
예제 #4
0
def get_umap():
    latent_dim = request.json['latent_dim']
    nn = request.json['n_neighbors']
    dist = request.json['min_dist']

    pkl_path = abs_path('./data/{}/umap/umap{}-nn{}-dist{}.pkl').format(
        dset, latent_dim, nn, dist)
    if os.path.exists(pkl_path):
        with open(pkl_path, 'rb') as pkl_file:
            data = pickle.load(pkl_file)
            from umap.nndescent import make_initialisations, make_initialized_nnd_search
            data._random_init, data._tree_init = make_initialisations(
                data._distance_func, data._dist_args)
            data._search = make_initialized_nnd_search(data._distance_func,
                                                       data._dist_args)
            umap_fit['{}-{}'.format(nn, dist)] = data
            d = data.embedding_
    else:
        d = _fit_umap(latent_dim, nn, dist).embedding_
    return jsonify({'data': d.tolist()}), 200
예제 #5
0
def neighbors_update(adata, adata_new, k=10, queue_size=5, random_state=0):
    # only with use_rep='X' for now
    from umap.nndescent import make_initialisations, make_initialized_nnd_search, initialise_search
    from umap.umap_ import INT32_MAX, INT32_MIN
    from umap.utils import deheap_sort
    import umap.distances as dist

    if 'metric_kwds' in adata.uns['neighbors']['params']:
        dist_args = tuple(
            adata.uns['neighbors']['params']['metric_kwds'].values())
    else:
        dist_args = ()
    dist_func = dist.named_distances[adata.uns['neighbors']['params']
                                     ['metric']]

    random_init, tree_init = make_initialisations(dist_func, dist_args)
    search = make_initialized_nnd_search(dist_func, dist_args)

    search_graph = adata.uns['neighbors']['distances'].copy()
    search_graph.data = (search_graph.data > 0).astype(np.int8)
    search_graph = search_graph.maximum(search_graph.transpose())
    # prune it?

    random_state = check_random_state(random_state)
    rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)

    if 'rp_forest' in adata.uns['neighbors']:
        rp_forest = _rp_forest_generate(adata.uns['neighbors']['rp_forest'])
    else:
        rp_forest = None
    train = adata.X
    test = adata_new.X

    init = initialise_search(rp_forest, train, test, int(k * queue_size),
                             random_init, tree_init, rng_state)
    result = search(train, search_graph.indptr, search_graph.indices, init,
                    test)

    indices, dists = deheap_sort(result)
    return indices[:, :k], dists[:, :k]