def smoothen_dists(store, z_idx, z_dist, lc: float, bw: float, chunk_size: int = 100000): """ Smoothens KNN distances. Args: store (): z_idx (): z_dist (): lc (): bw (): chunk_size (): Returns: None """ from umap.umap_ import smooth_knn_dist, compute_membership_strengths umap_is_latest = _is_umap_version_new() n_cells, n_neighbors = z_idx.shape zge = create_zarr_dataset(store, f'edges', (chunk_size,), ('u8', 'u8'), (n_cells * n_neighbors, 2)) zgw = create_zarr_dataset(store, f'weights', (chunk_size,), 'f8', (n_cells * n_neighbors,)) last_row = 0 val_counts = 0 step = int(chunk_size / n_neighbors) for i in tqdm(range(0, n_cells, step), desc='Smoothening KNN distances'): if i + step > n_cells: ki, kv = z_idx[i:n_cells, :], z_dist[i:n_cells, :] else: ki, kv = z_idx[i:i+step, :], z_dist[i:i+step, :] kv = kv.astype(np.float32, order='C') sigmas, rhos = smooth_knn_dist(kv, k=n_neighbors, local_connectivity=lc, bandwidth=bw) if umap_is_latest: rows, cols, vals, _ = compute_membership_strengths(ki, kv, sigmas, rhos) else: rows, cols, vals = compute_membership_strengths(ki, kv, sigmas, rhos) rows = rows + last_row start = val_counts end = val_counts + len(rows) last_row = rows[-1] + 1 val_counts += len(rows) zge[start:end, 0] = rows zge[start:end, 1] = cols zgw[start:end] = vals # Fixing edges with 0 weights w = zgw[:] idx = w == 0 minv = w[~idx].min() w[idx] = minv zgw[:] = w return None
def smooth_knn(nn_data, local_connectivity=1.0): knn_indices, knn_dists, _ = nearest_neighbors(nn_data, 10, "euclidean", {}, False, np.random) sigmas, rhos = smooth_knn_dist(knn_dists, 10.0, local_connectivity=local_connectivity) shifted_dists = knn_dists - rhos[:, np.newaxis] shifted_dists[shifted_dists < 0.0] = 0.0 vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis])) norms = np.sum(vals, axis=1) return norms
def _fuzzy_simplicial_set(X, n_neighbors, random_state, metric, metric_kwds={}, knn_indices=None, knn_dists=None, angular=False, set_op_mix_ratio=1.0, local_connectivity=1.0, apply_set_operations=True, verbose=False, return_dists=None): ''' Overwrite the UMAP `fuzzy_simplicial_set` function to allow computation with float64. ''' if knn_indices is None or knn_dists is None: knn_indices, knn_dists, _ = nearest_neighbors( X, n_neighbors, metric, metric_kwds, angular, random_state, verbose=verbose, ) sigmas, rhos = smooth_knn_dist( knn_dists, float(n_neighbors), local_connectivity=float(local_connectivity), ) rows, cols, vals, dists = _compute_membership_strengths( knn_indices, knn_dists, sigmas, rhos, return_dists ) result = scipy.sparse.coo_matrix( (vals, (rows, cols)), shape=(X.shape[0], X.shape[0]) ) result.eliminate_zeros() if apply_set_operations: transpose = result.transpose() prod_matrix = result.multiply(transpose) result = ( set_op_mix_ratio * (result + transpose - prod_matrix) + (1.0 - set_op_mix_ratio) * prod_matrix ) result.eliminate_zeros() if return_dists is None: return result, sigmas, rhos else: if return_dists: dmat = scipy.sparse.coo_matrix( (dists, (rows, cols)), shape=(X.shape[0], X.shape[0]) ) dists = dmat.maximum(dmat.transpose()).todok() else: dists = None return result, sigmas, rhos, dists
def test_smooth_knn_dist_l1norms(): knn_indices, knn_dists, _ = nearest_neighbors( nn_data, 10, "euclidean", {}, False, np.random ) sigmas, rhos = smooth_knn_dist(knn_dists, 10.0) shifted_dists = knn_dists - rhos[:, np.newaxis] shifted_dists[shifted_dists < 0.0] = 0.0 vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis])) norms = np.sum(vals, axis=1) assert_array_almost_equal( norms, 1.0 + np.log2(10) * np.ones(norms.shape[0]), decimal=3, err_msg="Smooth knn-dists does not give expected" "norms", )
def smoothen_dists(store, z_idx, z_dist, lc: float, bw: float, chunk_size: int): """ Smoothens KNN distances. Args: store (): z_idx (): z_dist (): lc (): bw (): chunk_size (): Returns: None """ from umap.umap_ import smooth_knn_dist, compute_membership_strengths umap_is_latest = _is_umap_version_new() n_cells, n_neighbors = z_idx.shape zge = create_zarr_dataset(store, f"edges", (chunk_size, ), ("u8", "u8"), (n_cells * n_neighbors, 2)) zgw = create_zarr_dataset(store, f"weights", (chunk_size, ), "f8", (n_cells * n_neighbors, )) last_row = 0 val_counts = 0 null_idx = [] global_min = 1 for i in tqdmbar(range(0, n_cells, chunk_size), desc="Smoothening KNN distances"): if i + chunk_size > n_cells: ki, kv = z_idx[i:n_cells, :], z_dist[i:n_cells, :] else: ki, kv = z_idx[i:i + chunk_size, :], z_dist[i:i + chunk_size, :] kv = kv.astype(np.float32, order="C") sigmas, rhos = smooth_knn_dist(kv, k=n_neighbors, local_connectivity=lc, bandwidth=bw) if umap_is_latest: rows, cols, vals, _ = compute_membership_strengths( ki, kv, sigmas, rhos) else: rows, cols, vals = compute_membership_strengths( ki, kv, sigmas, rhos) rows = rows + last_row start = val_counts end = val_counts + len(rows) last_row = rows[-1] + 1 val_counts += len(rows) zge[start:end, 0] = rows zge[start:end, 1] = cols zgw[start:end] = vals # Fixing edges with 0 weights # We are doing these steps here to have minimum operations outside # the scope of a progress bar nidx = vals == 0 if nidx.sum() > 0: min_val = vals[~nidx].min() if min_val < global_min: global_min = min_val null_idx.extend(nidx) # The whole zarr array needs to copied, modified and written back. # Or is this assumption wrong? w = zgw[:] w[null_idx] = global_min zgw[:] = w return None