def calc_fitsne(X, n_jobs, n_components, perplexity, early_exaggeration, learning_rate, random_state): # FItSNE will change X content return FItSNE(X.copy(), nthreads=n_jobs, no_dims=n_components, perplexity=perplexity, early_exag_coeff=early_exaggeration, learning_rate=learning_rate, rand_seed=(random_state if random_state is not None else -1))
def tsne(X, method='umap'): """ Parameters: --------------- X: numpy.array() log2(TPM + 1) method: str() 'sklearn', 'FIt-SNE' Returns: --------------- tsne_1: list() tsne_2: list() """ if method == 'sklearn': X_embedded = TSNE(n_components=2, metric='cosine', init='pca').fit_transform(X) # fit-sne seems to have the problem 'np.array() is not C - contiguous' elif method == 'FIt-SNE': # init pca = PCA(n_components=50) X_pca = pca.fit_transform(X) init = X_pca[:, :2] # make array C - contiguous X_pca = np.ascontiguousarray(X_pca) init = np.ascontiguousarray(init) # embedding X_embedded = FItSNE(X_pca, initialization=init) elif method == 'TSNEApprox': # from scanorama, the result seems to be very weird, do not suggest using. from scanorama.t_sne_approx import TSNEApprox # init pca = PCA(n_components=50) X_pca = pca.fit_transform(X) init = X_pca[:, 2] # embedding X_embedded = TSNEApprox(init='pca', metric='cosine').fit_transform(X_pca) elif method == 'umap': import umap reducer = umap.UMAP() X_embedded = reducer.fit_transform(X) tsne_1 = X_embedded[:, 0].tolist() tsne_2 = X_embedded[:, 1].tolist() return tsne_1, tsne_2
def calc_tsne( X, nthreads, no_dims, perplexity, early_exag_coeff, learning_rate, rand_seed, initialization=None, max_iter=750, stop_early_exag_iter=250, mom_switch_iter=250, ): """ TODO: Calculate t-SNE embeddings using the FIt-SNE package """ # FItSNE will change X content # Check if fftw3 is installed. import ctypes.util fftw3_loc = ctypes.util.find_library("fftw3") if fftw3_loc is None: raise Exception( "Please install 'fftw3' first to use the FIt-SNE feature!") try: from fitsne import FItSNE except ModuleNotFoundError: import sys logger.error( "Need FItSNE! Try 'pip install fitsne' or 'conda install -c conda-forge pyfit-sne'." ) sys.exit(-1) return FItSNE( X, nthreads=nthreads, no_dims=no_dims, perplexity=perplexity, early_exag_coeff=early_exag_coeff, learning_rate=learning_rate, rand_seed=rand_seed, initialization=initialization, max_iter=max_iter, stop_early_exag_iter=stop_early_exag_iter, mom_switch_iter=mom_switch_iter, )
def calc_fitsne( X, nthreads, no_dims, perplexity, early_exag_coeff, learning_rate, rand_seed, initialization=None, max_iter=1000, stop_early_exag_iter=250, mom_switch_iter=250, ): """ TODO: Typing """ # FItSNE will change X content # Check if fftw3 is installed. import ctypes.util fftw3_loc = ctypes.util.find_library("fftw3") if fftw3_loc is None: raise Exception( "Please install 'fftw3' first to use the FIt-SNE feature!") from fitsne import FItSNE return FItSNE( X.astype("float64"), nthreads=nthreads, no_dims=no_dims, perplexity=perplexity, early_exag_coeff=early_exag_coeff, learning_rate=learning_rate, rand_seed=rand_seed, initialization=initialization, max_iter=max_iter, stop_early_exag_iter=stop_early_exag_iter, mom_switch_iter=mom_switch_iter, )
def fit_transform(self, X): if self.variant == "bhtsne": return bhtsne.tsne(X, perplexity=self.perplexity) if self.variant == "multicore": return MulticoreTSNE(n_jobs=4, perplexity=self.perplexity).fit_transform(X) if self.variant == "sklearn": return skTSNE(perplexity=self.perplexity).fit_transform(X) if self.variant == "optsne": return OptSNE(perplexity=self.perplexity).fit_transform(X) if self.variant == "fitsne": return FItSNE(X, perplexity=self.perplexity) if self.variant == "cuda": return CudaTSNE(perplexity=self.perplexity).fit_transform(X) return None
def run_reduce_dim( adata, X_data, n_components, n_pca_components, reduction_method, embedding_key, n_neighbors, neighbor_key, cores, kwargs, ): if reduction_method == "trimap": import trimap triplemap = trimap.TRIMAP( n_inliers=20, n_outliers=10, n_random=10, distance="euclidean", # cosine weight_adj=1000.0, apply_pca=False, ) X_dim = triplemap.fit_transform(X_data) adata.obsm[embedding_key] = X_dim adata.uns[neighbor_key] = { "params": { "n_neighbors": n_neighbors, "method": reduction_method }, # "connectivities": "connectivities", # "distances": "distances", # "indices": "indices", } elif reduction_method == "diffusion_map": pass elif reduction_method.lower() == "tsne": try: from fitsne import FItSNE except ImportError: print( "Please first install fitsne to perform accelerated tSNE method. Install instruction is " "provided here: https://pypi.org/project/fitsne/") X_dim = FItSNE(X_data, nthreads=cores) # use FitSNE # bh_tsne = TSNE(n_components = n_components) # X_dim = bh_tsne.fit_transform(X) adata.obsm[embedding_key] = X_dim adata.uns[neighbor_key] = { "params": { "n_neighbors": n_neighbors, "method": reduction_method }, # "connectivities": "connectivities", # "distances": "distances", # "indices": "indices", } elif reduction_method == "umap": _umap_kwargs = { "n_components": n_components, "metric": "euclidean", "min_dist": 0.5, "spread": 1.0, "n_epochs": 0, "alpha": 1.0, "gamma": 1.0, "negative_sample_rate": 5, "init_pos": "spectral", "random_state": 0, "densmap": False, "dens_lambda": 2.0, "dens_frac": 0.3, "dens_var_shift": 0.1, "output_dens": False, "verbose": False, } umap_kwargs = update_dict(_umap_kwargs, kwargs) with warnings.catch_warnings(): warnings.simplefilter("ignore") ( mapper, graph, knn_indices, knn_dists, X_dim, ) = umap_conn_indices_dist_embedding(X_data, n_neighbors, **umap_kwargs) # X adata.obsm[embedding_key] = X_dim knn_dists = knn_to_adj(knn_indices, knn_dists) adata.uns[neighbor_key] = { "params": { "n_neighbors": n_neighbors, "method": reduction_method }, # "connectivities": "connectivities", # "distances": "distances", "indices": knn_indices, } layer = neighbor_key.split("_")[0] if neighbor_key.__contains__( "_") else None conn_key = "connectivities" if layer is None else layer + "_connectivities" dist_key = "distances" if layer is None else layer + "_distances" adata.obsp[conn_key], adata.obsp[dist_key] = graph, knn_dists adata.uns["umap_fit"] = { "fit": mapper, "n_pca_components": n_pca_components } elif reduction_method == "psl": adj_mat, X_dim = psl(X_data, d=n_components, K=n_neighbors) # this need to be updated adata.obsm[embedding_key] = X_dim adata.uns[neighbor_key] = adj_mat else: raise Exception( "reduction_method {} is not supported.".format(reduction_method)) return adata
def run(perplexity=30, learning_rate=100, n_jobs=4): x, y = get_mouse_60k() # x, y = get_fashion_mnist() angle = 0.5 ee = 12 metric = 'euclidean' print(x.shape) start = time.time() tsne = TSNE( perplexity=perplexity, learning_rate=learning_rate, early_exaggeration=ee, n_jobs=n_jobs, theta=angle, initialization='random', metric=metric, n_components=2, n_iter=750, early_exaggeration_iter=250, neighbors='approx', negative_gradient_method='fft', min_num_intervals=10, ints_in_interval=1, late_exaggeration_iter=0, late_exaggeration=2., callbacks=ErrorLogger(), ) # x = PCA(n_components=50).fit_transform(x) embedding = tsne.fit(x) print('-' * 80) print('tsne', time.time() - start) plt.title('tsne') plot(embedding, y) return x = np.ascontiguousarray(x.astype(np.float64)) from fitsne import FItSNE start = time.time() embedding = FItSNE( x, 2, perplexity=perplexity, stop_lying_iter=250, ann_not_vptree=True, early_exag_coeff=ee, nthreads=n_jobs, theta=angle, ) print('-' * 80) print('fft interp %.4f' % (time.time() - start)) plt.title('fft interp') plot(embedding, y) plt.show() return init = PCA(n_components=2).fit_transform(x) start = time.time() embedding = MulticoreTSNE(early_exaggeration=ee, learning_rate=learning_rate, perplexity=perplexity, n_jobs=n_jobs, cheat_metric=False, angle=angle, init=init, metric=metric, verbose=True).fit_transform(x) print('-' * 80) print('mctsne', time.time() - start) plt.title('mctsne') plot(embedding, y) plt.show() start = time.time() embedding = SKLTSNE( early_exaggeration=ee, learning_rate=learning_rate, angle=angle, perplexity=perplexity, init='pca', metric=metric, ).fit_transform(x) print('-' * 80) print('sklearn', time.time() - start) plt.title('sklearn') plot(embedding, y) plt.show()
if os.path.isfile(FILENAME_TSNE_RESULT) == True: print('File exists already, skipping this step..') exit() else: pass # _____ Load data ______________________________________________________________ Load data # Load the data sample_tsne_no_noise = np.load( dirs.data + 'sample_to_tsne_range_{}.npy'.format(SELECTED_RANGES)) # Noise is added at each iteration then sample_tsne = sample_tsne_no_noise + np.random.normal(0, 1/float(SNR), sample_tsne_no_noise.shape) print("Added noise to the t-SNE sample with a SNR of {}".format(int(SNR))) del sample_tsne_no_noise # _____ t-SNE __________________________________________________________________ t-SNE print('='*40) print('--> Starting t-SNE analysis for perplexity {}'.format(PERPLEXITY)) # Run t-SNE for each one of the above perplexities tSNEd_fluxes = FItSNE(X=sample_tsne, perplexity=PERPLEXITY) # Save the data np.save(dirs.data + 'tSNE_results_range_{}_perplexity_{}_SNRof{}_fftw'.format( SELECTED_RANGES, PERPLEXITY, int(SNR)), tSNEd_fluxes) print('='*40)
def reduceDimension(adata, n_pca_components=25, n_components=2, n_neighbors=10, reduction_method='trimap', velocity_key='velocity_S', cores=1): """Compute a low dimension reduction projection of an annodata object first with PCA, followed by non-linear dimension reduction methods Arguments --------- adata: :class:`~anndata.AnnData` an Annodata object n_pca_components: 'int' (optional, default 50) Number of PCA components. n_components: 'int' (optional, default 50) The dimension of the space to embed into. n_neighbors: 'int' (optional, default 10) Number of nearest neighbors when constructing adjacency matrix. reduction_method: 'str' (optional, default trimap) Non-linear dimension reduction method to further reduce dimension based on the top n_pca_components PCA components. Currently, PSL (probablistic structure learning, a new dimension reduction by us), tSNE (fitsne instead of traditional tSNE used) or umap are supported. velocity_key: 'str' (optional, default velocity_S) The dictionary key that corresponds to the estimated velocity values. cores: `int` (optional, default `1`) Number of cores. Used only when the tSNE reduction_method is used. Returns ------- Returns an updated `adata` with reduced dimension data for spliced counts, projected future transcript counts 'Y_dim' and adjacency matrix when possible. """ n_obs = adata.shape[0] if 'use_for_dynamo' in adata.var.keys(): X = adata.X[:, adata.var.use_for_dynamo.values] if velocity_key is not None: X_t = adata.X[:, adata.var.use_for_dynamo.values] + adata.layers[ velocity_key][:, adata.var.use_for_dynamo.values] else: X = adata.X if velocity_key is not None: X_t = adata.X + adata.layers[velocity_key] if ((not 'X_pca' in adata.obsm.keys()) or 'pca_fit' not in adata.uns.keys()) or reduction_method is "pca": transformer = TruncatedSVD(n_components=n_pca_components + 1, random_state=0) X_fit = transformer.fit(X) X_pca = X_fit.transform(X)[:, 1:] adata.obsm['X_pca'] = X_pca if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys( ): X_t_pca = X_fit.transform(X_t)[:, 1:] adata.obsm['_velocity_pca'] = X_t_pca - X_pca else: X_pca = adata.obsm['X_pca'][:, :n_pca_components] if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys( ): X_t_pca = adata.uns['pca_fit'].fit_transform(X_t) adata.obsm['_velocity_pca'] = X_t_pca[:, 1:(n_pca_components + 1)] - X_pca adata.obsm['X_pca'] = X_pca if reduction_method is "trimap": import trimap triplemap = trimap.TRIMAP( n_inliers=20, n_outliers=10, n_random=10, distance='angular', # cosine weight_adj=1000.0, apply_pca=False) X_dim = triplemap.fit_transform(X_pca) adata.obsm['X_trimap'] = X_dim adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \ 'distances': None, 'indices': None} elif reduction_method is 'tSNE': try: from fitsne import FItSNE except ImportError: print( 'Please first install fitsne to perform accelerated tSNE method. Install instruction is provided here: https://pypi.org/project/fitsne/' ) X_dim = FItSNE(X_pca, nthreads=cores) # use FitSNE # bh_tsne = TSNE(n_components = n_components) # X_dim = bh_tsne.fit_transform(X_pca) adata.obsm['X_tSNE'] = X_dim adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \ 'distances': None, 'indices': None} elif reduction_method is 'umap': with warnings.catch_warnings(): warnings.simplefilter("ignore") graph, knn_indices, knn_dists, X_dim = umap_conn_indices_dist_embedding( X_pca) # X_pca adata.obsm['X_umap'] = X_dim adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': graph, \ 'distances': knn_dists, 'indices': knn_indices} elif reduction_method is 'psl': adj_mat, X_dim = psl_py(X_pca, d=n_components, K=n_neighbors) # this need to be updated adata.obsm['X_psl'] = X_dim adata.uns['PSL_adj_mat'] = adj_mat else: raise Exception( 'reduction_method {} is not supported.'.format(reduction_method)) return adata