def load_samples(sample_ids): """ Load Velocyto output loom files and seurat data Parameters ---------- sample_ids : list list of sample IDs. Returns ------- samples : dict dictionary of dictionaries, {s : dict} for s in sample_ids Each subdictionary contains: AnnData object (key: 'main') loaded from loom file AnnData object (key: from filename) for csv files with seurat data """ samples = {} for s in sample_ids: samples[s] = {} filename = "./inputs/" + s + ".loom" comment = "\nLoading sample " + s + " from " + filename print(comment) samples[s]['main'] = scv.read(filename, cache = False) csv_files = filter(lambda x: x.startswith(s) & x.endswith(".csv"), os.listdir("./outputs/seurat_dat/")) for csvf in csv_files: varname = os.path.splitext(csvf)[0].casefold()[len(s)+1:] samples[s][varname] = scv.read("./outputs/seurat_dat/" + csvf, cache = False) return(samples)
def read_adata(train_paths, train_datasets, test_path, test_dataset): """ read adata files of training and testing datasets This function reads a list of training datasets (at least one) and one testing dataset from .h5ad files and returns a list of training datasets anndata objects, and a testing anndata object. parameters ---------- train_paths: `list` list of paths where training datasets are located train_datasets: `list` a list of names of training datasets (same order as paths) test_path: `string` path of test dataset test_dataset: `string` name of test dataset returns ------- list A list of training dataset anndata objects AnnData An anndata object containing testing dataset """ adata_trains = [] for i in range(len(train_datasets)): adata_trains.append( scv.read(os.path.join(train_paths[i], train_datasets[i]))) adata_pred = scv.read(os.path.join(test_path, test_dataset)) return adata_trains, adata_pred
def scv_open(file_name): """ Open a file for scv RNA verlocity analysis. See https://scvelo.readthedocs.io/VelocityBasics.html for the file format and requirement. :param file_name: :return: """ adata = scv.read(file_name, cache=True) adata.var_names_make_unique() return adata
def subset_anndata(loom, csvfile): adata = scv.read(loom) adata.var_names_make_unique() adata.obs.index = [ re.search("[ACTG]{6,}", x).group(0) for x in adata.obs.index ] my_pd = pd.read_csv(csvfile + ".csv", index_col=0, encoding="utf8") intersected = adata.obs.index.intersection(my_pd.index) adata = adata[intersected, :] return adata
def read_raw(train_paths, train_datasets, test_path, test_dataset): """ read from adata.raw and revert log1p normalization This function reads a list of training datasets (at least one) and one testing dataset and reverses log1p normalization on the raw set that includes all genes and has not been regressed out parameters ---------- train_paths: `list` list of paths where training datasets are located train_datasets: `list` a list of names of training datasets (same order as paths) test_path: `string` path of test dataset test_dataset: `string` name of test dataset returns ------- list A list of training dataset anndata objects AnnData An anndata object containing testing dataset """ adata_trains = [] for i in range(len(train_datasets)): adata_trains.append( scv.read(os.path.join(train_paths[i], train_datasets[i]))) adata_trains[i] = sc.AnnData(X=np.expm1(adata_trains[i].raw.X), obs=adata_trains[i].obs, var=adata_trains[i].raw.var) adata_pred = scv.read(os.path.join(test_path, test_dataset)) adata_pred = sc.AnnData(X=np.expm1(adata_pred.raw.X), obs=adata_pred.obs, var=adata_pred[i].raw.var) return adata_trains, adata_pred
def load_pca_data(dictionary): """ Loads PCA loadings and variance data output by extractSeurat.R Parameters ---------- dictionary : dict dictionary Returns ------- dictionary : dict dictionary, with keys added for PCA loadings and variance """ try: comment = "\nImporting PCA loadings and variance data" print(comment) dictionary['pca_data'] = {} dictionary['pca_data']['loadings'] = scv.read("./outputs/seurat_dat/seur_pca_loadings.csv") dictionary['pca_data']['variance'] = scv.read("./outputs/seurat_dat/seur_pca_var.csv") except OSError as err: print("ERROR: " + err) print("Proceeding without PCA variance or loadings") return(dictionary)
def plot_velocity(script_pkl, loom_path, components='1,2'): data = script_pkl["expression_table"] pca = script_pkl["PC_expression"] meta_data = script_pkl["annotation"] data.index = rename_shl(data.index) pca.index = rename_shl(pca.index) meta_data.index = rename_shl(meta_data.index) adata = anndata.AnnData(data, meta_data) # Crate an analysis object adata_loom = scv.read("../20170407-SHL-FACS-Hs_proj.loom", cache=True) retained_cells = list( set(adata_loom.obs.index).intersection(set(adata.obs.index))) retained_cells.sort() adata_loom = adata_loom[retained_cells, :] adata_loom.var_names_make_unique() # plot proporations spliced/unspliced # scv.pl.proportions(adata_loom) # #preprocess scv.pp.filter_and_normalize(adata_loom, min_shared_counts=20, n_top_genes=2000) scv.pp.moments(adata_loom, n_pcs=30, n_neighbors=30) adata_loom.obsm['X_pca'][:, 0:20] = pca scv.tl.velocity(adata_loom) scv.tl.velocity_graph(adata_loom) scv.pl.velocity_embedding(adata_loom, basis='pca', components=components)
import scvelo as scv import pandas as pd import os import numpy as np os.chdir( r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/Magic_based" ) seur_loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/R_references/Seurat_integration/" namez = pd.read_csv(seur_loc + 'spliced_seur.csv', index_col=0).index namez = namez.to_list() s = scv.read(seur_loc + 'spliced_seur.csv', cache=True, first_column_names=True, index_col=0) s = s.transpose() u = scv.read(seur_loc + 'unspliced_seur.csv', cache=True, first_column_names=True) u = u.transpose() adata = s adata.layers['spliced'] = s.X adata.layers['unspliced'] = u.X adata.var_names = namez scv.pp.moments(adata, n_neighbors=15) s = scv.read(seur_loc + 'spliced_magic.csv', cache=True,
import numpy as np import pandas as pd scv.settings.set_figure_params('scvelo') working_dir = "/home/kwells4/mTEC_dev/mtec_snakemake/" seurat_cells = working_dir + "not_yet_included/seurat_info_new.csv" output = working_dir + "/figure_output/figure_2d_new2.pdf" input_loom = working_dir + "not_yet_included/wt_velocyto.loom" seurat_df = pd.read_csv(seurat_cells, index_col = 0) supplement_output = working_dir +"/figure_output/supplemental_scvelo_oct_2019.pdf" seurat_cell_list = list(seurat_df.index) adata = scv.read(input_loom, sparse = True, cache = True) adata.var_names_make_unique() adata = adata[adata.obs.index.isin(seurat_cell_list)] scv.utils.show_proportions(adata) scv.utils.cleanup(adata, clean='all') scv.pp.filter_and_normalize(adata, min_counts=20, min_counts_u=10, n_top_genes=3000) scv.pp.moments(adata, n_pcs=30, n_neighbors=30) scv.tl.velocity(adata) scv.tl.velocity_graph(adata)
adata.obs["batch_group"] = metadata["batch_group"].values adata.obs["cluster"] = umap_cord.iloc[:,2].values # Add features (VAR) feat = pd.read_csv("../data/data_RNAvelocity/processed_counts.txt", sep= " ") feat = feat.index feat = pd.DataFrame(feat) feat = pd.DataFrame({"feature" : feat.iloc[:,0]}) adata.var["features"] = feat.values # %% # Read LOOM file ldata = scv.read("../data/data_RNAvelocity/s_un_am_allgenes.loom", cache=True) # Merge adata with loom file adata = scv.utils.merge(adata, ldata) # %% #we compute the first- and second-order moments (basically means and variances) for velocity estimation: scv.pp.moments(adata) # %% # Estimates of velocity
signatures = dict([(ct, tab.gene.values[tab.cell_type_epi_custom == ct]) for ct in pd.unique(tab.cell_type_epi_custom)]) letters = ['C', 'E', 'W'] donors = ['NCO', 'p009ot', 'p013ot'] scv.settings.figdir = '/fast/work/users/peidlis_c/sodar_patient_organoid_data/figures' # Note: Never analyse NCO and patient tumors together. panel_genes = ['FABP1', 'PHGR1', 'TFF3', 'MKI67', 'CD44'] # prep and save data recalc = True if recalc: # Split up conditions: for letter in letters: cdata = scv.read(data_path + 'NB_AS_' + letter + '/demuxed/NB_AS_' + letter + '_demuxed.h5') for donor in donors: print(letter, donor) adata = cdata[cdata.obs['SNPdemux'] == donor].copy() adata.var_names_make_unique() adata = pp(adata, min_counts=5000, max_perc_mito=.25) # ct_annotate(adata, signatures, rescore=True) adata.write(data_path + 'NB_AS_' + letter + '/processed/NB_AS_' + letter + '_' + donor + '.h5') # Aggregate conditions: for donor in donors: adata = None for letter in letters: dat = scv.read(data_path + 'NB_AS_' + letter + '/demuxed/NB_AS_' + letter + '_demuxed.h5')
type=str, help="reduced dimension 1") parser.add_argument("--rdim2", default="UMAP2", type=str, help="reduced dimension 2") args = parser.parse_args() # ########################################################################### # # ######################## Initialise AnnData ############################### # # ########################################################################### # if not args.loom == "none": adata = scv.read(args.loom) # get directory with metadata + barcodes metadata_dir = args.rdims.split("/")[0] elif not args.dropest_dir == "none": exon_matrix = os.path.join(args.dropest_dir, "exons.mtx.gz") intron_matrix = os.path.join(args.dropest_dir, "introns.mtx.gz") spanning_matrix = os.path.join(args.dropest_dir, "spanning.mtx.gz") exons = io.mmread(exon_matrix).transpose().tocsr() introns = io.mmread(intron_matrix).transpose().tocsr() spanning = io.mmread(spanning_matrix).transpose().tocsr() adata = ad.AnnData(X=exons) adata.layers["spliced"] = adata.X
# Part 2 of calculating velocity # Import loom files created by merging pulps 2,3,13 and 14 in Seurat # Import umap embeddings from seurat to visualize velocity in the same umap coordinates import scvelo as scv import os.path # ----------------------------------------------------------------------------------- # Open loom files from merged Pulp 2,3,13,14 # I ran python locally so make sure to check for correct paths pulp_sdata = scv.read('/Users/delaura/Documents/Tooth/AllPulp/merged/sf.loom', cache=True) pulp_ndata = scv.read('/Users/delaura/Documents/Tooth/AllPulp/merged/uf.loom', cache=True) pulp_sdata.layers['spliced'] = pulp_sdata.X pulp_sdata.layers['unspliced'] = pulp_ndata.X pulp_sdata scv.utils.show_proportions(pulp_sdata) # import umap embeddings from R (Seurat) and add to anndata object data_folder = "/Users/delaura/Documents/Tooth/AllPulp/merged" # import harmony umap embedding saved from Seurat object obsm = scv.read_csv(os.path.join(data_folder, "embednonames.csv")) pulp_sdata.obsm["X_umap"] = obsm.values scv.pp.filter_and_normalize( pulp_sdata, min_shared_counts=30, n_top_genes=2000) # don't run if using seurat pre-filtered data scv.pp.moments(pulp_sdata, n_pcs=30, n_neighbors=30) # scv.tl.umap(pulp_sdata) # if re-doing umap within scvelo
def RunSCVELO(adata=None, h5ad=None, group_by=None, liner_reduction=None, nonliner_reduction=None, dirpath="./", fileprefix="", dpi=300, min_shared_counts=30, n_pcs=30, n_neighbors=30, approx=True, stream_smooth=0.3, stream_density=1.2, arrow_density=0.05, arrow_length=15, arrow_size=15, paga_threshold=0.15, calculate_velocity_genes=False, velocity_genes_min_corr=0.3, velocity_ngenes=100, s_genes=None, g2m_genes=None, recover_dynamics=False, n_jobs=12, velocity_with_noise=False, calculate_dynamical_genes=False, dynamical_ngenes=100, diff_kinetics=False): import matplotlib.pyplot as plt import random random.seed(11) import scvelo as scv import pandas as pd import os prevdir = os.getcwd() os.chdir(os.path.expanduser(dirpath)) try: if adata is None and h5ad is None: print("adata or h5ad must be provided.") exit() if group_by is None or liner_reduction is None or nonliner_reduction is None: print( "group_by, liner_reduction and nonliner_reduction must be all provided." ) exit() if adata is None: adata = scv.read(h5ad) del adata.uns liner_reduction = "X_" + liner_reduction nonliner_reduction = "X_" + nonliner_reduction adata.obs[group_by] = adata.obs[group_by].astype(dtype="category") scv.pp.filter_and_normalize(adata, min_shared_counts=min_shared_counts) scv.pp.moments(adata, n_pcs=n_pcs, use_rep=liner_reduction, n_neighbors=n_neighbors) scv.tl.velocity(adata, vkey="stochastic") scv.tl.velocity_graph(adata, vkey="stochastic", n_neighbors=n_neighbors, approx=approx) scv.pl.velocity_embedding_stream(adata, title="stochastic", basis=nonliner_reduction, vkey=["stochastic"], color=group_by, smooth=stream_smooth, density=stream_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "stochastic_stream.png"])), dpi=dpi) scv.pl.velocity_embedding(adata, title="stochastic", basis=nonliner_reduction, vkey=["stochastic"], color=group_by, size=20, arrow_length=arrow_length, arrow_size=arrow_size, density=arrow_density, save=False, show=True) plt.savefig('.'.join(filter(None, [fileprefix, "stochastic_arrow.png"])), dpi=dpi) scv.tl.velocity_confidence(adata, vkey="stochastic") scv.tl.velocity_pseudotime(adata, vkey="stochastic") scv.pl.scatter(adata, basis=nonliner_reduction, color=('stochastic_length', 'stochastic_confidence'), cmap='coolwarm', perc=[5, 95], save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "stochastic_length_confidence.png"])), dpi=dpi) scv.pl.scatter(adata, basis=nonliner_reduction, color='stochastic_pseudotime', cmap='gnuplot', save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "stochastic_pseudotime.png"])), dpi=dpi) adata.uns['neighbors']['distances'] = adata.obsp['distances'] adata.uns['neighbors']['connectivities'] = adata.obsp['connectivities'] scv.tl.paga(adata, groups=group_by, vkey="stochastic") scv.pl.paga(adata, basis=nonliner_reduction[2:], threshold=paga_threshold, size=50, alpha=0.02, min_edge_width=2, node_size_scale=1.5, save=False, show=True) plt.savefig('.'.join(filter(None, [fileprefix, "stochastic_paga.png"])), dpi=dpi) if calculate_velocity_genes is True: scv.tl.rank_velocity_genes(adata, vkey="stochastic", groupby=group_by, min_corr=velocity_genes_min_corr, n_genes=velocity_ngenes) df = scv.DataFrame(adata.uns['rank_velocity_genes']['names']) for cluster in df.columns: #df[0:1].values.ravel()[:12] ### by row scv.pl.scatter(adata, color=group_by, basis=df[cluster].values[:6], size=20, linewidth=2, alpha=1, ylabel="cluster: " + cluster + "\nunspliced", add_linfit=True, add_rug=True, add_outline=True, ncols=3, frameon=True, save=False, show=False) plt.savefig('.'.join( filter(None, [fileprefix, cluster, "stochastic_genes1.png"])), dpi=dpi) scv.pl.velocity(adata, color=group_by, var_names=df[cluster].values[:6], size=10, linewidth=2, alpha=1, ylabel="cluster: " + cluster + "\nunspliced", add_outline=True, basis=nonliner_reduction, color_map=["Spectral", "YlOrRd"], ncols=2, save=False, show=False) plt.savefig('.'.join( filter(None, [fileprefix, cluster, "stochastic_genes2.png"])), dpi=dpi) if s_genes is not None and g2m_genes is not None: scv.tl.score_genes_cell_cycle(adata, s_genes=s_genes, g2m_genes=g2m_genes) scv.pl.scatter(adata, basis=nonliner_reduction, color=('S_score', 'G2M_score'), smooth=True, perc=[5, 95], save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "stochastic_cellcycle.png"])), dpi=dpi) if recover_dynamics is True or diff_kinetics is True or velocity_with_noise is True: adata2 = adata[:, adata.var['stochastic_genes']].copy() Ms = adata2.layers["Ms"] Mu = adata2.layers["Mu"] spliced = adata2.layers["spliced"] unspliced = adata2.layers["unspliced"] stochastic = adata2.layers["stochastic"] variance_stochastic = adata2.layers["variance_stochastic"] adata2.layers.clear() adata2.layers["Ms"] = Ms adata2.layers["Mu"] = Mu connectivities = adata2.obsp["connectivities"] distances = adata2.obsp["distances"] adata2.obsp.clear() adata2.obsp["connectivities"] = connectivities scv.tl.recover_dynamics(adata2, var_names='stochastic_genes', use_raw=False, n_jobs=n_jobs) adata2.obsp["distances"] = distances adata2.layers["spliced"] = spliced adata2.layers["unspliced"] = unspliced adata2.layers["stochastic"] = stochastic adata2.layers["variance_stochastic"] = variance_stochastic scv.tl.velocity(adata2, mode="dynamical", vkey="dynamical") scv.tl.velocity_graph(adata2, vkey="dynamical", n_neighbors=n_neighbors, approx=approx) scv.pl.velocity_embedding_stream(adata2, title="dynamical", basis=nonliner_reduction, vkey=["dynamical"], color=group_by, smooth=stream_smooth, density=stream_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_stream.png"])), dpi=dpi) scv.pl.velocity_embedding(adata2, title="dynamical", basis=nonliner_reduction, vkey=["dynamical"], color=group_by, size=20, arrow_length=arrow_length, arrow_size=arrow_size, density=arrow_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_arrow.png"])), dpi=dpi) scv.tl.velocity_confidence(adata2, vkey="dynamical") scv.tl.velocity_pseudotime(adata2, vkey="dynamical") scv.pl.scatter(adata2, basis=nonliner_reduction, color=('dynamical_length', 'dynamical_confidence'), cmap='coolwarm', perc=[5, 95], save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_length_confidence.png"])), dpi=dpi) scv.pl.scatter(adata2, basis=nonliner_reduction, color='dynamical_pseudotime', cmap='gnuplot', save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_pseudotime.png"])), dpi=dpi) scv.tl.latent_time(adata2, vkey="dynamical") scv.pl.scatter(adata2, basis=nonliner_reduction, color='latent_time', color_map='gnuplot', save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_latent_time.png"])), dpi=dpi) if calculate_dynamical_genes is True: scv.tl.rank_dynamical_genes(adata2, groupby=group_by, n_genes=dynamical_ngenes) df = scv.DataFrame(adata2.uns['rank_dynamical_genes']['names']) for cluster in df.columns: #df[0:1].values.ravel()[:12] ### by row scv.pl.scatter(adata, color=group_by, basis=df[cluster].values[:6], size=20, linewidth=2, alpha=1, ylabel="cluster: " + cluster + "\nunspliced", add_linfit=True, add_rug=True, add_outline=True, ncols=3, frameon=True, save=False, show=False) plt.savefig('.'.join( filter(None, [fileprefix, cluster, "dynamical_genes1.png"])), dpi=dpi) scv.pl.velocity(adata, color=group_by, var_names=df[cluster].values[:6], size=10, linewidth=2, alpha=1, ylabel="cluster: " + cluster + "\nunspliced", add_outline=True, basis=nonliner_reduction, color_map=["Spectral", "YlOrRd"], ncols=2, save=False, show=False) plt.savefig('.'.join( filter(None, [fileprefix, cluster, "dynamical_genes2.png"])), dpi=dpi) if diff_kinetics is True: top_genes = adata2.var['fit_likelihood'].sort_values( ascending=False).index[:100] scv.tl.differential_kinetic_test(adata2, var_names=top_genes, groupby=group_by) scv.tl.velocity(adata2, mode="dynamical", vkey="dynamical_kinetics", diff_kinetics=True) scv.tl.velocity_graph(adata2, vkey="dynamical_kinetics", n_neighbors=n_neighbors, approx=approx) scv.pl.velocity_embedding_stream(adata2, title="dynamical_kinetics", basis=nonliner_reduction, vkey=["dynamical_kinetics"], color=group_by, smooth=stream_smooth, density=stream_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_kinetics_stream.png"])), dpi=dpi) scv.pl.velocity_embedding(adata2, title="dynamical_kinetics", basis=nonliner_reduction, vkey=["dynamical_kinetics"], color=group_by, size=20, arrow_length=arrow_length, arrow_size=arrow_size, density=arrow_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_kinetics_arrow.png"])), dpi=dpi) scv.tl.velocity(adata2, mode="stochastic", vkey="stochastic_kinetics", diff_kinetics=True) scv.tl.velocity_graph(adata2, vkey="stochastic_kinetics", n_neighbors=n_neighbors, approx=approx) scv.pl.velocity_embedding_stream(adata2, title="stochastic_kinetics", basis=nonliner_reduction, vkey=["stochastic_kinetics"], color=group_by, smooth=stream_smooth, density=stream_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "stochastic_kinetics_stream.png"])), dpi=dpi) scv.pl.velocity_embedding(adata2, title="stochastic_kinetics", basis=nonliner_reduction, vkey=["stochastic_kinetics"], color=group_by, size=20, arrow_length=arrow_length, arrow_size=arrow_size, density=arrow_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "stochastic_kinetics_arrow.png"])), dpi=dpi) if velocity_with_noise is True: import numpy as np top_genes = adata2.var['fit_likelihood'].sort_values( ascending=False).index[:3] adata2.layers['dynamical_with_noise'] = adata2.layers['dynamical'] + \ np.random.normal( adata2.layers['dynamical'], scale=adata2.layers['Ms'].std(0)) scv.tl.velocity_graph(adata2, gene_subset=top_genes, vkey='dynamical_with_noise') scv.tl.velocity_embedding(adata2, basis=nonliner_reduction[2:], vkey='dynamical_with_noise', autoscale=False) scv.pl.velocity_embedding_stream(adata2, title="dynamical_with_noise", basis=nonliner_reduction, vkey=["dynamical_with_noise"], color=group_by, smooth=stream_smooth, density=stream_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_with_noise_stream.png"])), dpi=dpi) scv.pl.velocity_embedding(adata2, title="dynamical_with_noise", basis=nonliner_reduction, vkey=["dynamical_with_noise"], color=group_by, size=20, arrow_length=arrow_length, arrow_size=arrow_size, density=arrow_density, save=False, show=True) plt.savefig('.'.join( filter(None, [fileprefix, "dynamical_with_noise_arrow.png"])), dpi=dpi) import os adata2.write('.'.join(filter(None, [fileprefix, "dynamical.h5ad"])), compression='gzip') finally: os.chdir(prevdir) try: adata.__dict__['_raw'].__dict__['_var'] = adata.__dict__[ '_raw'].__dict__['_var'].rename(columns={'_index': 'features'}) except: pass return adata
n1 = ns.n_atlas n = ds.n_samples knn = {'row': [], 'col': [], 'val': []} for e in ns.graph.es: v1, v2 = e.source - n1, e.target - n1 if (v1 > 0) and (v2 > 0): knn['row'].append(v1) knn['col'].append(v2) knn['val'].append(1) knn = sp.sparse.coo_matrix((knn['val'], (knn['row'], knn['col'])), shape=(n, n), dtype=int) import scvelo as scv fn_velocity = '../../data/sequencing/me1/velocity_me1.loom' adata = scv.read(fn_velocity, cache=True) adata.obs.index = adata.obs.index.str.slice(4, -1) + '-1' adata.var_names_make_unique() scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000) scv.pp.moments(adata, n_pcs=25, n_neighbors=10) scv.tl.velocity(adata) scv.tl.velocity_graph(adata) ds.query_samples_by_name(adata.obs_names, inplace=True) ds.samplesheet['northstar_assignment'] = dsme.samplesheet.loc[ ds.samplenames, 'northstar_assignment'] adata.obsm['X_umap'] = ds.samplesheet.loc[adata.obs_names,
import pandas as pd import scanpy as sc import scvelo as scv sc.settings.verbosity = 3 sc.logging.print_header() sc.settings.set_figure_params(dpi=80, facecolor='white') scv.logging.print_version() scv.settings.verbosity = 3 # show errors(0), warnings(1), info(2), hints(3) scv.settings.presenter_view = True # set max width size for presenter view scv.settings.set_figure_params('scvelo') # for beautified visualization results_file = 'results_HL.h5ad' adata_r = sc.read_10x_mtx('/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/filtered_feature_bc_matrix/', var_names='gene_symbols',cache=False) adata_r.var_names_make_unique() adata_r velocity = scv.read("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/Parent_NGSC3_DI_HodgkinsLymphoma_possorted_genome_bam_JLA4X.loom") adata = scv.utils.merge(adata_r, velocity) adata_r = scv.utils.merge(adata_r, velocity) sc.pp.filter_cells(adata_r, min_genes=200) sc.pp.filter_genes(adata_r, min_cells=3) sc.pp.filter_cells(adata_r, max_counts=39766) sc.pp.filter_cells(adata_r, max_genes=5942) adata_r.var['mt'] = adata_r.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt' sc.pp.calculate_qc_metrics(adata_r, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True) adata = adata_r[adata_r.obs.pct_counts_mt < 10, :] sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata)
def main(argv): print("SEUROCITY v1.0.0, (c) 2020 Richard A. Guyer, MD, PhD\n") # default for rscript_dir to pass to run_rscript function rscript_dir = None input_file = "input.rds" # handle arguments to adjust default settings try: opts, args = getopt.getopt(argv,"hlr:w:") except getopt.GetoptError: arg_error() sys.exit(1) for opt, arg in opts: if opt == '-h': display_help() sys.exit(0) elif opt in ("-l"): display_license() sys.exit(0) elif opt in ("-r"): rscript_dir = arg elif opt in ("-w"): os.chdir(arg) elif opt in ("-i"): input_file = arg working_dir = os.getcwd() + "/" input_dir = working_dir + "inputs/" output_dir = working_dir + "outputs/" # check for files required by extractSeurat.R, run if all are present required_files_for_R = [input_dir + input_file, input_dir + "idents.txt", input_dir + "reductions.txt", input_dir + "append.txt", working_dir + "extractSeurat.R"] if not files_exist(required_files_for_R): print("ERROR: Critical files not found in expected locations") print("Please ensure proper input file structure") print("For help: python Seurocity.py -h") print("") sys.exit(1) else: run_rscript(working_dir + "extractSeurat.R", [input_file,"idents.txt","reductions.txt", "append.txt"], rscript_path=rscript_dir) # get sample IDs and reductions output by Rscript sample_ids = get_ids() reductions = get_reductions() # check whether expected loom files exist expected_looms = [input_dir + ident + ".loom" for ident in sample_ids] if not files_exist(expected_looms): print("ERROR: Expected loom files not found in ./inputs") print("Please ensure proper input file structure") print("For help: python Seurocity.py -h") print("") sys.exit(1) else: print("\nLoading files and processing AnnData objects, this may take a few minutes") samples = load_samples(sample_ids) samples = load_pca_data(samples) samples = import_seur_data(samples, sample_ids, reductions) # ensure every sample has the same list of genes if len(sample_ids) > 1: samples = same_genes(samples, sample_ids) # save main AnnData object for each sample as a loom file comment = "\nSaving main AnnData for each sample as loom files" print(comment) if os.path.exists(output_dir + "proc_loom"): comment = "- WARNING: ./outputs/proc_loom/ exists, files may be overwritten" print(comment) else: os.mkdir(output_dir + "proc_loom") for s in sample_ids: savename = output_dir + "proc_loom/" + s + "_proc.loom" comment = "- Saving sample " + s + " to: " + savename print(comment) samples[s]['main'].varm['PCs'] = np.asarray(samples[s]['main'].varm['PCs']) # currenty is an ArrayView, need to make into numpy array samples[s]['main'].write_loom(savename, write_obsm_varm = True) # remove samples to clean up memory del(samples) # generate combined loom file and import pca data # generate combined file comment = "\nGenerating combined loom file with PCA data loaded" if os.path.exists(output_dir + "comb_loom"): comment = "- WARNING: ./outputs/comb_loom/ exists, combined.loom will be overwritten if it already exists" print(comment) else: os.mkdir(output_dir + "comb_loom") processed_files = os.listdir(output_dir + "proc_loom/") processed_files = [output_dir + "proc_loom/" + p for p in processed_files] lp.combine(processed_files, output_dir + "comb_loom/combined.loom") # load combined loom and pca data files combined = scv.read(output_dir + "comb_loom/combined.loom", cache = False) pca_var = scv.read(output_dir + "seurat_dat/seur_pca_var.csv") pca_load = scv.read(output_dir + "seurat_dat/seur_pca_loadings.csv") # variance data combined.uns['pca'] = {} combined.uns['pca'][pca_var.obs.index[0]] = pca_var.X[0] combined.uns['pca'][pca_var.obs.index[1]] = pca_var.X[1] # pca loadings genes = combined.var.index.tolist() combined.varm['PCs'] = np.asarray(pca_load[genes,:].X) # save combined, now containing pca loadings and variance data combined.write_loom(output_dir + "comb_loom/combined.loom", write_obsm_varm = True)
import os import re import sys from pathlib import Path import numpy as np import scipy os.chdir( r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/Seurat_based" ) seur_loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/R_references/Seurat_integration/" namez = pd.read_csv(seur_loc + 'spliced_seur.csv', index_col=0).index namez = namez.to_list() s = scv.read(seur_loc + 'spliced_seur.csv', cache=True, first_column_names=True, index_col=0) s = s.transpose() u = scv.read(seur_loc + 'unspliced_seur.csv', cache=True, first_column_names=True) u = u.transpose() adata = s adata.layers['spliced'] = s.X adata.layers['unspliced'] = u.X n = pd.read_csv(seur_loc + 'neighbors_seur.csv', index_col=0) n = n - 1 adata.uns.neighbors = n adata.var_names = namez
def read_counts_and_phases(count_or_rpkm, use_spike_ins, biotype_to_use, u_plates, use_isoforms=False, load_velocities=False): ''' Read data into scanpy; Read phases and FACS intensities - count_or_rpkm: Must be "Counts" or "Tpms" ''' read_file = f"input/RNAData/{count_or_rpkm}{'_Isoforms' if use_isoforms else ''}.csv" + (".ercc.csv" if use_spike_ins else "") if biotype_to_use != None and len(biotype_to_use) > 0: print(f"filtering for biotype: {biotype_to_use}") biotype_file = f"{read_file}.{biotype_to_use}.csv" if not os.path.exists(biotype_file): gene_info = pd.read_csv(f"input/RNAData/IdsToNames{'_Isoforms' if use_isoforms else ''}.csv.gz", index_col=False, header=None, names=["gene_id", "name", "biotype", "description"]) biotyped = gene_info[gene_info["biotype"] == biotype_to_use]["gene_id"] pd.read_csv(read_file)[biotyped].to_csv(biotype_file, index=False) read_file = biotype_file adata = sc.read_csv(read_file) print(f"data shape: {adata.X.shape}") if load_velocities: adata.obs_names = pd.read_csv("input/RNAData/Tpms.obs_names.csv")["well_plate"] intensities, phases = [],[] for plate in u_plates: file = f"input/RNAData/180911_Fucci_single cell seq_ss2-18-{plate}_index sort export.csv" plateIntensities = pd.read_csv(file, skiprows=2) newColumns = list(plateIntensities.columns) newColumns[5] = "MeanGreen530" newColumns[6] = "MeanRed585" plateIntensities.columns = newColumns plateIntensities["Plate"] = [plate] * len(plateIntensities) plateIntensities["Well_Plate"] = [f"{w}_{plate}" for w in plateIntensities["Well"]] intensitiesSubFrame = plateIntensities[plateIntensities["Population"] == "All Events"] if len(intensities) == 0: intensities = intensitiesSubFrame else: intensities = intensities.append(intensitiesSubFrame, ignore_index=True) isPhaseRow = ~plateIntensities["Population"].isin(["All Events", "Cells", "Singlets"]) phasesSubFrame = plateIntensities[isPhaseRow & (plateIntensities["% Total"] == "100.00%")] if len(phases) == 0: phases = phasesSubFrame else: phases = phases.append(phasesSubFrame, ignore_index=True) wp_idx = list(phases.columns).index("Well_Plate") pop_idx = list(phases.columns).index("Population") phases_lookup = dict([(row[1][wp_idx], row[1][pop_idx]) for row in phases.iterrows()]) # Assign phases and log intensities; require log intensity intensities = intensities.sort_values(by="Well_Plate") adata.obs["Well_Plate"] = np.array(intensities["Well_Plate"]) adata.obs["plate"] = np.array(intensities["Plate"]) adata.obs["phase"] = np.array([phases_lookup[wp] if wp in phases_lookup else "N/A" for wp in intensities["Well_Plate"]]) adata.obs["MeanGreen530"] = np.array(intensities["MeanGreen530"]) adata.obs["MeanRed585"] = np.array(intensities["MeanRed585"]) adata = adata[pd.notnull(adata.obs["MeanGreen530"]) & pd.notnull(adata.obs["MeanRed585"])] # removes 6 dark likely mitotic cells # Read in fucci pseudotime from previous analysis if os.path.isfile("output/fucci_time.csv"): adata.obs["fucci_time"] = np.array(pd.read_csv("output/fucci_time.csv")["fucci_time"]) # Get info about the genes gene_info = pd.read_csv(f"input/RNAData/IdsToNames{'_Isoforms' if use_isoforms else ''}.csv.gz", header=None, names=["name", "biotype", "description"], index_col=0) adata.var["name"] = gene_info["name"] adata.var["biotype"] = gene_info["biotype"] adata.var["description"] = gene_info["description"] if load_velocities: ldata = scv.read("input/RNAData/a.loom", cache=True) ldata.obs_names = pd.read_csv("input/RNAData/a.obs_names.csv")["well_plate"] ldata.var["GeneName"] = ldata.var_names ldata.var_names = ldata.var["Accession"] adata = scv.utils.merge(adata, ldata, copy=True) return adata, phases
# %% import scvelo as scv import pandas as pd import numpy as np import matplotlib as plt # %% sample_one = scv.read("../test/G328E2L2_scRNAseq_G328E2L3_CITEseq.loom", cache=False) # sample_one = sample_one.var_names_make_unique # .... # sample_n = anndata.read_loom("sample_n.loom") # %% sample_obs = pd.read_csv("../test/cellID_obs.csv") cell_clusters = pd.read_csv("../test/cell_clusters.csv") #%% sample_one.obs = sample_one.obs.rename( index=lambda x: x.split(":")[-1].replace("x", "-1")) sample_one.obs.head() # %% sample_one = sample_one[np.isin(sample_one.obs.index, sample_obs["x"])] sample_one.obs.head() # %% # Now that we have our Velocity file filtered based upon our Seurat object, we can go ahead and add UMAP coordinates. We'll first upload them: umap = pd.read_csv("../test/cell_embeddings.csv") #%% # With the coordinates, we will need to make sure we add them so they match the order of the Cell IDs in our anndata object. Our Cell IDs are rownames in the observation layer of our object, so we can view them by using the following: sample_one.obs.index # Let's cast our index as a data frame and change the column name
sys.stderr.write("beginning scvelo!") velocity_loom = snakemake.input.velocity_loom seurat_loom = snakemake.input.seurat_loom sample_batch = snakemake.params.seurat_sample out_object = snakemake.output.out_object out_dir = os.path.dirname(out_object) #walkthrough #https://colab.research.google.com/github/theislab/scvelo_notebooks/blob/master/VelocityBasics.ipynb#scrollTo=iHl8jdCUd1j8 #scvelo documentation #https://readthedocs.org/projects/scvelo/downloads/pdf/latest/ #ds = loompy.connect(seurat_loom,mode = "r") #seurat object to loom ... in r 'as.loom(seuratObj, filename = "seuratObj.loom") adata = scv.read(velocity_loom) #remove cell id duplicates adata.obs_names_make_unique("-") non_duplicates = [x for x in adata.obs_names if "-" not in x] adata = adata[adata.obs_names.isin(non_duplicates)] #make gene names unique adata.var_names_make_unique("-") os.chdir(out_dir) #matplotlib settings to 'upgraded' images scv.set_figure_params('scvelo') scv.logging.print_version()
sc.pl.umap(a, color=['tp'], save='_' + r + '_tp') sc.pl.umap(a, color=['ActualRegion'], save='_' + r + '_ActualRegion') a.obs = adata.obs.loc[a.obs.index, :] sc.pl.umap(a, color=['subclassname'], save='_' + r + '_Subclass') ###Now the velocity, using the already projected data velodirs = [ "E65-2019A_AND_E65-2019B_MULTI-SEQ_1_Out_velocyto", "E65-2019A_AND_E65-2019B_MULTI-SEQ_2_Out_velocyto", "E65-2019A_AND_E65-2019B_MULTI-SEQ_3_Out_velocyto", "E80-2019_MULTI-SEQ_Out_velocyto", "E90-2019_MULTI-SEQ_Out_velocyto" ] velofiles = [os.listdir(os.path.join(headpath, x))[0] for x in velodirs] velopaths = [os.path.join(headpath, x, y) for x, y in zip(velodirs, velofiles)] velolooms = [scv.read(x, cache=True) for x in velopaths] for i in range(len(velolooms)): velolooms[i].obs.index = [ re.sub("-1", "", x) for x in velolooms[i].obs.index ] velolooms[i].obs.insert(0, 'batch', velodirs[i]) vdata = sc.AnnData.concatenate(*velolooms) print(vdata.obs) vdata.var_names_make_unique() avdata = scv.utils.merge(adata, vdata) print(avdata.obs) print(adata.obs) avdata.var_names_make_unique() print('norm') scv.pp.filter_genes(avdata)
from matplotlib.colors import ListedColormap, LinearSegmentedColormap from matplotlib import pyplot as plt from matplotlib import rcParams ##### Setup scVelo sc.settings.verbosity = 3 sc.set_figure_params(dpi=80, color_map='viridis') scv.settings.set_figure_params('scvelo') ##### Load Data LoomFile=sys.argv[1] OUTFILE_PREFIX=sys.argv[2] Experiment = OUTFILE_PREFIX adata = scv.read(LoomFile, cache=True) adata.var_names_make_unique() cellnames = adata.obs_names df = pd.DataFrame(adata.obs_names) df.to_csv('Samples.txt', index=False) anno = pd.read_csv('Annotations.txt') ###################### Example of Annotations.txt is available on GitHub adata.obs = anno #### Basic Filtering sc.pp.filter_cells(adata, min_genes=0) sc.pp.filter_genes(adata, min_cells=0) ##### Normalization
#import library import scvelo as scv #set parameters scv.set_figure_params() scv.settings.verbosity = 3 # show errors(0), warnings(1), info(2), hints(3) scv.settings.presenter_view = True # set max width size for presenter view scv.settings.set_figure_params('scvelo') # for beautified visualization #load data adata_merged = scv.read("/rsrch3/scratch/sarc_med_onco-rsch/dtruong4/LPS_scRNA/LPS_data_cell_velocyto.h5ad") print('Running recover_dynamics') scv.tl.recover_dynamics(adata_merged, n_jobs = 20) print('Running velocity') scv.tl.velocity(adata_merged, mode='dynamical') print('Running velocity_graph') scv.tl.velocity_graph(adata_merged, n_jobs = 20) print('Writing data') adata_merged.write("/rsrch3/scratch/sarc_med_onco-rsch/dtruong4/LPS_scRNA/LPS_data_cell_velocyto_calc_dynamic.h5ad") print('Job done')
import scvelo as scv scv.settings.set_figure_params('scvelo') import scanpy.api as sc sc.settings.autoshow=False sc.settings.autosave=True sc.settings.figdir='/scrapp2/mtschmitz/data/Exonic/fig' adata = sc.read_10x_mtx('/scrapp2/mtschmitz/data/Exonic/E40_motor_Out/outs/filtered_gene_bc_matrices/refdata-celranger-mmul8-toplevel/', cache=True) ldata = scv.read('/scrapp2/mtschmitz/data/Exonic/E40_motor_Out_velocyto/possorted_genome_bam_RWRQ2.loom', cache=True) adata.var_names_make_unique() ldata.var_names_make_unique() adata = scv.utils.merge(adata, ldata) adata.var_names_make_unique() print('norm') scv.pp.filter_genes(adata) scv.pp.normalize_per_cell(adata) scv.pp.filter_genes_dispersion(adata) scv.pp.log1p(adata) print(adata) print('moment') scv.pp.moments(adata, n_pcs=30, n_neighbors=30) print('velo') scv.tl.umap(adata) scv.tl.velocity(adata) print('graph') scv.tl.velocity_graph(adata) scv.tl.velocity_embedding(adata, basis='umap') scv.pl.velocity_embedding(adata, basis='umap',save='Embed') scv.pl.velocity_embedding_grid(adata, basis='umap',save='Grid') scv.pl.velocity_embedding_stream(adata, basis='umap',save='stream') sc.tl.leiden(adata)
def main(): parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-f", "--filename", dest="filename", default=None, required=True, help="Analysis result file name", metavar="FILE") parser.add_argument( "-t", "--toolname", dest="toolname", default=None, required=True, type=str.lower, choices=['scanpy', 'paga', 'seurat', 'stream', 'velocity'], help="Tool used to generate the analysis result.") parser.add_argument( "-a", "--annotations", dest="annotations", default=None, required=True, help= "Annotation file name. It contains the cell annotation key(s) to visualize in one column." ) parser.add_argument( "-g", "--genes", dest="genes", default=None, help= "Gene list file name. It contains the genes to visualize in one column." ) parser.add_argument("-o", "--output", dest="output", default='vr_report', help="Output folder name") parser.add_argument( "--layer", dest="layer", default='norm_data', help="The name of layer in Anndata object for gene expression") args = parser.parse_args() filename = args.filename toolname = args.toolname genes = args.genes output = args.output #work directory annotations = args.annotations layer = args.layer if (annotations is None): raise Exception( "Annotation file must be specified when %s is chosen." % (toolname)) if toolname != 'velocity': try: ann_list = pd.read_csv(annotations, sep='\t', header=None, index_col=None).iloc[:, 0].tolist() except FileNotFoundError as fnf_error: print(fnf_error) raise except: print('Failed to load in annotation file.') raise else: ann_list = list(set(ann_list)) if (genes is not None): try: gene_list = pd.read_csv(genes, sep='\t', header=None, index_col=None).iloc[:, 0].tolist() except FileNotFoundError as fnf_error: print(fnf_error) raise except: print('Failed to load in gene list.') raise else: gene_list = list(set(gene_list)) else: gene_list = None print("Converting '%s' analysis result ..." % toolname) if (toolname in ['scanpy', 'paga', 'seurat']): if (toolname == 'scanpy'): assert (filename.lower().endswith( ('.h5ad'))), "For PAGA only .h5ad file is supported." print('reading in h5ad file ...') adata = ad.read_h5ad(filename) scvr.output_scanpy_cells(adata, ann_list, gene_list=gene_list, reportdir=output) if (toolname == 'paga'): assert (filename.lower().endswith( ('.h5ad'))), "For PAGA only .h5ad file is supported." print('reading in h5ad file ...') adata = ad.read_h5ad(filename) scvr.output_paga_graph(adata, reportdir=output) scvr.output_paga_cells(adata, ann_list, gene_list=gene_list, reportdir=output) if (toolname == 'seurat'): assert (filename.lower().endswith( ('.loom'))) or (filename.lower().endswith( ('.h5ad' ))), "For Seurat only .loom .h5ad file is supported." print('reading in loom file ...') if filename.lower().endswith(('.loom')): adata = ad.read_loom(filename) else: adata = ad.read(filename) scvr.output_seurat_cells(adata, ann_list, gene_list=gene_list, reportdir=output) with open(os.path.join(output, 'index.json'), 'w') as f: json.dump({"tool": toolname}, f) shutil.make_archive(base_name=output, format='zip', root_dir=output) shutil.rmtree(output) if toolname == 'velocity': assert (filename.lower().endswith('.h5ad') or filename.lower().endswith('.loom') ), 'Velocity supports .h5ad or .loom.' adata = scv.read(filename) scvr.output_velocity_cells(adata, ann_field=annotations, gene_list=gene_list, reportdir=output) if (toolname == 'stream'): try: import stream as st except ImportError: raise ImportError( 'Please install STREAM >=0.5: `conda install -c bioconda stream`.' ) assert (filename.lower().endswith( ('.pkl'))), "For STREAM only .pkl file is supported." print('reading in pkl file ...') adata = st.read(filename, file_format='pkl', workdir='./') st.save_vr_report(adata, ann_list=ann_list, gene_list=gene_list, file_name=output)
for file in file_list: name = re.sub("_.+", "", file) con_dir[name] = subset_anndata(file, csv_loc + name) concat = anndata.concat(con_dir, axis=0, label="dataset") path = Path( r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/" + r"Concat_raw.h5ad") concat.write_h5ad(filename=path, ) del concat del con_dir del file_list loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/Raw_based/" os.chdir(loc) adata = scv.read(path) adata.obs.dataset = [x for x in adata.obs.dataset] new_index = [] for ob in range(len(adata.obs.index)): cell = adata.obs.index[ob] dataset = adata.obs["dataset"][ob] n_ind = cell + "_" + dataset new_index.append(n_ind) adata.obs.index = new_index # adding seurat data csv_loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/R_references/" seurat_meta = pd.read_csv(csv_loc + "seurat_meta.csv", index_col=0) seurat_meta[["origin", "cells", "clusters"]] = seurat_meta[["origin", "cells",
import scvelo as scv import pandas as pd import numpy as np from scipy import sparse import os os.chdir(r"C:\Users\USER\Documents\R\RNAseq\scientificProject\data\combined") scv.settings.set_figure_params('scvelo') s = scv.read('sub_spliced.csv', cache=True, first_column_names=True) s = s.transpose() u = scv.read('sub_unspliced.csv', cache=True) u = u.transpose() adata = s adata.layers['spliced'] = s.X adata.layers['unspliced'] = u.X m = pd.read_csv("combined_meta.csv") adata.obs['cell_cluster'] = list(m['seurat_clusters']) adata.obs['cell_cluster'] = adata.obs['cell_cluster'].astype('category') UMAP_D = m[['UMAP_1', 'UMAP_2']] adata.obsm['X_umap_ori'] = np.asanyarray(UMAP_D) del u del s del m scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
if False: print('Analyze RNA velocity') csts = [ 'Early Car4- capillaries', 'Late Car4- capillaries', ] dsi = ds.query_samples_by_metadata( 'cellSubtype in @csts', local_dict=locals(), inplace=False) print('Load combined velocity file') fn_combined = '../../data/sequencing/datasets/all_{:}/velocity_endo.loom'.format(version) import scvelo as scv adata_endo = scv.read(fn_combined, cache=True) adata_endo.var_names_make_unique() print('Restrict to subtypes') adata = adata_endo[dsi.samplenames] print('Follow tutorial') # show proportions of spliced/unspliced abundances #scv.utils.show_proportions(adata) scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000) scv.pp.moments(adata, n_pcs=25, n_neighbors=10) scv.tl.velocity(adata) scv.tl.velocity_graph(adata)
get_ipython().run_cell_magic( 'R', '', '# Load all the R libraries we will be using in the notebook\nlibrary(scran)\nlibrary(RColorBrewer)\nlibrary(slingshot)\nlibrary(monocle)\nlibrary(gam)\nlibrary(clusterExperiment)\nlibrary(ggplot2)\nlibrary(plyr)\nlibrary(MAST)' ) scv.settings.set_figure_params('scvelo') # split into 3 separate objects TLS = adata[adata.obs['donor'].isin(['TLS'])] Gastruloid = adata[adata.obs['donor'].isin(['Gastruloid'])] TLSCL = adata[adata.obs['donor'].isin(['TLSCL'])] # Read and merge velocity TLS_loom_120 = scv.read("TLS_120h/velocyto/TLS_120h.loom", sparse=True, cache=True) TLS_loom_120.var_names_make_unique() Gastruloid_loom_120 = scv.read("Gastruloid/velocyto/Gastruloid.loom", sparse=True, cache=True) Gastruloid_loom_120.var_names_make_unique() TLSCL_loom_120 = scv.read("TLSCL/velocyto/TLSCL.loom", sparse=True, cache=True) TLSCL_loom_120.var_names_make_unique() ## merge loom file into an already existing object TLS = scv.utils.merge(TLS, TLS_loom_120) TLS.var_names_make_unique() TLS.obs_names_make_unique() Gastruloid = scv.utils.merge(Gastruloid, Gastruloid_loom_120)