def load_epi(average='geometric'): import singlet ds_dev = load_epi_development(average=False) ds_adu = load_epi_TMS(average=False) dsepi = singlet.concatenate([ds_dev, ds_adu], missing='pad') if not average: return dsepi if average == 'geometric': dsepi.counts.log(inplace=True) dsepi_av = dsepi.average('samples', by=['cellSubtype', 'Timepoint']) return dsepi_av
}, ) ds_tm.samplesheet['Tissue'] = ds_tm.samplesheet['tissue'] ds_tm.samplesheet['Mousename'] = 'TM' + ds_tm.samplesheet['mouse.id'] ds_tm.samplesheet['Dataset'] = 'TabulaMuris' ds_tm.samplesheet['Timepoint'] = '3MO' # They are all endos, without further classification (ouch!) ds_tm.samplesheet['Cell Type'] = ds_tm.samplesheet['cell_ontology_class'] ds_tm.samplesheet['cellSubtype'] = 'unknown (TM)' print('Restrict TM to lung') ds_tm0 = ds_tm ds_tm = ds_tm0.query_samples_by_metadata('Tissue == "Lung"') print('Concatenate datasets') dsme = concatenate([ds, ds_tm], missing='pad') print('Normalize') dsme.counts.normalize('counts_per_million', inplace=True) if False: print('Feature selection') features = dsme.feature_selection.overdispersed_within_groups( 'Mousename', inplace=False) dsf = dsme.query_features_by_name(features) print('PCA') dsc = dsf.dimensionality.pca(n_dims=50, robust=False, return_dataset='samples')
dataset={ 'path': fns_loom['schyns'], 'index_samples': 'CellID', 'index_features': 'Gene', 'bit_precision': 32, }, ) ds_sc.samplesheet['cellSubtype'] = ds_sc.samplesheet['ClusterName'] print('Merge the data by hand') genes = np.intersect1d(ds.featurenames, ds_sc.featurenames) ds.query_features_by_name(genes, inplace=True) ds.samplesheet['Data source'] = 'Domingo-Gonzales\net al. (this paper)' ds_sc.query_features_by_name(genes, inplace=True) ds_sc.samplesheet['Data source'] = 'Schyns et al.' dsme = concatenate([ds, ds_sc]) dsme.counts.normalize('counts_per_million', inplace=True) print('Feature selection') features = dsme.feature_selection.overdispersed_within_groups( 'Data source', inplace=False) dsf = dsme.query_features_by_name(features) print('PCA') dsc = dsf.dimensionality.pca(n_dims=30, robust=False, return_dataset='samples') print('Embedding') vs = dsc.dimensionality.umap() dsme.samplesheet['umap1'] = vs.iloc[:, 0] dsme.samplesheet['umap2'] = vs.iloc[:, 1]
else: cache = pd.read_csv( '../../data/sequencing/me1/northstar_with_Palantir.tsv', sep='\t', index_col=0) vs = cache.iloc[:, :2] northstar_assignment = cache['northstar_assignment'] print('Make dataset with merged') genes = np.intersect1d(ds.featurenames, dsP.featurenames) ds.query_features_by_name(genes, inplace=True) ds.samplesheet['Data source'] = 'new_data' ds.samplesheet['Cell Subtype'] = 'ME1' dsPsub.query_features_by_name(genes, inplace=True) dsPsub.samplesheet['Data source'] = 'Palantir' dsme = singlet.concatenate([dsPsub, ds]) dsme.samplesheet['northstar_assignment'] = northstar_assignment new_clusters = [x for x in np.unique(ns.membership) if x.isdigit()] print('Plot embedding') genes = ['Data source', 'Cell Subtype', 'northstar_assignment'] cmaps = { #'clusters': an.uns['cluster_colors'], 'Cell Subtype': { 'HSC': 'deeppink', 'Ery-precursor': 'lawngreen', 'Mono': 'red', 'Mono-precursor': 'purple', 'CLP': 'tan', 'pDC': 'lightseagreen', 'Ery': 'forestgreen',
ns.compute_pca() ns.compute_similarity_graph() print('Cluster graph') ns.cluster_graph() print('Compute embedding') vs = ns.embed('umap') print('Make dataset with merged') genes = np.intersect1d(ds.featurenames, ds_tm.featurenames) ds.query_features_by_name(genes, inplace=True) ds.samplesheet['Data source'] = 'new_data' ds_tm.query_features_by_name(genes, inplace=True) ds_tm.samplesheet['Data source'] = 'Tabula Muris' dsme = concatenate([ds_tm, ds]) dsme.samplesheet['northstar assignment'] = np.concatenate( [ds_tm.samplesheet['Cell Subtype'].values, ns.membership], ) new_clusters = [x for x in np.unique(ns.membership) if x.isdigit()] print('Plot embedding') genes = ['Data source', 'Cell Subtype', 'northstar assignment', 'Tissue'] cmaps = { 'Data source': { 'Tabula Muris': 'darkred', 'new_data': 'steelblue' }, 'Cell Subtype': { 'Mac IV': 'darkolivegreen', 'Mac II': 'lime', 'Mac I': 'seagreen',
if True: print('Cxcl12/Cxcr4/Cxcr7') import singlet dsa = [] for g1, csts1 in ct_groups.items(): if g1 == 'Epi': continue dsia = ds0.query_samples_by_metadata('cellSubtype in @csts1', local_dict=locals()).average( 'samples', by='TimepointHO') dsia.obs['new_name'] = [(g1, tp) for tp in dsia.samplenames] dsia.rename('samples', 'new_name', inplace=True) dsa.append(dsia) dsa = singlet.concatenate(dsa) dsepia = dsepi.average('samples', by=['cellSubtype', 'Timepoint']) genes = ['Cxcl12', 'Cxcr4', 'Ackr3'] df = dsa.counts.loc[genes] df.columns = pd.MultiIndex.from_arrays( [[x[0] for x in df.columns], [x[1] for x in df.columns]], ) for (cst, tp) in dsepia.samplenames: if tp not in tps_epi: continue tmp = pd.Series(np.zeros(len(genes)), index=genes) for gene in genes: if gene in dsepia.featurenames: tmp[gene] = dsepia.counts.loc[gene, (cst, tp)]