def test_conditions(all_names, conditions, atlas_sub, cancer_data, repeats=1): import time results = [] ncomb = len(conditions) for ic, comb in enumerate(conditions): print('{:} / {:}: {:}'.format(ic + 1, ncomb, comb)) for ir in range(repeats): kwargs = dict(zip(all_names, comb)) if 'n' in kwargs: n = kwargs.pop('n') tmp = subsample_dataset(cancer_data, n) else: tmp = cancer_data t0 = time.time() no = northstar.Subsample( atlas=atlas_sub, **kwargs, ) cell_types = no.fit_transform(tmp['counts']) t1 = time.time() acc = define_accuracy(tmp['meta']['Cell_type'].values, cell_types) kwargs['accuracy'] = acc kwargs['runtime'] = t1 - t0 kwargs['repeat'] = ir + 1 kwargs['ncells'] = tmp['meta'].shape[0] results.append(kwargs) return pd.DataFrame(results)
def run_northstar( adata, asub, nct, rep, ): print('Subsample with {:} cells per type'.format(nct)) asub2 = northstar.subsample_atlas( adata, cell_type_column='cell_ontology_class', n_cells=nct, ) ntot = asub2.X.shape[0] print('Run northstar') t0 = time.time() ns = northstar.Subsample(asub, ) t1 = time.time() t = t1 - t0 ct_orig = ns.new_data.obs['cell_ontology_class'].astype(str) gof = (ct_orig == ns.membership).mean() resd = { 'time': t, 'gof': gof, 'ntot': ntot, 'rep': rep, 'tissue': tissue, } return resd
def test_conditions(all_names, conditions, atlas_sub, gbm, repeats=1): import time results = [] ncomb = len(conditions) for ic, comb in enumerate(conditions): print('{:} / {:}: {:}'.format(ic + 1, ncomb, comb)) for ir in range(repeats): kwargs = dict(zip(all_names, comb)) t0 = time.time() no = northstar.Subsample( atlas=atlas_sub, **kwargs, ) cell_types = no.fit_transform(gbm['counts']) t1 = time.time() acc = define_accuracy(gbm['meta']['Cell_type'].values, cell_types) kwargs['accuracy'] = acc kwargs['runtime'] = t1 - t0 kwargs['repeat'] = ir + 1 results.append(kwargs) return pd.DataFrame(results)
atlases = { 'Velmeshev': adata, 'Darmanis_2015_nofetal': adata_dmnf, } print('Run northstar (subsample)') ress = [] for aname, atlas in atlases.items(): if aname.endswith('avg'): continue print('Atlas: {:}'.format(aname)) ns = northstar.Subsample( atlas, #n_features_per_cell_type=70, #n_features_overdispersed=500, #n_pcs=25, #resolution_parameter=0.003, #n_neighbors=30, #n_neighbors_external=0, #external_neighbors_mutual=True, ) ct_orig = ns.new_data.obs['Cell_type'].astype(str) gof = (ct_orig == ns.membership).mean() identity = ct_orig.to_frame() identity['northstar_assignment'] = ns.membership vs = ns.embed('umap') resd = { 'atlas': aname,
atlas_sub = af.fetch_atlas('Darmanis_2015_nofetal', kind='subsample') print('Rename a few cell types in the atlas') atlas_sub['cell_types'] = atlas_sub['cell_types'].map( { 'Oligodendrocyte': 'Oligodendrocyte', 'Vascular': 'Endothelial', 'Astrocyte': 'Astrocyte', 'Neuron': 'Neuron', 'OPC': 'OPC', 'microglia': 'Immune cell' }, ) gbm = ingest_gbm_data() print('Classify and cluster cells with default parameters') no = northstar.Subsample(atlas=atlas_sub, ) cell_types = no.fit_transform(gbm['counts']) acc = define_accuracy(gbm['meta']['Cell_type'].values, cell_types) print(acc) if False: print('Scan broad parameter space') params_dict = dict( n_features_per_cell_type=[10, 30, 50], n_features_overdispersed=[50, 300, 1000], n_pcs=[10, 20, 40], n_neighbors=[10, 20, 50], distance_metric=['correlation'], threshold_neighborhood=[0.8], resolution_parameter=[0.0001, 0.001, 0.01], )
def atlas_subsamples_to_tsnedf(new_metadata, new_counttable, **kwargs): savedir = kwargs['savedir'] date = kwargs['timestamp'] n_pcs = kwargs['n_pcs'] atlas = kwargs['atlas'] cell_type_names = kwargs['CT_lut'] #instantiate class no = northstar.Subsample( atlas=atlas, features=None, n_features_per_cell_type=kwargs['n_features_per_cell_type'], n_features_overdispersed=kwargs['n_features_overdispersed'], n_pcs=n_pcs, n_neighbors=kwargs['n_neighbors'], distance_metric='correlation', threshold_neighborhood=kwargs['threshold_neighborhood'], clustering_metric='cpm', resolution_parameter=kwargs['resolution_parameter'], normalize_counts=True, ) # add new membership to metadata idx = new_counttable.columns n_fixed = len(no.cell_types) c_fixed = len(np.unique(no.cell_types)) new_metadata.loc[idx, 'new_class'] = no.membership new_metadata['new_class_renamed'] = [ cell_type_names[f] if f in cell_type_names.keys() else 'NewClass_' + "{0:0=2d}".format(int(f) - c_fixed + 1) if (f.isdigit() == True) else f for f in new_metadata['new_class'] ] # unweighted PCA cols = list(no.cell_names) + list(new_counttable.columns) feature_selected_matrix = pd.DataFrame(index=no.features_selected, columns=cols, data=no.matrix) normal_PCA, udistmat = unweighted_PCA(feature_selected_matrix, n_pcs) # perform tSNE tsnedf = perform_tSNE(normal_PCA, 20) tsnedf.rename(index=str, columns={0: 'Dim1', 1: 'Dim2'}, inplace=True) tsnedf.loc[idx, 'new_membership'] = new_metadata.loc[idx, 'new_class_renamed'] tsnedf.loc[tsnedf[:n_fixed].index, 'new_membership'] = list(map(cell_type_names.get, no.cell_types)) # write params to json in new folder with date timestamp output_file = savedir + date + '/annotation_parameters_' + atlas + '_CellAtlasSubsampling_' + date + '.json' if not os.path.exists(os.path.dirname(output_file)): try: os.makedirs(os.path.dirname(output_file)) except OSError as exc: if exc.errno != errno.EEXIST: raise with open(output_file, 'w') as file: file.write(json.dumps(kwargs)) file.close() atlastypes = list( np.sort(tsnedf.loc[tsnedf[:n_fixed].index, 'new_membership'].unique())) newtypes = list( set(new_metadata['new_class_renamed']).difference(atlastypes)) celltypes = atlastypes + list(np.sort(newtypes)) return tsnedf, celltypes
for res_par in res_pars: for rep in range(reps): print('Subsample with {:} cells per type'.format(nct)) asub2 = northstar.subsample_atlas( adata, cell_type_column='cell_ontology_class', n_cells=nct, ) ntot = asub2.X.shape[0] print('Run northstar') import time t0 = time.time() ns = northstar.Subsample( asubr, # NOTE: seems like this has to go down with more cell types resolution_parameter=0.005, ) t1 = time.time() t = t1 - t0 ct_orig = ns.new_data.obs['cell_ontology_class'].astype(str) identity = ct_orig.to_frame() identity['northstar'] = ns.membership identity['correct'] = (ct_orig == ns.membership) atlas_cts = asubr.obs['CellType'].unique() new_cats = list(set(ns.membership) - set(csti)) cats_map = {} for nc in new_cats:
nreps = 5 res = [] for nct in ncts: for rep in range(nreps): print('Subsample with {:} cells per type'.format(nct)) asub2 = northstar.subsample_atlas( adata, cell_type_column='cell_ontology_class', n_cells=nct, ) ntot = asub2.X.shape[0] print('Run northstar') t0 = time.time() ns = northstar.Subsample( asub, ) t1 = time.time() t = t1 - t0 ct_orig = ns.new_data.obs['cell_ontology_class'].astype(str) gof = (ct_orig == ns.membership).mean() res.append({ 'time': t, 'gof': gof, 'ntot': ntot, 'rep': rep, })
) sys.exit() print('Subsample their data') dsPsub = dsP.subsample(40, within_metadata='clusters') if False: print('Merge etc based on northstar') ns = northstar.Subsample( atlas={ 'cell_types': dsPsub.samplesheet['Cell Subtype'], 'counts': dsPsub.counts, }, join='intersection', n_pcs=35, resolution_parameter=0.001, n_features_per_cell_type=80, n_features_overdispersed=0, n_neighbors=20, n_neighbors_external=10, external_neighbors_mutual=False, ) ns.new_data = ds.counts ns._check_init_arguments() ns.fetch_atlas_if_needed() ns.compute_feature_intersection() ns._check_feature_intersection() ns.prepare_feature_selection() ns.select_features() ns._check_feature_selection()
skip = [ 'professional antigen presenting cell', 'lymphocyte', 'leukocyte', 'myeloid cell', ] ds_tm.query_samples_by_metadata('cell_ontology_class not in @skip', local_dict=locals(), inplace=True) print('Merge etc based on northstar') ns = northstar.Subsample( atlas={ 'cell_types': ds_tm.samplesheet['Cell Subtype'], 'counts': ds_tm.counts, }, join='intersection', n_pcs=35, resolution_parameter=0.001, ) ns.new_data = ds.counts ns._check_init_arguments() ns.fetch_atlas_if_needed() ns.compute_feature_intersection() ns._check_feature_intersection() ns.prepare_feature_selection() ns.select_features() ns._check_feature_selection() ns.merge_atlas_newdata()
print('Assign subtypes based on Palantir + Northstar') fn_cache = '../../data/sequencing/me1/northstar_with_Palantir.pkl' if not os.path.isfile(fn_cache): print('Subsample palantir data') dsPsub = dsP.subsample(40, within_metadata='clusters') print('Merge etc based on northstar') atlas = dsPsub.to_AnnData() atlas.obs['CellType'] = atlas.obs['Cell Subtype'] ns = northstar.Subsample( atlas=atlas, join='intersection', n_pcs=35, resolution_parameter=0.001, n_features_per_cell_type=80, n_features_overdispersed=0, n_neighbors=20, n_neighbors_external=10, external_neighbors_mutual=False, ) ns.new_data = ds.to_AnnData() ns._check_init_arguments() ns.fetch_atlas_if_needed() ns.compute_feature_intersection() ns._check_feature_intersection() ns.prepare_feature_selection() ns.select_features() ns._check_feature_selection() ns.merge_atlas_newdata()