def atlas_averages_annotationOnly(new_metadata, new_counttable, **kwargs): n_pcs = kwargs['n_pcs'] atlas = kwargs['atlas'] cell_type_names = kwargs['CT_lut'] #instantiate class sa = northstar.Averages( atlas=atlas, n_cells_per_type=kwargs['weights_atlas_cells'], n_features_per_cell_type=kwargs['n_features_per_cell_type'], n_features_overdispersed=kwargs['n_features_overdispersed'], n_pcs=n_pcs, n_neighbors=kwargs['n_neighbors'], n_neighbors_out_of_atlas=kwargs['n_neighbors_out_of_atlas'], distance_metric='correlation', threshold_neighborhood=kwargs['threshold_neighborhood'], clustering_metric='cpm', resolution_parameter=kwargs['resolution_parameter'], normalize_counts=True, ) sa.fit(new_counttable) n_fixed = len(sa.cell_types) idx = list(sa.cell_types) + list(new_counttable.columns) annotdf = pd.DataFrame(index=idx, columns=['new_membership', 'class']) idx = new_counttable.columns new_metadata.loc[idx, 'new_class'] = sa.membership new_metadata['new_class_renamed'] = [ cell_type_names[f] if f in cell_type_names.keys() else 'NewClass_' + "{0:0=2d}".format(int(f) - n_fixed + 1) if (f.isdigit() == True) else f for f in new_metadata['new_class'] ] annotdf.loc[idx, 'new_membership'] = new_metadata.loc[idx, 'new_class_renamed'] annotdf.loc[annotdf[:n_fixed].index, 'new_membership'] = annotdf.index[:n_fixed].map( cell_type_names) atlastypes = list( np.sort(annotdf.loc[annotdf[:n_fixed].index, 'new_membership'])) newtypes = list( set(new_metadata['new_class_renamed']).difference(atlastypes)) celltypes = atlastypes + list(np.sort(newtypes)) class_lut = dict(zip(celltypes, list(range(1, len(celltypes) + 1)))) annotdf['class'] = annotdf['new_membership'].map(class_lut) return annotdf
'class': 'subsample', 'n_atlas': ns.n_atlas, } ress.append(resd) print('Run northstar (averages)') for aname, atlas in atlases.items(): if not aname.endswith('avg'): continue print('Atlas: {:}'.format(aname)) t0 = time.time() ns = northstar.Averages( atlas, n_features_per_cell_type=50, n_features_overdispersed=500, resolution_parameter=0.005, n_neighbors=30, #n_neighbors_external=5, #external_neighbors_mutual=True, ) ns.fit(adata_tgt) t1 = time.time() t = t1 - t0 ct_orig = ns.new_data.obs['CellType'].astype(str) gof = (ct_orig == ns.membership).mean() identity = ct_orig.to_frame() identity['northstar_assignment'] = ns.membership vs = ns.embed('umap')
def atlas_averages_to_tsnedf(new_metadata, new_counttable, **kwargs): savedir = kwargs['savedir'] date = kwargs['timestamp'] n_pcs = kwargs['n_pcs'] atlas = kwargs['atlas'] cell_type_names = kwargs['CT_lut'] #instantiate class sa = northstar.Averages( atlas=atlas, n_cells_per_type=kwargs['weights_atlas_cells'], n_features_per_cell_type=kwargs['n_features_per_cell_type'], n_features_overdispersed=kwargs['n_features_overdispersed'], n_pcs=n_pcs, n_neighbors=kwargs['n_neighbors'], n_neighbors_out_of_atlas=kwargs['n_neighbors_out_of_atlas'], distance_metric='correlation', threshold_neighborhood=kwargs['threshold_neighborhood'], clustering_metric='cpm', resolution_parameter=kwargs['resolution_parameter'], normalize_counts=True, ) sa.fit(new_counttable) # add new membership to metadata idx = new_counttable.columns n_fixed = len(sa.cell_types) new_metadata.loc[idx, 'new_class'] = sa.membership new_metadata['new_class_renamed'] = [ cell_type_names[f] if f in cell_type_names.keys() else 'NewClass_' + "{0:0=2d}".format(int(f) - n_fixed + 1) if (f.isdigit() == True) else f for f in new_metadata['new_class'] ] # unweighted PCA cols = list(sa.cell_types) + list(new_counttable.columns) feature_selected_matrix = pd.DataFrame(index=sa.features_selected, columns=cols, data=sa.matrix) normal_PCA, distance_matrix = unweighted_PCA(feature_selected_matrix, n_pcs) # perform tSNE tsnedf = perform_tSNE(normal_PCA, 20) tsnedf.rename(index=str, columns={0: 'Dim1', 1: 'Dim2'}, inplace=True) tsnedf.loc[idx, 'new_membership'] = new_metadata.loc[idx, 'new_class_renamed'] tsnedf.loc[tsnedf[:n_fixed].index, 'new_membership'] = tsnedf.index[:n_fixed].map(cell_type_names) # write params to json in new folder with date timestamp output_file = savedir + date + '/annotation_parameters_' + atlas + '_CellAtlasAverages_' + date + '.json' if not os.path.exists(os.path.dirname(output_file)): try: os.makedirs(os.path.dirname(output_file)) except OSError as exc: if exc.errno != errno.EEXIST: raise with open(output_file, 'w') as file: file.write(json.dumps(kwargs)) file.close() # save feature matrix for later reference, e.g. making dotplots feature_selected_matrix.to_csv(savedir + date + '/feature_selected_matrix_' + date + '.csv') atlastypes = list( np.sort(tsnedf.loc[tsnedf[:n_fixed].index, 'new_membership'])) newtypes = list( set(new_metadata['new_class_renamed']).difference(atlastypes)) celltypes = atlastypes + list(np.sort(newtypes)) class_lut = dict(zip(celltypes, list(range(1, len(celltypes) + 1)))) tsnedf['class'] = tsnedf['new_membership'].map(class_lut) return tsnedf, celltypes, distance_matrix