np.random.shuffle(other_batches) df_EC.columns = pd.MultiIndex.from_arrays( [EC_batches, df_EC.columns.get_level_values('cell')], names=['batch', 'cell']) df_other.columns = pd.MultiIndex.from_arrays( [other_batches, df_other.columns.get_level_values('cell')], names=['batch', 'cell']) print(df_EC) print(df_other) return df_EC, df_other if __name__ == '__main__': wdir = '/mnt/research/piermarolab/Sergii/results/ESC/' process(*prepareInputData_human_McCracken(), *prepareInputData_mouse_Han(), wdir + 'McCracken_hESC_vs_day8/', wdir + 'Han_mESC_vs_mesenchyme/', nCPUs=8 if platform.system() == "Windows" else 10, parallelBootstrap=True, genesOfInterest=TF, knownRegulators=TFmarkers, exprCutoff1=0.05, exprCutoff2=0.005, perEachOtherCase=True)
exprCutoff1=0.05, exprCutoff2=0.05, genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, perEachOtherCase=True, part1=False, part2=False, part3=False, panels=['fraction', 'binomial', 'top50', 'combo3avgs'], majorMetric='correlation', # (1) correlation (2) spearman dendrogramMetric='euclidean', # (1) euclidean (2) correlation dendrogramLinkageMethod='ward') # (1) ward (2) complete (3) average anHuman, anMouse = process( *(None, None), *(None, None), wdir + 'PanglaoDB_byDCS_human_%s/' % parameters['majorMetric'], wdir + 'PanglaoDB_byDCS_mouse_%s/' % parameters['majorMetric'], **parameters) anHuman.compareTwoCases( wdir + 'PanglaoDB_byDCS_human_correlation/bootstrap/All/', wdir + 'PanglaoDB_byDCS_mouse_correlation/bootstrap/All/', name1='name1', name2='name2', saveName=wdir + 'PanglaoDB_byDCS_human_correlation/bootstrap/All/comparison') anMouse.compareTwoCases( wdir + 'PanglaoDB_byDCS_mouse_correlation/bootstrap/All/', wdir + 'PanglaoDB_byDCS_human_correlation/bootstrap/All/', name1='name1', name2='name2',
dfsh.to_hdf(wdir + 'secondValidationEC.h5', key='df_human', **phdf) if True: dfsmNon = pd.concat(dfsmNon, axis=1, sort=False).fillna(0.) print(dfsmNon, flush=True) dfsmNon.to_hdf(wdir + 'secondValidationNonEC.h5', key='df_mouse', **phdf) dfshNon = pd.concat(dfshNon, axis=1, sort=False).fillna(0.) print(dfshNon, flush=True) dfshNon.to_hdf(wdir + 'secondValidationNonEC.h5', key='df_human', **phdf) # DECNEO bootstrap and analysis if True: anMouse = process(*(None, None), *(None, None), wdir + 'DECNEO analysis/', '/mnt/research/piermarolab/Sergii/results/PanglaoDB_byDCS_human_correlation/', nCPUs=4 if platform.system()=="Windows" else 20, parallelBootstrap=True, genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, perEachOtherCase=True, panels = ['fraction', 'binomial', 'top50', 'combo3avgs'], nBootstrap=100, part1=False, part2=False, part3=False)[0] if False: dfa = pd.read_hdf(wdir + 'secondValidationEC.h5', key='df_mouse') print(dfa, flush=True) dfb = pd.read_hdf(wdir + 'secondValidationNonEC.h5', key='df_mouse').reindex(dfa.index).fillna(0.) print(dfb, flush=True) anMouse.prepareDEG(dfa, dfb) anMouse.preparePerBatchCase(exprCutoff=0.05) if False: anMouse.prepareBootstrapExperiments(parallel=True)
#prepareInputData_human_Choroid_remapped() #process(*prepareInputData_human_Choroid_remapped(), *(None, None), # wdir + 'choroid Voigt remapped test 10/', wdir + 'PanglaoDB_byDCS_mouse/bootstrap/All/', # nCPUs=4 if platform.system()=="Windows" else 8, parallelBootstrap=True, # genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, exprCutoff1=0.01, perEachOtherCase=False, # nBootstrap=10, dendrogramMetric = 'euclidean', dendrogramLinkageMethod = 'average') if False: process(*(None, None), *(None, None), wdir + 'choroid Voigt remapped/', wdir + 'PanglaoDB_byDCS_mouse/bootstrap/All/', nCPUs=4 if platform.system() == "Windows" else 8, parallelBootstrap=True, genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, exprCutoff1=0.01, perEachOtherCase=False, nBootstrap=10, majorMetric='spearman', dendrogramMetric='euclidean', dendrogramLinkageMethod='average') if True: Analysis(workingDir=wdir + 'choroid Voigt remapped/', otherCaseDir=wdir + 'PanglaoDB_byDCS_mouse/bootstrap/All/', genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, panels=['fraction', 'binomial', 'top50', 'combo3avgs', 'max'], externalPanelsData=externalPanelsData).reanalyzeMain(
import pandas as pd from decneo.analysisPipeline import process demoData = '/mnt/home/domansk6/Projects/Endothelial/scripts/demo/VoightChoroid4567RemappedData.h5' if __name__ == '__main__': wdir = '/mnt/scratch/domansk6/DECNEOdemo/' process( pd.read_hdf(demoData, key='dfa'), # Endothelial cells pd.read_hdf(demoData, key='dfb'), # Non-endothelial cells None, None, # Comparison dataset is provided wdir, # Working directory wdir + 'fromPanglaoDBmouseAllbyDCS/', # Comparison dataset parallelBootstrap=True, # Set False if RAM is limited exprCutoff1=0.01, # Gene expression cutoff perEachOtherCase=False) # Comparison mode setting
df_ranks = df_ranks.loc[~(df_ranks == -1).all(axis=1)] print(df_ranks, flush=True) print('Recording selected batches DE ranks data', flush=True) df_ranks.to_hdf(an.dataSaveName, key='df_ranks', mode='a', complevel=4, complib='zlib') return if __name__ == '__main__': prepareDEGforTissues( '/mnt/research/piermarolab/Sergii/results/PanglaoDB_lung_mouse/data.h5', 'Mus musculus', ['Lung', 'Lung mesenchyme', 'Fetal lung', 'Lung endoderm']) process( *prepareInputData_human_Choroid_remapped(), *(None, None), '/mnt/research/piermarolab/Sergii/results/PanglaoDB_lung_mouse/', '/mnt/research/piermarolab/Sergii/PanglaoDB_byAlona/PanglaoDB_byDCS_human/bootstrap/All/', nCPUs=4 if platform.system() == "Windows" else 20, parallelBootstrap=True, genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, exprCutoff1=0.05, perEachOtherCase=False)
for method in ['ttest', 'wilcoxon', 'mannwhitneyu']: df_ranks = getDEG(*dfs, method=method) df_ranks.to_hdf(ranksFile, key=method, **phdf) print(df_ranks) # Testing demo with 3 DEG methods if False: method = 'mannwhitneyu' # trying each one of {'ttest', 'mannwhitneyu'} process( pd.read_hdf(demoData, key='dfa'), # Endothelial cells pd.read_hdf(demoData, key='dfb'), # Non-endothelial cells None, None, # Comparison dataset is provided wdir + 'demo/%s_3/' % method, wdir + 'demo/fromPanglaoDBmouseAllbyDCS/', nBootstrap=1, nCPUs=2, methodForDEG=method, parallelBootstrap=True, # Set False if RAM is limited exprCutoff1=0.01, # Gene expression cutoff perEachOtherCase=False) # Data normality testing if False: for key in ['dfa', 'dfb']: df = pd.read_hdf(demoData, key=key) df = df.loc[(df > 0).sum(axis=1) >= 20].apply( lambda s: scipy.stats.normaltest(s.values[s.values > 0])[1], axis=1) print(df, '\n', df.mean(), '\n')
genesOfInterest = receptorsListHugo_2555 workingDirOther = 'otherCellTypes_rec/{}/Mus musculus/bootstrap/All/' else: raise NotImplementedError an = process( pd.read_hdf(dataPath.format(group='', celltype=celltype), key='df'), pd.read_hdf(dataPath.format(group='non', celltype=celltype), key='df'), *(None, None), workingDir + 'otherCellTypes_Choroid_AMD_and_Normal_LR_5_celltypes/%s/%s/' % (mode, celltype), workingDir + workingDirOther.format(celltype), nCPUs=20, parallelBootstrap=True, genesOfInterest=genesOfInterest, knownRegulators=knownRegulators, exprCutoff1=0.01, perEachOtherCase=False, nBootstrap=100, part1=True, part2=True, part3=True, PCNpath=PCN_path) #an.reanalyzeMain(togglePublicationFigure=False, includeClusterNumber=False, toggleIncludeHeatmap=False, toggleCalculateMeasures=False, toggleExportFigureData=True) if True: PCN_path = '/mnt/ufs18/home-132/paterno1/Ben/' dataPath = processedDataDir + 'Voigt_all/' + 'df_Voigt_Choroid_AMD_and_Normal_expression_by_celltypes_{group}{celltype}.h5'
for celltype in uCelltypes: try: print(celltype) if True: for species in ['H**o sapiens', 'Mus musculus']: an = Analysis(workingDir = workingDir + '%s/%s/' % (celltype, species), otherCaseDir = workingDir + '%s/%s/' % (celltype, 'H**o sapiens' if species=='Mus musculus' else 'Mus musculus')) print('Saving DEG and expression data, %s, %s' % (species, celltype), flush=True) df_expr = pd.read_hdf(processedDataDir + 'PanglaoDB_expresion_per_batch_%s_%s.h5' % (species, celltype), key='df') df_expr.to_hdf(an.dataSaveName, key='df', mode='a', complevel=4, complib='zlib') df_ranks = pd.read_hdf(processedDataDir + 'PanglaoDB_ttest_ranks_per_batch_%s_%s.h5' % (species, celltype), key='df') df_ranks.to_hdf(an.dataSaveName, key='df_ranks', mode='a', complevel=4, complib='zlib') anHuman, anMouse = process(*(None, None), *(None, None), workingDir + '%s/H**o sapiens/' % celltype, workingDir + '%s/Mus musculus/' % celltype, nCPUs=20, parallelBootstrap=False, nBootstrap=100, PCNpath='/mnt/research/piermarolab/Sergii/results/', exprCutoff1=0.05, exprCutoff2=0.05, genesOfInterest=genesOfInterest, knownRegulators=knownRegulators, perEachOtherCase=True, part1=True, part2=True, part3=True) except Exception as exception: print('\nANALYSIS ERROR:', exception, '\n') # Re-plot main lig if True: for workingDir in ['otherCellTypes_rec/', 'otherCellTypes_lig/']: for celltype in ['Pericyte', 'SMC', 'Macrophage', 'Fibroblast', 'Endothelial']: # Original ['Pericyte', 'SMC', 'Macrophage', 'Fibroblast', 'Endothelial']: try: if workingDir=='otherCellTypes_lig/': knownRegulators = ligands_44 genesOfInterest = ligands_1777 elif workingDir=='otherCellTypes_rec/': knownRegulators = gEC22 genesOfInterest = receptorsListHugo_2555