示例#1
0
    np.random.shuffle(other_batches)

    df_EC.columns = pd.MultiIndex.from_arrays(
        [EC_batches, df_EC.columns.get_level_values('cell')],
        names=['batch', 'cell'])
    df_other.columns = pd.MultiIndex.from_arrays(
        [other_batches,
         df_other.columns.get_level_values('cell')],
        names=['batch', 'cell'])

    print(df_EC)
    print(df_other)

    return df_EC, df_other


if __name__ == '__main__':

    wdir = '/mnt/research/piermarolab/Sergii/results/ESC/'

    process(*prepareInputData_human_McCracken(),
            *prepareInputData_mouse_Han(),
            wdir + 'McCracken_hESC_vs_day8/',
            wdir + 'Han_mESC_vs_mesenchyme/',
            nCPUs=8 if platform.system() == "Windows" else 10,
            parallelBootstrap=True,
            genesOfInterest=TF,
            knownRegulators=TFmarkers,
            exprCutoff1=0.05,
            exprCutoff2=0.005,
            perEachOtherCase=True)
示例#2
0
        exprCutoff1=0.05,
        exprCutoff2=0.05,
        genesOfInterest=receptorsListHugo_2555,
        knownRegulators=gEC22,
        perEachOtherCase=True,
        part1=False,
        part2=False,
        part3=False,
        panels=['fraction', 'binomial', 'top50', 'combo3avgs'],
        majorMetric='correlation',  # (1) correlation    (2) spearman 
        dendrogramMetric='euclidean',  # (1) euclidean    (2) correlation
        dendrogramLinkageMethod='ward')  # (1) ward    (2) complete (3) average

    anHuman, anMouse = process(
        *(None, None), *(None, None),
        wdir + 'PanglaoDB_byDCS_human_%s/' % parameters['majorMetric'],
        wdir + 'PanglaoDB_byDCS_mouse_%s/' % parameters['majorMetric'],
        **parameters)

    anHuman.compareTwoCases(
        wdir + 'PanglaoDB_byDCS_human_correlation/bootstrap/All/',
        wdir + 'PanglaoDB_byDCS_mouse_correlation/bootstrap/All/',
        name1='name1',
        name2='name2',
        saveName=wdir +
        'PanglaoDB_byDCS_human_correlation/bootstrap/All/comparison')
    anMouse.compareTwoCases(
        wdir + 'PanglaoDB_byDCS_mouse_correlation/bootstrap/All/',
        wdir + 'PanglaoDB_byDCS_human_correlation/bootstrap/All/',
        name1='name1',
        name2='name2',
示例#3
0
            dfsh.to_hdf(wdir + 'secondValidationEC.h5', key='df_human', **phdf)

        if True:
            dfsmNon = pd.concat(dfsmNon, axis=1, sort=False).fillna(0.)
            print(dfsmNon, flush=True)
            dfsmNon.to_hdf(wdir + 'secondValidationNonEC.h5', key='df_mouse', **phdf)

            dfshNon = pd.concat(dfshNon, axis=1, sort=False).fillna(0.)
            print(dfshNon, flush=True)
            dfshNon.to_hdf(wdir + 'secondValidationNonEC.h5', key='df_human', **phdf)

    # DECNEO bootstrap and analysis
    if True:
        anMouse = process(*(None, None), *(None, None),
                        wdir + 'DECNEO analysis/', '/mnt/research/piermarolab/Sergii/results/PanglaoDB_byDCS_human_correlation/', 
                        nCPUs=4 if platform.system()=="Windows" else 20, parallelBootstrap=True,
                        genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, perEachOtherCase=True,
                        panels = ['fraction', 'binomial', 'top50', 'combo3avgs'],
                        nBootstrap=100, part1=False, part2=False, part3=False)[0]

        if False:
            dfa = pd.read_hdf(wdir + 'secondValidationEC.h5', key='df_mouse')
            print(dfa, flush=True)

            dfb = pd.read_hdf(wdir + 'secondValidationNonEC.h5', key='df_mouse').reindex(dfa.index).fillna(0.)
            print(dfb, flush=True)

            anMouse.prepareDEG(dfa, dfb)
            anMouse.preparePerBatchCase(exprCutoff=0.05)
        
        if False:
            anMouse.prepareBootstrapExperiments(parallel=True)
示例#4
0
    #prepareInputData_human_Choroid_remapped()

    #process(*prepareInputData_human_Choroid_remapped(), *(None, None),
    #        wdir + 'choroid Voigt remapped test 10/', wdir + 'PanglaoDB_byDCS_mouse/bootstrap/All/',
    #        nCPUs=4 if platform.system()=="Windows" else 8, parallelBootstrap=True,
    #        genesOfInterest=receptorsListHugo_2555, knownRegulators=gEC22, exprCutoff1=0.01, perEachOtherCase=False,
    #        nBootstrap=10, dendrogramMetric = 'euclidean', dendrogramLinkageMethod = 'average')

    if False:
        process(*(None, None),
                *(None, None),
                wdir + 'choroid Voigt remapped/',
                wdir + 'PanglaoDB_byDCS_mouse/bootstrap/All/',
                nCPUs=4 if platform.system() == "Windows" else 8,
                parallelBootstrap=True,
                genesOfInterest=receptorsListHugo_2555,
                knownRegulators=gEC22,
                exprCutoff1=0.01,
                perEachOtherCase=False,
                nBootstrap=10,
                majorMetric='spearman',
                dendrogramMetric='euclidean',
                dendrogramLinkageMethod='average')

    if True:

        Analysis(workingDir=wdir + 'choroid Voigt remapped/',
                 otherCaseDir=wdir + 'PanglaoDB_byDCS_mouse/bootstrap/All/',
                 genesOfInterest=receptorsListHugo_2555,
                 knownRegulators=gEC22,
                 panels=['fraction', 'binomial', 'top50', 'combo3avgs', 'max'],
                 externalPanelsData=externalPanelsData).reanalyzeMain(
示例#5
0
import pandas as pd
from decneo.analysisPipeline import process

demoData = '/mnt/home/domansk6/Projects/Endothelial/scripts/demo/VoightChoroid4567RemappedData.h5'

if __name__ == '__main__':

    wdir = '/mnt/scratch/domansk6/DECNEOdemo/'

    process(
        pd.read_hdf(demoData, key='dfa'),  # Endothelial cells
        pd.read_hdf(demoData, key='dfb'),  # Non-endothelial cells
        None,
        None,  # Comparison dataset is provided
        wdir,  # Working directory
        wdir + 'fromPanglaoDBmouseAllbyDCS/',  # Comparison dataset 
        parallelBootstrap=True,  # Set False if RAM is limited
        exprCutoff1=0.01,  # Gene expression cutoff
        perEachOtherCase=False)  # Comparison mode setting
示例#6
0
    df_ranks = df_ranks.loc[~(df_ranks == -1).all(axis=1)]
    print(df_ranks, flush=True)

    print('Recording selected batches DE ranks data', flush=True)
    df_ranks.to_hdf(an.dataSaveName,
                    key='df_ranks',
                    mode='a',
                    complevel=4,
                    complib='zlib')

    return


if __name__ == '__main__':

    prepareDEGforTissues(
        '/mnt/research/piermarolab/Sergii/results/PanglaoDB_lung_mouse/data.h5',
        'Mus musculus',
        ['Lung', 'Lung mesenchyme', 'Fetal lung', 'Lung endoderm'])

    process(
        *prepareInputData_human_Choroid_remapped(),
        *(None, None),
        '/mnt/research/piermarolab/Sergii/results/PanglaoDB_lung_mouse/',
        '/mnt/research/piermarolab/Sergii/PanglaoDB_byAlona/PanglaoDB_byDCS_human/bootstrap/All/',
        nCPUs=4 if platform.system() == "Windows" else 20,
        parallelBootstrap=True,
        genesOfInterest=receptorsListHugo_2555,
        knownRegulators=gEC22,
        exprCutoff1=0.05,
        perEachOtherCase=False)
示例#7
0
        for method in ['ttest', 'wilcoxon', 'mannwhitneyu']:
            df_ranks = getDEG(*dfs, method=method)
            df_ranks.to_hdf(ranksFile, key=method, **phdf)
            print(df_ranks)

    # Testing demo with 3 DEG methods
    if False:
        method = 'mannwhitneyu'  # trying each one of {'ttest', 'mannwhitneyu'}

        process(
            pd.read_hdf(demoData, key='dfa'),  # Endothelial cells
            pd.read_hdf(demoData, key='dfb'),  # Non-endothelial cells
            None,
            None,  # Comparison dataset is provided
            wdir + 'demo/%s_3/' % method,
            wdir + 'demo/fromPanglaoDBmouseAllbyDCS/',
            nBootstrap=1,
            nCPUs=2,
            methodForDEG=method,
            parallelBootstrap=True,  # Set False if RAM is limited
            exprCutoff1=0.01,  # Gene expression cutoff
            perEachOtherCase=False)

    # Data normality testing
    if False:
        for key in ['dfa', 'dfb']:
            df = pd.read_hdf(demoData, key=key)
            df = df.loc[(df > 0).sum(axis=1) >= 20].apply(
                lambda s: scipy.stats.normaltest(s.values[s.values > 0])[1],
                axis=1)
            print(df, '\n', df.mean(), '\n')
            genesOfInterest = receptorsListHugo_2555
            workingDirOther = 'otherCellTypes_rec/{}/Mus musculus/bootstrap/All/'
        else:
            raise NotImplementedError

        an = process(
            pd.read_hdf(dataPath.format(group='', celltype=celltype),
                        key='df'),
            pd.read_hdf(dataPath.format(group='non', celltype=celltype),
                        key='df'),
            *(None, None),
            workingDir +
            'otherCellTypes_Choroid_AMD_and_Normal_LR_5_celltypes/%s/%s/' %
            (mode, celltype),
            workingDir + workingDirOther.format(celltype),
            nCPUs=20,
            parallelBootstrap=True,
            genesOfInterest=genesOfInterest,
            knownRegulators=knownRegulators,
            exprCutoff1=0.01,
            perEachOtherCase=False,
            nBootstrap=100,
            part1=True,
            part2=True,
            part3=True,
            PCNpath=PCN_path)

        #an.reanalyzeMain(togglePublicationFigure=False, includeClusterNumber=False, toggleIncludeHeatmap=False, toggleCalculateMeasures=False, toggleExportFigureData=True)
    if True:
        PCN_path = '/mnt/ufs18/home-132/paterno1/Ben/'
        dataPath = processedDataDir + 'Voigt_all/' + 'df_Voigt_Choroid_AMD_and_Normal_expression_by_celltypes_{group}{celltype}.h5'
示例#9
0
        for celltype in uCelltypes:
            try:
                print(celltype)

                if True:
                    for species in ['H**o sapiens', 'Mus musculus']: 
                        an = Analysis(workingDir = workingDir + '%s/%s/' % (celltype, species), otherCaseDir = workingDir + '%s/%s/' % (celltype, 'H**o sapiens' if species=='Mus musculus' else 'Mus musculus'))

                        print('Saving DEG and expression data, %s, %s' % (species, celltype), flush=True)
                        df_expr = pd.read_hdf(processedDataDir + 'PanglaoDB_expresion_per_batch_%s_%s.h5' % (species, celltype), key='df')
                        df_expr.to_hdf(an.dataSaveName, key='df', mode='a', complevel=4, complib='zlib')
                        df_ranks = pd.read_hdf(processedDataDir + 'PanglaoDB_ttest_ranks_per_batch_%s_%s.h5' % (species, celltype), key='df')
                        df_ranks.to_hdf(an.dataSaveName, key='df_ranks', mode='a', complevel=4, complib='zlib')

                anHuman, anMouse = process(*(None, None), *(None, None), workingDir + '%s/H**o sapiens/' % celltype, workingDir + '%s/Mus musculus/' % celltype, nCPUs=20, parallelBootstrap=False, nBootstrap=100, PCNpath='/mnt/research/piermarolab/Sergii/results/', exprCutoff1=0.05, exprCutoff2=0.05, genesOfInterest=genesOfInterest, knownRegulators=knownRegulators, perEachOtherCase=True, part1=True, part2=True, part3=True)
    
            except Exception as exception:
                print('\nANALYSIS ERROR:', exception, '\n')

    # Re-plot main lig
    if True:
        for workingDir in ['otherCellTypes_rec/', 'otherCellTypes_lig/']:
            for celltype in ['Pericyte', 'SMC', 'Macrophage', 'Fibroblast', 'Endothelial']: # Original ['Pericyte', 'SMC', 'Macrophage', 'Fibroblast', 'Endothelial']:
                try:
                    if workingDir=='otherCellTypes_lig/':
                        knownRegulators = ligands_44
                        genesOfInterest = ligands_1777
                    elif workingDir=='otherCellTypes_rec/':
                        knownRegulators = gEC22
                        genesOfInterest = receptorsListHugo_2555