示例#1
0
文件: Plot.py 项目: airanmehr/bio
def plotPowerCLR(recompute=False):
    if recompute:
        mc = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'MarkovChain'))
        hmm = f(pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM')))
        a = pd.concat([mc, hmm]);
        print a
        a = a[a.index.get_level_values('coverage') != np.inf]
        df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()))[0]
        # df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean()))
        df = getPower(df, groupbyLevels=range(4))
        df.to_pickle(utl.outpath + 'ROC/PowerCLR.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/PowerCLR.df')
        reload(pplt)
    info = pplt.getNameColorMarker(df)
    info.loc[info.index.get_level_values('method') == 'HMM', 'marker'] = '--o'
    info.loc[info.index.get_level_values('method') == 'MarkovChain', 'marker'] = '--s'
    info.loc[info.index.get_level_values('method') == 'HMM', 'color'] = 'r'
    info.loc[info.index.get_level_values('method') == 'MarkovChain', 'color'] = 'darkblue'
    # info.loc[info.index.get_level_values('q')==0.99,'color']='r'
    # info.loc[info.index.get_level_values('q')==1,'color']='darkblue'
    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi);
    pplt.setStyle(lw=1);
    pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard');
    pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft');
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('powerCLR', dpi=dpi)
    plt.show()
示例#2
0
文件: Plot.py 项目: airanmehr/bio
def plotPower(recompute=False):
    if recompute:
        causal = lambda x: x[(x.index.get_level_values('causal') == True) | (x.index.get_level_values('label') == -1)]
        FIT = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT'];
        FIT[FIT.isnull()] = np.random.rand(FIT.isnull().sum())
        CMH = causal(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH'].fillna(0))
        GP = causal(pd.read_pickle(utl.outpath + 'ROC/GP').LR)
        HMM = f(loadHMMAllDepths())
        # HMM = (HMM.alt - HMM.null) ;HMM = HMM.groupby(level=range(6)).mean()
        # HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean())
        HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean())
        GP = GP.groupby(level=range(6)).max()
        FIT = FIT.groupby(level=range(6)).max();  # dont move this line!
        CMH = CMH.groupby(level=range(6)).max();
        df = getPower(pd.concat([GP, HMM, FIT, CMH]), range(4)).sort_index()
        df.to_pickle(utl.outpath + 'ROC/Power.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/Power.df')
    df = df[df.index.get_level_values('coverage') != np.inf]
    df = fixComaleName(df)
    info = fixColor(pplt.getNameColorMarker(df))
    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=pplt.PLOS.dpi);
    pplt.setStyle(lw=1);
    reload(pplt)
    pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard', panel=list('ABC'));
    pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft', panel=list('DEF'));
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('power', pplt.PLOS.dpi)
    df.groupby(level=range(3)).mean().unstack('method').to_pickle(utl.outpath + 'ROC/avgPower.df')
    csv = df.groupby(level=range(3)).mean().reset_index()
    # csv.replace({'HMM': comaleName}, inplace=True)
    csv.replace({np.inf: r'$\infty$'}, inplace=True)
    csv.nu0.replace({0.005: 'Hard', 0.1: 'Soft'}, inplace=True)
    csv.columns = [r'$\lambda$', 'Sweep', 'Method', 'Avg Power']
    csv.sort_values([r'$\lambda$', 'Sweep', 'Avg Power'], ascending=False, inplace=True)
    csv['Avg Power'] = csv['Avg Power'].round().astype(int)
    csv = csv.set_index(['Sweep'])
    i = csv[r'$\lambda$'].apply(lambda x: not isinstance(x, str))
    csv.loc[i, r'$\lambda$'] = csv.loc[i, r'$\lambda$'].astype(int)
    soft = csv.loc['Soft'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False)
    hard = csv.loc['Hard'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False)
    utl.DataframetolaTexTable(hard, fname=utl.paperFiguresPath + '../tables/powerHardMathods.tex')
    utl.DataframetolaTexTable(soft, fname=utl.paperFiguresPath + '../tables/powerSoftMethods.tex')
    plt.show()
示例#3
0
文件: Plot.py 项目: airanmehr/bio
def plotRank():
    def computeRanks():
        print 'ranking...'
        ff = lambda x: x.groupby(level=range(5)).rank(ascending=False).xs(True, level='causal')
        removeINF = lambda x: x[x.index.get_level_values('coverage') != np.inf]
        positive = lambda x: x.xs(1, level='label').fillna(0)
        # ff(positive(        f(pd.read_pickle(utl.outpath + 'ROC/HMM')))).to_pickle('{}ROC/ranks.HMM.df'.format(utl.outpath))
        ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/GP'))).LR).to_pickle(
            '{}ROC/ranks.GP.df'.format(utl.outpath));
        a = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT'];
        a[a.isnull()] = np.random.rand(a.isnull().sum())
        a.isnull().sum()

        a.xs(True, level='causal')
        # ff(a).to_pickle('{}ROC/ranks.FIT.df'.format(utl.outpath))
        ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH']))).to_pickle(
            '{}ROC/ranks.CMH.df'.format(utl.outpath))

    #computeRanks()
    print 'plotting...'
    #
    a = pd.concat(map(lambda x: pd.read_pickle('{}ROC/ranks.{}.df'.format(utl.outpath, x)), ['CMH', 'HMM', 'GP']))
    fontsize = 7
    dpi = 300

    def addlast(df):
        df[df.name + (1200,)] = 1
        return df.loc[df.name]
    def fil(x):
        if pd.isnull(x.iloc[0]): x.iloc[0] = 0
        for i in range(1, x.size):
            if pd.isnull(x.iloc[i]):
                x.iloc[i] = x.iloc[i - 1]
        return x
    for depth, aa in a.groupby(level=0):
        print depth
        AUC = []
        dists = a.loc[depth].groupby(level=[0, 2, 1]).apply(
            lambda df: (df.value_counts().sort_index().cumsum() / df.shape))
        dists = dists.groupby(level=range(3)).apply(addlast)

        fig, axes = plt.subplots(2, 4, figsize=(7, 3), dpi=dpi, sharey=True, sharex=True);
        axes = axes.reshape(-1)
        j = 0
        for nu0, dfnu in dists.groupby(level=0):
            for s, df in dfnu.loc[nu0].groupby(level=0):
                df = df.loc[s]
                df = df.unstack(level='method')  #.rename(columns={'HMM': r'$\mathcal{H}$'})
                df = df.apply(fil)
                auc = (df.apply(lambda x: x.dot(df.index.values)) / np.sum(df.index.values)).rename((depth, nu0, s))
                AUC += [auc]
                color = fixColor(pd.DataFrame(None, index=df.columns)).loc[df.columns.values, 'color'].tolist()
                df.columns = map(lambda y: y.replace('HMM', comaleName), df.columns)

                if df.shape[0] == 2: df.index = np.ceil(df.index.values)
                df.plot(color=color, ax=axes[j], lw=1, legend=False)
                axes[j].set_ylim([-0.02, 1.02])
                # pplt.annotate('$s$={}'.format(s), xpad=0.6, ypad=0.25, fontsize=fontsize + 1, ax=axes[j])
                axes[j].set_title('$s$={}'.format(s), fontsize=fontsize + 1)
                if j > 3:
                    axes[j].set_xlabel('Rank', fontsize=fontsize)
                axes[j].set_ylabel(r'CDF ({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005], nu0),
                                   fontsize=fontsize)
                axes[j].locator_params(axis='x', nbins=5);
                pplt.setStyle(lw=1, fontsize=fontsize, fontscale=0.1);
                mpl.rcParams.update({'font.size': 2})
                mpl.rc('xtick', labelsize=6)
                mpl.rc('ytick', labelsize=6)
                if j == 7: axes[j].legend(loc='lower right', fontsize=fontsize)
                j += 1
        # plt.tight_layout(pad=0.1)
        plt.xlim([0, 1200])
        plt.gcf().subplots_adjust(bottom=0.15)
        print pd.concat(AUC, axis=1).round(2).T.reset_index()
        print depth
        pplt.savefig('rank{}'.format(depth), dpi)
示例#4
0
文件: Markov.py 项目: airanmehr/bio
    o.iloc[1] = o.iloc[2]
    # o=(obs[100].value_counts().sort_index()/obs.shape[0])
    o.name = 'Observation';
    o.plot(color='g');
    markov.plot(color='b');
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s={}$, $\nu_0=${}, $\tau$={}'.format(s, nu0, tau), loc=1, fontsize=fontsize)
    plt.xlabel('$s$')
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[2] - 1]), fontsize=fontsize)

if __name__ == '__main__':
    # createNeutralSimulations()
    # createSelectionSimulations(s=0.01)
    # createSelectionSimulations(s=0.1)
    reload(pplt)
    dpi = 200;
    fig = plt.figure(figsize=(6.2, 4), dpi=dpi);
    pplt.setStyle(lw=1);
    fontsize = 7
    plotNull(range(1, 4), fontsize=fontsize);
    plotNull(range(4, 7), 0.1, fontsize=fontsize);
    plotAlternative(range(7, 10), fontsize=fontsize);
    plt.tight_layout()
    pplt.savefig('markovDists', dpi=dpi);
    plt.gcf().subplots_adjust(bottom=0.1)
    plt.show()
    print 'Done'