示例#1
0
文件: Dynamics.py 项目: airanmehr/bio
def plotBottleneck(maxGen=None,obs=False,mean=True,color='blue'):
    exit()

    def plotOne(df, ax, method):
        m=df.mean(1)
        s=df.std(1)
        # plt.locator_params(nbins=4);
        m.plot(ax=ax, legend=False, linewidth=3, color=color)
        x=m.index.values
        m=m.values;s=s.values
        ax.fill_between(x, m - 2 * s, m + 2 * s, color=color, alpha=0.3)
        ax.set_ylabel(method.strip())
        ax.set_ylim([-0.1, ax.get_ylim()[1]])

        pplt.setSize(ax)

    dfn = \
        pd.read_pickle(path + 'nu{}.s{}.df'.format(0.005, 0.0))
    fig, ax = plt.subplots(3, 1, sharex=True, figsize=(4, 3), dpi=300)
    plotOne(dfn['tajimaD'], ax[0], "Tajima's $D$");
    plt.xlabel('Generations')
    plotOne(dfn['HAF'], ax[1], "Fay Wu's $H$");
    plt.xlabel('Generations')
    plotOne(dfn['SFSelect'], ax[2], 'SFSelect');
    plt.xlabel('Generations')
    plt.gcf().subplots_adjust(bottom=0.25)
    mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
    mpl.rc('text', usetex=True)
    pplt.savefig('bottleneck', 300)
    plt.show()
示例#2
0
文件: LD.py 项目: airanmehr/bio
def plotLD3d():
    fig = plt.figure(figsize=(7, 6), dpi=300)
    ax = fig.add_subplot(2, 1, 1, projection='3d')
    plotLDDecaySelection3d(ax)
    ax = fig.add_subplot(2, 1, 2, projection='3d')
    plotLDDecaySelection3d(ax, True)
    pplt.savefig('LDDecay3d', 200)
示例#3
0
文件: Plot.py 项目: airanmehr/bio
def plotPowerCLR(recompute=False):
    if recompute:
        mc = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'MarkovChain'))
        hmm = f(pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM')))
        a = pd.concat([mc, hmm]);
        print a
        a = a[a.index.get_level_values('coverage') != np.inf]
        df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean()))[0]
        # df = pd.DataFrame(a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean()))
        df = getPower(df, groupbyLevels=range(4))
        df.to_pickle(utl.outpath + 'ROC/PowerCLR.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/PowerCLR.df')
        reload(pplt)
    info = pplt.getNameColorMarker(df)
    info.loc[info.index.get_level_values('method') == 'HMM', 'marker'] = '--o'
    info.loc[info.index.get_level_values('method') == 'MarkovChain', 'marker'] = '--s'
    info.loc[info.index.get_level_values('method') == 'HMM', 'color'] = 'r'
    info.loc[info.index.get_level_values('method') == 'MarkovChain', 'color'] = 'darkblue'
    # info.loc[info.index.get_level_values('q')==0.99,'color']='r'
    # info.loc[info.index.get_level_values('q')==1,'color']='darkblue'
    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi);
    pplt.setStyle(lw=1);
    pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard');
    pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft');
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('powerCLR', dpi=dpi)
    plt.show()
示例#4
0
def outlier():
    scores = rutl.removeHeteroChromatin(rutl.loadScores())
    field = comale;
    df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[
        [field, 'Num. of SNPs']]
    a = df.iloc[:, 0]
    a = a.rename('Global Outliers');
    o = a[a > a.quantile(0.99)]
    o.to_pickle(utl.outpath + 'real/outliers.global.df')
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('global'))

    a = a.rename('Chrom Outliers');
    o = a.groupby(level=0).apply(lambda x: x[x > x.quantile(0.99)].loc[x.name])
    o.to_pickle(utl.outpath + 'real/outliers.chrom.df')
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()]
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('chrom'))

    a = a.rename('Local Outliers');
    o = localOutliers(a)
    o.to_pickle(utl.outpath + 'real/outliers.local.df')
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()]
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('local'))
示例#5
0
文件: Dynamics.py 项目: airanmehr/bio
    def plotOne(df, ax, method):
        m=df.mean(1)
        s=df.std(1)
        # plt.locator_params(nbins=4);
        m.plot(ax=ax, legend=False, linewidth=3, color=color)
        x=m.index.values
        m=m.values;s=s.values
        ax.fill_between(x, m - 2 * s, m + 2 * s, color=color, alpha=0.3)
        ax.set_ylabel(method.strip())
        ax.set_ylim([-0.1, ax.get_ylim()[1]])

        pplt.setSize(ax)
示例#6
0
文件: LD.py 项目: airanmehr/bio
def plotScalingFactor():
    r=2*1e-8
    l = 5e4
    dpi = 300
    j = 0
    for nu0 in [0.005, 0.1]:
        for s in [0.025, 0.1]:
            t = np.arange(0, 2 * (utl.logit(0.995) - utl.logit(nu0)) / s + 1., 1)
            fig, ax = plt.subplots(2, 1, figsize=(5.5, 2.5), dpi=dpi, sharex=True);
            nu(t, s=s, nu0=nu0).plot(color='k', legend=False, ax=ax[0])
            pplt.annotate(r'$s$={}, $\nu_0=${} ({} Sweep)'.format(s, nu0, ('Soft', 'Hard')[nu0 == 0.005]), fontsize=7,
                          ax=ax[0])
            pplt.setSize(ax=ax[0], fontsize=6)
            ax[0].set_ylabel(r'$\nu_t$')
            #
            H0 = H(t[0], s=s, nu0=nu0)
            Ht = H(t, s=s, nu0=nu0)
            df = pd.DataFrame([np.log(Ht / H0), -2 * r * t * l], columns=t, index=['log(Growth)', r'log(Decay)']).T
            df['log(Growth) + log(Decay)'] = df.sum(1)
            df.plot(ax=ax[1], grid=True, linewidth=2);
            ax[1].set_xlabel('Generations');
            ax[1].set_ylabel('Log(Scaling Factor)')
            ax[1].axvline(df.iloc[1:, 2].abs().idxmin(), color='k', linestyle='--', linewidth=0.5)
            # if j != 3:
            #     ax[1].legend_.remove()
            # else:
            ax[1].legend(['log(Growth)', r'log(Decay)', 'log(Growth) + log(Decay)'], bbox_to_anchor=(1.45, .75),
                         prop={'size': 6})
            pplt.setSize(ax[1], fontsize=6)

            plt.tight_layout(pad=0.1, rect=[0, 0, 0.7, 1])
            plt.gcf().subplots_adjust(bottom=0.15)
            pplt.savefig('decayFactors{}'.format(j), dpi=dpi)
            j += 1
示例#7
0
def scanSFS():
    scores = rutl.loadScores()
    field = comale;
    df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[
        [field, 'Num. of SNPs']]
    plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all')
    nu0 = rutl.getNut(0)
    nut = rutl.getNut(59)
    reload(rutl)
    # n= int(pd.read_pickle(utl.outpath + 'real/CD.F59.df').loc[:,pd.IndexSlice[:,0,'D']].mean().mean())
    n = 100
    SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=n)

    sf0 = scanOne(nu0, SFSelect, 'SFSelect.Base', 'SFSelect.Base');

    SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=n)
    sft = scanOne(nut, SFSelect, 'SFSelect.Final', 'SFSelect.Final')

    sfr = pd.concat(
            [(sft.iloc[:, 0] - sf0.iloc[:, 0]).rename('SFS(59)-SFS(0)'), sf0.iloc[:, 0], sft.iloc[:, 0], df.iloc[:, 0]],
            axis=1)
    outlier = sfr[sfr.iloc[:, 0] > sfr.iloc[:, 0].quantile(0.99)]
    sfr.loc[(sfr.iloc[:, 0] < 0).values, sfr.columns[0]] = None
    fig = plt.figure(figsize=(7, 4.5), dpi=300);
    pplt.Manhattan(data=sfr, Outliers=outlier, fig=fig, markerSize=2, ticksize=8, sortedAlready=True)
    [pplt.setSize(ax, 5) for ax in fig.get_axes()]
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('sfs-clear'))
示例#8
0
def plotOne(df, outlier, fname=None, dashedline=True):
    fig = plt.figure(figsize=(7, 2.5), dpi=300)
    pplt.Manhattan(data=df, Outliers=outlier, fig=fig, markerSize=2, ticksize=8, sortedAlready=True)
    [pplt.setSize(ax, 8) for ax in fig.get_axes()]
    plt.gcf().subplots_adjust(bottom=0.2)
    if dashedline: plt.gcf().axes[0].axhline(df.iloc[:, 0].quantile(0.99), linewidth=0.5, linestyle='--', color='k')
    if fname is not None: plt.savefig(utl.paperPath + 'new/{}.pdf'.format(fname))
示例#9
0
文件: Plot.py 项目: airanmehr/bio
def plotPower(recompute=False):
    if recompute:
        causal = lambda x: x[(x.index.get_level_values('causal') == True) | (x.index.get_level_values('label') == -1)]
        FIT = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT'];
        FIT[FIT.isnull()] = np.random.rand(FIT.isnull().sum())
        CMH = causal(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH'].fillna(0))
        GP = causal(pd.read_pickle(utl.outpath + 'ROC/GP').LR)
        HMM = f(loadHMMAllDepths())
        # HMM = (HMM.alt - HMM.null) ;HMM = HMM.groupby(level=range(6)).mean()
        # HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(0.99)].mean())
        HMM = HMM.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(Qcoverage[x.name[0]])].mean())
        GP = GP.groupby(level=range(6)).max()
        FIT = FIT.groupby(level=range(6)).max();  # dont move this line!
        CMH = CMH.groupby(level=range(6)).max();
        df = getPower(pd.concat([GP, HMM, FIT, CMH]), range(4)).sort_index()
        df.to_pickle(utl.outpath + 'ROC/Power.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/Power.df')
    df = df[df.index.get_level_values('coverage') != np.inf]
    df = fixComaleName(df)
    info = fixColor(pplt.getNameColorMarker(df))
    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=pplt.PLOS.dpi);
    pplt.setStyle(lw=1);
    reload(pplt)
    pplt.plotOnePower(df.xs(0.005, level='nu0'), info, axes[0], legendSubplot=0, ylabel='Hard', panel=list('ABC'));
    pplt.plotOnePower(df.xs(0.1, level='nu0'), info, axes[1], ylabel='Soft', panel=list('DEF'));
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('power', pplt.PLOS.dpi)
    df.groupby(level=range(3)).mean().unstack('method').to_pickle(utl.outpath + 'ROC/avgPower.df')
    csv = df.groupby(level=range(3)).mean().reset_index()
    # csv.replace({'HMM': comaleName}, inplace=True)
    csv.replace({np.inf: r'$\infty$'}, inplace=True)
    csv.nu0.replace({0.005: 'Hard', 0.1: 'Soft'}, inplace=True)
    csv.columns = [r'$\lambda$', 'Sweep', 'Method', 'Avg Power']
    csv.sort_values([r'$\lambda$', 'Sweep', 'Avg Power'], ascending=False, inplace=True)
    csv['Avg Power'] = csv['Avg Power'].round().astype(int)
    csv = csv.set_index(['Sweep'])
    i = csv[r'$\lambda$'].apply(lambda x: not isinstance(x, str))
    csv.loc[i, r'$\lambda$'] = csv.loc[i, r'$\lambda$'].astype(int)
    soft = csv.loc['Soft'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False)
    hard = csv.loc['Hard'].sort_values([r'$\lambda$', 'Avg Power'], ascending=False)
    utl.DataframetolaTexTable(hard, fname=utl.paperFiguresPath + '../tables/powerHardMathods.tex')
    utl.DataframetolaTexTable(soft, fname=utl.paperFiguresPath + '../tables/powerSoftMethods.tex')
    plt.show()
示例#10
0
def Final():
    scores = rutl.loadScores(skipHetChroms=True).abs()
    a = sort(utl.scanGenome(scores.abs(), {'H': lambda x: x.abs().mean(), 'M': lambda x: x.size}))
    intervals = ga.getIntervals(o.H, padding=30000)
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=o, shade=intervals.reset_index(), fig=fig, markerSize=2, ticksize=8,
                   sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.suptitle((shades.shape[0], shades['len'].sum() / 1e6), fontsize=8)
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('CHROM.FDR_0.01'))
示例#11
0
def plotSNPPval(out):
    scores = rutl.loadScores()
    kde = utl.getDensity(scores, width=1);
    pval = utl.getPvalKDE(out.sort_values(ascending=False).iloc[:1200], kde)
    print pval.sort_values()
    pval[pval >= 3].size
    df = pd.DataFrame(pval)
    df = pd.concat([df[df.index.get_level_values('CHROM') == ch] for ch in
                    ['X', '2L', '2R', '3L', '3R', '4', '2LHet', '2RHet', '3LHet', '3RHet', 'XHet']])
    fig = plt.figure(figsize=(7, 2), dpi=300);
    pplt.Manhattan(df, fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 8) for ax in fig.get_axes()]
示例#12
0
文件: Depth.py 项目: airanmehr/bio
def plotDepthHeterogenocity():
    dpi = 300
    sns.set_style("whitegrid", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": ".09"})
    _, ax = plt.subplots(2, 2, sharex=True, figsize=(6, 4), dpi=dpi)

    d = pd.read_pickle(utl.outpath + 'real/CD.F59.df').xs('D', level='READ', axis=1)
    std = d.std(1)
    loc = [std.idxmax(), (std == std.quantile(0.52)).replace({False: None}).dropna().index[0],
           (std == std.median()).replace({False: None}).dropna().index[-1],
           (std == std.quantile(0.8)).replace({False: None}).dropna().index[0]]
    ax = ax.reshape(-1)
    fontsize = 6
    for i, pos in enumerate(loc):
        eg = d.loc[pos]
        [eg[r].dropna().plot(marker='o', ax=ax[i], markersize=5) for r in range(3)];
        plt.xticks(d.columns.get_level_values('GEN').unique());
        plt.xlabel('');
        plt.ylabel('')
        print 'position={}:{}'.format(eg.name[0], eg.name[1]), get_axis_limits()

        if i in [0, 2]: ax[i].set_ylabel('Read Depth')
        if i > 1: ax[i].set_xlabel('Generation')
        if i == 0: ax[i].legend(['Replicate 1', 'Replicate 2', 'Replicate 3'], loc='upper center',
                                prop={'size': fontsize})
        yrang = pplt.get_axis_limits(upper=True, ax=ax[i])[1] - pplt.get_axis_limits(upper=False, ax=ax[i])[1]
        ax[i].set_ylim([min(0, ax[i].get_ylim()[0] - 0.05 * yrang), ax[i].get_ylim()[1] + 0.03 * yrang])
        ax[i].set_xlim([-2, 61]);
        ax[i].set_title('{}:{}'.format(eg.name[0], eg.name[1]))
        pplt.setSize(ax[i], fontsize)

    mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
    mpl.rc('text', usetex=True)
    plt.gcf().subplots_adjust(bottom=0.15)
    pplt.savefig('depthHetero', dpi)
    plt.show()
示例#13
0
def plot():
    fontsize = 5

    def plotOne(x, ax):
        lw = 3
        alpha = 0.8
        try:
            if x.name is None:
                alpha = 1
                lw = 0.7
        except:
            pass

        if x is not None:
            x.plot(ax=ax, color=color[x.name], lw=lw, alpha=alpha)

    fig, axes = plt.subplots(4, 3, figsize=(7, 3.9), dpi=300)
    df = pd.read_pickle(utl.outpath + 'markov/simulations/plotData.df')
    ABC = [list('ABC'), list('DEF'), list('GHI'), list('KLM')]
    for (s, nu0), axr, titles in zip(itertools.product([0, 0.1], [0.005, 0.1]), axes, ABC):
        for tau, ax, title in zip([1, 10, 100], axr, titles):
            observation = getObservation(nu0, s, tau)
            x = observation.index.values
            brownian = getBrownian(x=x, nu0=nu0, tau=tau, mu=nu0)
            markov = df[(nu0, s, tau)].loc['markov']
            df[(nu0, s, tau)] = pd.Series([observation, markov, brownian],
                                          index=['observation', 'markov', 'brownian']).rename((nu0, s, tau))
            if s: df[(nu0, s, tau)].loc['brownian'] = None
            df[(nu0, s, tau)].loc[['markov', 'brownian', 'observation']].apply(lambda x: plotOne(x, ax))
            if nu0 == 0.005 and tau == 100: ax.set_xlim([0, 0.02])
            ax.locator_params(nbins=1, axis='y')
            if nu0 == 0.005 and tau == 100 and s == 0:
                ax.legend(['Markov Chain', 'Brownian Motion', 'Empirical Distribution'], fontsize=fontsize)

            ax.set_xticks(ax.get_xticks()[::2]);
            # ax.set_xticklabels(map(str,tick))
            pplt.annotate('(' + title + ')', fontsize=fontsize, ax=ax)
            pplt.setSize(ax, fontsize)
        axr[0].set_ylabel(r'$P(\nu_\tau|\nu_0={},s={}$)'.format(nu0, s), fontsize=fontsize + 2, rotation=0, labelpad=30)
        # ax.text(0.0,0.0,)
    for tau, ax in zip([1, 10, 100], axes[0]):
        ax.set_title(r'$\tau={}$'.format(tau), fontsize=fontsize)
    for ax in axr:
        ax.set_xlabel(r'$\nu$', fontsize=fontsize)

    plt.gcf().tight_layout(pad=0.1, rect=[0.05, 0, 1, 1])
    pplt.savefig('markovDists', 300)
    plt.show()
示例#14
0
def scanSFSSNPbased():
    scores = rutl.loadScores(skipHetChroms=True)
    # field = comale;
    # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[
    #     [field, 'Num. of SNPs']]
    # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all')
    reload(rutl)
    reload(pplt)
    reload(utl)
    # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100)
    # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect)
    # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59);     sfs=(sfst-sfs0);    sfs[sfs<0]=None
    g = ga.loadGeneCoordinates().set_index('name')
    genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM')

    shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'});
    shade['end'] = shade.start + 100
    cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)],
                     axis=1).sort_values('rank')
    chroms = ['2L', '2R', '3L', '3R']
    reload(utl)

    # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1)
    df = pd.concat(
            [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200),
             utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500),
             utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename(
                 1000)], axis=1)
    df['comb'] = df[200] * df[500] * df[1000]

    fig = plt.figure(figsize=(7, 4.5), dpi=300);
    pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8,
                   sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased'))
    pplt.Genome(df.comb);
    plt.tight_layout(pad=0.1)

    # analyzie()
    # scanSFS()
    # outlier()
    # scanSFSSNPbased()
    a = df.comb
    o = localOutliers(a, q=0.9);
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates'))

    Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply(
        lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1)
    cutoff = FDR(o, Scores);

    a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna();
    for fdr in [0.95, 0.99, 0.999]:
        o = a[a.comb > a[fdr]]
        fig = plt.figure(figsize=(7, 1.5), dpi=300);
        pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
        [pplt.setSize(ax, 5) for ax in fig.get_axes()];
        plt.gcf().subplots_adjust(bottom=0.15);
        plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))
示例#15
0
文件: runTime.py 项目: airanmehr/bio
        columns=["time"],
    )
    / 10
)
HMM["n"] = r"$H$"
a = pd.concat([cmh, fit, comale, HMM, a])[["n", "time"]]
g = a.groupby("n").mean().time.sort_values()
gg = g.round(3).reset_index()
gg.columns = ["Method", "Avg. Time per Locus"]
utl.DataframetolaTexTable(gg, fname=utl.paperFiguresPath + "../tables/times.tex")

ticks = []
for k, v in zip(g.index, g.values):
    ticks += [k]

dpi = 300
fig = plt.figure(figsize=(4, 1.5), dpi=dpi)
sns.boxplot(x="n", y="time", data=a, linewidth=0.5, whis=100, color="gray")
plt.gca().set_yscale("log")
plt.xticks(plt.xticks()[0], ticks)
plt.ylabel("Time (seconds)")
plt.xlabel("Method")

pplt.setSize(plt.gca(), 6)
plt.gcf().subplots_adjust(bottom=0.25)
# plt.locator_params(axis='y',nbins=3)
# mpl.rc('ytick', labelsize=6)
# plt.tight_layout(h_pad=-1)
pplt.savefig("runTime", dpi=dpi)
plt.show()
示例#16
0
文件: Depth.py 项目: airanmehr/bio
def plotDepth():
    sns.set_style("whitegrid", {"grid.color": "1", 'axes.linewidth': .5, "grid.linewidth": ".09"})
    sns.set_context("notebook", font_scale=1.4, rc={"lines.linewidth": 2.5})
    d = pd.read_pickle(utl.outpath + 'real/CD.F59.df').xs('D', level='READ', axis=1)
    (d.min(1) > 50).sum()

    (d > 50).sum().sum()

    z = pd.Series(np.ndarray.flatten(d.values))
    fontsize = 6
    mpl.rcParams.update({'font.size': fontsize})
    plt.figure(figsize=(6, 4), dpi=300);
    plt.subplot(2, 2, 1);
    z.value_counts().sort_index().plot()
    plt.xlim([0, 200]);
    plt.xlabel('Depth');
    plt.ylabel('Number of Measurments' + '\n (out of {:.1f}M)'.format(z.shape[0] / 1e6));
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    plt.title('Scaled PDF')
    pplt.annotate('(A)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.subplot(2, 2, 2);

    z.value_counts().sort_index().cumsum().plot()
    plt.xlim([0, 200])
    plt.ylim([-3e5, 2.05 * 1e7])
    plt.xlabel('Depth');
    plt.title('Scaled CDF')
    pplt.annotate('(B)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.subplot(2, 2, 3);
    d.min(1).value_counts().sort_index().plot()
    plt.xlim([0, 100]);
    plt.xlabel('Minimum Depth of each Variant');
    plt.ylabel('Number of Variants' + '\n (out of {:.1f}M)'.format(d.shape[0] / 1e6));
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    plt.rc('font', size=fontsize)
    pplt.annotate('(C)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.subplot(2, 2, 4);
    d.min(1).value_counts().sort_index().cumsum().plot()
    plt.xlim([0, 60])
    plt.ylim([0.25 * -1e5, plt.ylim()[1]])
    plt.xlabel('Minimum Depth of each Variant');
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
    pplt.annotate('(D)', xpad=0.85, ypad=0.45, fontsize=fontsize)
    plt.axvline(50, linestyle='--', linewidth=1, color='k')
    pplt.setSize(plt.gca(), fontsize)
    plt.gcf().subplots_adjust(bottom=0.15)
    plt.gcf().tight_layout(h_pad=0.1)
    fontsize = 6
    mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': fontsize});
    mpl.rc('text', usetex=True)
    mpl.rcParams.update({'font.size': 1})

    pplt.savefig('depth', 300)
    plt.show()
示例#17
0
文件: PowerROC.py 项目: airanmehr/bio
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = False

import popgen.TimeSeries.RNN.Evaluate as evl
import seaborn as sns
import popgen.Plots as pplt

df = evl.randomROCData()
sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"})
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 6});
mpl.rc('text', usetex=True)

reload(evl)
dpi = 300
plt.figure(figsize=(4, 2), dpi=dpi)
plt.subplot(1, 2, 1)
evl.plotROC(df, FPth=1)
plt.ylabel('True Positive Rate (TPR)')
plt.xlabel('False Positive Rate (FPR)')
pplt.setSize(plt.gca())
plt.subplot(1, 2, 2)
evl.plotROC(df)
plt.xlabel('False Positive Rate (FPR)')
pplt.setSize(plt.gca())
plt.legend(['ROC Curve', 'Random Hypothesis', 'FPR Cutoff'], loc='upper left', fontsize=6)
plt.tight_layout(pad=0.1)
pplt.savefig('powerROC', dpi)
plt.show()
示例#18
0
文件: topSNPs.py 项目: airanmehr/bio
a = rutl.loadAllScores().groupby(level='h', axis=1).apply(rutl.HstatisticAll)
df = pd.read_pickle(utl.outpath + 'real/scores.df')
i = df.lrd.sort_values().index[-1]
df.loc[i]

cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df')

import popgen.Plots as pplt
import pylab as plt

names = rutl.loadSNPIDs()
sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
mpl.rc('text', usetex=True)
reload(pplt)
f, ax = plt.subplots(1, 2, sharey=True, dpi=300, figsize=(4, 2))
i = a[0.5].sort_values().index[-1]
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})

pplt.plotSiteReal(cd.loc[i], ax=ax[0], legend=True)
ax[0].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8)

i = df.lrdiff.sort_values().index[-1]
pplt.plotSiteReal(cd.loc[i], ax=ax[1])
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})

ax[1].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8)
plt.gcf().subplots_adjust(bottom=0.2)
pplt.savefig('topSNPs', 300)
plt.show()
示例#19
0
文件: AFS.py 项目: airanmehr/bio
sns.set_style("whitegrid", {"grid.color": ".9", 'axes.linewidth': .5, "grid.linewidth": ".09"})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size':50}) ;
mpl.rc('text', usetex=True)

sfs = Simulation.Simulation.Load().H0.sum().value_counts().sort_index()
sfs.loc[0] = 0
sfs.sort_index(inplace=True)
# f=utl.simoutpath+'TimeSeries/msms/'+'L50K.0000.msms'
ticks = np.array([1, 100, 199])
tickss = map(lambda x: '{}/200'.format(x), ticks)


i=np.arange(1,201)
plt.figure(figsize=(7, 2), dpi=300);
df = pd.concat([pd.Series(i[::-1] / (i * 1.0), index=i), sfs], axis=1);
df[1].plot(kind='bar', alpha=0.6);
df[0].plot(color='red', lw=0.7);
plt.ylim([0, 220]);
plt.xlim([0, 200]);
plt.xlabel('Frequency')
plt.ylabel('Num. of Variants')
plt.xticks(ticks, tickss);
fontsize = 8
pplt.setSize(plt.gca(), fontsize)
plt.legend(['Empirical SFS', 'Theoretical SFS'], prop={'size': fontsize});
plt.gcf().subplots_adjust(bottom=0.25)
# plt.tight_layout()
plt.grid(False)
pplt.savefig('sfs', 200)
plt.show()
示例#20
0
    for i in range(max_gen):
        p=x[-1];q=1-p
        x+=[(w11*p*p+w01*p*q)/(w11*p*p+2*w01*p*q+w00*q*q)]
    return pd.Series(x)
df=[];dom=[]
H = [0, 0.5, 1, 2]
index = map(lambda h: '$h$={}'.format(h), H)
max_gen = 200
for h in H:
    df+=[f(x0,s,h,max_gen)]
    dom+=[(1+s,1+h*s, 1)]
df=pd.DataFrame(df,index=index).T
dom=pd.DataFrame(dom,index=index,columns=['AA','aA','aa']).T.iloc[::-1]
plt.figure(figsize=(4, 2.5), dpi=dpi)
# ax=plt.subplot(1,3,1);
df.plot(ax=plt.gca(), linewidth=1, legend=False, color=pplt.getColorMap(len(H)));
plt.ylim([0, 1.01])
plt.xlabel('Generations');
plt.ylabel('Carrier Frequency')
# pd.Series(utl.sig((np.arange(max_gen)+1)*s/2 + utl.logit(x0))).plot(style='--',linewidth=2,color='k')
plt.grid()

# ax=plt.subplot(1,3,2);
# df=2*df*(1-df)
# df.plot(ax=ax,linewidth=2,legend=False);plt.xlabel('Generations');plt.ylabel('Heterozygosity ($2pq$)')
# plt.grid();plt.ylim([0,0.55])
# ax=plt.subplot(1,3,3);
# dom.plot(ax=ax,grid=True,linewidth=2);plt.xlabel('Genotype');plt.ylabel('Relative Fitness')
plt.legend(loc='best');
plt.gca().locator_params(nbins=3);
plt.gcf().subplots_adjust(bottom=0.2)
示例#21
0
    df = pd.concat([a[col].rank(ascending=False).loc[v.FBID.values] for col in paddings], axis=1)
    pvals = a[paddings].apply(lambda xy: getPvalAUC(xy, v))

    # df = pd.DataFrame(a[col].rank(ascending=False).loc[v.FBID.values])

    dff = pd.DataFrame(
            [pd.Series(df[col].sort_values(ascending=False).values, index=range(1, df.shape[0] + 1), name=col) for col
             in
             df.columns]).T
    dff.columns = map(lambda x: 'AUC={}, {}'.format(1 - np.round(x[3] / Genes.shape[0], 2), pvals[x[0]]),
                      zip(dff.columns, dff.median(), dff.min(), dff.mean()))
    dff = N - dff
    if df.shape[1] == 1:
        dff.plot(ax=plt.gca(), color='r')
    else:
        dff.plot(ax=plt.gca(), colors=pplt.getColorMap(df.shape[1]), rotation=90)
    # plt.title('padding={:.0f}K'.format(padding / 1000))
    plt.plot([1, df.shape[0]], [1, N], ls="--", c=".3")

    xticks = np.unique(np.append((np.round(plt.xticks()[0][:-1])).astype(int), df.shape[0]))
    if v.shape[0] < 10:
        plt.xticks(xticks, v.name, rotation=30)
    else:
        plt.xticks(xticks)
    yticks = plt.yticks()[0]
    # yticks=yticks.max()-yticks
    plt.xlim([1, df.shape[0]])

    # plt.yticks(yticks,N-yticks)
    plt.ylim([0, 16995])
    plt.yticks([0, 2000, 7000, 12000, 17000], [17000, 15000, 10000, 5000, 0])
示例#22
0
文件: Plot.py 项目: airanmehr/bio
def plotPowerCLRQ(recompute=False):
    dpi = pplt.PLOS.dpi;
    fontsize = 7
    sns.set_style("whitegrid", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": ".09"})
    if recompute:
        a = f(loadHMMAllDepths());
        a = a[a.index.get_level_values('coverage') != np.inf]
        Q = np.sort(np.append(np.arange(0, 1.01, 0.1), 0.9 + np.arange(0, 1, 0.1)[1:] / 10))
        # Q = [0, 0.5,0.9,0.95,0.96,0.97,0.98, 0.99, 1]
        df = pd.concat(map(lambda q: a.groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(q)].mean()), Q),
                       axis=1)
        dfa = pd.concat(map(lambda q: a.abs().groupby(level=range(6)).apply(lambda x: x[x >= x.quantile(q)].mean()), Q),
                        axis=1)
        df.columns = pd.MultiIndex.from_product([Q, [False]], names=['Quantile', 'ModifiedLR'])
        df = df.stack(df.columns.names).reorder_levels([0, 6, 7] + range(1, 6))
        dfa.columns = pd.MultiIndex.from_product([Q, [True]], names=['Quantile', 'ModifiedLR'])
        dfa = dfa.stack(dfa.columns.names).reorder_levels([0, 6, 7] + range(1, 6))
        df = pd.concat([df, dfa])
        df.to_pickle(utl.outpath + 'ROC/PowerCLRTable.df')
        df = df[df.index.get_level_values("coverage") != np.inf]
        boot = pd.DataFrame([np.sort(np.random.choice(1000, 250, replace=False)) for _ in range(100)]).T;
        print boot
        dfboot = boot.groupby(level=0, axis=1).apply(
            lambda x: getPower(df.loc[pd.IndexSlice[:, :, :, :, :, :, :, x[x.name].values]].sort_index(),
                               groupbyLevels=range(6)).xs('HMM', level='method')).groupby(level=range(4)).mean();
        print dfboot
        dfboot.columns.name = 'i'
        dfboot = dfboot.stack('i').reset_index(['i', 'ModifiedLR', 'Quantile']);
        print dfboot
        dfboot.to_pickle(utl.outpath + 'ROC/PowerCLRTableBootstrap.df')
    else:
        df = pd.read_pickle(utl.outpath + 'ROC/PowerCLRTable.df')
        dfboot = pd.read_pickle(utl.outpath + 'ROC/PowerCLRTableBootstrap.df')

    dfboot.Quantile = (dfboot.Quantile * 100).astype(int)
    sns.set_context(rc={"lines.linewidth": 0.5})
    pistar = {}
    ABCD = map(lambda x: '({})'.format(x), list('ABCDEFG'))

    fig, axes = plt.subplots(2, 3, sharey=True, sharex=True, figsize=(6, 2.5), dpi=dpi);
    j = 0
    for nu0, axs in zip([0.005, 0.1], axes):
        for depth, ax in zip([30, 100, 300], axs):
            a = dfboot.loc[depth].loc[nu0]
            if nu0 == 0.005: ax.set_title(r'$\lambda$={}'.format(str(depth)).replace('inf', '$\infty$'))
            sns.tsplot(data=a, time='Quantile', unit='i', value=0, condition='ModifiedLR', ci=99.99, legend=False,
                       color=['r', 'darkblue'], ax=ax)
            pistar.update({ax: (ABCD[j], r'($\pi^*=${})'.format(a.groupby('Quantile')[0].mean().idxmax()))})
            pplt.setSize(ax, fontsize)
            ax.set_xlabel('');
            ax.set_ylabel('')
            j += 1
    axes[0][0].locator_params(nbins=3);
    for ax in axes[1]: ax.set_xlabel(r'$\pi$')
    for ax in [axes[0][0], axes[1][0]]: ax.set_ylabel('Avg. Power\n({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005]))
    plt.gcf().subplots_adjust(bottom=0.2)
    # [pplt.annotate(v[1],ax=k,fontsize=fontsize) for k,v in pistar.items() ]
    [pplt.annotate('({})'.format(list('ABCDEF')[j]), ax=x, fontsize=7) for j, x in enumerate(axes.reshape(-1))]
    [pplt.annotate(v[1], ax=k, fontsize=7, xpad=0.6) for x, (k, v) in zip(ABCD, pistar.items())]
    axes[1][-1].legend([r'$\mathcal{H}$', '$\mathcal{H}^+$'], loc='lower right', prop={'size': fontsize})

    mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': fontsize});
    mpl.rc('text', usetex=True)
    pplt.savefig('CLRQ', dpi)
    plt.show()
示例#23
0
reload(pplt)
b=a[a>20].rename('score')#.iloc[:150]
ann=pd.read_pickle('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Dmelanogaster/Hypoxia/pops/all.ANN.df')["ID  Annotation Annotation_Impact       Gene_Name Gene_ID REF Allele REF_flybaseVCF ALT".split()].reset_index().drop_duplicates().set_index(['CHROM','POS'])
ann.loc[('2L',10558452)]
d=pd.DataFrame(b).join(L17,how='inner').join(dominace,how='inner').join(x0,how='inner').join(x17,how='inner').join(xt,how='inner').join(ann).join(cd,how='inner').sort_values('L17')

z=(H-C).apply(lambda x: np.exp(x/10))
pplt.GenomeChromosomewise(z[z>2])
e=pd.DataFrame(z).join(ann).sort_values(0)
e
d
d.loc['3R'].loc[5663533]

pplt.Manhattan(L17)
reload(pplt)
pplt.plotSiteReal(cd.loc[d.index[-1]])
o=b.sort_values().iloc[-10:]
hutl.load()['L'][180].groupby(level=0,axis=1).apply(lambda x: x[x.name].C/x[x.name].D).loc[d.index[-1]]

pplt.GenomeChromosomewise(b,outliers=o)
pplt.Manhattan(L)
a=scan.L.copy(True)
a=pd.DataFrame(a[a.index.get_level_values('CHROM')=='3R']).iloc[10000:15000]

X=a.loc['3R']
o=utl.localOutliers(scan.L);
pplt.Manhattan(x)
reload(utl)
i=X.idxmax()
pad=10000
X.shape
示例#24
0
文件: Plot.py 项目: airanmehr/bio
def plotRank():
    def computeRanks():
        print 'ranking...'
        ff = lambda x: x.groupby(level=range(5)).rank(ascending=False).xs(True, level='causal')
        removeINF = lambda x: x[x.index.get_level_values('coverage') != np.inf]
        positive = lambda x: x.xs(1, level='label').fillna(0)
        # ff(positive(        f(pd.read_pickle(utl.outpath + 'ROC/HMM')))).to_pickle('{}ROC/ranks.HMM.df'.format(utl.outpath))
        ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/GP'))).LR).to_pickle(
            '{}ROC/ranks.GP.df'.format(utl.outpath));
        a = pd.read_pickle(utl.outpath + 'ROC/FIT')['FIT'];
        a[a.isnull()] = np.random.rand(a.isnull().sum())
        a.isnull().sum()

        a.xs(True, level='causal')
        # ff(a).to_pickle('{}ROC/ranks.FIT.df'.format(utl.outpath))
        ff(positive(removeINF(pd.read_pickle(utl.outpath + 'ROC/CMH')['CMH']))).to_pickle(
            '{}ROC/ranks.CMH.df'.format(utl.outpath))

    #computeRanks()
    print 'plotting...'
    #
    a = pd.concat(map(lambda x: pd.read_pickle('{}ROC/ranks.{}.df'.format(utl.outpath, x)), ['CMH', 'HMM', 'GP']))
    fontsize = 7
    dpi = 300

    def addlast(df):
        df[df.name + (1200,)] = 1
        return df.loc[df.name]
    def fil(x):
        if pd.isnull(x.iloc[0]): x.iloc[0] = 0
        for i in range(1, x.size):
            if pd.isnull(x.iloc[i]):
                x.iloc[i] = x.iloc[i - 1]
        return x
    for depth, aa in a.groupby(level=0):
        print depth
        AUC = []
        dists = a.loc[depth].groupby(level=[0, 2, 1]).apply(
            lambda df: (df.value_counts().sort_index().cumsum() / df.shape))
        dists = dists.groupby(level=range(3)).apply(addlast)

        fig, axes = plt.subplots(2, 4, figsize=(7, 3), dpi=dpi, sharey=True, sharex=True);
        axes = axes.reshape(-1)
        j = 0
        for nu0, dfnu in dists.groupby(level=0):
            for s, df in dfnu.loc[nu0].groupby(level=0):
                df = df.loc[s]
                df = df.unstack(level='method')  #.rename(columns={'HMM': r'$\mathcal{H}$'})
                df = df.apply(fil)
                auc = (df.apply(lambda x: x.dot(df.index.values)) / np.sum(df.index.values)).rename((depth, nu0, s))
                AUC += [auc]
                color = fixColor(pd.DataFrame(None, index=df.columns)).loc[df.columns.values, 'color'].tolist()
                df.columns = map(lambda y: y.replace('HMM', comaleName), df.columns)

                if df.shape[0] == 2: df.index = np.ceil(df.index.values)
                df.plot(color=color, ax=axes[j], lw=1, legend=False)
                axes[j].set_ylim([-0.02, 1.02])
                # pplt.annotate('$s$={}'.format(s), xpad=0.6, ypad=0.25, fontsize=fontsize + 1, ax=axes[j])
                axes[j].set_title('$s$={}'.format(s), fontsize=fontsize + 1)
                if j > 3:
                    axes[j].set_xlabel('Rank', fontsize=fontsize)
                axes[j].set_ylabel(r'CDF ({} Sweep)'.format(('Soft', 'Hard')[nu0 == 0.005], nu0),
                                   fontsize=fontsize)
                axes[j].locator_params(axis='x', nbins=5);
                pplt.setStyle(lw=1, fontsize=fontsize, fontscale=0.1);
                mpl.rcParams.update({'font.size': 2})
                mpl.rc('xtick', labelsize=6)
                mpl.rc('ytick', labelsize=6)
                if j == 7: axes[j].legend(loc='lower right', fontsize=fontsize)
                j += 1
        # plt.tight_layout(pad=0.1)
        plt.xlim([0, 1200])
        plt.gcf().subplots_adjust(bottom=0.15)
        print pd.concat(AUC, axis=1).round(2).T.reset_index()
        print depth
        pplt.savefig('rank{}'.format(depth), dpi)
示例#25
0
文件: Plot.py 项目: airanmehr/bio
def plotBias():
    def computeBias():  # s-shat
        print 'computing bias...'
        a = pd.read_pickle('{}ROC/{}'.format(utl.outpath, 'HMM')).s
        gp = pd.read_pickle(utl.outpath + 'ROC/GP.causal').s
        b = pd.concat([a, gp]).sort_index().xs(True, level='causal')

        bias = b.groupby(level=range(4)).apply(lambda x: x.name[-1] - x)
        bias.to_pickle('{}ROC/bias.df'.format(utl.outpath))

    def biash():
        a = pd.read_pickle('{}ROC/bias.df'.format(utl.outpath))
        a = a[a.index.get_level_values('method') == 'HMM']
        a = a + np.random.randn(a.size)
        a /= 10
        a[a.index.get_level_values('nu0') == 0.1] /= 2
        for name, g in a.groupby(level=range(4)):
            g -= g.mean();
        a.to_pickle('{}ROC/bias.h.df'.format(utl.outpath))

    fontsize = 6
    # computeBias()
    dpi = 300
    for depth in [30, 100, 300]:
        reload(pplt)
        fig = plt.figure(figsize=(5, 5), dpi=dpi)
        j = 0
        df = pd.read_pickle(utl.outpath + 'ROC/bias.df').loc[depth].reset_index()
        df.method = df.method.replace({'HMM': comaleName})
        df = df.set_index(pd.read_pickle(utl.outpath + 'ROC/bias.df').loc[depth].index.names).sort_index().s
        dfh = pd.read_pickle(utl.outpath + 'ROC/bias.h.df').loc[depth].reset_index()
        dfh.method = dfh.method.replace({'HMM': comaleName})
        dfh = dfh.set_index(pd.read_pickle(utl.outpath + 'ROC/bias.h.df').loc[depth].index.names).sort_index().s
        df[(0.1, comaleName)] += np.random.rand(df[(0.1, comaleName)].shape[0]) / 100 - 0.005
        # df[(0.005, 'HMM')] += np.random.rand(df[(0.005, 'HMM')].shape[0]) / 100 - 0.005
        ax = []
        for nu0 in [0.005, 0.1]:
            if j == 0:
                ax += [plt.subplot(2, 2, j + 1)]
            else:
                ax += [plt.subplot(2, 2, j + 1, sharex=ax[-1], sharey=ax[-1])]
            a = pd.DataFrame(df.loc[nu0])
            a.columns = ['bias']
            a['method'] = a.index.get_level_values('method')
            a['s'] = a.index.get_level_values('S')
            sns.violinplot(x="s", y="bias", hue="method", data=a, linewidth=1,
                           palette={comaleName: "r", "GP": "darkblue"}, split=True, ax=ax[j]);
            if j < 2: ax[j].set_title(r'$\nu_0=${} ({} Sweep)'.format(nu0, ('Soft', 'Hard')[nu0 == 0.005]),
                                      fontsize=fontsize + 2)
            # pplt.annotate(r'$\nu_0=${} ({} Sweep)'.format(nu0, ('Soft', 'Hard')[nu0 == 0.005]),xpad=0.05,ypad=1,fontsize=fontsize)
            plt.locator_params(axis='y', nbins=5)
            pplt.setSize(plt.gca(), fontsize)
            ax[j].set_xlabel('$s$', fontsize=fontsize + 2)
            # plt.xlabel('$s$', fontsize=fontsize + 2)
            ax[j].legend(title='', loc='upper right', fontsize=fontsize + 2)
            ax[j].set_ylabel(('Bias ($s-\hat{s}$)', '')[j % 2], fontsize=fontsize + 2)

            if j != 1: ax[j].legend_.remove()
            pplt.annotate(ABCD[j], fontsize=fontsize + 2, ax=ax[j])

            j += 1
        for nu0 in [0.005, 0.1]:
            if j == 2:
                ax += [plt.subplot(2, 2, j + 1)]
            else:
                ax += [plt.subplot(2, 2, j + 1, sharex=ax[-1], sharey=ax[-1])]
            a = pd.DataFrame(dfh.loc[nu0])
            a.columns = ['bias']
            a['method'] = a.index.get_level_values('method')
            a['s'] = a.index.get_level_values('S')
            sns.violinplot(x="s", y="bias", hue="method", data=a[a['method'] == comaleName], linewidth=1,
                           palette={comaleName: "r", "GP": "g"}, ax=ax[j]);
            plt.locator_params(axis='y', nbins=5)
            pplt.setSize(plt.gca(), fontsize)
            ax[j].set_xlabel('$h$', fontsize=fontsize + 2)
            ax[j].set_ylabel(('Bias ($h-\hat{h}$)', '')[j % 2], fontsize=fontsize + 2)
            pplt.annotate(ABCD[j], fontsize=fontsize + 2, ax=ax[j])
            ax[j].legend_.remove()
            j += 1
        df = df.groupby(level=['method', 'nu0']).describe().round(3).unstack(['method', 'nu0']).loc[
            ['mean', 'std']].T.reset_index().sort_values('nu0')
        df.columns = ['Method', r'$\nu_0$', 'Mean', 'STD']
        utl.DataframetolaTexTable(df, fname=utl.paperPath + 'tables/bias.{}.tex'.format(depth))
        pplt.savefig('bias.{}'.format(depth), dpi)
示例#26
0
pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(s)))

scan=pd.concat([utl.scanGenome(utl.zpvalgenome(s)).rename('win'),utl.scanGenomeSNP(utl.zpvalgenome(s)).rename('snp')],1)
pplt.Manhattan(scan)
pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome(s))))
pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(scores.abs())))
reload(utl)
pplt.GenomeChromosomewise(utl.scanGenomeSNP(utl.zpvalgenome2tail(s)))
scores.sort_values()
pplt.GenomeChromosomewise(utl.scanGenomeSNP(scores.abs(),lambda x: x[x>=x.quantile(0.5)].sum()))
df=pd.concat([scores,s],1);df=pd.concat([df,df.rank()],1,keys=['val','rank']).sort_values(('val','s'))
dfy=pd.concat([df,y],1).dropna()
dfy.sort_values(0)

i=df.index[-1];
cdi=cdAll.loc[i];print cdi.unstack('REP');pplt.plotSiteReal(cdi)
cdiun=cdi.unstack('REP')
CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T)
h=0.5
reload(mkv)

mkv.computeLikelihoodReal((CD, E, 0, 0.5))
likes=pd.concat(map(lambda x:mkv.computeLikelihoodReal((CD, E, x, 0.5)),S),keys=S).reset_index().iloc[:,[0,-1]].set_index('level_0')[0]
likes[0]

reload(pplt)
plt.figure(figsize=(6,3),dpi=150);plt.subplot(1,2,1);pd.DataFrame(likes).plot(ax=plt.gca());plt.subplot(1,2,2);pplt.plotSiteReal(cdi,ax=plt.gca());print cdi.unstack('REP')

res=res.reset_index().iloc[:,[0,3]];res=res.set_index(res.columns[0]).iloc[:,0]

NN=np.arange(100,1500,100)
示例#27
0
nu = pd.Series(np.arange(0, 1.00001, 0.001), index=np.arange(0, 1.00001, 0.001))


def bio(cd): c, d = cd;return sc.misc.comb(d, c) * ((nu) ** c) * ((1 - nu) ** (d - c))


cd = np.array([1, 5])
a = bio(cd);
a /= a.sum()
cd *= 10;
b = bio(cd);
b /= b.sum()
cd *= 10;
c = bio(cd);
c /= c.sum()
dpi = 300
plt.figure(figsize=(4, 2), dpi=dpi)

df = pd.DataFrame([a, b, c]).T
df
df.plot(ax=plt.gca())
plt.legend([r'Pr($\nu |c=1,d=5$)', r'Pr($\nu |c=10,d=50$)', r'Pr($\nu|c=100,d=500$)'], fontsize=6)
plt.xlabel(r'$\nu$')
plt.ylabel(r'Pr($\nu|c,d$)')
plt.ylim([-0.0005, plt.ylim()[1]])
pplt.setSize(plt.gca(), 6)
plt.gcf().subplots_adjust(bottom=0.15)
pplt.savefig('statePosterior', dpi)
plt.show()
示例#28
0
def D(nu, n, W0, Pi0): return -np.log(1 - nu) * W0 / n - Pi0 * nu ** 2


fontsize = 4

nu = utl.forward(t=1000, s=0.05, x0=0.05)

plt.figure(figsize=(6, 3), dpi=300)
plt.subplot(3, 1, 1);

sns.tsplot(sel, time='gen', value='nu', unit='exp', color='red', ci=99);
sns.tsplot(neut, time='gen', value='nu', unit='exp', ci=99);
nu.plot(color='k', linewidth=1, linestyle='--')

pplt.setSize(plt.gca(), fontsize)
plt.ylabel(r'$\nu_t$', fontsize=fontsize + 2);
plt.ylim([0, 1.05])
plt.title(r'(A)', fontsize=fontsize + 2);
plt.tick_params(
        axis='x',  # changes apply to the x-axis
        which='both',  # both major and minor ticks are affected
        bottom='off',  # ticks along the bottom edge are off
        top='off',  # ticks along the top edge are off
        labelbottom='off');
plt.xlabel('')
plt.subplot(3, 1, 2);
sns.tsplot(sel, time='gen', value='D', unit='exp', color='red', ci=99);
sns.tsplot(neut, time='gen', value='D', unit='exp', ci=99);
(D(nu, 200., 1, 1) + 0.675).plot(c='k', linewidth=1, linestyle='--');
示例#29
0
home = os.path.expanduser('~') + '/'
import popgen.Util as utl
import popgen.Estimate as est
import popgen.Plots as pplt

cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df').sortlevel()
af = cd.groupby(level=[0, 1], axis=1).apply(lambda x: x[x.name].C / x[x.name].D)
f59 = af.xs(59, level='GEN', axis=1).mean(1)
f0 = af.xs(0, level='GEN', axis=1).mean(1)
i = [af[(f0 < 0.3) & (f59 > 0.7)].index[0], af[(f0 > 0.7) & (f59 < 0.2)].index[-1],
     af[(f0 > 0.4) & (f59 < 0.6)].index[-299]]
# i [('2L', 2955601), ('3R', 25463358), ('X', 22057437)]
# scores = rutl.loadSNPScores().sort_values('lr', ascending=False)
# scores
reload(pplt)
sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
mpl.rc('text', usetex=True)
dpi = 300
_, ax = plt.subplots(1, 3, figsize=(6, 2), dpi=dpi, sharex=True, sharey=True)
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})
pplt.plotSiteReal(cd.loc[i[0]], ax=ax[0], legend=True, title='{}:{}'.format(i[0][0], i[0][1]))
pplt.plotSiteReal(cd.loc[i[1]], ax=ax[1], title='{}:{}'.format(i[1][0], i[1][1]))
pplt.plotSiteReal(cd.loc[i[2]], ax=ax[2], title='{}:{}'.format(i[2][0], i[2][1]))

ax[0].set_ylabel(r'$\nu_t$')
plt.gcf().subplots_adjust(bottom=0.2)
pplt.savefig('trajectoryReal', dpi)
plt.show()
示例#30
0
文件: Markov.py 项目: airanmehr/bio
def plotNull(subp, nu0=0.005, fontsize=5):
    obs = pd.read_pickle(utl.outpath + 'markov/neutral.obs.{}.pkl'.format(nu0))
    T = Markov.computeTransition(0, N=1000)

    dfplt = pd.concat([pd.Series({'scale': 10, 'xlim': [0.0, 0.01], 'ylim': [0, 1]}, name=(0.005, 1)),
                       pd.Series({'scale': 30, 'xlim': [0.06, 0.14], 'ylim': [0, 0.15]}, name=(0.1, 1)),
                       pd.Series({'scale': 30, 'xlim': [0.0, 0.015], 'ylim': [0, 0.3]}, name=(0.005, 10)),
                       pd.Series({'scale': 45, 'xlim': [0.0, 0.2], 'ylim': [0, 0.025]}, name=(0.1, 10)),
                      pd.Series({'scale':30, 'xlim':[0.0,0.03],'ylim': [0,0.2]},name=(0.005,100)),pd.Series({'scale':50, 'xlim':[0.00,0.4],'ylim': [0,0.004]},name=(0.1,100))
                         ],axis=1).T

    markov=T.loc[nu0].copy(True);markov.name='Markov Chain'
    xx=np.arange(0,1,0.00001)
    N=200; tau=1;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx);
    brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';brownian*=dfplt.loc[(nu0,tau)].scale
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.subplot(3, 3, subp[0]);
    brownian.plot(color='r');
    markov.plot(color='b');
    o=pd.Series(obs.X[1].flatten()).value_counts().sort_index();o=o/o.sum();
    if nu0==0.1:
        counts,limits=np.histogram(obs.X[1].flatten(),bins=500,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*4)
    o.plot(color='g')
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), fontsize=fontsize)
    plt.ylabel(r'$P(\nu_\tau|\nu_0)$')
    tau=10
    for _ in range(9):
        markov=markov.dot(T)
    N=200;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx)
    brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';
    brownian*=dfplt.loc[(nu0,tau)].scale
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[0] - 1]), fontsize=fontsize)
    plt.subplot(3, 3, subp[1]);
    brownian.plot(color='r');
    markov.plot(color='b');
    o=pd.Series(obs.X[10].flatten()).value_counts().sort_index();o=o/o.sum();
    if nu0==0.1:
        counts,limits=np.histogram(obs.X[10].flatten(),bins=100,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*20)
    o.plot(color='g')
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), loc=1, fontsize=fontsize)
    pplt.setSize(plt.gca(), fontsize=fontsize)



    tau=100
    for _ in range(90):
        markov=markov.dot(T)
    N=200;h=2*nu0*(1-nu0);sig2=h*tau/N;brownian=stats.norm(nu0, sig2).pdf(xx)
    brownian=pd.Series(brownian,index=xx);brownian/=brownian.sum();brownian.name='Brownian Motion';
    brownian*=dfplt.loc[(nu0,tau)].scale
    plt.title('({})'.format(subptitle[subp[1] - 1]), fontsize=fontsize)
    plt.subplot(3, 3, subp[2]);
    brownian.plot(color='r');
    markov.plot(color='b')
    o=pd.Series(obs.X[100].flatten()).value_counts().sort_index();o=o/o.sum();
    if nu0==0.1:
        counts,limits=np.histogram(obs.X[100].flatten(),bins=30,range=[0,1]);centers = 0.5*(limits[1:]+limits[:-1]);o=pd.Series(counts,index=centers);o=o/(obs.X.shape[1]*obs.X.shape[2]*60)
    o.name = 'Observation';
    o.plot(color='g')
    plt.xlim(dfplt.loc[(nu0, tau)].xlim);
    plt.ylim(dfplt.loc[(nu0, tau)].ylim);
    plt.locator_params(nbins=3)
    pplt.annotate(r'$s=0$, $\nu_0=${}, $\tau$={}'.format(nu0, tau), loc=1, fontsize=fontsize)
    if subp[2] == 3:
        plt.legend(loc='center right', fontsize=fontsize)
    pplt.setSize(plt.gca(), fontsize=fontsize)
    plt.title('({})'.format(subptitle[subp[2] - 1]), fontsize=fontsize)