示例#1
0

# computeStatistics()
import popgen.Run.TimeSeries.RealData.Utils as rutl
import popgen.Run.TimeSeries.RealData.GeneAnalysis as ga

p = popgen.Util.getEuChromatin(pd.read_pickle(path + 'pairwise.df'))
s = popgen.Util.getEuChromatin(pd.read_pickle(path + 'single.df'))


def get(a, b):
    return pd.concat([(a.iloc[:, i] / b.iloc[:, i]).rename('C{}H{}'.format(a.columns[i], b.columns[j])) for i in
                      range(a.shape[1]) for j in range(a.shape[1])], axis=1)


g = ga.loadGeneCoordinates().set_index('name')
genes = g.loc[['HDAC4', 'H', 'para', 'hang', 'rut', 'Ulp1']].reset_index().set_index('CHROM')

reload(pplt)
ratio = get(s.pi.C, s.pi.L)
pplt.Manhattan(ratio);
plt.suptitle('$\theta$');
plt.savefig(utl.home + 'manhattan.pdf')

candchr = ratio.loc[['3R', 'X']]
_, axes = plt.subplots(2, 1, sharey=True, dpi=200, figsize=(10, 6));
pplt.GenomeChromosomewise(candchr['C1H1'], genes=genes, axes=axes);
plt.savefig(utl.home + 'C1H1.manhattan.pdf')
cross = p.loc[:, map(lambda x: 'L' in x and 'C' in x, p.columns.get_level_values(1))].pi

fst = 1 - pd.DataFrame(s.pi.L.loc[:, map(lambda x: int(x[-1]), cross.columns)].values / cross.values, index=cross.index,
示例#2
0
e=utl.scanGenome(d,lambda x:x.mean(),winSize=10000,step=5000)
pplt.GenomeChromosomewise(d[d>d.quantile(0.999)])
a
i=b.sort_values(ascending=False).iloc[:20].index[1]
a
plt.figure();pplt.plotSiteReal(a.loc[i])
plt.show()
import popgen.Run.TimeSeries.RealData.GeneAnalysis as ga
ann=ga.loadANN()
top=b.sort_values(ascending=False).iloc[:2000].rename('score')
snp=pd.DataFrame(top).join(ann).sort_values('score',ascending=False).iloc[:,[0]+range(4,11)]
z=snp.iloc[:,:5].drop_duplicates()
z=z[z['Annotation_Impact']!='LOW']
top

genes=ga.loadGeneCoordinates()
dfb,dfa=top,genes
genes
aa=top.reset_index('POS')
aa['start']=aa.POS
aa['end']=aa.POS
reload(utl)
top.shape
aa=utl.BED.intersection(top.reset_index(),genes,dfa_interval_name='score', dfb_interval_name='primary_FBid').rename(columns={'start':'POS','primary_FBid':'ID'}).set_index('POS',append=True)[['score','ID']]
aa.score=aa.score.astype(float)
aa=aa.sort_values('score',ascending=False)
aa.join(H).sort_values('score')
top
pd.concat([top.rename('aaa'),aa],1)
genes=ga.loadGeneData().set_index('CHROM')
for i in b.sort_values(ascending=False).iloc[:20].index:
示例#3
0
def scanSFSSNPbased():
    scores = rutl.loadScores(skipHetChroms=True)
    # field = comale;
    # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[
    #     [field, 'Num. of SNPs']]
    # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all')
    reload(rutl)
    reload(pplt)
    reload(utl)
    # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100)
    # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect)
    # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59);     sfs=(sfst-sfs0);    sfs[sfs<0]=None
    g = ga.loadGeneCoordinates().set_index('name')
    genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM')

    shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'});
    shade['end'] = shade.start + 100
    cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)],
                     axis=1).sort_values('rank')
    chroms = ['2L', '2R', '3L', '3R']
    reload(utl)

    # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1)
    df = pd.concat(
            [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200),
             utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500),
             utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename(
                 1000)], axis=1)
    df['comb'] = df[200] * df[500] * df[1000]

    fig = plt.figure(figsize=(7, 4.5), dpi=300);
    pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8,
                   sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased'))
    pplt.Genome(df.comb);
    plt.tight_layout(pad=0.1)

    # analyzie()
    # scanSFS()
    # outlier()
    # scanSFSSNPbased()
    a = df.comb
    o = localOutliers(a, q=0.9);
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates'))

    Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply(
        lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1)
    cutoff = FDR(o, Scores);

    a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna();
    for fdr in [0.95, 0.99, 0.999]:
        o = a[a.comb > a[fdr]]
        fig = plt.figure(figsize=(7, 1.5), dpi=300);
        pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
        [pplt.setSize(ax, 5) for ax in fig.get_axes()];
        plt.gcf().subplots_adjust(bottom=0.15);
        plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))