def Final(): scores = rutl.loadScores(skipHetChroms=True).abs() a = sort(utl.scanGenome(scores.abs(), {'H': lambda x: x.abs().mean(), 'M': lambda x: x.size})) intervals = ga.getIntervals(o.H, padding=30000) fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=o, shade=intervals.reset_index(), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.suptitle((shades.shape[0], shades['len'].sum() / 1e6), fontsize=8) plt.savefig(utl.paperPath + 'new/{}.pdf'.format('CHROM.FDR_0.01'))
pplt.GenomeChromosomewise() b=b.loc[(i).replace({False:None}).dropna().index] c=utl.scanGenome(b,lambda x:x.mean(),winSize=10000,step=5000) pplt.GenomeChromosomewise(b[b>b.quantile(0.999)],outliers=b.sort_values().iloc[-20:]) e=utl.scanGenome(d,lambda x:x.mean(),winSize=10000,step=5000) pplt.GenomeChromosomewise(d[d>d.quantile(0.999)]) a i=b.sort_values(ascending=False).iloc[:20].index[1] a plt.figure();pplt.plotSiteReal(a.loc[i]) plt.show() import popgen.Run.TimeSeries.RealData.GeneAnalysis as ga ann=ga.loadANN() top=b.sort_values(ascending=False).iloc[:2000].rename('score') snp=pd.DataFrame(top).join(ann).sort_values('score',ascending=False).iloc[:,[0]+range(4,11)] z=snp.iloc[:,:5].drop_duplicates() z=z[z['Annotation_Impact']!='LOW'] top genes=ga.loadGeneCoordinates() dfb,dfa=top,genes genes aa=top.reset_index('POS') aa['start']=aa.POS aa['end']=aa.POS reload(utl) top.shape aa=utl.BED.intersection(top.reset_index(),genes,dfa_interval_name='score', dfb_interval_name='primary_FBid').rename(columns={'start':'POS','primary_FBid':'ID'}).set_index('POS',append=True)[['score','ID']]
# computeStatistics() import popgen.Run.TimeSeries.RealData.Utils as rutl import popgen.Run.TimeSeries.RealData.GeneAnalysis as ga p = popgen.Util.getEuChromatin(pd.read_pickle(path + 'pairwise.df')) s = popgen.Util.getEuChromatin(pd.read_pickle(path + 'single.df')) def get(a, b): return pd.concat([(a.iloc[:, i] / b.iloc[:, i]).rename('C{}H{}'.format(a.columns[i], b.columns[j])) for i in range(a.shape[1]) for j in range(a.shape[1])], axis=1) g = ga.loadGeneCoordinates().set_index('name') genes = g.loc[['HDAC4', 'H', 'para', 'hang', 'rut', 'Ulp1']].reset_index().set_index('CHROM') reload(pplt) ratio = get(s.pi.C, s.pi.L) pplt.Manhattan(ratio); plt.suptitle('$\theta$'); plt.savefig(utl.home + 'manhattan.pdf') candchr = ratio.loc[['3R', 'X']] _, axes = plt.subplots(2, 1, sharey=True, dpi=200, figsize=(10, 6)); pplt.GenomeChromosomewise(candchr['C1H1'], genes=genes, axes=axes); plt.savefig(utl.home + 'C1H1.manhattan.pdf') cross = p.loc[:, map(lambda x: 'L' in x and 'C' in x, p.columns.get_level_values(1))].pi fst = 1 - pd.DataFrame(s.pi.L.loc[:, map(lambda x: int(x[-1]), cross.columns)].values / cross.values, index=cross.index,
def scanSFSSNPbased(): scores = rutl.loadScores(skipHetChroms=True) # field = comale; # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ # [field, 'Num. of SNPs']] # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all') reload(rutl) reload(pplt) reload(utl) # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100) # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect) # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59); sfs=(sfst-sfs0); sfs[sfs<0]=None g = ga.loadGeneCoordinates().set_index('name') genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM') shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'}); shade['end'] = shade.start + 100 cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)], axis=1).sort_values('rank') chroms = ['2L', '2R', '3L', '3R'] reload(utl) # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1) df = pd.concat( [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename( 1000)], axis=1) df['comb'] = df[200] * df[500] * df[1000] fig = plt.figure(figsize=(7, 4.5), dpi=300); pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased')) pplt.Genome(df.comb); plt.tight_layout(pad=0.1) # analyzie() # scanSFS() # outlier() # scanSFSSNPbased() a = df.comb o = localOutliers(a, q=0.9); fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates')) Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply( lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1) cutoff = FDR(o, Scores); a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna(); for fdr in [0.95, 0.99, 0.999]: o = a[a.comb > a[fdr]] fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))