a = np.loadtxt(filename, dtype=str) # check if there is only one bam file if int(subprocess.check_output("wc -l %s | awk '{print $1}'"%filename, shell=True).strip()) == 1: a = np.vstack((a, np.zeros(len(a)))) sampleNames = a[:,0] all_reads = a[:,1].astype(float)/1E6 no_chrM = a[:, 2].astype(float)/1E6 chrM = a[:, 3].astype(float)/1E6 final = a[:, 4].astype(float)/1E6 final_q30 = a[:, 5].astype(float)/1E6 headers = np.array(['final over q30', 'final under q30', 'mitochondrial', 'duplicates', 'reads not aligned']) vecs = np.array([final_q30, final - final_q30, chrM, no_chrM - final, all_reads - chrM - no_chrM]) # Now plot results plt.figure(figsize=(6.5,5.5)) plot_barplot(vecs, labels=headers, samples=sampleNames) plt.subplots_adjust(bottom=0.25, left=0.15, right=0.6) ax = plt.gca() ax.set_ylabel('number of reads (million)') ax.set_ylim((0, np.max(np.sum(vecs, 0))*1.1)) ax.grid() ax.set_xticklabels(sampleNames, rotation=90) outFile = os.path.splitext(options.a)[0]+'.pdf' plt.savefig(outFile)
np.save(outfile+'.npy', signals) """ for signal tracks, make plot """ span = 1E4 indx = np.arange(0, options.l+options.r, span, dtype=int) xvalues = np.arange(-options.l, options.r, span) fig = plt.figure(figsize=(5, 4)) ax = fig.add_subplot(111) for signal in signals[0]: ax.plot(xvalues, signal[indx], 'b', alpha=0.1) ax.plot(xvalues, np.nanmean(signals[0, :, indx], 1), 'k') plt.savefig('%s.%s.pdf'%(outfile, options.interval)) """ Now, for conservation, etc, plot the conservation in distal sites. Dista, """ locBed = filefun.loadBedwScores(options.a) locBed.distanceToTss = np.array(subprocess.check_output("bedtools closest -d -t first -a %s -b %s | awk '{print $NF}'"%(bedFileName, tssBedFileName), shell=True).split(), dtype=int) locBed.distal = locBed.distanceToTss > 5E3 signals[0, np.all((locBed.distal, locBed.significant_up), axis=0)] signals[0, locBed.distal] signals[0, np.logical_not(locBed.distal)] histogram.compare([signals[0, np.all((locBed.distal, locBed.significant_up), axis=0)], signals[0, np.all((locBed.distal, locBed.no_change), axis=0)], signals[0, np.logical_not(locBed.distal)]], labels=['up, distal', 'no change, distal', 'all promoter']) plotfun.plot_barplot()
np.all((peakBed.distal, peakBed.enriched_region), axis=0), ] # plot histogram and scatterplot indxlabels = ["depleted", "enriched"] for i, indx in enumerate(indices): xvalues = xvalues_norm[indx] yvalues = yvalues_norm[indx] fig = plt.figure(figsize=(4, 4)) plotfun.plot_hexbin(xvalues, yvalues) # plt.savefig('allDistalPeaks.%s.hexbinplot.pdf'%indxlabels[i]) plt.savefig("allDistalPeaks.%s.per_bp.hexbinplot.pdf" % indxlabels[i]) fig = plt.figure(figsize=(7, 4)) plotfun.plotHistogram(score[indices[0]], score[indices[1]], labels=indxlabels) plt.savefig("allDistalPeaks.both.histogram.pdf") """ # plot barplot filename = 'scoring/140815_peaks.coverageCorr.upwNfib.annstats' labels, numpeaks = filefun.loadAnnstatFile(filename) percentage_up = numpeaks/float(np.sum(numpeaks)) filename = 'scoring/140815_peaks.coverageCorr.all.annstats' labels, numpeaks = filefun.loadAnnstatFile(filename) percentage_all = numpeaks/float(np.sum(numpeaks)) indx = np.argsort(np.mean([percentage_up, percentage_all], 0))[::-1][:7] vecs = np.transpose([np.append(percentage_all[indx], 1-np.sum(percentage_all[indx])), np.append(percentage_up[indx], 1-np.sum(percentage_up[indx]))]) reorder = np.append(np.argsort(np.abs(percentage_up-percentage_all)[indx]), -1) fig = plt.figure(figsize=(4,4)) plotfun.plot_barplot(vecs[reorder], labels=np.append(labels[indx], 'other')[reorder], cmap='binary', samples=['all peaks', 'up peaks'])