dotproducts[i, j] = dotproducts[j, i] = np.sum(xcorr) dotproducts_triangle[i, j] = dotproducts_triangle[j, i] = np.sum(getTriangle(xcorr)) plot_vplot(getTriangle(xcorr)) plt.title('%s vs %s'%(motifs[i], motifs[j])) plot_vplot(xcorr) plt.title('%s vs %s'%(motifs[i], motifs[j])) plot_vplot(smoothmat2) plt.title('%s vs %s'%(motifs[i], motifs[j])) else: dotproducts[i, j] = dotproducts[j, i] = np.nan dotproducts_triangle[i, j] = dotproducts_triangle[j, i] = np.nan rowindices = np.all(np.isfinite(dotproducts), axis=1) columnindices = np.all(np.isfinite(dotproducts), axis=0) heatmapfun.plotHeatMap(dotproducts[rowindices][:, columnindices], rowlabels=motifs[:num_to_test][rowindices], columnlabels=motifs[:num_to_test][columnindices], fontSize=11, vmin=np.nanmin(dotproducts), vmax=np.nanmax(dotproducts), cmap='RdGy_r') for sigma in [0.5, 0.6, 0.7, 0.8]: smoothmat1 = scipy.ndimage.gaussian_filter(mat1 - np.mean(mat1), sigma) plt.figure() plt.imshow(smoothmat1, aspect='equal', origin='left') plt.figure() plt.imshow(mat2, aspect='equal', origin='left') plt.figure() plt.imshow(xcorr, aspect='equal', origin='left') plt.figure() plt.imshow((mat1-np.mean(mat1))*(mat2-np.mean(mat2)), aspect='equal', origin='left')
# save file of normalized peakCounts if not options.o: outputfile = os.path.splitext(options.c)[0] else: outputfile = options.o np.savetxt(outputfile+'.normalize.peakCount', readCountNorm) # also save file with no replicates readCountRed = filefun.reduceByReplicates(readCount, parameters.indices_for_reduce_by_replicates) readCountNorm = readCountRed/np.mean(readCountRed[noExprChange], 0)*10 np.savetxt(outputfile+'.normalize.norepicates.peakCount', readCountNorm) # plot and save correlation heat map numSamples = readCount.shape[1] readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10 distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)]) plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig(outputfile+'.correlation_heatmap.all.pdf') distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[noExprChange, i], readCountNorm[noExprChange, j]) for i in range(numSamples)] for j in range(numSamples)]) plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig(outputfile+'.correlation_heatmap.no_change.pdf') # plot only old samples with replicates distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)]) plotHeatMap(distanceCorr[parameters.indices_old][:, parameters.indices_old], rowlabels=parameters.headers_old, columnlabels=parameters.headers_old, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig(outputfile+'.correlation_heatmap.tissue.pdf') #plot scatterplot of replicates for i,j in parameters.indices_for_reduce_by_replicates: xvalues = readCountNorm[:, i] yvalues = readCountNorm[:, j]
reorder = np.array(bound_clusters['leaves']) plotfun.plotAggregateSignal(footprintVecs_norm_bound[reorder], labels=labels[reorder]) plt.title('threshold %3.2f; %d bound by all'%(threshold, np.sum(np.all(boundMatrix, 0)))) plt.savefig('%s.threshold_%3.2f.normalizedbyinsertions.bound.pdf'%(outfile, threshold)) # plot unbound profiles footprintVecs_norm_unbound = np.array([np.mean(footprintMats[i, unboundMatrix[i]], 0)* np.mean(footprintMats[unboundMatrix])/np.mean(footprintMats[i, unboundMatrix[i]]) for i in range(numSamples)]) plotfun.plotAggregateSignal(footprintVecs_norm_unbound, labels=labels) plt.title('threshold %3.2f; %d unbound'%(threshold, np.sum(np.all(np.logical_not(boundMatrix), 0)))) plt.savefig('%s.threshold_%3.2f.normalizedbyinsertions.unbound.pdf'%(outfile, threshold)) # do Jaccard distance heat map distance = np.array([[1-jaccard(boundMatrix[i], boundMatrix[j]) for i in range(numSamples)] for j in range(numSamples)]) plotHeatMap(distance, rowlabels=labels, columnlabels=labels, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig('%s.jaccard.heatmap.threshold_%3.2f.pdf'%(outfile, threshold)) # plot heat maps for i in range(numSamples): fig = plt.figure(figsize=(4, 10)) sortIndx = np.argsort(strengthMatrix[i])[::-1] plot_heatmap_bar(footprintMats[i, sortIndx], strengthMatrix[i,sortIndx], threshold=[np.sum(correlationToIdeal[i]['r']-correlationToBackground[i]['r'] > j) for j in [0, 0.05, 0.1, 0.15, 0.2]], label=labels[i]) plt.savefig('%s.footprint.%s.heatmap.pdf'%(outfile, labels[i])) # plot side by side bound and unbound threshold = 0.05 fig = plt.figure(figsize=(4, 10)) plot_heatmap_bar(footprintMats[i, np.all(boundMatrix,0)], strengthMatrix[i,np.all(boundMatrix,0)])
readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10 # such that average count in TSS peaks is 10. # save file of normalized peakCounts if not options.o: outputfile = os.path.splitext(options.c)[0] else: outputfile = options.o np.savetxt(outputfile+'.normalize.peakCount', readCountNorm) # also save file with no replicates readCountRed = filefun.reduceByReplicates(readCount, np.array([[0, 1], [2, 3]])) np.savetxt(outputfile+'.normalize.norepicates.peakCount', readCountRed/np.mean(readCountRed[noExprChange], 0)*10) # plot and save correlation heat map numSamples = readCount.shape[1] distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)]) plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers_human, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig(outputfile+'.correlation_heatmap.all.pdf') """ Reminder of script is making a lot of plots. Requires calls of significant versus not peaks optional: run python script 'find_significant_peaks.py' os.system('python %s -b %s -p %s --indx %s'%('scoring/140815_peaks.coverageCorr.all.bed', outputfile+'.normalize.replicate_red.peakCount', options.b)) """ # plot histogram and scatterplot for i in range(4): for j in range(i+1, 4): print '%d\t%d'%(i, j)
plt.savefig('%s.all.normalizedbyinsertions.pdf'%outfile) sort_indx = np.zeros((numSamples, numSites)) correlation_stength = np.zeros((numSamples, numSites)) # find correlations for i in range(numSamples): backgroundSignal = np.mean(backgroundMats[i], 0) sort_indx[i], correlation_stength[i] = main(footprintMats[i], idealSignal = np.mean(np.mean(footprintMats, 1), 0), backgroundSignal = backgroundSignal ) plt.title(labels[i]) plt.tight_layout() threshold = 0 boundMatrix = correlation_stength > threshold distance = np.array([[1-jaccard(boundMatrix[i], boundMatrix[j]) for i in range(numSamples)] for j in range(numSamples)]) heatmapfun.plotHeatMap(distance, rowlabels=labels, columnlabels=labels, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig('%s.jaccard.heatmap.threshold_%3.2f.pdf' %(outfile, threshold)) threshold = 0.05 boundMatrix = correlation_stength > threshold distance = np.array([[1-jaccard(boundMatrix[i], boundMatrix[j]) for i in range(numSamples)] for j in range(numSamples)]) heatmapfun.plotHeatMap(distance, rowlabels=labels, columnlabels=labels, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1) plt.savefig('%s.jaccard.heatmap.threshold_%3.2f.pdf' %(outfile, threshold)) distance = np.array([[st.spearmanr(correlation_stength[i], correlation_stength[j])[0] for i in range(numSamples)] for j in range(numSamples)]) heatmapfun.plotHeatMap(distance, rowlabels=labels, columnlabels=labels, rowIndx=None, fontSize=None, columnIndx=None, cmap='Rd_Gy_r', vmin=0, vmax=0.5) plt.savefig('%s.correlation.heatmap.pdf' %(outfile)) """ for i in range(numSamples): main(footprintMats[i], sortIndx = sort_indx_nfi)
plt.figure(figsize=(20,5)) heatmapfun.plotCoverageHeatMap(enrichment[:, reorder], cluster=False, rowlabels = parameters.headers_noreplicates[reorder]) np.savetxt('%s.enrichment_values.iterations_%d.mat'%(outfile, numIterations), enrichment_all, delimiter='\t') # save bed file of enrichment locations f = open('%s.windowed_locs.bed'%outfile, 'w') for chrm in chrms: windowCenters = windowedLocsDict[chrm] for windowCenter in windowCenters: f.write('%s\t%d\t%d\n'%(chrm, (windowCenter-stepSize/2), min(genomeSize[chrm], (windowCenter+stepSize/2)))) f.close() # save correleogram, if possible distanceCorr = np.array([[st.spearmanr(enrichment_all[:, i], enrichment_all[:, j])[0] for i in range(numSamples)] for j in range(numSamples)]) try: plotHeatMap(distanceCorr, rowlabels=parameters.headers_noreplicates, columnlabels=parameters.headers_noreplicates, fontSize=11, cmap='PuOr_r', vmin=-1, vmax=1) plt.savefig('%s.enrichment_correlation.iterations_%d.pdf'%(outfile, numIterations)) except ValueError: print 'did not save correlelogram' # save bigwig file numSamples = signalDensity.shape[1] for sample in range(numSamples): # save bigwig file of enrichment wigFileName = '%s.enrichment.track%d.wig'%(outfile, sample) f = open(wigFileName, 'w') f.write("track name='enrichment score' description='enrichment in %s'\n"%(wigFileName)) logFoldChangeAll = np.empty(0) for chrm in chrms: windowedLocs = windowedLocsDict[chrm] foldChange = signalDensityDict[chrm]/np.mean(signalDensityDict[chrm], 0)