Пример #1
0
            dotproducts[i, j] = dotproducts[j, i] = np.sum(xcorr)
            dotproducts_triangle[i, j] = dotproducts_triangle[j, i] = np.sum(getTriangle(xcorr))
            plot_vplot(getTriangle(xcorr))
            plt.title('%s vs %s'%(motifs[i], motifs[j]))
            plot_vplot(xcorr)
            plt.title('%s vs %s'%(motifs[i], motifs[j]))
            plot_vplot(smoothmat2)
            plt.title('%s vs %s'%(motifs[i], motifs[j]))
        else:
            dotproducts[i, j] = dotproducts[j, i] = np.nan
            dotproducts_triangle[i, j] = dotproducts_triangle[j, i] = np.nan

rowindices = np.all(np.isfinite(dotproducts), axis=1)
columnindices = np.all(np.isfinite(dotproducts), axis=0)
        
heatmapfun.plotHeatMap(dotproducts[rowindices][:, columnindices], rowlabels=motifs[:num_to_test][rowindices], columnlabels=motifs[:num_to_test][columnindices], fontSize=11, vmin=np.nanmin(dotproducts), vmax=np.nanmax(dotproducts), cmap='RdGy_r')

for sigma in [0.5, 0.6, 0.7, 0.8]:
    smoothmat1 = scipy.ndimage.gaussian_filter(mat1 - np.mean(mat1), sigma)
    plt.figure()
    plt.imshow(smoothmat1, aspect='equal', origin='left')

plt.figure()
plt.imshow(mat2, aspect='equal', origin='left')

plt.figure()
plt.imshow(xcorr, aspect='equal', origin='left')

plt.figure()
plt.imshow((mat1-np.mean(mat1))*(mat2-np.mean(mat2)), aspect='equal', origin='left')
Пример #2
0
# save file of normalized peakCounts
if not options.o:
    outputfile = os.path.splitext(options.c)[0]
else: outputfile = options.o
np.savetxt(outputfile+'.normalize.peakCount', readCountNorm)

# also save file with no replicates
readCountRed = filefun.reduceByReplicates(readCount, parameters.indices_for_reduce_by_replicates)
readCountNorm = readCountRed/np.mean(readCountRed[noExprChange], 0)*10
np.savetxt(outputfile+'.normalize.norepicates.peakCount', readCountNorm)

# plot and save correlation heat map
numSamples = readCount.shape[1]
readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10 
distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.all.pdf')

distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[noExprChange, i], readCountNorm[noExprChange, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.no_change.pdf')

# plot only old samples with replicates
distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr[parameters.indices_old][:, parameters.indices_old], rowlabels=parameters.headers_old, columnlabels=parameters.headers_old, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.tissue.pdf')

#plot scatterplot of replicates
for i,j in parameters.indices_for_reduce_by_replicates:
    xvalues = readCountNorm[:, i]
    yvalues = readCountNorm[:, j]
Пример #3
0
     reorder = np.array(bound_clusters['leaves'])
     
     plotfun.plotAggregateSignal(footprintVecs_norm_bound[reorder], labels=labels[reorder])
     plt.title('threshold %3.2f; %d bound by all'%(threshold, np.sum(np.all(boundMatrix, 0))))
     plt.savefig('%s.threshold_%3.2f.normalizedbyinsertions.bound.pdf'%(outfile, threshold))
     
     # plot unbound profiles
     footprintVecs_norm_unbound = np.array([np.mean(footprintMats[i, unboundMatrix[i]], 0)*
                                            np.mean(footprintMats[unboundMatrix])/np.mean(footprintMats[i, unboundMatrix[i]]) for i in range(numSamples)])
     plotfun.plotAggregateSignal(footprintVecs_norm_unbound, labels=labels)
     plt.title('threshold %3.2f; %d unbound'%(threshold, np.sum(np.all(np.logical_not(boundMatrix), 0))))
     plt.savefig('%s.threshold_%3.2f.normalizedbyinsertions.unbound.pdf'%(outfile, threshold))
     
     #  do Jaccard distance heat map
     distance = np.array([[1-jaccard(boundMatrix[i], boundMatrix[j]) for i in range(numSamples)] for j in range(numSamples)])
     plotHeatMap(distance, rowlabels=labels, columnlabels=labels, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
     plt.savefig('%s.jaccard.heatmap.threshold_%3.2f.pdf'%(outfile, threshold))
         
 # plot heat maps
 for i in range(numSamples):
     fig = plt.figure(figsize=(4, 10))
     sortIndx = np.argsort(strengthMatrix[i])[::-1]
     plot_heatmap_bar(footprintMats[i, sortIndx], strengthMatrix[i,sortIndx], threshold=[np.sum(correlationToIdeal[i]['r']-correlationToBackground[i]['r'] > j) for j in [0, 0.05, 0.1, 0.15, 0.2]], label=labels[i])
     plt.savefig('%s.footprint.%s.heatmap.pdf'%(outfile, labels[i]))
 
 # plot side by side bound and unbound    
 threshold = 0.05
 fig = plt.figure(figsize=(4, 10))
 plot_heatmap_bar(footprintMats[i, np.all(boundMatrix,0)], strengthMatrix[i,np.all(boundMatrix,0)])
 
     
readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10    # such that average count in TSS peaks is 10. 

# save file of normalized peakCounts
if not options.o:
    outputfile = os.path.splitext(options.c)[0]
else: outputfile = options.o
np.savetxt(outputfile+'.normalize.peakCount', readCountNorm)

# also save file with no replicates
readCountRed = filefun.reduceByReplicates(readCount, np.array([[0, 1], [2, 3]]))
np.savetxt(outputfile+'.normalize.norepicates.peakCount', readCountRed/np.mean(readCountRed[noExprChange], 0)*10)

# plot and save correlation heat map
numSamples = readCount.shape[1]
distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers_human, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.all.pdf')

"""
Reminder of script is making a lot of plots. Requires calls of significant versus not peaks
optional: run python script 'find_significant_peaks.py'

os.system('python %s -b %s -p %s --indx %s'%('scoring/140815_peaks.coverageCorr.all.bed', outputfile+'.normalize.replicate_red.peakCount', options.b))
"""



# plot histogram and scatterplot
for i in range(4):
    for j in range(i+1, 4):
        print '%d\t%d'%(i, j)
 
 plt.savefig('%s.all.normalizedbyinsertions.pdf'%outfile)
 
 sort_indx = np.zeros((numSamples, numSites))
 correlation_stength = np.zeros((numSamples, numSites))
 # find correlations
 for i in range(numSamples):
     backgroundSignal = np.mean(backgroundMats[i], 0)    
     sort_indx[i], correlation_stength[i] = main(footprintMats[i], idealSignal = np.mean(np.mean(footprintMats, 1), 0), backgroundSignal = backgroundSignal )
     plt.title(labels[i])
     plt.tight_layout()
 
 threshold = 0
 boundMatrix = correlation_stength > threshold
 distance = np.array([[1-jaccard(boundMatrix[i], boundMatrix[j]) for i in range(numSamples)] for j in range(numSamples)])
 heatmapfun.plotHeatMap(distance, rowlabels=labels, columnlabels=labels, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
 plt.savefig('%s.jaccard.heatmap.threshold_%3.2f.pdf' %(outfile, threshold))
 
 threshold = 0.05
 boundMatrix = correlation_stength > threshold
 distance = np.array([[1-jaccard(boundMatrix[i], boundMatrix[j]) for i in range(numSamples)] for j in range(numSamples)])
 heatmapfun.plotHeatMap(distance, rowlabels=labels, columnlabels=labels, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
 plt.savefig('%s.jaccard.heatmap.threshold_%3.2f.pdf' %(outfile, threshold))
 
 
 distance = np.array([[st.spearmanr(correlation_stength[i], correlation_stength[j])[0] for i in range(numSamples)] for j in range(numSamples)])
 heatmapfun.plotHeatMap(distance, rowlabels=labels, columnlabels=labels, rowIndx=None, fontSize=None, columnIndx=None, cmap='Rd_Gy_r', vmin=0, vmax=0.5)  
 plt.savefig('%s.correlation.heatmap.pdf' %(outfile))
 """
 for i in range(numSamples):
     main(footprintMats[i], sortIndx = sort_indx_nfi)
        plt.figure(figsize=(20,5))
        heatmapfun.plotCoverageHeatMap(enrichment[:, reorder], cluster=False, rowlabels = parameters.headers_noreplicates[reorder])
np.savetxt('%s.enrichment_values.iterations_%d.mat'%(outfile, numIterations), enrichment_all, delimiter='\t')

# save bed file of enrichment locations
f = open('%s.windowed_locs.bed'%outfile, 'w')
for chrm in chrms:
    windowCenters = windowedLocsDict[chrm]
    for windowCenter in windowCenters:
        f.write('%s\t%d\t%d\n'%(chrm, (windowCenter-stepSize/2), min(genomeSize[chrm], (windowCenter+stepSize/2))))
f.close() 

# save correleogram, if possible
distanceCorr = np.array([[st.spearmanr(enrichment_all[:, i], enrichment_all[:, j])[0] for i in range(numSamples)] for j in range(numSamples)])
try:
    plotHeatMap(distanceCorr, rowlabels=parameters.headers_noreplicates, columnlabels=parameters.headers_noreplicates, fontSize=11, cmap='PuOr_r', vmin=-1, vmax=1)
    plt.savefig('%s.enrichment_correlation.iterations_%d.pdf'%(outfile, numIterations))
except ValueError: print 'did not save correlelogram'

# save bigwig file
numSamples = signalDensity.shape[1]
for sample in range(numSamples):
    
    # save bigwig file of enrichment
    wigFileName = '%s.enrichment.track%d.wig'%(outfile, sample)
    f = open(wigFileName, 'w')
    f.write("track name='enrichment score' description='enrichment in %s'\n"%(wigFileName))
    logFoldChangeAll = np.empty(0)
    for chrm in chrms:
        windowedLocs = windowedLocsDict[chrm]
        foldChange = signalDensityDict[chrm]/np.mean(signalDensityDict[chrm], 0)