def plotAggregateSignal(footprintMats, backgroundMats, labels=None, plotMatrix=None): # usefule constants numSamples = footprintMats.shape[0] numSites = footprintMats.shape[1] insertionLength = footprintMats.shape[2] # plotMatrix gives what rows to plot. Default is all rows. if plotMatrix is None: plotMatrix = np.ones((numSamples, numSites), dtype=bool) # find what to plot xvalues = np.arange(-insertionLength/2, insertionLength/2) # if baslineMats is defined, normalize by baseline. Otherwise, normalize by number of insertions print "Normalizing by total insertions in intervals..." vecs = np.zeros((numSamples, insertionLength)) averageInsertions = np.mean(footprintMats[:, np.transpose(plotMatrix)]) for i in range(numSamples): aggregateInsertions = np.mean(footprintMats[i, plotMatrix[i]], 0) vecs[i] = aggregateInsertions*np.mean(averageInsertions)/np.mean(aggregateInsertions) # also plot # plot Aggregate Signal print "plotting aggregated signals.." fig = plt.figure(figsize=(6,5)) ax1 = fig.add_subplot(111) plot_manylines(vecs, x=xvalues, labels=labels) ax1.set_xlim((np.min(xvalues), np.max(xvalues))) ax1.set_ylabel('normalized insertions per bp', color='r') ax1.set_xlabel('distance from motif center (bp)') for tl in ax1.get_yticklabels(): tl.set_color('r') plt.subplots_adjust(bottom=0.15, right=0.85, left=0.15, top=0.9) return
def subtractBackgroundSignal(vector, subtractVector, windowSize): """ If subtractVector is specified, the program will try to fit the vector to the middle points of the 'ideal' signal (given by windowsize) and subtract off. Requires subtractVector to be the same size as ideal vector """ print "using background to subtract off Tn5 bias..." # then subtract off 'sutractVector' from 'idealSignal' in window if windowSize is None: windowSize = 10 window = np.arange(np.floor((len(vector)-windowSize)*0.5), np.ceil((len(vector)+windowSize)*0.5)).astype(int) p = np.polyfit(subtractVector[window], vector[window], deg=1) vectorFitted = vector - subtractVector*p[0]+p[1] fig = plt.figure() plot_manylines([vector, subtractVector*p[0]+p[1], vectorFitted], labels=['before fit', 'subtracted', 'final']) plt.close() return vectorFitted
else: print "skipping finding correlation to background" correlationToBackground = np.copy(correlationToIdeal) correlationToBackground['r'] = np.zeros(correlationToIdeal['r'].shape) np.save(outfile+'.footprint.correlation.ideal', correlationToIdeal) else: print "Find correlation to aggregate" correlationToIdeal = findCorrelationCoefficients(footprintMats, findIdealSignal(np.mean(footprintMats, 0))) print "Skip finding correlation to background" correlationToBackground = np.copy(correlationToIdeal) correlationToBackground['r'] = np.zeros(correlationToIdeal['r'].shape) # call bound vs unbound fig = plt.figure() plot_manylines([np.sort(correlationToIdeal[i]['r']-correlationToBackground[i]['r'])[::-1] for i in range(numSamples)], labels = labels) ax = plt.gca() ax.set_xlabel('motif sites') ax.set_ylabel('correlation coefficient to ideal minus background') plt.tight_layout() plt.savefig('%s.footprint.correlationCoefficient.pdf'%outfile) # define bound matrix for threshold in [0, 0.05]: boundMatrix = np.zeros(footprintMats.shape[0:2], dtype=bool) strengthMatrix = np.zeros(footprintMats.shape[0:2]) for i in range(numSamples): boundMatrix[i] = correlationToIdeal[i]['r']-correlationToBackground[i]['r'] > threshold strengthMatrix[i] = correlationToIdeal[i]['r']-correlationToBackground[i]['r'] unboundMatrix = np.logical_not(boundMatrix)