Пример #1
0
def profileTargetsHistoAS(tccList, cName, name='boxplot'):

    range = 50
    histDict = {}  # {coord: []}
    histDictAS = {}
    for tcc in tccList:

        chrom, strand, start, end = cg.tccSplit(tcc)
        #Get highest peak (sense)
        tccStretch = cgPeaks.stretch(tcc, cName)
        tccStretch.createPeaks(span=2)
        highestCoord = tccStretch.getHighestPeak()
        if highestCoord == None: continue

        #AS
        tccAS = cg.convertToAS(tcc)
        tccStretch = cgPeaks.stretch(tccAS, cName)
        tccStretch.createPeaks(span=2)
        highestCoordAS = tccStretch.getHighestPeak()
        if highestCoordAS == None: continue

        #profile around point (Sense)
        zPoint = cg.makeTcc(chrom, strand, highestCoord, end)
        cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio=True)

        for coord in cProfile:
            try:
                histDict[coord].append(cProfile[coord])
            except:  #quicker way to initialize
                histDict[coord] = [cProfile[coord]]

        #profile around point (AS)
        zPoint = cg.convertToAS(zPoint)
        cProfile = svs.profileAroundPoint(zPoint,
                                          range,
                                          cName,
                                          ratio=True,
                                          ratioCoord=highestCoordAS)

        for coord in cProfile:
            try:
                histDictAS[coord].append(cProfile[coord])
            except:  #quicker way to initialize
                histDictAS[coord] = [cProfile[coord]]

    plot.boxPlotHistoAS(histDict, histDictAS, name=name)
Пример #2
0
def extendPeakTest(tcc, pRange, minVal, maxAvgNoise, minPeakLength, maxPeakLength, cName):  	
	        
                chrom, strand, peakPosition, end = cg.tccSplit(tcc)
		cProfile = stepVectorScan.profileAroundPoint(tcc, pRange, cName, ratio = True)
		
                #extend this peak left and right
                leftRange = range(1-pRange, 0)
                rightRange = range(1, pRange)
                leftRange.reverse() #going from the middle outward

                #left
                startFinal = leftRange[-1]
		for i in leftRange:
			if cProfile[i] > minVal:
				print ' extending stretch'
			else:
                                print ' end of stretch L'
                                startFinal = i + 1
                                break
                #right
                endFinal = rightRange[-1]
                for i in rightRange:
                        if cProfile[i] > minVal:
                                print ' extending stretch'
                        else:
                                print ' end of stretch R'
                                endFinal = i - 1
                                break

	        peakLength = endFinal - startFinal + 1

		
                #avg expression around peak check...
                #get total expression before peak
                low = startFinal
                high = endFinal
                noiseExpression = 0
                lowRange = range(1 - pRange, low)
                highRange = range(high + 1, pRange)
                totalLength = len(lowRange) + len(highRange)
                print totalLength, pRange, low, high, lowRange, highRange
                for i in lowRange:
                        noiseExpression += cProfile[i]
                for i in highRange:
                        noiseExpression += cProfile[i]
                try:
                        avgNoise = noiseExpression/float(totalLength)
                except:
                        return False

		#filter out peaks that look a certain way.
		if (minPeakLength < peakLength < maxPeakLength) and (avgNoise < maxAvgNoise):
                        goodTcc = cg.makeTcc(chrom, strand, peakPosition + startFinal, peakPosition + endFinal)
                        print '*KEEPER'
                        return goodTcc
                else:
                        return False
Пример #3
0
def profileTargetsHistoAS(tccList, cName, name = 'boxplot'):
	
	range = 50
	histDict = {} # {coord: []}
	histDictAS = {}
	for tcc in tccList:
		
		chrom, strand, start, end = cg.tccSplit(tcc)
		#Get highest peak (sense)
		tccStretch = cgPeaks.stretch(tcc, cName)
		tccStretch.createPeaks(span = 2)
		highestCoord = tccStretch.getHighestPeak()
		if highestCoord == None: continue
		
		#AS
		tccAS = cg.convertToAS(tcc)
		tccStretch = cgPeaks.stretch(tccAS, cName)
		tccStretch.createPeaks(span = 2)
		highestCoordAS = tccStretch.getHighestPeak()
		if highestCoordAS == None: continue
		
		#profile around point (Sense)
		zPoint = cg.makeTcc(chrom, strand, highestCoord, end)
		cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True)
		
		for coord in cProfile:
			try:
				histDict[coord].append(cProfile[coord])
			except: #quicker way to initialize
				histDict[coord] = [cProfile[coord]]
	
		#profile around point (AS)
		zPoint = cg.convertToAS(zPoint)
		cProfile = svs.profileAroundPoint(zPoint, range, cName, ratio = True, ratioCoord = highestCoordAS)
		
		for coord in cProfile:
			try:
				histDictAS[coord].append(cProfile[coord])
			except: #quicker way to initialize
				histDictAS[coord] = [cProfile[coord]]
	
	plot.boxPlotHistoAS(histDict, histDictAS, name = name)
Пример #4
0
def profileTargetsHisto(tccList, cName, name='boxplot'):

    histDict = {}  # {coord: []}
    for tcc in tccList:

        chrom, strand, start, end = cg.tccSplit(tcc)
        #Get highest peak
        tccStretch = cgPeaks.stretch(tcc, cName)
        tccStretch.createPeaks(span=2)
        highestCoord = tccStretch.getHighestPeak()
        if highestCoord == None: continue

        #profile around point
        zPoint = cg.makeTcc(chrom, strand, highestCoord, end)
        cProfile = svs.profileAroundPoint(zPoint, 200, cName, ratio=True)

        for coord in cProfile:
            try:
                histDict[coord].append(cProfile[coord])
            except:  #quicker way to initialize
                histDict[coord] = [cProfile[coord]]

    plot.boxPlotHisto(histDict, name=name)
Пример #5
0
def profileTargetsHisto(tccList, cName, name = 'boxplot'):
		
	histDict = {} # {coord: []}
	for tcc in tccList:
		
		chrom, strand, start, end = cg.tccSplit(tcc)
		#Get highest peak
		tccStretch = cgPeaks.stretch(tcc, cName)
		tccStretch.createPeaks(span = 2)
		highestCoord = tccStretch.getHighestPeak()
		if highestCoord == None: continue
		
		#profile around point
		zPoint = cg.makeTcc(chrom, strand, highestCoord, end)
		cProfile = svs.profileAroundPoint(zPoint, 200, cName, ratio = True)
		
		for coord in cProfile:
			try:
				histDict[coord].append(cProfile[coord])
			except: #quicker way to initialize
				histDict[coord] = [cProfile[coord]]
	
	
	plot.boxPlotHisto(histDict, name = name)
Пример #6
0
def parallelMakePeaks(tcc, cName, minExpression):
	conf = c.getConfig(cName)
	f = open('out/peakData.%s.%s.%s' % (tcc, minExpression, conf.conf['assembly']), 'w')
	print 'scanning range', tcc
	chrom, strand, start, end = cg.tccSplit(tcc)
	peaks = cgPeaks.stretch(tcc, cName)
	
	
	
	#print 'getting peaks'
	peaks.createPeaks(span = 1, minVal = int(minExpression))
	
	print 'len peaks', len(peaks.peaks)
	endCheck = 0
	for x in peaks.peaks:
		print x, endCheck
                
                '''
		if x < endCheck:
                        print 'endChecked'
			continue
	        '''

		#scan a 30 bp range around this point and find the best roof...
		pRange = 40
		rTcc = cg.makeTcc(chrom, strand, x, x + 1)
		

		#now make profile for roof...
		cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True)
		
		#now get highest stretch length and the rNext coord.
		minVal = .70
		highest = 0
		stretch = 0
		startCurrent = None
		startFinal = None
		endFinal = None
		for i in range(1 - pRange, pRange):
                        print ' ', x + i, cProfile[i] 
			if cProfile[i] > minVal:
				print '  extending stretch'
                                stretch += 1
				if startCurrent == None:
					startCurrent = i
			else:
				if stretch > 0:
					print 'end of stretch'
                                        if stretch > highest: #stretch ended and was higher than previous
						highest = stretch
						endFinal = i - 1
						startFinal = startCurrent
						startCurrent = None
					else:
						startCurrent = None
				stretch = 0
		
		#get +/- extend value...
		val = [1.0, 1.0]
                extend = 1
		if (startFinal) and (endFinal):
			low = startFinal - extend
			high = endFinal + extend
			if low > (1 - pRange) and high < pRange:
					val[0] = float(cProfile[startFinal - extend])
					val[1] = float(cProfile[endFinal + extend])
			else:
                                print 'out of range'
				continue
		else:
                        print 'no start and end of peak'
			continue
	        print low, high, x, endFinal
		endCheck = x + endFinal
		
                #avg expression around peak check...
                #get total expression before peak
                noiseExpression = 0
                lowRange = range(1 - pRange, low)
                highRange = range(high + 1, pRange) 
                totalLength = len(lowRange) + len(highRange)
                for i in lowRange:
                        noiseExpression += cProfile[i]
                for i in highRange:
                        noiseExpression += cProfile[i]
                avgNoise = noiseExpression/float(totalLength)


		#filter out peaks that look a certain way.
                print highest, val[0], val[1], avgNoise
		if 0 < highest < 5: #rooflength 14/26
			if val[0] < 0.20 and val[1] < .20: #drop values
                                if avgNoise < .3:
                                        goodTcc = cg.makeTcc(chrom, strand, x + low, x + high)
				        print '*KEEPER'
				        f.write('%s\n' % goodTcc)
	

	f.close()
	print 'DONE', tcc
Пример #7
0
def makePeakInput(cName, minExpression=2000):

    mConf = c.getConfig('Main.conf')
    conf = c.getConfig(cName)

    assembly = conf.conf['assembly']

    tccList = []

    chromLens = cg.returnChromLengthDict(assembly)
    f = open('peakData.%s' % minExpression, 'w')
    for chrom in chromLens:
        if chrom not in cg.acceptableChroms: continue
        for strand in ['1', '-1']:
            print 'Getting Peaks for ', chrom, strand
            prevI = 0
            endCheck = 0
            for i in rangePoints(1, chromLens[chrom], 1000):
                if i == 1:
                    prevI = i
                    continue

                start = prevI
                end = i
                prevI = i

                tcc = cg.makeTcc(chrom, strand, start, end)
                #print 'scanning range', tcc
                peaks = cgPeaks.stretch(tcc, cName)
                peaks.createPeaks(span=3, minVal=minExpression)

                for x in peaks.peaks:

                    if x < endCheck:
                        continue

                    #scan a 30 bp range around this point and find the best roof...
                    pRange = 30
                    rTcc = cg.makeTcc(chrom, strand, x, x + 1)

                    #now make profile for roof...
                    cProfile = stepVectorScan.profileAroundPoint(rTcc,
                                                                 pRange,
                                                                 cName,
                                                                 ratio=True)

                    #now get highest stretch length and the rNext coord.
                    minVal = .80
                    highest = 0
                    stretch = 0
                    startCurrent = None
                    startFinal = None
                    endFinal = None
                    for i in range(1 - pRange, pRange):
                        if cProfile[i] > minVal:
                            stretch += 1
                            if startCurrent == None:
                                startCurrent = i
                        else:
                            if stretch > 0:
                                if stretch > highest:  #stretch ended and was higher than previous
                                    highest = stretch
                                    endFinal = i - 1
                                    startFinal = startCurrent
                                    startCurrent = None
                                else:
                                    startCurrent = None
                            stretch = 0

                    #get +/- 4 value...
                    val = [1.0, 1.0]
                    if (startFinal) and (endFinal):
                        low = startFinal - 4
                        high = endFinal + 4
                        if low > (1 - pRange) and high < pRange:
                            val[0] = float(cProfile[startFinal - 4])
                            val[1] = float(cProfile[endFinal + 4])
                        else:
                            continue
                    else:
                        continue

                    endCheck = x + high

                    #filter out peaks that look a certain way.
                    if 14 < highest < 26:  #rooflength
                        if val[0] < 0.2 and val[1] < .2:  #drop values
                            goodTcc = cg.makeTcc(chrom, strand, x + low,
                                                 x + high)
                            #print goodTcc
                            f.write('%s\n' % goodTcc)
    f.close()
def findPeaks(pType, cName = None):
	
	#init
	mConf = c.cgConfig('Main.conf')
	conf = c.getConfig(cName)

	if pType == 'E':
		predName = conf.conf['resultsExonsSorted']
	else:
		predName = conf.conf['resultsIntronsSorted']
	
	print predName
	#make CID:hairpin:peak dictionary
	cHairs = getHairpins.getHairpins(predName)
	peakDict = {}
	for CID in cHairs:
		peakDict[CID] = [cHairs[CID],'None']
		

	timer = cg.cgTimer()
	timer.start()

	#put peaks in memory
	print 'Creating peak data'
	peaks = {} # chr:peak:value
	for CID in cHairs:
		chrom, strand, start, end = cg.tccSplit(cHairs[CID])
		tcc = cHairs[CID]
		
		#init dictionary
		if chrom not in peaks:
			peaks[chrom] = {}
		
		if strand not in peaks[chrom]:
			peaks[chrom][strand] = {}
		
		#create peaks for tcc and add to peak dictionary
		stretch = cgPeaks.stretch(tcc, cName)
		stretch.createPeaks()
		for peakCoord in stretch.peaks:
			peaks[chrom][strand][peakCoord] = 0
	print timer.split()

	print 'finding best combos'
	bestCombos = []
	aPass = 0
	bPass = 0
	cPass = 0
	numT = 0
	for CID in peakDict:
		cgFlag = False
		if CID == '538':cgFlag = True
		
		tcc = peakDict[CID][0]
		#print tcc
		tccPeaks = []
		chrom = cg.ss(tcc, ':')[0]
		strand = cg.ss(tcc, ':')[1]
		start = int(cg.ss(tcc, ':')[2])
		end = int(cg.ss(tcc, ':')[3])
		
		#get all peaks
		for i in range(start, end + 1):
			if i in peaks[chrom][strand]:
				#print '  peak added', i
				tccPeaks.append(i)
		
		#Calculate parameters...
		pairStrings = [] #used to check if pair already added
		peakCombos = []
		for x in tccPeaks:
				
								
				#scan a 30 bp range around this point and find the best roof...
				pRange = 30
				rTcc = cg.makeTcc(chrom, strand, x, x + 1)
				
				#quickly get max value...kinda a long way to do it but whatever
				cProfile = stepVectorScan.profileAroundPoint(rTcc, 1, cName, ratio = False)
				xval = cProfile[0]
				max = xval
				highestValueCoord = x
				
				#now make profile for roof...
				cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True)
				
				
				
				#now get highest stretch length and the rNext coord.
				minVal = .80
				highest = 0
				stretch = 0
				startCurrent = None
				startFinal = None
				endFinal = None
				for i in range(1 - pRange, pRange):
					if cProfile[i] > minVal:
						stretch += 1
						if startCurrent == None:
							startCurrent = i
					else:
						if stretch > 0:
							if stretch > highest: #stretch ended and was higher than previous
								highest = stretch
								endFinal = i - 1
								startFinal = startCurrent
								startCurrent = None
							else:
								startCurrent = None
						stretch = 0
				
				#get +/- 4 value...
				val = [1.0, 1.0]
				if (startFinal) and (endFinal):
					low = startFinal - 4
					high = endFinal + 4
					if low > (1 - pRange):
						if high < pRange:
							val[0] = float(cProfile[startFinal - 4])
							val[1] = float(cProfile[endFinal + 4])
				
				#fill in other details...
				y = 'S'
				dist = 'S'
				ratio = 'S'
				
				peakCombos.append([tcc,x,y,dist,ratio,max,highest,val])
				#print '  ', peakCombos[-1]
		
		#find best combo...
		topCombo = None
		for combo in peakCombos:
			roofLength = combo[6]
			dropValue = combo[7][0]
			if combo[7][1] > dropValue:
				dropValue = combo[7][1]
			
			#print roofLength, dropValue
			if 14 < roofLength < 26:
				if 0.0 < dropValue < 0.2:
					#pick one with rooflength nearest 20:
					if topCombo:
						if (math.fabs(22 - roofLength)) < (math.fabs(22 - topCombo[6])):
							topCombo = combo
					else:
						topCombo = combo
		
		if topCombo:
			peakDict[CID][1] = topCombo
			bestCombos.append(topCombo)
			print bestCombos[-1]
		else:
			#print 'None'
			pass

	print timer.split()


	#now update predFile (SLOT 13)
	predFile = open(predName, 'r')
	newLines = []
	for line in predFile:
		CID = cg.ss(line)[7]
		if peakDict[CID][1] == 'None':
			peakInfo = 'None'
		else:
			peakInfo = '%s:%s:%s:%s:%s:%s' % (str(peakDict[CID][1][1])[-3:], 'S', str(peakDict[CID][1][4]).split('.')[0], peakDict[CID][1][5],peakDict[CID][1][6], peakDict[CID][1][7])
		newLines.append(cg.appendToLine(line, peakInfo, 13))
	predFile.close()

	predFile = open(predName, 'w')
	predFile.writelines(newLines)
	predFile.close()
Пример #9
0
def roofPeakTest(tcc, pRange, minRoofVal, maxAvgNoise, maxDropVal, extend, minPeakLength, maxPeakLength, cName):  	
	        '''Note: extend does not extend the coordinates into the final tcc, just 
                used for declaring peak'''
                coveredStartingPoints = set()

                chrom, strand, peakPosition, end = cg.tccSplit(tcc)
		cProfile = stepVectorScan.profileAroundPoint(tcc, pRange, cName, ratio = True)
		
                #extend this peak left and right
                leftRange = range(1-pRange, 0)
                rightRange = range(1, pRange)
                leftRange.reverse() #going from the middle outward

                startFinalE = 0
                #left
                startFinal = leftRange[-1]
		for i in leftRange:
			if cProfile[i] > minRoofVal:
			        pass
                        else:
                                startFinal = i + 1
                                startFinalE = cProfile[i]
                                break
                
                endFinalE = 0
                #right
                endFinal = rightRange[-1] #this only holds if it extends to the end of the range...
                for i in rightRange:
                        if cProfile[i] > minRoofVal:
                                pass
                        else:
                                endFinal = i - 1
                                endFinalE = cProfile[i]
                                break

	        peakLength = endFinal - startFinal + 1

	        extend = int(extend)	
                low = startFinal - extend
                high = endFinal + extend

                if low > (1 - pRange) and high < pRange:
                        dropPassL = False
                        dropPassR = False

                        #find if any of the values in the extended range are below drop range
                        leftDrop = [float(cProfile[startFinal - x]) for x in range(1, extend + 1)]
                        rightDrop = [float(cProfile[endFinal + x]) for x in range(1, extend + 1)]
                        leftDropPass = [True if x < maxDropVal else False for x in leftDrop]
                        rightDropPass = [True if x < maxDropVal else False for x in rightDrop]
                       
                        if True in leftDropPass:
                                dropPassL = True
                        if True in rightDropPass:
                                dropPassR = True
                        
                        if (not dropPassL) or (not dropPassR):
                                print 'dropVal Fail', 'dropLeft', 'dropRight'
                                print startFinal, endFinal, leftDrop, rightDrop, leftDropPass, rightDropPass 
                                return False
                                        
                else:
                        print 'out of range'
                        return False

                                           
                
                #avg expression around peak check...
                #get total expression before peak
                noiseExpression = 0
                lowRange = range(1 - pRange, low)
                highRange = range(high + 1, pRange)
                totalLength = len(lowRange) + len(highRange)
                #for i in lowRange:
                        #noiseExpression += cProfile[i]
                #for i in highRange:
                        #noiseExpression += cProfile[i]
                #avgNoise = noiseExpression/float(totalLength)
                avgNoise = 0.0

		#filter out peaks that look a certain way.
                print startFinal, startFinalE, endFinal, endFinalE, peakLength, 
                goodTcc = cg.makeTcc(chrom, strand, peakPosition + startFinal, peakPosition + endFinal)
		if (minPeakLength <= peakLength <= maxPeakLength) and (avgNoise < maxAvgNoise):
		#if (minPeakLength < peakLength < maxPeakLength):
                        print ' *KEEPER', goodTcc, peakLength, avgNoise, maxAvgNoise
                        return goodTcc
                else:
                        #print 'bad peak', chrom, strand, peakPosition + startFinal, peakPosition + endFinal
                        print ' reason', goodTcc, peakLength, avgNoise, maxAvgNoise
                        return False
Пример #10
0
def parallelMakePeaks(tcc, cName, minExpression):
    conf = c.getConfig(cName)
    f = open(
        'out/peakData.%s.%s.%s' % (tcc, minExpression, conf.conf['assembly']),
        'w')
    print 'scanning range', tcc
    chrom, strand, start, end = cg.tccSplit(tcc)
    peaks = cgPeaks.stretch(tcc, cName)

    #print 'getting peaks'
    peaks.createPeaks(span=1, minVal=int(minExpression))

    print 'len peaks', len(peaks.peaks)
    endCheck = 0
    for x in peaks.peaks:
        print x, endCheck
        '''
		if x < endCheck:
                        print 'endChecked'
			continue
	        '''

        #scan a 30 bp range around this point and find the best roof...
        pRange = 40
        rTcc = cg.makeTcc(chrom, strand, x, x + 1)

        #now make profile for roof...
        cProfile = stepVectorScan.profileAroundPoint(rTcc,
                                                     pRange,
                                                     cName,
                                                     ratio=True)

        #now get highest stretch length and the rNext coord.
        minVal = .70
        highest = 0
        stretch = 0
        startCurrent = None
        startFinal = None
        endFinal = None
        for i in range(1 - pRange, pRange):
            print ' ', x + i, cProfile[i]
            if cProfile[i] > minVal:
                print '  extending stretch'
                stretch += 1
                if startCurrent == None:
                    startCurrent = i
            else:
                if stretch > 0:
                    print 'end of stretch'
                    if stretch > highest:  #stretch ended and was higher than previous
                        highest = stretch
                        endFinal = i - 1
                        startFinal = startCurrent
                        startCurrent = None
                    else:
                        startCurrent = None
                stretch = 0

        #get +/- extend value...
        val = [1.0, 1.0]
        extend = 1
        if (startFinal) and (endFinal):
            low = startFinal - extend
            high = endFinal + extend
            if low > (1 - pRange) and high < pRange:
                val[0] = float(cProfile[startFinal - extend])
                val[1] = float(cProfile[endFinal + extend])
            else:
                print 'out of range'
                continue
        else:
            print 'no start and end of peak'
            continue
        print low, high, x, endFinal
        endCheck = x + endFinal

        #avg expression around peak check...
        #get total expression before peak
        noiseExpression = 0
        lowRange = range(1 - pRange, low)
        highRange = range(high + 1, pRange)
        totalLength = len(lowRange) + len(highRange)
        for i in lowRange:
            noiseExpression += cProfile[i]
        for i in highRange:
            noiseExpression += cProfile[i]
        avgNoise = noiseExpression / float(totalLength)

        #filter out peaks that look a certain way.
        print highest, val[0], val[1], avgNoise
        if 0 < highest < 5:  #rooflength 14/26
            if val[0] < 0.20 and val[1] < .20:  #drop values
                if avgNoise < .3:
                    goodTcc = cg.makeTcc(chrom, strand, x + low, x + high)
                    print '*KEEPER'
                    f.write('%s\n' % goodTcc)

    f.close()
    print 'DONE', tcc
Пример #11
0
def makePeakInput(cName, minExpression = 2000):
	
	mConf = c.getConfig('Main.conf')
	conf = c.getConfig(cName)
	
	assembly = conf.conf['assembly']
	
	tccList = []
	
	chromLens = cg.returnChromLengthDict(assembly)
	f = open('peakData.%s' % minExpression, 'w')
	for chrom in chromLens:
		if chrom not in cg.acceptableChroms: continue
		for strand in ['1', '-1']:
			print 'Getting Peaks for ', chrom, strand
			prevI = 0
			endCheck = 0
			for i in rangePoints(1, chromLens[chrom], 1000):
				if i == 1:
					prevI = i
					continue
				
				start = prevI
				end = i
				prevI = i
				
				tcc = cg.makeTcc(chrom, strand, start, end)
				#print 'scanning range', tcc
				peaks = cgPeaks.stretch(tcc, cName)
				peaks.createPeaks(span = 3, minVal = minExpression)
				
				for x in peaks.peaks:
					
					if x < endCheck:
						continue
				
					#scan a 30 bp range around this point and find the best roof...
					pRange = 30
					rTcc = cg.makeTcc(chrom, strand, x, x + 1)
					
	
					#now make profile for roof...
					cProfile = stepVectorScan.profileAroundPoint(rTcc, pRange, cName, ratio = True)
					
					
					
					#now get highest stretch length and the rNext coord.
					minVal = .80
					highest = 0
					stretch = 0
					startCurrent = None
					startFinal = None
					endFinal = None
					for i in range(1 - pRange, pRange):
						if cProfile[i] > minVal:
							stretch += 1
							if startCurrent == None:
								startCurrent = i
						else:
							if stretch > 0:
								if stretch > highest: #stretch ended and was higher than previous
									highest = stretch
									endFinal = i - 1
									startFinal = startCurrent
									startCurrent = None
								else:
									startCurrent = None
							stretch = 0
					
					#get +/- 4 value...
					val = [1.0, 1.0]
					if (startFinal) and (endFinal):
						low = startFinal - 4
						high = endFinal + 4
						if low > (1 - pRange) and high < pRange:
								val[0] = float(cProfile[startFinal - 4])
								val[1] = float(cProfile[endFinal + 4])
						else:
							continue
					else:
						continue
					
					endCheck = x + high
					
					#filter out peaks that look a certain way.
					if 14 < highest < 26: #rooflength
						if val[0] < 0.2 and val[1] < .2: #drop values
							goodTcc = cg.makeTcc(chrom, strand, x + low, x + high)
							#print goodTcc
							f.write('%s\n' % goodTcc)
	f.close()
Пример #12
0
def roofPeakTest(tcc, pRange, minRoofVal, maxAvgNoise, maxDropVal, extend, minPeakLength, maxPeakLength, cName):  	
	        '''Note: extend does not extend the coordinates into the final tcc, just 
                used for declaring peak'''

                chrom, strand, peakPosition, end = cg.tccSplit(tcc)
		cProfile = stepVectorScan.profileAroundPoint(tcc, pRange, cName, ratio = True)
		
                #extend this peak left and right
                leftRange = range(1-pRange, 0)
                rightRange = range(1, pRange)
                leftRange.reverse() #going from the middle outward

                #left
                startFinal = leftRange[-1]
		for i in leftRange:
			if cProfile[i] > minRoofVal:
				print ' extending stretch'
			else:
                                print ' end of stretch L'
                                startFinal = i + 1
                                break
                #right
                endFinal = rightRange[-1] #this only holds if it extends to the end of the range...
                for i in rightRange:
                        if cProfile[i] > minRoofVal:
                                print ' extending stretch'
                        else:
                                print ' end of stretch R'
                                endFinal = i - 1
                                break

	        peakLength = endFinal - startFinal + 1

	        extend = int(extend)	
		val = [0.0, 0.0]
                low = startFinal - extend
                high = endFinal + extend
                
                if low > (1 - pRange) and high < pRange:
                                val[0] = float(cProfile[startFinal - extend])
                                val[1] = float(cProfile[endFinal + extend])
                                if not (val[0] < maxDropVal and val[1] < maxDropVal):
                                        return False
                                        
                else:
                        print 'out of range'
                        return False
                
                #avg expression around peak check...
                #get total expression before peak
                noiseExpression = 0
                lowRange = range(1 - pRange, low)
                highRange = range(high + 1, pRange)
                totalLength = len(lowRange) + len(highRange)
                for i in lowRange:
                        noiseExpression += cProfile[i]
                for i in highRange:
                        noiseExpression += cProfile[i]
                avgNoise = noiseExpression/float(totalLength)

		#filter out peaks that look a certain way.
		if (minPeakLength < peakLength < maxPeakLength) and (avgNoise < maxAvgNoise):
                        goodTcc = cg.makeTcc(chrom, strand, peakPosition + startFinal, peakPosition + endFinal)
                        print '*KEEPER'
                        return goodTcc
                else:
                        return False
Пример #13
0
def findPeaks(pType, cName=None):

    #init
    mConf = c.cgConfig('Main.conf')
    conf = c.getConfig(cName)

    if pType == 'E':
        predName = conf.conf['resultsExonsSorted']
    else:
        predName = conf.conf['resultsIntronsSorted']

    print predName
    #make CID:hairpin:peak dictionary
    cHairs = getHairpins.getHairpins(predName)
    peakDict = {}
    for CID in cHairs:
        peakDict[CID] = [cHairs[CID], 'None']

    timer = cg.cgTimer()
    timer.start()

    #put peaks in memory
    print 'Creating peak data'
    peaks = {}  # chr:peak:value
    for CID in cHairs:
        chrom, strand, start, end = cg.tccSplit(cHairs[CID])
        tcc = cHairs[CID]

        #init dictionary
        if chrom not in peaks:
            peaks[chrom] = {}

        if strand not in peaks[chrom]:
            peaks[chrom][strand] = {}

        #create peaks for tcc and add to peak dictionary
        stretch = cgPeaks.stretch(tcc, cName)
        stretch.createPeaks()
        for peakCoord in stretch.peaks:
            peaks[chrom][strand][peakCoord] = 0
    print timer.split()

    print 'finding best combos'
    bestCombos = []
    aPass = 0
    bPass = 0
    cPass = 0
    numT = 0
    for CID in peakDict:
        cgFlag = False
        if CID == '538': cgFlag = True

        tcc = peakDict[CID][0]
        #print tcc
        tccPeaks = []
        chrom = cg.ss(tcc, ':')[0]
        strand = cg.ss(tcc, ':')[1]
        start = int(cg.ss(tcc, ':')[2])
        end = int(cg.ss(tcc, ':')[3])

        #get all peaks
        for i in range(start, end + 1):
            if i in peaks[chrom][strand]:
                #print '  peak added', i
                tccPeaks.append(i)

        #Calculate parameters...
        pairStrings = []  #used to check if pair already added
        peakCombos = []
        for x in tccPeaks:

            #scan a 30 bp range around this point and find the best roof...
            pRange = 30
            rTcc = cg.makeTcc(chrom, strand, x, x + 1)

            #quickly get max value...kinda a long way to do it but whatever
            cProfile = stepVectorScan.profileAroundPoint(rTcc,
                                                         1,
                                                         cName,
                                                         ratio=False)
            xval = cProfile[0]
            max = xval
            highestValueCoord = x

            #now make profile for roof...
            cProfile = stepVectorScan.profileAroundPoint(rTcc,
                                                         pRange,
                                                         cName,
                                                         ratio=True)

            #now get highest stretch length and the rNext coord.
            minVal = .80
            highest = 0
            stretch = 0
            startCurrent = None
            startFinal = None
            endFinal = None
            for i in range(1 - pRange, pRange):
                if cProfile[i] > minVal:
                    stretch += 1
                    if startCurrent == None:
                        startCurrent = i
                else:
                    if stretch > 0:
                        if stretch > highest:  #stretch ended and was higher than previous
                            highest = stretch
                            endFinal = i - 1
                            startFinal = startCurrent
                            startCurrent = None
                        else:
                            startCurrent = None
                    stretch = 0

            #get +/- 4 value...
            val = [1.0, 1.0]
            if (startFinal) and (endFinal):
                low = startFinal - 4
                high = endFinal + 4
                if low > (1 - pRange):
                    if high < pRange:
                        val[0] = float(cProfile[startFinal - 4])
                        val[1] = float(cProfile[endFinal + 4])

            #fill in other details...
            y = 'S'
            dist = 'S'
            ratio = 'S'

            peakCombos.append([tcc, x, y, dist, ratio, max, highest, val])
            #print '  ', peakCombos[-1]

        #find best combo...
        topCombo = None
        for combo in peakCombos:
            roofLength = combo[6]
            dropValue = combo[7][0]
            if combo[7][1] > dropValue:
                dropValue = combo[7][1]

            #print roofLength, dropValue
            if 14 < roofLength < 26:
                if 0.0 < dropValue < 0.2:
                    #pick one with rooflength nearest 20:
                    if topCombo:
                        if (math.fabs(22 - roofLength)) < (
                                math.fabs(22 - topCombo[6])):
                            topCombo = combo
                    else:
                        topCombo = combo

        if topCombo:
            peakDict[CID][1] = topCombo
            bestCombos.append(topCombo)
            print bestCombos[-1]
        else:
            #print 'None'
            pass

    print timer.split()

    #now update predFile (SLOT 13)
    predFile = open(predName, 'r')
    newLines = []
    for line in predFile:
        CID = cg.ss(line)[7]
        if peakDict[CID][1] == 'None':
            peakInfo = 'None'
        else:
            peakInfo = '%s:%s:%s:%s:%s:%s' % (
                str(peakDict[CID][1][1])[-3:], 'S', str(
                    peakDict[CID][1][4]).split('.')[0], peakDict[CID][1][5],
                peakDict[CID][1][6], peakDict[CID][1][7])
        newLines.append(cg.appendToLine(line, peakInfo, 13))
    predFile.close()

    predFile = open(predName, 'w')
    predFile.writelines(newLines)
    predFile.close()