Пример #1
0
import compareData as compare
import bioLibCG as cg

exonList = compare.tccFileToList('allExons.tcc', 0)

print cg.getTccListTotalLength(exonList)
nonOverlap = compare.collapseOverlaps(exonList)
print cg.getTccListTotalLength(nonOverlap)

o = open('mouseExons.tcc', 'w')
for tcc in nonOverlap:
    o.write(tcc + '\n')
o.close()
Пример #2
0
import compareData as compare

tccList = compare.tccFileToList('snos.tcc', 0)

collapsed = compare.collapseOverlaps(tccList)

for tcc in collapsed:
	print tcc
	
Пример #3
0
def intronNoisy(cName = None):
	mConf = c.cgConfig('Main.conf')
	conf = c.getConfig(cName)
	
	#init
	cHairs = getHairpins.getHairpins(conf.conf['resultsIntrons']) #CID: HAIRPIN
	organism = conf.conf['organism']
	exonList = compare.tccFileToList('%sExons.tcc' % organism, 0)
	slide = 1000
	
	#make prediction overlap hitmap
	predMap = {}
	predList = []
	for CID in cHairs:
		hPin = cHairs[CID]
		predList.append(hPin)
	
	#collapse Overlaps
	print ' collapsing predictions'
	predList = compare.collapseOverlaps(predList)
	print ' collapsing exons'
	exonList = compare.collapseOverlaps(exonList)
	
	
	#collect levels for each hairpin region
	cidLevels = {}
	for CID in cHairs:
		print CID
		hPin = cHairs[CID]
		chrom = ss(hPin, ':')[0]
		strand = ss(hPin, ':')[1]
		start = int(ss(hPin, ':')[2])
		end = int(ss(hPin, ':')[3])
		
		scanStart = start - slide
		scanEnd = end + slide
		
		scanRange = []
		scanRange.append('%s:%s:%s:%s' % (chrom, strand, scanStart, start))
		scanRange.append('%s:%s:%s:%s' % (chrom, strand, end, scanEnd))
		
		print scanRange
		scanRange = compare.subtractTwoTccLists(scanRange, predList)
		scanRange = compare.subtractTwoTccLists(scanRange, exonList)
			
		levels = []
		
		print '  Retrieving Expression levels:', cg.getTccListTotalLength(scanRange)
		levels = []
		
		
		hPinLevels = stepVectorScan.scanVectorsHist(scanRange, cName)
		for hPin in hPinLevels:
			levels.extend(hPinLevels[hPin])
		
			
		cidLevels[CID] = levels
		
	#output levels to file
	
	#find longest
	longest = 0
	for CID in cidLevels:
		length = len(cidLevels[CID])
		if length > longest:
			longest = length
	
	sortedKeys = cidLevels.keys()
	sortedKeys.sort()
	
	newLines = []
	for j in range(0, longest): #how many lines are there
		newLine = []
		for CID in sortedKeys:
			if len(cidLevels[CID]) > j:# add it
				newLine.append(str(cidLevels[CID][j]))
			else:
				newLine.append('NA')
	
		newLines.append('\t'.join(newLine) + '\n')
	
	outFileN = conf.conf['intronNoiseData']
	outFile = open(outFileN, 'w')
	outFile.write('\t'.join(sortedKeys) + '\n')
	outFile.writelines(newLines)
	outFile.close()
Пример #4
0
import compareData as compare

tccList = compare.tccFileToList('snos.tcc', 0)

collapsed = compare.collapseOverlaps(tccList)

for tcc in collapsed:
    print tcc
Пример #5
0
def exonNoisy(cName = None):
	#init
	mConf = c.cgConfig('Main.conf')
	conf = c.getConfig(cName)
	cHairs = getHairpins.getHairpins(conf.conf['resultsExons']) #CID: HAIRPIN
	organism = conf.conf['organism']
	geneSetFolder = mConf.conf['geneSets%s' % organism]
	
	#make prediction overlap hitmap
	print 'Making prediction list'
	predList = [] 
	for CID in cHairs:
		hPin = cHairs[CID]
		predList.append(hPin)
	
	if compare.checkIfOverlaps(predList):
		predList = compare.collapseOverlaps(predList)
	
	
	#make genes for Ensemble/make list of tccs for exons.
	print 'Creating gene set'
	
	ensGenes = cgGenes.createGeneSetFromFile(geneSetFolder + '/ensemblAllExons.tsv')
	print '  loaded # genes:', len(ensGenes.set)
	
	
	#collect levels for each haipin region
	print '[Checking all levels]'
	cidLevels = {}
	for CID in cHairs:
		print CID
		hPin = cHairs[CID]
			
		#for each hairpin, --> find overlapping transcripts in same gene
		overlappingGenes = ensGenes.geneOverlaps([hPin])
		if len(overlappingGenes) > 0:
			gIDs = [gene.id for gene in overlappingGenes]
			allTccs = ensGenes.getTccsFromGIDs(gIDs)
			if compare.checkIfOverlaps:
				print '  Overlaps...collapsing'
				allTccs = compare.collapseOverlaps(allTccs)
		else:
			print 'NO GENE OVERLAPS!!!!!', CID, hPin
		
		
		#filter out my predictions.
		print '  Filtering out predictions'
		checkList = compare.subtractTwoTccLists(allTccs, predList)
			
		
		#Get Expression level for gene.
		print '  Retrieving Expression levels:', cg.getTccListTotalLength(checkList)
		levels = []
		
		
		hPinLevels = stepVectorScan.scanVectorsHist(checkList, cName)
		for hPin in hPinLevels:
			levels.extend(hPinLevels[hPin])
		
			
		cidLevels[CID] = levels
		
	
	
	
	#output levels to file
	print 'Outputting to file'
	#find longest
	longest = 0
	for CID in cidLevels:
		length = len(cidLevels[CID])
		if length > longest:
			longest = length
	
	sortedKeys = cidLevels.keys()
	sortedKeys.sort()
	#print sortedKeys
	
	newLines = []
	for j in range(0, longest): #how many lines are there
		newLine = []
		for CID in sortedKeys:
			if len(cidLevels[CID]) > j:# add it
				newLine.append(str(cidLevels[CID][j]))
			else:
				newLine.append('NA')
	
		newLines.append('\t'.join(newLine) + '\n')
	
	outFileN = conf.conf['exonNoiseData']
	outFile = open(outFileN, 'w')
	outFile.write('\t'.join(sortedKeys) + '\n')
	outFile.writelines(newLines)
	outFile.close()
Пример #6
0
def intronNoisy(cName=None):
    mConf = c.cgConfig('Main.conf')
    conf = c.getConfig(cName)

    #init
    cHairs = getHairpins.getHairpins(
        conf.conf['resultsIntrons'])  #CID: HAIRPIN
    organism = conf.conf['organism']
    exonList = compare.tccFileToList('%sExons.tcc' % organism, 0)
    slide = 1000

    #make prediction overlap hitmap
    predMap = {}
    predList = []
    for CID in cHairs:
        hPin = cHairs[CID]
        predList.append(hPin)

    #collapse Overlaps
    print ' collapsing predictions'
    predList = compare.collapseOverlaps(predList)
    print ' collapsing exons'
    exonList = compare.collapseOverlaps(exonList)

    #collect levels for each hairpin region
    cidLevels = {}
    for CID in cHairs:
        print CID
        hPin = cHairs[CID]
        chrom = ss(hPin, ':')[0]
        strand = ss(hPin, ':')[1]
        start = int(ss(hPin, ':')[2])
        end = int(ss(hPin, ':')[3])

        scanStart = start - slide
        scanEnd = end + slide

        scanRange = []
        scanRange.append('%s:%s:%s:%s' % (chrom, strand, scanStart, start))
        scanRange.append('%s:%s:%s:%s' % (chrom, strand, end, scanEnd))

        print scanRange
        scanRange = compare.subtractTwoTccLists(scanRange, predList)
        scanRange = compare.subtractTwoTccLists(scanRange, exonList)

        levels = []

        print '  Retrieving Expression levels:', cg.getTccListTotalLength(
            scanRange)
        levels = []

        hPinLevels = stepVectorScan.scanVectorsHist(scanRange, cName)
        for hPin in hPinLevels:
            levels.extend(hPinLevels[hPin])

        cidLevels[CID] = levels

    #output levels to file

    #find longest
    longest = 0
    for CID in cidLevels:
        length = len(cidLevels[CID])
        if length > longest:
            longest = length

    sortedKeys = cidLevels.keys()
    sortedKeys.sort()

    newLines = []
    for j in range(0, longest):  #how many lines are there
        newLine = []
        for CID in sortedKeys:
            if len(cidLevels[CID]) > j:  # add it
                newLine.append(str(cidLevels[CID][j]))
            else:
                newLine.append('NA')

        newLines.append('\t'.join(newLine) + '\n')

    outFileN = conf.conf['intronNoiseData']
    outFile = open(outFileN, 'w')
    outFile.write('\t'.join(sortedKeys) + '\n')
    outFile.writelines(newLines)
    outFile.close()
Пример #7
0
import compareData as compare
import bioLibCG as cg

exonList = compare.tccFileToList('allExons.tcc', 0)

print cg.getTccListTotalLength(exonList)
nonOverlap = compare.collapseOverlaps(exonList)
print cg.getTccListTotalLength(nonOverlap)

o = open('mouseExons.tcc', 'w')
for tcc in nonOverlap:
	o.write(tcc + '\n')
o.close()