def updatePolySeqs(mFN, readsFN, alignFN): tim = bioLibCG.cgTimer() tim.start() variousAs = ["A" * x for x in range(1,20)] variousGs = ["G" * x for x in range(1,20)] variousTs = ["T" * x for x in range(1,20)] variousCs = ["C" * x for x in range(1,20)] letter_variousLetters = [ ("A", variousAs), ("G", variousGs), ("T", variousTs), ("C", variousCs)] checkRange = range(1,8) NX = cgNexusFlat.Nexus(mFN, miR) NX.load(['sequence', 'polySeqs']) #print 'load micro', tim.split() reads = cgNexusFlat.quickTable(('read','string', '.', 1)) rNX = cgNexusFlat.Nexus(readsFN, reads) rNX.load(['read']) #print 'load reads', tim.split() aNX = cgNexusFlat.Nexus(alignFN, cgAlignment) aNX.load(['sID', 'tID']) #print 'load alignments', tim.split() for id in aNX.ids: theRead = rNX.read[aNX.sID[id]] mID = aNX.tID[id] microSeq = NX.sequence[mID] #may be a read for expression, but wont count... if theRead in microSeq: continue #just for expression if microSeq == theRead: print tabIt(microSeq, theRead, 0, 0, "N") #first check full elif microSeq in theRead and (len(theRead) != len(microSeq)): tail = theRead.split(microSeq)[1] for let, variousLetters in letter_variousLetters: if tail in variousLetters: print tabIt(microSeq, theRead, 0, len(tail), let) #now check trimmed (cant do [:-0]) else: for i in checkRange: if microSeq[:-i] in theRead and (len(theRead) != len(microSeq[:-i])): tail = theRead.split(microSeq[:-i])[1] for let, variousLetters in letter_variousLetters: if tail in variousLetters: print tabIt(microSeq, theRead, i, len(tail), let) print "TRIMMED" break #dont trim after the first trimmed one works
def plotBar(analysisFN): polyTable = cgNexusFlat.quickTable( ('microSeq', 'string', '.', 0), ('readSeq', 'string', '.', 1), ('trimAmount', 'int', 0, 2), ('tailLength', 'int', 0, 3), ('letter', 'string', 'N', 4)) polyNX = cgNexusFlat.Nexus(analysisFN, polyTable, ids=False) polyNX.load(['trimAmount', 'tailLength', 'letter']) letter_trim_tLen_count = {} for id in polyNX.ids: let = polyNX.letter[id] trim = polyNX.trimAmount[id] tLen = polyNX.tailLength[id] prevCount = letter_trim_tLen_count.setdefault(let, {}).setdefault( trim, {}).setdefault(tLen, 0) letter_trim_tLen_count[let][trim][tLen] = prevCount + 1 letters = ["A", "T", "C", "G"] ind = range(1, 5) width = 0.35 # the width of the bars for trim in range(0, 8): for tLen in range(0, 7): plotNum = (trim * 7) + tLen plt.subplot(8, 7, plotNum) letAmount = [] for let in letters: letAmount.append(letter_trim_tLen_count[let].setdefault( trim, {}).setdefault(tLen, 0)) #normalize theMax = max(letAmount) if theMax == 0: letAmount = [0, 0, 0, 0] else: letAmount = [float(x + .01) / theMax for x in letAmount] plt.bar(ind, letAmount, width) plt.title('%s,%s %s' % (trim, tLen, theMax)) plt.xticks([], ['']) plt.yticks([], ['']) plt.show()
def plotBar(analysisFN): polyTable = cgNexusFlat.quickTable(('microSeq', 'string', '.', 0), ('readSeq', 'string', '.', 1), ('trimAmount', 'int', 0, 2), ('tailLength', 'int', 0, 3), ('letter', 'string', 'N', 4)) polyNX = cgNexusFlat.Nexus(analysisFN, polyTable, ids = False) polyNX.load(['trimAmount', 'tailLength', 'letter']) letter_trim_tLen_count = {} for id in polyNX.ids: let = polyNX.letter[id] trim = polyNX.trimAmount[id] tLen = polyNX.tailLength[id] prevCount = letter_trim_tLen_count.setdefault(let, {}).setdefault(trim, {}).setdefault(tLen, 0) letter_trim_tLen_count[let][trim][tLen] = prevCount + 1 letters = ["A", "T", "C", "G"] ind = range(1,5) width = 0.35 # the width of the bars for trim in range(0, 8): for tLen in range(0,7): plotNum = (trim * 7) + tLen plt.subplot(8,7, plotNum) letAmount = [] for let in letters: letAmount.append(letter_trim_tLen_count[let].setdefault(trim, {}).setdefault(tLen, 0)) #normalize theMax = max(letAmount) if theMax == 0: letAmount = [0,0,0,0] else: letAmount = [float(x+ .01)/theMax for x in letAmount] plt.bar(ind, letAmount, width) plt.title('%s,%s %s' % (trim, tLen, theMax) ) plt.xticks([], ['']) plt.yticks([], ['']) plt.show()