Пример #1
0
        def rows():
            '''takes xml blast file and returns a generator of best
            pairwise results
            '''
            hg=B.ncbiXmlHitGenerator(iFile)
            pairCache={}
            canCache={}
            i=0
            for rec in hg:
                i+=1
                seqID=int(rec['def'])
                #1st variable foundandidate_Sequence_Energy
                
                canName=rec['query-def']
                #2nd variable found

                qseq=rec['hsps'][0]['qseq']
                hseq=rec['hsps'][0]['hseq']
                rqseq=B.reverseComplement(qseq)
                en=A.energy(hseq,rqseq)
                #3rd variable found

                try:
                    canID=canCache[canName]
                except KeyError:
                    canCache[canName]=Candidate(canName).ID()
                    canID=canCache[canName]
                #timesaver so we only have to look it up once


                try:
                    if en < pairCache[canID,seqID]:
                        pairCache[canID,seqID]=en
                except KeyError:
                    pairCache[canID,seqID]=en
                #store the most negative value for the pair


            for canID,seqID in pairCache.iterkeys():
                rv=[canID,seqID,pairCache[canID,seqID]]
                yield rv
Пример #2
0
        def rows():
            '''
            '''
            canCache={}
            pairCache={}
            
            lastCanName=''
            hg=B.ncbiXmlHitGenerator(iFile)
            i=0
            for rec in hg:
                i+=1
                seqID=int(rec['def'])
                #1st variable found
                
                canName=rec['query-def']
                #2nd variable found

                if canName!=lastCanName:
                    for tID,canID in pairCache.iterkeys():
                        min_E=min(pairCache[tID,canID])
                        max_E=max(pairCache[tID,canID])
                        mean_E=N.mean(pairCache[tID,canID])
                        SD_E=N.std(pairCache[tID,canID])

                        N_E=0
                        N_E30=0
                        N_E40=0
                        N_E50=0
                        N_E100=0
                        for ener in pairCache[tID,canID]:
                            N_E+=1
                            if ener<=-30:
                                N_E30+=1
                            if ener<=-40:
                                N_E40+=1
                            if ener<=-50:
                                N_E50+=1
                            if ener<=-100:
                                N_E100+=1
                        rv=[canID,tID,min_E,max_E,mean_E,SD_E,N_E,N_E30,N_E40,
                            N_E50,N_E100]
                        yield rv
                    pairCache={}
                    lastCanName=canName
                #stores and refreshes dictionary
                #once we have moved onto next pair
                    
                  
                try:
                    canID=canCache[canName]
                except KeyError:
                    canCache[canName]=Candidate(canName).ID()
                    canID=canCache[canName]
                #timesaver so we only have to look it up once

                if canID%4!=0:
                    continue
                    
                #hit=str(rec)
                #3rd variable found
                
                qseq=rec['hsps'][0]['qseq']
                hseq=rec['hsps'][0]['hseq']
                rqseq=B.reverseComplement(qseq)
                en=A.energy(hseq,rqseq)
                #3rd variable found
                
                for t in Sequence(seqID).taxa():
                    tID=t.ID()
                    try:
                        pairCache[tID,canID].append(en)
                    except KeyError:
                        pairCache[tID,canID]=[en]
                #checks to see if energy is most negative for the pair
                #replaces old result if it is



            for tID,canID in pairCache.iterkeys():
                        min_E=min(pairCache[tID,canID])
                        max_E=max(pairCache[tID,canID])
                        mean_E=N.mean(pairCache[tID,canID])
                        SD_E=N.std(pairCache[tID,canID])

                        N_E=0
                        N_E30=0
                        N_E40=0
                        N_E50=0
                        N_E100=0
                        for ener in pairCache[tID,canID]:
                            N_E+=1
                            if ener<=-30:
                                N_E30+=1
                            if ener<=-40:
                                N_E40+=1
                            if ener<=-50:
                                N_E50+=1
                            if ener<=-100:
                                N_E100+=1
                        rv=[canID,tID,min_E,max_E,mean_E,SD_E,N_E,N_E30,N_E40,
                            N_E50,N_E100]
                        yield rv