def convertMali2Alignlib( mali ): '''convert a multiple alignment of type :class:`Mali` into an alignlib multiple alignment object. ''' import alignlib m = alignlib.makeMultipleAlignment() for identifier in mali.getIdentifiers(): a = alignlib.makeAlignatum( mali[identifier] ) m.add( a ) return m
def applyMethod(self, neighbours): """apply the method.""" # build multiple alignment mali = alignlib.makeMultipleAlignment() query_nid = neighbours.mQueryToken sequence = self.mFasta.getSequence(query_nid) mali.add(alignlib.makeAlignatum(sequence)) qseq = alignlib.makeSequence(sequence) alignator = alignlib.makeAlignatorDPFull(alignlib.ALIGNMENT_GLOBAL, -10.0, -1.0, True, True, True, True) for n in neighbours.mMatches: if n.mSbjctToken == query_nid: continue sequence = self.mFasta.getSequence(n.mSbjctToken) blast_query2sbjct = n.getAlignment() if blast_query2sbjct == None: raise ValueError( "AddaRealignment.py needs a reference alignment.") realign_query2sbjct = alignlib.makeAlignmentVector() sseq = alignlib.makeSequence(sequence) qseq.useSegment(n.mQueryFrom, n.mQueryTo) sseq.useSegment(n.mSbjctFrom, n.mSbjctTo) realign_query2sbjct = alignlib.makeAlignmentVector() alignator.align(realign_query2sbjct, qseq, sseq) nidentical = alignlib.getAlignmentIdentity(realign_query2sbjct, blast_query2sbjct, alignlib.RR) nblast = blast_query2sbjct.getNumAligned() nrealigned = realign_query2sbjct.getNumAligned() self.mOutfile.write( "%s\t%s\t%i\t%i\t%i\n" % \ (n.mQueryToken, n.mSbjctToken, nidentical, nblast, nrealigned ) ) if nidentical == nblast: self.mNIdentical += 1 else: self.mNDifferent += 1
def applyMethod(self, neighbours ): """apply the method.""" # build multiple alignment mali = alignlib.makeMultipleAlignment() query_nid = neighbours.mQueryToken sequence = self.mFasta.getSequence( query_nid ) mali.add( alignlib.makeAlignatum( sequence ) ) qseq = alignlib.makeSequence( sequence ) alignator = alignlib.makeAlignatorDPFull( alignlib.ALIGNMENT_GLOBAL, -10.0, -1.0, True, True, True, True) for n in neighbours.mMatches: if n.mSbjctToken == query_nid: continue sequence = self.mFasta.getSequence( n.mSbjctToken ) blast_query2sbjct = n.getAlignment() if blast_query2sbjct == None: raise ValueError( "AddaRealignment.py needs a reference alignment.") realign_query2sbjct = alignlib.makeAlignmentVector() sseq = alignlib.makeSequence( sequence ) qseq.useSegment( n.mQueryFrom, n.mQueryTo ) sseq.useSegment( n.mSbjctFrom, n.mSbjctTo ) realign_query2sbjct = alignlib.makeAlignmentVector() alignator.align( realign_query2sbjct, qseq, sseq ) nidentical = alignlib.getAlignmentIdentity( realign_query2sbjct, blast_query2sbjct, alignlib.RR ) nblast = blast_query2sbjct.getNumAligned() nrealigned = realign_query2sbjct.getNumAligned() self.mOutfile.write( "%s\t%s\t%i\t%i\t%i\n" % \ (n.mQueryToken, n.mSbjctToken, nidentical, nblast, nrealigned ) ) if nidentical == nblast: self.mNIdentical += 1 else: self.mNDifferent += 1
continue if use_cds_id: identifier = cds_identifier else: identifier = peptide_identifier if options.output_format =="alignment": options.stdout.write("\t".join( map(str, (identifier, alignlib.AlignmentFormatEmissions( map_p2c ), len(cur_record.sequence), len(cds_sequences[identifier])) ) )+"\n") elif options.output_format == "fasta": map_p2c.switchRowCol() alignatum = alignlib.makeAlignatum( c ) alignatum.mapOnAlignment( map_p2c, len(p) * 3 ) s = alignatum.getString() if len(s) != len(p) * 3: raise ValueError ("incomplete aligned string for %s: %s, cds=%s" % (cur_record.title, s, c )) options.stdout.write( ">%s\n%s\n" % (identifier, s )) noutput += 1 sys.stdout.flush() E.info( "ninput=%i, noutput=%i, nnosequence=%i, nskipped=%i" % (ninput, noutput, nnosequence, nskipped) ) E.Stop()
def buildMali(self, query_nid, neighbours): """build a multiple alignment from a set of neighbours. """ # build multiple alignment mali = alignlib.makeMultipleAlignment() query_sequence = self.mFasta.getSequence(query_nid) mali.add(alignlib.makeAlignatum(query_sequence)) qseq = alignlib.makeSequence(query_sequence) alignator = alignlib.makeAlignatorDPFull(alignlib.ALIGNMENT_LOCAL, -10, -2) nskipped = 0 for n in neighbours[:self.mMaxNumNeighbours]: if n.mSbjctToken == query_nid: continue if n.mEvalue > self.mMaxEvalue: nskipped += 1 continue sequence = self.mFasta.getSequence(n.mSbjctToken) E.debug("adding %s" % str(n)) map_query2sbjct = n.getAlignment() if map_query2sbjct == None: sseq = alignlib.makeSequence(sequence) qseq.useSegment(n.mQueryFrom, n.mQueryTo) sseq.useSegment(n.mSbjctFrom, n.mSbjctTo) map_query2sbjct = alignlib.makeAlignmentVector() alignator.align(map_query2sbjct, qseq, sseq) if map_query2sbjct.getLength() == 0: self.warn("empty alignment: %s" % str(n)) nskipped += 1 continue if map_query2sbjct.getRowTo() > len(query_sequence): self.warn( "alignment out of bounds for query: %i>%i, line=%s" %\ (map_query2sbjct.getRowTo(), len(query_sequence), str(n))) nskipped += 1 continue elif map_query2sbjct.getColTo() > len(sequence): self.warn( "alignment out of bounds for sbjct: %i>%i, line=%s" %\ (map_query2sbjct.getColTo(), len(sequence), str(n))) nskipped += 1 continue try: mali.add(alignlib.makeAlignatum(sequence), map_query2sbjct, mali_is_in_row=True, insert_gaps_mali=False, insert_gaps_alignatum=True, use_end_mali=True, use_end_alignatum=False) except RuntimeError, msg: self.warn("problem when building alignment for %s: msg=%s" % (str(n), msg)) nskipped += 1 continue
def buildMali(self, query_nid, neighbours ): """build a multiple alignment from a set of neighbours. """ # build multiple alignment mali = alignlib.makeMultipleAlignment() query_sequence = self.mFasta.getSequence( query_nid ) mali.add( alignlib.makeAlignatum( query_sequence ) ) qseq = alignlib.makeSequence( query_sequence ) alignator = alignlib.makeAlignatorDPFull( alignlib.ALIGNMENT_LOCAL, -10, -2) nskipped = 0 for n in neighbours[:self.mMaxNumNeighbours]: if n.mSbjctToken == query_nid: continue if n.mEvalue > self.mMaxEvalue: nskipped += 1 continue sequence = self.mFasta.getSequence( n.mSbjctToken ) E.debug( "adding %s" % str(n) ) map_query2sbjct = n.getAlignment() if map_query2sbjct == None: sseq = alignlib.makeSequence( sequence ) qseq.useSegment( n.mQueryFrom, n.mQueryTo ) sseq.useSegment( n.mSbjctFrom, n.mSbjctTo ) map_query2sbjct = alignlib.makeAlignmentVector() alignator.align( map_query2sbjct, qseq, sseq ) if map_query2sbjct.getLength() == 0: self.warn( "empty alignment: %s" % str( n ) ) nskipped += 1 continue if map_query2sbjct.getRowTo() > len(query_sequence): self.warn( "alignment out of bounds for query: %i>%i, line=%s" %\ (map_query2sbjct.getRowTo(), len(query_sequence), str(n))) nskipped += 1 continue elif map_query2sbjct.getColTo() > len(sequence): self.warn( "alignment out of bounds for sbjct: %i>%i, line=%s" %\ (map_query2sbjct.getColTo(), len(sequence), str(n))) nskipped += 1 continue try: mali.add( alignlib.makeAlignatum( sequence ), map_query2sbjct, mali_is_in_row = True, insert_gaps_mali = False, insert_gaps_alignatum = True, use_end_mali = True, use_end_alignatum = False ) except RuntimeError, msg: self.warn( "problem when building alignment for %s: msg=%s" % (str(n), msg)) nskipped += 1 continue