def isWellFormed(self,seq): """if(transcript.isWellFormed(sequence)) ... This procedure iterates through the codons of this transcript, starting at the start codon (attribute startCodon specifies this offset within the transcript, not counting intron bases), and continuing until either an in-frame stop codon is encountered, or the end of the transcript is reached. The transcript is considered well-formed only if a stop-codon is encountered. """ stopCodons=self.stopCodons # 1. Check whether any exons overlap each other exons=self.exons numExons=len(exons) for i in range(1,numExons): exon=exons[i] prevExon=exons[i-1] if(exon.overlaps(prevExon)): return 0 # 2. Check that there is an in-frame stop-codon iterator=CodonIterator(self,seq,stopCodons) codons=iterator.getAllCodons() n=len(codons) if(n==0): return False lastCodon=codons[n-1] isStop=stopCodons.get(lastCodon.triplet,None) return True if isStop else False
def trimUTR(self,axisSequenceRef): self.adjustOrders() stopCodons=self.stopCodons sequence=self.sequence strand=self.strand numExons=self.numExons() startCodon=self.startCodon if(not startCodon): raise Exception("can't trim UTR, because startCodon is not set") for j in(numExons): exon=self.getIthExon(j) length=exon.getLength() if(length<=startCodon): self.deleteExon(j) numExons-=1 j-=1 startCodon-=length self.adjustOrders() ### 4/1/03 else: if(strand=="+"): exon.trimInitialPortion(startCodon) self.begin=exon.begin else: exon.trimInitialPortion(startCodon)### ??? self.end=exon.end exon.type="initial-exon" if numExons>1 else "single-exon" self.startCodon=0 break # Find in-frame stop codon codonIterator=CodonIterator(self,axisSequenceRef,stopCodons) stopCodonFound=False while(True): codon=codonIterator.nextCodon() if(not codon): break if(stopCodons.get(codon.triplet,None)): exon=codon.exon coord=codon.absoluteCoord trimBases=0 if(strand=="+"): trimBases=exon.end-coord-3 else: trimBases=coord-exon.begin-3 if(trimBases>=0): exon.trimFinalPortion(trimBases) exon.type="single-exon" if exon.order==0 else "final-exon" j=numExons-1 while(j>exon.order): self.deleteExon(j) j-=1 stopCodonFound=True break else: # codon is interrupted; trim the next exon nextExon=self.getSuccessor(exon) if(not nextExon): raise Exception("exon successor not found") nextExon.trimFinalPortion(nextExon.getLength()+trimBases) nextExon.type="final-exon" j=numExons-1 while(j>nextExon.order): self.deleteExon(j) j-=1 stopCodonFound=True break if(not stopCodonFound): ### sometimes the GFF coords don't include the stop codon... numExons=self.numExons() lastExon=self.getIthExon(numExons-1) lastExonEnd=lastExon.getEnd() seq=axisSequenceRef if(strand=="+"): stopCodonBegin=lastExonEnd stopCodon=seq[stopCodonBegin:stopCodonBegin+3] if(stopCodon!="TAG" and stopCodon!="TAA" and stopCodon!="TGA"): print("Warning! No stop codon found for", self.transcriptId,self.strand, "strand , unable to trim UTR") else: # strand="-" stopCodonBegin=lastExon.getBegin()-3 stopCodon=seq[stopCodonBegin,stopCodonBegin+3] stopCodon=Translation.reverseComplement(stopCodon) if(stopCodon!="TAG" and stopCodon!= "TAA" and stopCodon!="TGA"): print("Warning! No stop codon found for", self.transcriptId,self.strand, "strand , unable to trim UTR") self.recomputeBoundaries()