def removeRestrictionSites(self, sourceSeq, optimizedSeq, restrictionSites): ''' get the best sequence that does not contain any restriction sites by substituting codons in the optimizedSeq in a shortest-paths manner ''' checkedSeqs = set() worklist = list() restrictionSites = SeqUtils.expandAmbiguousMult(restrictionSites) heapq.heappush(worklist, (self.scoreSequence(sourceSeq, optimizedSeq), optimizedSeq)) while len(worklist) > 0: tScore, tSeq = heapq.heappop(worklist) restrictionLocations = SeqUtils.searchSubseqs(tSeq, restrictionSites) restrictionCodons = SeqUtils.getCodonsForRanges(restrictionLocations) if not restrictionCodons: return tSeq else: checkedSeqs.add(tSeq) possibleChanges = self.getPossibleOneStepChanges(sourceSeq, tSeq, restrictionCodons) if possibleChanges: for tNewSeq in possibleChanges: if not tNewSeq in checkedSeqs: heapq.heappush(worklist, (self.scoreSequence(sourceSeq, tNewSeq), tNewSeq)) return None
def __init__(self,parent,urls): self.pairs=False self.parent=parent self.files=SeqUtils.Filename(urls[0]) self.basename=self.files.ID self.aln="_".join([self.parent.basename,self.basename]) self.dir=os.path.join(self.parent.dir,self.basename) if nonexist(self.dir): os.mkdir(self.dir)
def getBestSequence(self, sourceSequence): sourceCodons = re.findall('...', sourceSequence) remainder = SeqUtils.getRemainderSuffix(sourceSequence) result = "" for co in sourceCodons: result += self.getBestCodon(co) result += remainder return result
def SequenceToPrint(self, seq, restrictionSites, source=True): ''' return tuples of the form (codon, usage, isRestrictionSite) for the sequence ''' codons = re.findall('...', seq) remainder = SeqUtils.getRemainderSuffix(seq) result = list() restrictionSites = SeqUtils.expandAmbiguousMult(restrictionSites) restrictionLocations = SeqUtils.searchSubseqs(seq, restrictionSites) restrictionCodons = SeqUtils.getCodonsForRanges(restrictionLocations) for i in range(len(codons)): usage = self.sourceCU.getCodonRelativeUsage(codons[i]) if source else self.targetCU.getCodonRelativeUsage(codons[i]) result.append((codons[i], usage, i in restrictionCodons)) if remainder: result.append((remainder, None, None)) return result
def __init__(self,parent,urls): self.files=list() for url in urls: self.files.append(SeqUtils.Filename(url)) self.pairs=True self.urls=list(map(os.path.abspath,urls)) self.parent=parent self.basename=self.files[0].ID self.aln="_".join([self.parent.basename,self.basename]) self.dir=os.path.join(self.parent.dir,self.basename) if nonexist(self.dir): os.mkdir(self.dir)
def removeRestrictionSites(self, sourceSeq, optimizedSeq, restrictionSites): ''' get the best sequence that does not contain any restriction sites by re-randomizing until no restriction site remains ''' codons = re.findall('...', optimizedSeq) remainder = SeqUtils.getRemainderSuffix(optimizedSeq) restrictionSites = SeqUtils.expandAmbiguousMult(restrictionSites) restrictionLocations = SeqUtils.searchSubseqs(optimizedSeq, restrictionSites) restrictionCodons = SeqUtils.getCodonsForRanges(restrictionLocations) # try to re-randomize a finite amount of times ITERMAX = 10000 iteration = 0 while iteration < ITERMAX: for i in restrictionCodons: codons[i] = self.getRandomOptimizedCodon(codons[i]) tSeq = "".join(codons) + remainder tRL = SeqUtils.searchSubseqs(tSeq, restrictionSites) tRC = SeqUtils.getCodonsForRanges(tRL) if not tRC: return tSeq iteration += 1 return None
def getPossibleOneStepChanges(self, sourceSeq, optimizedSeq, codonsToConsider): ''' get all possible sequences resulting from substituting the codons to consider in the optimized Seq with the next best codons ''' sourceCodons = re.findall('...', sourceSeq) # list of codons optCodons = re.findall("...", optimizedSeq) remainder = SeqUtils.getRemainderSuffix(optimizedSeq) res = list() for i in range(len(optCodons)): if i in codonsToConsider: nextBestCodon = self.getNextBestCodon(sourceCodons[i], optCodons[i]) if nextBestCodon: tCodons = optCodons tCodons[i] = nextBestCodon tSeq = "".join(tCodons) tSeq += remainder res.append(tSeq) return res
args=vars(ap.parse_args() ) def GC_chunk(size, step): """ In theory, will create a sequence, chunk it, then calculate GC content for each chunk In practice... """ bases = 'ATGC' seq = ''.join(random.choice(bases) for i in range(args["LENGTH"]) return seq gc = [] #Initializes list seq = seq_rec.seq print SeqUtils.GC(seq) #Converts to sequence record to be read by SeqUtils for i in xrange(0, len(seq), step): s = seq[i:i+step].upper() a = s.count('A') c = s.count('C') g = s.count('G') t = s.count('T') if a+c+g+t > 0: gc.append((g+c)/float(a+c+g+t)) else: gc.append(0.0) """ For every step, here 1000/100, count number of nucleotides, then divide GC content by all nucleotides """