def testRegionSpecificRandomization2(codon, taxId): #assert(fraction>=0) #assert(fraction < numFractions) assert(type(taxId)==type(0)) startTime = time() shuffler = SynonymousCodonPermutingRandomization(getSpeciesTranslationTable(taxId)) numShuffles = 1 numSeqsDone = 0 #diffStats = CalcStats2(taxId) #allNativeStats = CalcStats2(taxId) data = [] poolCodons = [] for (seqId, seq) in nativeSequencesSource(taxId, 0, 1): if len(seq) >= (codon+1)*3-1: codon = seq[codon*3:(codon+1)*3] assert(len(codon)==3) poolCodons.append( codon ) pool = ''.join(poolCodons) totalPermutationsCountForSeq = None numAttempts = 0 while True: identity = None shuffledSeq = None #if time() - startTime > 300: # raise Exception("Calculation took to much time!") try: numAttempts += 1 totalPermutationsCountForSeq, identity, shuffledSeq = shuffler.randomize(pool) except Exception as e: print(e) #continue # skip this sequence raise e if numAttempts >= 3: break #data.append( (len(seq), totalPermutationsCountForSeq) ) #numSeqsDone += 1 #logging.warning(mod3.getResults()) #logging.warning(mod4.getResults()) #logging.warning(mod5.getResults()) return (taxId, codon, len(pool), totalPermutationsCountForSeq)
def createRandomizedSeqs(cds, newShuffleIds, shuffleType=db.Sources.ShuffleCDSv2_python): shuffler = SynonymousCodonPermutingRandomization(cds.getTranslationTable()) nativeSeq = cds.sequence() #print(nativeSeq[:10]) newShuffles = [] for shuffleId in newShuffleIds: totalPermutationsCount, identity, newseq = None, None, None try: totalPermutationsCount, identity, newseq = shuffler.randomize( nativeSeq) except Exception as e: print(e) raise assert ((identity <= 1.0) and (identity > 0.0)) if (identity > 0.95): print( "Warning: Identity of randomized sequence is high - %.3g%% (length=%d nt, total permutations=%.2g)" % (identity * 100.0, len(newseq), totalPermutationsCount)) if (totalPermutationsCount < 500): raise Exception( "Low number of possible permutations %.2g (length=%d nt, identity=%.3g%%)" % (totalPermutationsCount, len(newseq), identity * 100.0)) newShuffles.append(newseq) return newShuffles
def getRandomizedSequenceCacheForVerticalPermutations(taxId): global _caches if (taxId, db.Sources.ShuffleCDS_vertical_permutation_1nt) in _caches: cache = _caches[(taxId, db.Sources.ShuffleCDS_vertical_permutation_1nt)] else: # read all native sequences protIds = [] cdss = [] for protId in SpeciesCDSSource(taxId): cds = CDSHelper(taxId, protId) if( cds.length()%3 != 0 ): continue seq = cds.sequence() protIds.append(protId) cdss.append(seq) geneticCode = getSpeciesTranslationTable( taxId ) scpr = SynonymousCodonPermutingRandomization( geneticCode ) randomizer = lambda cdss: scpr.verticalPermutation( cdss ) cache = VerticalRandomizationCache(shuffleType=db.Sources.ShuffleCDS_vertical_permutation_1nt, taxId=taxId, nativeSeqsMap=dict(zip(protIds, cdss)), geneticCode=geneticCode, randomizer=randomizer ) _caches[(taxId, db.Sources.ShuffleCDS_vertical_permutation_1nt)] = cache print(_caches.keys()) return cache
def createRandomizedSeqs_CDS_with_3UTR( cds, newShuffleIds, shuffleType=db.Sources. ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation, taxId=None): #NucleotidePermutationRandomization, CDSand3UTRRandomization cdsRand = SynonymousCodonPermutingRandomization(cds.getTranslationTable()) utrRand = NucleotidePermutationRandomization() if shuffleType == db.Sources.ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation: shuffler = CDSand3UTRRandomization(cdsRand, utrRand) elif shuffleType == db.Sources.ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation_Including_Next_CDS: shuffler = CDSand3UTRRandomizationIncludingNextCDS( cdsRand, utrRand, taxId=taxId, constantOverlaps=False) elif shuffleType == db.Sources.ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation_Including_Next_CDS_Constant_Overlaps: shuffler = CDSand3UTRRandomizationIncludingNextCDS( cdsRand, utrRand, taxId=taxId, constantOverlaps=True) else: raise Exception("Unknown shuffleType={}".format(shuffleType)) genomeModel = getGenomeModelFromCache(cds.getTaxId()) nativeSeq = cds.sequence() stopCodonPos = cds.CDSlength() #print(nativeSeq[:10]) newShuffles = [] for shuffleId in newShuffleIds: totalPermutationsCount, identity, newseq = None, None, None try: totalPermutationsCount, identity, newseq = shuffler.randomize( nativeSeq, cds.getProtId()) except Exception as e: print(e) raise assert ((identity <= 1.0) and (identity > 0.0)) if (identity > 0.95): print( "Warning: Identity of randomized sequence is high - %.3g%% (length=%d nt, total permutations=%.2g)" % (identity * 100.0, len(newseq), totalPermutationsCount)) if (totalPermutationsCount < 500): raise Exception( "Low number of possible permutations %.2g (length=%d nt, identity=%.3g%%)" % (totalPermutationsCount, len(newseq), identity * 100.0)) newShuffles.append(newseq) return newShuffles
def testRegionSpecificRandomization(fraction, taxId, numFractions): assert(fraction>=0) assert(fraction < numFractions) assert(type(taxId)==type(0)) startTime = time() shuffler = SynonymousCodonPermutingRandomization(getSpeciesTranslationTable(taxId)) numShuffles = 1 numSeqsDone = 0 #diffStats = CalcStats2(taxId) #allNativeStats = CalcStats2(taxId) data = [] for (seqId, seq) in nativeSequencesSource(taxId, fraction, numFractions): if random.randint(0,1)>0: continue #print(seqId) #nativeStats = CalcStats2(taxId) #nativeStats.calcSeq(seq) #allNativeStats += nativeStats numShufflesIncluded = 0 numAttempts = 0 totalPermutationsCountForSeq = None while True: identity = None shuffledSeq = None #if time() - startTime > 300: # raise Exception("Calculation took to much time!") try: numAttempts += 1 totalPermutationsCountForSeq, identity, shuffledSeq = shuffler.randomizeWithMask(seq, getCodonMaskForSeq(seq, 0, 22) ) except Exception as e: print(e) #continue # skip this sequence raise e if numAttempts >= 3: break data.append( (len(seq), totalPermutationsCountForSeq) ) numSeqsDone += 1 #logging.warning(mod3.getResults()) #logging.warning(mod4.getResults()) #logging.warning(mod5.getResults()) return (taxId, fraction, numSeqsDone, data)