def testRegionSpecificRandomization2(codon, taxId):
    #assert(fraction>=0)
    #assert(fraction < numFractions)

    assert(type(taxId)==type(0))

    startTime = time()

    shuffler = SynonymousCodonPermutingRandomization(getSpeciesTranslationTable(taxId))

    numShuffles = 1

    numSeqsDone = 0

    #diffStats = CalcStats2(taxId)
    #allNativeStats = CalcStats2(taxId)

    data = []

    poolCodons = []
    
    for (seqId, seq) in nativeSequencesSource(taxId, 0, 1):
        if len(seq) >= (codon+1)*3-1:
            codon = seq[codon*3:(codon+1)*3]
            assert(len(codon)==3)
            poolCodons.append( codon )

    pool = ''.join(poolCodons)

    totalPermutationsCountForSeq = None

    numAttempts = 0

    while True:
        identity = None
        shuffledSeq = None
        
        #if time() - startTime > 300:
        #    raise Exception("Calculation took to much time!")
        
        try:
            numAttempts += 1
            totalPermutationsCountForSeq, identity, shuffledSeq = shuffler.randomize(pool)
            
        except Exception as e:
            print(e)
            #continue # skip this sequence
            raise e

        if numAttempts >= 3:
            break

    #data.append( (len(seq), totalPermutationsCountForSeq) )
        
    #numSeqsDone += 1
                
    #logging.warning(mod3.getResults())
    #logging.warning(mod4.getResults())
    #logging.warning(mod5.getResults())
    return (taxId, codon, len(pool), totalPermutationsCountForSeq)
def createRandomizedSeqs(cds,
                         newShuffleIds,
                         shuffleType=db.Sources.ShuffleCDSv2_python):

    shuffler = SynonymousCodonPermutingRandomization(cds.getTranslationTable())

    nativeSeq = cds.sequence()
    #print(nativeSeq[:10])

    newShuffles = []
    for shuffleId in newShuffleIds:
        totalPermutationsCount, identity, newseq = None, None, None

        try:
            totalPermutationsCount, identity, newseq = shuffler.randomize(
                nativeSeq)
        except Exception as e:
            print(e)
            raise

        assert ((identity <= 1.0) and (identity > 0.0))

        if (identity > 0.95):
            print(
                "Warning: Identity of randomized sequence is high - %.3g%% (length=%d nt, total permutations=%.2g)"
                % (identity * 100.0, len(newseq), totalPermutationsCount))

        if (totalPermutationsCount < 500):
            raise Exception(
                "Low number of possible permutations %.2g (length=%d nt, identity=%.3g%%)"
                % (totalPermutationsCount, len(newseq), identity * 100.0))
        newShuffles.append(newseq)

    return newShuffles
示例#3
0
def getRandomizedSequenceCacheForVerticalPermutations(taxId):
    global _caches

    if (taxId, db.Sources.ShuffleCDS_vertical_permutation_1nt) in _caches:
        cache = _caches[(taxId, db.Sources.ShuffleCDS_vertical_permutation_1nt)]
        
    else:
        # read all native sequences
        protIds = []
        cdss = []
        for protId in SpeciesCDSSource(taxId):
            cds = CDSHelper(taxId, protId)
            
            if( cds.length()%3 != 0 ):
                continue
            
            seq = cds.sequence()
            
            protIds.append(protId)
            cdss.append(seq)
            
        geneticCode = getSpeciesTranslationTable( taxId )
        scpr = SynonymousCodonPermutingRandomization( geneticCode ) 
        randomizer = lambda cdss: scpr.verticalPermutation( cdss )
        cache = VerticalRandomizationCache(shuffleType=db.Sources.ShuffleCDS_vertical_permutation_1nt,
                                           taxId=taxId,
                                           nativeSeqsMap=dict(zip(protIds, cdss)),
                                           geneticCode=geneticCode,
                                           randomizer=randomizer )
        _caches[(taxId, db.Sources.ShuffleCDS_vertical_permutation_1nt)] = cache
        print(_caches.keys())

        
    return cache
def createRandomizedSeqs_CDS_with_3UTR(
        cds,
        newShuffleIds,
        shuffleType=db.Sources.
    ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation,
        taxId=None):
    #NucleotidePermutationRandomization, CDSand3UTRRandomization
    cdsRand = SynonymousCodonPermutingRandomization(cds.getTranslationTable())
    utrRand = NucleotidePermutationRandomization()

    if shuffleType == db.Sources.ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation:
        shuffler = CDSand3UTRRandomization(cdsRand, utrRand)

    elif shuffleType == db.Sources.ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation_Including_Next_CDS:
        shuffler = CDSand3UTRRandomizationIncludingNextCDS(
            cdsRand, utrRand, taxId=taxId, constantOverlaps=False)

    elif shuffleType == db.Sources.ShuffleCDS_synon_perm_and_3UTR_nucleotide_permutation_Including_Next_CDS_Constant_Overlaps:
        shuffler = CDSand3UTRRandomizationIncludingNextCDS(
            cdsRand, utrRand, taxId=taxId, constantOverlaps=True)

    else:
        raise Exception("Unknown shuffleType={}".format(shuffleType))

    genomeModel = getGenomeModelFromCache(cds.getTaxId())

    nativeSeq = cds.sequence()
    stopCodonPos = cds.CDSlength()
    #print(nativeSeq[:10])

    newShuffles = []
    for shuffleId in newShuffleIds:
        totalPermutationsCount, identity, newseq = None, None, None

        try:
            totalPermutationsCount, identity, newseq = shuffler.randomize(
                nativeSeq, cds.getProtId())
        except Exception as e:
            print(e)
            raise

        assert ((identity <= 1.0) and (identity > 0.0))

        if (identity > 0.95):
            print(
                "Warning: Identity of randomized sequence is high - %.3g%% (length=%d nt, total permutations=%.2g)"
                % (identity * 100.0, len(newseq), totalPermutationsCount))

        if (totalPermutationsCount < 500):
            raise Exception(
                "Low number of possible permutations %.2g (length=%d nt, identity=%.3g%%)"
                % (totalPermutationsCount, len(newseq), identity * 100.0))
        newShuffles.append(newseq)

    return newShuffles
def testRegionSpecificRandomization(fraction, taxId, numFractions):
    assert(fraction>=0)
    assert(fraction < numFractions)

    assert(type(taxId)==type(0))

    startTime = time()

    shuffler = SynonymousCodonPermutingRandomization(getSpeciesTranslationTable(taxId))

    numShuffles = 1

    numSeqsDone = 0

    #diffStats = CalcStats2(taxId)
    #allNativeStats = CalcStats2(taxId)

    data = []
    
    for (seqId, seq) in nativeSequencesSource(taxId, fraction, numFractions):

        if random.randint(0,1)>0:
            continue

        #print(seqId)

        #nativeStats   = CalcStats2(taxId)
        
        #nativeStats.calcSeq(seq)

        #allNativeStats += nativeStats
        
        numShufflesIncluded = 0
        numAttempts = 0
        
        totalPermutationsCountForSeq = None

        while True:
            identity = None
            shuffledSeq = None

            #if time() - startTime > 300:
            #    raise Exception("Calculation took to much time!")
            
            try:
                numAttempts += 1
                totalPermutationsCountForSeq, identity, shuffledSeq = shuffler.randomizeWithMask(seq, getCodonMaskForSeq(seq, 0, 22) )

            except Exception as e:
                print(e)
                #continue # skip this sequence
                raise e

            if numAttempts >= 3:
                    break

        data.append( (len(seq), totalPermutationsCountForSeq) )
        
        numSeqsDone += 1
                
    #logging.warning(mod3.getResults())
    #logging.warning(mod4.getResults())
    #logging.warning(mod5.getResults())
    return (taxId, fraction, numSeqsDone, data)