def betterClumpFinding(Genome, k, t, L):
    genomeList = list(Genome)
    frequentPatterns = []
    clump = []
    for i in range(4**k - 1 + 1):
        clump.insert(i, 0)
    text = Genome[0:L]
    frequencyArray = computingFreqs(text,k)
    for i in range(4**k-1+1):
        if frequencyArray[i] >= t:
                clump[i] = 1
    for i in range(len(genomeList) - L + 1):
        firstPattern = Genome[i-1:i-1+k]
        index = patternToNumber(firstPattern)
        frequencyArray[index] = frequencyArray[index]-1
        lastPattern = Genome[i+L-k:i+L-k+k]
        index = patternToNumber(lastPattern)
        frequencyArray[index] = frequencyArray[index]+1
        if frequencyArray[index] >= t:
                clump[index] = 1
    for i in range(4**k):
        if clump[i] == 1:
            pattern = numberToPattern(i,k)
            frequentPatterns.append(pattern)
    return frequentPatterns
def betterClumpFinding(Genome, k, t, L):
    genomeList = list(Genome)
    frequentPatterns = []
    clump = []
    for i in range(4**k - 1 + 1):
        clump.insert(i, 0)
    text = Genome[0:L]
    frequencyArray = computingFreqs(text, k)
    for i in range(4**k - 1 + 1):
        if frequencyArray[i] >= t:
            clump[i] = 1
    for i in range(len(genomeList) - L + 1):
        firstPattern = Genome[i - 1:i - 1 + k]
        index = patternToNumber(firstPattern)
        frequencyArray[index] = frequencyArray[index] - 1
        lastPattern = Genome[i + L - k:i + L - k + k]
        index = patternToNumber(lastPattern)
        frequencyArray[index] = frequencyArray[index] + 1
        if frequencyArray[index] >= t:
            clump[index] = 1
    for i in range(4**k):
        if clump[i] == 1:
            pattern = numberToPattern(i, k)
            frequentPatterns.append(pattern)
    return frequentPatterns
def frequencyArray(Close, Text, k, d):
	patternsCount = pow(4,k)
	fArray = [0] * patternsCount
	for i in xrange(0, patternsCount):
		if Close[i] == 1:
			Pattern = nTp.numberToPattern(i, k)
			fArray[i] = aM.approximate_count(Pattern, Text, d)
	return fArray
def computingFreqs(Text, k):
    frequentPatterns = []
    frequencyArray = computingFreqs(Text,k)
    maxCount = max(frequencyArray)
    for i in range(4**k-1+1):
        if frequencyArray[i] == maxCount:
            pattern = numberToPattern(i,k)
            frequentPatterns.append(pattern)
    return frequentPatterns
示例#5
0
def computingFreqs(Text, k):
    frequentPatterns = []
    frequencyArray = computingFreqs(Text, k)
    maxCount = max(frequencyArray)
    for i in range(4**k - 1 + 1):
        if frequencyArray[i] == maxCount:
            pattern = numberToPattern(i, k)
            frequentPatterns.append(pattern)
    return frequentPatterns
def numberToPattern(index, k):
    if k == 1:
        return numberToSymbol(index)
    prefixIndex = index // 4
    r = index % 4
    if index == 0:
        symbol = 'A'
    else:
        symbol = numberToSymbol(r)
    prefixPattern = numberToPattern(prefixIndex, k - 1)
    return prefixPattern + symbol
def frequencyArrayAndReverse(Close, Text, k, d):
	patternsCount = pow(4,k)
	fArray = [0] * patternsCount
	for i in xrange(0, patternsCount):
		if Close[i] == 1:
			if fArray[i] > 0:
				continue
			Pattern = nTp.numberToPattern(i, k)
			ReversePattern = rC.reverse_complement(Pattern)
			anotherIndex = nTp.patternToNumber(ReversePattern)
			totalSum = aM.approximate_count(Pattern, Text, d) + aM.approximate_count(ReversePattern, Text, d)
			fArray[i] = totalSum
			fArray[anotherIndex] = totalSum
	return fArray
示例#8
0
def clumpFinding(Genome, k, t, L):
    genomeList = list(Genome)
    frequentPatterns = []
    clump = []
    for i in range(4**k - 1 + 1):
        clump[i] = 0
    for i in range(len(genomeList) - L + 1):
        text = genomeList[i:i + L]
        frequencyArray = computingFreqs(text, k)
        for index in range(4**k - 1 + 1):
            if frequencyArray[index] >= t:
                clump[index] = 1
    for i in range(4**k):
        if clump[i] == 1:
            pattern = numberToPattern(i, k)
            frequentPatterns.append(pattern)
    return frequentPatterns
def clumpFinding(Genome, k, t, L):
    genomeList = list(Genome)
    frequentPatterns = []
    clump = []
    for i in range(4**k - 1 + 1):
        clump[i] = 0
    for i in range(len(genomeList) - L + 1):
        text = genomeList[i:i+L]
        frequencyArray = computingFreqs(text,k)
        for index in range(4**k-1+1):
            if frequencyArray[index] >= t:
                clump[index] = 1
    for i in range(4**k):
        if clump[i] == 1:
            pattern = numberToPattern(i,k)
            frequentPatterns.append(pattern)
    return frequentPatterns
def maxPatterns(fArray, k):
	maxCount = max(fArray)
	indexes = [i for i, x in enumerate(fArray) if x == maxCount]
	return set([nTp.numberToPattern(x, k) for x in indexes])
示例#11
0
def closeNeighborsLines(Text, k, d):
	fArray = closeNeighbors(Text, k, d)
	indexes = [i for i, x in enumerate(fArray) if x != 0]
	return set([nTp.numberToPattern(x, k) for x in indexes])