def betterClumpFinding(Genome, k, t, L): genomeList = list(Genome) frequentPatterns = [] clump = [] for i in range(4**k - 1 + 1): clump.insert(i, 0) text = Genome[0:L] frequencyArray = computingFreqs(text,k) for i in range(4**k-1+1): if frequencyArray[i] >= t: clump[i] = 1 for i in range(len(genomeList) - L + 1): firstPattern = Genome[i-1:i-1+k] index = patternToNumber(firstPattern) frequencyArray[index] = frequencyArray[index]-1 lastPattern = Genome[i+L-k:i+L-k+k] index = patternToNumber(lastPattern) frequencyArray[index] = frequencyArray[index]+1 if frequencyArray[index] >= t: clump[index] = 1 for i in range(4**k): if clump[i] == 1: pattern = numberToPattern(i,k) frequentPatterns.append(pattern) return frequentPatterns
def betterClumpFinding(Genome, k, t, L): genomeList = list(Genome) frequentPatterns = [] clump = [] for i in range(4**k - 1 + 1): clump.insert(i, 0) text = Genome[0:L] frequencyArray = computingFreqs(text, k) for i in range(4**k - 1 + 1): if frequencyArray[i] >= t: clump[i] = 1 for i in range(len(genomeList) - L + 1): firstPattern = Genome[i - 1:i - 1 + k] index = patternToNumber(firstPattern) frequencyArray[index] = frequencyArray[index] - 1 lastPattern = Genome[i + L - k:i + L - k + k] index = patternToNumber(lastPattern) frequencyArray[index] = frequencyArray[index] + 1 if frequencyArray[index] >= t: clump[index] = 1 for i in range(4**k): if clump[i] == 1: pattern = numberToPattern(i, k) frequentPatterns.append(pattern) return frequentPatterns
def frequencyArray(Close, Text, k, d): patternsCount = pow(4,k) fArray = [0] * patternsCount for i in xrange(0, patternsCount): if Close[i] == 1: Pattern = nTp.numberToPattern(i, k) fArray[i] = aM.approximate_count(Pattern, Text, d) return fArray
def computingFreqs(Text, k): frequentPatterns = [] frequencyArray = computingFreqs(Text,k) maxCount = max(frequencyArray) for i in range(4**k-1+1): if frequencyArray[i] == maxCount: pattern = numberToPattern(i,k) frequentPatterns.append(pattern) return frequentPatterns
def computingFreqs(Text, k): frequentPatterns = [] frequencyArray = computingFreqs(Text, k) maxCount = max(frequencyArray) for i in range(4**k - 1 + 1): if frequencyArray[i] == maxCount: pattern = numberToPattern(i, k) frequentPatterns.append(pattern) return frequentPatterns
def numberToPattern(index, k): if k == 1: return numberToSymbol(index) prefixIndex = index // 4 r = index % 4 if index == 0: symbol = 'A' else: symbol = numberToSymbol(r) prefixPattern = numberToPattern(prefixIndex, k - 1) return prefixPattern + symbol
def frequencyArrayAndReverse(Close, Text, k, d): patternsCount = pow(4,k) fArray = [0] * patternsCount for i in xrange(0, patternsCount): if Close[i] == 1: if fArray[i] > 0: continue Pattern = nTp.numberToPattern(i, k) ReversePattern = rC.reverse_complement(Pattern) anotherIndex = nTp.patternToNumber(ReversePattern) totalSum = aM.approximate_count(Pattern, Text, d) + aM.approximate_count(ReversePattern, Text, d) fArray[i] = totalSum fArray[anotherIndex] = totalSum return fArray
def clumpFinding(Genome, k, t, L): genomeList = list(Genome) frequentPatterns = [] clump = [] for i in range(4**k - 1 + 1): clump[i] = 0 for i in range(len(genomeList) - L + 1): text = genomeList[i:i + L] frequencyArray = computingFreqs(text, k) for index in range(4**k - 1 + 1): if frequencyArray[index] >= t: clump[index] = 1 for i in range(4**k): if clump[i] == 1: pattern = numberToPattern(i, k) frequentPatterns.append(pattern) return frequentPatterns
def clumpFinding(Genome, k, t, L): genomeList = list(Genome) frequentPatterns = [] clump = [] for i in range(4**k - 1 + 1): clump[i] = 0 for i in range(len(genomeList) - L + 1): text = genomeList[i:i+L] frequencyArray = computingFreqs(text,k) for index in range(4**k-1+1): if frequencyArray[index] >= t: clump[index] = 1 for i in range(4**k): if clump[i] == 1: pattern = numberToPattern(i,k) frequentPatterns.append(pattern) return frequentPatterns
def maxPatterns(fArray, k): maxCount = max(fArray) indexes = [i for i, x in enumerate(fArray) if x == maxCount] return set([nTp.numberToPattern(x, k) for x in indexes])
def closeNeighborsLines(Text, k, d): fArray = closeNeighbors(Text, k, d) indexes = [i for i, x in enumerate(fArray) if x != 0] return set([nTp.numberToPattern(x, k) for x in indexes])