def pattern_matching_mismatches(text, k, d): counts = defaultdict(int) max_count = 0 for i in range(len(text) - (k - 1)): kmer = text[i : i + k] for n in set.union(neighbours(kmer, d), neighbours(reverse_complement(kmer), d)): counts[n] += 1 if counts[n] > max_count: max_count = counts[n] most_frequent = [] for kmer, count in counts.items(): if count == max_count: most_frequent.append(kmer) return most_frequent
def pattern_matching_mismatches_copy_pseudocode(text, k, d): frequent_patterns = set() close = [] frequency_array = [] for i in range(4**k): close.append(False) frequency_array.append(0) for i in range(len(text) - (k)): neighbourhood = neighbours(text[i:i+k], d) for pattern in neighbourhood: index = pattern_to_number(pattern) close[index] = True for i in range(4**k): if close[i]: pattern = number_to_pattern(i, k) frequency_array[i] = approx_pattern_count(text, pattern, d) max_count = max(frequency_array) for i in range(4**k): if frequency_array[i] == max_count: pattern = number_to_pattern(i, k) frequent_patterns.add(pattern) return frequent_patterns
def motif_enumeration(dna_strings, k, d): # Epic runtime! patterns = set() for dna in dna_strings: for i in xrange(len(dna) - k + 1): ns = neighbours(dna[i:i + k], d) for n in ns: if hamming_in_all([x for x in dna_strings if x != dna], n, d): patterns.add(n) return patterns