def approx_pattern_count(text, pattern, d): count = 0 pattern_len = len(pattern) for i in range(len(text) - (pattern_len-1)): substr = text[i:i+pattern_len] if hamming_distance(pattern, substr) <= d: count += 1 return count
def hamming_in_all(dna_strings, kmer, d): for dna in dna_strings: k = len(kmer) found_match = False for j in xrange(len(dna) - k + 1): kmer2 = dna[j:j + k] if hamming_distance(kmer, kmer2) <= d: found_match = True break if not found_match: return False return True
def neighbours(pattern, d): nucleotides = ['A', 'C', 'G', 'T'] if d == 0: return {pattern} if len(pattern) == 1: return set(nucleotides) neighbourhood = set() suffix_pattern = pattern[1:] suffix_neighbours = neighbours(suffix_pattern, d) for t in suffix_neighbours: if hamming_distance(suffix_pattern, t) < d: for n in nucleotides: neighbourhood.add(n + t) else: neighbourhood.add(pattern[0] + t) return neighbourhood