def pattern_matching_mismatches(text, k, d):
    counts = defaultdict(int)
    max_count = 0
    for i in range(len(text) - (k - 1)):
        kmer = text[i : i + k]
        for n in set.union(neighbours(kmer, d), neighbours(reverse_complement(kmer), d)):
            counts[n] += 1
            if counts[n] > max_count:
                max_count = counts[n]

    most_frequent = []
    for kmer, count in counts.items():
        if count == max_count:
            most_frequent.append(kmer)

    return most_frequent
def pattern_matching_mismatches_copy_pseudocode(text, k, d):
    frequent_patterns = set()
    close = []
    frequency_array = []
    for i in range(4**k):
        close.append(False)
        frequency_array.append(0)

    for i in range(len(text) - (k)):
        neighbourhood = neighbours(text[i:i+k], d)
        for pattern in neighbourhood:
            index = pattern_to_number(pattern)
            close[index] = True

    for i in range(4**k):
        if close[i]:
            pattern = number_to_pattern(i, k)
            frequency_array[i] = approx_pattern_count(text, pattern, d)

    max_count = max(frequency_array)
    for i in range(4**k):
        if frequency_array[i] == max_count:
            pattern = number_to_pattern(i, k)
            frequent_patterns.add(pattern)

    return frequent_patterns
Пример #3
0
def motif_enumeration(dna_strings, k, d):
    # Epic runtime!
    patterns = set()
    for dna in dna_strings:
        for i in xrange(len(dna) - k + 1):
            ns = neighbours(dna[i:i + k], d)
            for n in ns:
                if hamming_in_all([x for x in dna_strings if x != dna], n, d):
                    patterns.add(n)
    return patterns