def count_approx_pattern(text, pattern, d):
    k = len(pattern)
    count = 0
    for i in range(len(text) - k + 1):
        if hamming_dist(text[i:i + k], pattern) <= d:
            count += 1
    return count
示例#2
0
def distance(pattern, text):
    k = len(pattern)
    min_dist = float("Inf")
    for i in range(len(text) - k + 1):
        dist = hamming_dist(text[i:i + k], pattern)
        if dist < min_dist:
            min_dist = dist
    return min_dist
示例#3
0
def neighbors(pattern, d):
    if d == 0:
        return pattern
    if len(pattern) == 1:
        return ['A', 'C', 'G', 'T']
    neighborhood = set()
    suffix_neighbors = neighbors(pattern[1:], d)
    for suffix in suffix_neighbors:
        if hamming_dist(pattern[1:], suffix) < d:
            for nuc in ['A', 'C', 'G', 'T']:
                neighborhood.add(nuc + suffix)
        else:
            neighborhood.add(pattern[0] + suffix)
    return neighborhood
示例#4
0
def main(k, d, data):
    variants = get_all_kmers(k)
    start = 0
    data_len = len(data)
    while start + k <= data_len:
        sub = data[start:start + k]
        for item in variants.keys():
            if hamming_dist(item, sub) <= d:
                variants[item] += 1

            if hamming_dist(rev_complement(item), sub) <= d:
                variants[item] += 1

        start += 1

    values = variants.values()
    maximum = max(values)
    answer = []
    for key in variants.keys():
        if variants[key] == maximum:
            answer.append(key)

    return answer
示例#5
0
def main(k, d, data):
    variants = get_all_kmers(k)
    start = 0
    data_len = len(data)
    while start + k <= data_len:
        sub = data[start:start + k]
        for item in variants.keys():
            if hamming_dist(item, sub) <= d:
                variants[item] += 1

            if hamming_dist(rev_complement(item), sub) <= d:
                variants[item] += 1

        start += 1

    values = variants.values()
    maximum = max(values)
    answer = []
    for key in variants.keys():
        if variants[key] == maximum:
            answer.append(key)

    return answer
示例#6
0
def hamming_dist_approx(pattern, string, d):
    """ returns all approximate occurances (up to d mismatches) of pattern inside the string """

    start = 0
    pat_len = len(pattern)
    str_len = len(string)
    answer = []
    while start + pat_len < str_len:
        sub = string[start:start + pat_len]
        if (hamming_dist(pattern, sub) <= d):
            answer.append(start)

        start += 1

    return answer
示例#7
0
def gen_d_neighb(d, fileName, data):
    """ returns and write to file_name d-neighborhood of a string """

    variants = get_all_kmers(len(data))
    var_len = len(variants)
    answers = []
    for i in xrange(var_len):
        if hamming_dist(data, variants[i]) <= d:
            answers.append(variants[i])

    file = open(fileName, 'w+')
    for item in answers:
        file.writelines(str(item) + "\n")

    return answers
def CalculateScore(Motifs):
    k = len(Motifs[0])
    profile = FormProfile(Motifs)
    consensus = ''
    for i in range(k):
        most_freq = 0
        for nuc in ['A', 'C', 'G', 'T']:
            if profile[nuc][i] > most_freq:
                most_freq = profile[nuc][i]
                to_add = nuc
        consensus += to_add
    score = 0
    for motif in Motifs:
        score += hamming_dist(consensus, motif)
    return score
示例#9
0
def hamming_dist_approx(pattern, string, d):
    """ returns all approximate occurances (up to d mismatches) of pattern inside the string """

    start = 0
    pat_len = len(pattern)
    str_len = len(string)
    answer = []
    while start + pat_len < str_len:
        sub = string[start:start+pat_len]
        if (hamming_dist(pattern, sub) <= d):
            answer.append(start)

        start += 1

    return answer