-
Notifications
You must be signed in to change notification settings - Fork 0
/
ba1i_KC.py
51 lines (39 loc) · 1.13 KB
/
ba1i_KC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/python
# File created on Nov 05, 2015
__author__ = "Kenneth Cheng"
__credits__ = ["Kenneth Cheng"]
__version__ = "0.0.1-dev"
__maintainer__ = "Kenneth Cheng"
__email__ = ""
"""
Definition:
Problem Description:
Find the most frequent words with mismatches in a string.
Given: Strings Text along with integers k and d.
Return: All most frequent k-mers with up to d mismatches in Text.
"""
import sys
from rosalind_utils import readdat
from ba1g_KC import hamming_dist
from ba1k_KC import permutdna
from collections import defaultdict
# Approach 1 (dummy's approach):
def maxfreq_mm(seq, k, d):
k = int(k)
d = int(d)
seql = len(seq)
freq_dict = defaultdict(int)
for kmer in permutdna(k):
for i in xrange(seql - k + 1):
if hamming_dist(kmer, seq[i:i + k]) <= d:
freq_dict[kmer] += 1
maxfreq = max(freq_dict.values())
for kmer in freq_dict:
if freq_dict[kmer] == maxfreq:
print kmer,
def main(filename):
dat = readdat(filename)
maxfreq_mm(*dat)
if __name__ == '__main__':
filename = sys.argv[1]
main(filename)