-
Notifications
You must be signed in to change notification settings - Fork 0
/
ba1h_KC.py
50 lines (37 loc) · 1.26 KB
/
ba1h_KC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python
# File created on Oct 1, 2015
__author__ = "Kenneth Cheng"
__credits__ = ["Kenneth Cheng"]
__version__ = "0.0.1-dev"
__maintainer__ = "Kenneth Cheng"
__email__ = ""
"""
Definition:
We say that a k-mer Pattern appears as a substring of Text with at most d mismatches
if there is some k-mer substring Pattern' of Text having d or fewer mismatches with Pattern,
i.e., HammingDistance(Pattern, Pattern') <= d.
Problem Description:
Approximate Pattern Matching Problem
Given: Strings Pattern and Text along with an integer d.
Return: All starting positions where Pattern appears as a substring of Text with at most d mismatches.
"""
import sys
from rosalind_utils import readdat
from ba1g_KC import hamming_dist
# Approach 1:
def approx_pat_posi(pattern, text, d):
posilist = []
lenp = len(pattern)
lent = len(text)
d = int(d)
for i in xrange(lent - lenp + 1):
if hamming_dist(text[i:i + lenp], pattern) <= d:
posilist.append(str(i))
return ' '.join(posilist)
def main(filename):
dat = readdat(filename)
with open('ba1h_out.txt', 'w') as fout:
fout.write(approx_pat_posi(*dat))
if __name__ == '__main__':
filename = sys.argv[1]
main(filename)