def TestCase(self): kmp = KMP() rs = kmp.search_MP("abcabcabcabc", "cabc") self.assertListEqual(rs, [2,5,8]) rs = kmp.search_MP("ABC ABCDAB ABCDABCDABDE", "ABCDABD") self.assertListEqual(rs, [15])
def TestCase(self): kmp = KMP() rs = kmp.search_MP("abcabcabcabc", "cabc") self.assertListEqual(rs, [2, 5, 8]) rs = kmp.search_MP("ABC ABCDAB ABCDABCDABDE", "ABCDABD") self.assertListEqual(rs, [15])
def __init__(self, p): # p : m by m sized pattern. self.p = list(p) self.m = len(p) # dr : pi[1..m] -> distinct row number. # pp : p converted in to sequence of distinct row #s. self.dr, self.pp = self.construct() # ac : Aho-Corasick instance for p (row-matching) self.ac = AC(self.dr.keys()) # kmp : KMP instance for pp (column-matching) self.kmp = KMP(self.pp)
def do_kmp(spam_text, data_resp): results = [] for data in data_resp: text = data['text'] result = KMP.match_string(text, spam_text) result['profile_img'] = data['profile_img'] result['name'] = data['name'] result['screen_name'] = data['screen_name'] results.append(result) return results
def __init__(self, stream, pattern_len): ''' Baker-bird 알고리즘 이니셜라이징 :param patterns: Baker-bird 알고리즘을 실행할 패턴 배열 ''' self.ac = AhoCorasick() for _ in range(pattern_len): line = stream.next() self.ac.add_patterns(line) self.ac.build() self.r = {} idx = 1 stream.set_seek() for _ in range(pattern_len): row = stream.next() if row not in self.r.keys(): self.r[row] = str(idx) idx += 1 stream.set_seek() self.kmp = KMP("".join([str(self.r[stream.next()]) for _ in range(pattern_len)]))
class BakerBird(object): def __init__(self, stream, pattern_len): ''' Baker-bird 알고리즘 이니셜라이징 :param patterns: Baker-bird 알고리즘을 실행할 패턴 배열 ''' self.ac = AhoCorasick() for _ in range(pattern_len): line = stream.next() self.ac.add_patterns(line) self.ac.build() self.r = {} idx = 1 stream.set_seek() for _ in range(pattern_len): row = stream.next() if row not in self.r.keys(): self.r[row] = str(idx) idx += 1 stream.set_seek() self.kmp = KMP("".join([str(self.r[stream.next()]) for _ in range(pattern_len)])) def __call__(self, stream, text_len): ''' Baker-bird 알고리즘 수행, extra space를 최적화 하기 위해 기존 Aho-corasick으로 만들어 낸 2차원 배열을 KMP를 모두 훑는 방식이 아닌 Aho-corasick 한줄을 수행 후 패턴 크기만큼의 배열만을 생성 유지하고, KMP를 step별로 계산하는 방식으로 구현 :param text: 매칭할 텍스트 :return: 매칭이 일어난 끝 좌표를 튜플 배열 형태로 반환 ''' ret = [] position = [0,] * text_len for i in range(text_len): row = stream.next() row_R = ["0",] * text_len for start, end, keyword in self.ac(row): row_R[end-1] = self.r[keyword] for idx, R in enumerate(row_R): position[idx] = self.kmp.step(R, position[idx]) if position[idx] is len(self.kmp.keyword): ret.append((i, idx)) position[idx] = self.kmp.pi[position[idx]-1] return ret
tock = time.time() print('re search time:', tock - tick) tick = time.time() for text in text_iterator(): for keyword in keywords: result = bf_search(text.strip(), keyword) # if result: # print(keyword, result) tock = time.time() print('bf search time:', tock - tick) tick = time.time() for text in text_iterator(): for keyword in keywords: result = KMP().search(text.strip(), keyword) # if result: # print(keyword, result) tock = time.time() print('kmp search time:', tock - tick) tick = time.time() for text in text_iterator(): result = ac_automation.search(text.strip()) tock = time.time() # word2pos = defaultdict(list) # for start, end in result: # word2pos[line[start: end+1]].append((start, end)) # for word, pos in word2pos.items(): # print(word, pos) print('ac search time:', tock - tick)
from kmp import KMP from fa import AUTOMATA import sys matcher = sys.argv[1] pattern = sys.argv[2] file = open(sys.argv[3], "r") text = "" for a in file.read().splitlines(): text += a if matcher == "KMP": print("wyszukiwanie wzorca za pomoca KMP") print("=================================") kmp = KMP(text, pattern) kmp.kmp() else: print("wyszukiwanie wzorca za pomoca FA") print("================================") finite = AUTOMATA(text, pattern) finite.automata_matcher()
def kmp_array(fasta_list): kmp = KMP() kmp.build_kmp_table(fasta_list[0][1], print_table = True)
def fifthw(self): QMessageBox.information(self, "规则","请输入 用半角逗号间隔的字符串 第一串最少6个字符最多20个 第二串最少一个 第二串字符数不超过第一串 回车完成输入",QMessageBox.Yes) from kmp import KMP self.demo5 = KMP()