Пример #1
0
def main(query):
	
	x = int(sys.argv[1])
	if(x==1):
		kmp.kmp(sys.argv[2])
	elif(x==2):
		bm.bm(sys.argv[2])
	elif(x==3):
		regex.regex(sys.argv[2])
Пример #2
0
 def test_kmp(self):
     self.assertListEqual(kmp("THIS IS A TEST TEXT", "TEST"),
                          [10])
     self.assertListEqual(kmp("AABAACAADAABAAABAA", "AABA"),
                          [0, 9, 13])
     self.assertListEqual(kmp("AAAAAAAAAAAAAAAAAB", "AAAAB"),
                          [13])
     self.assertListEqual(kmp("ABABABCABABABCABABABC", "ABABAC"),
                          [])
     self.assertListEqual(kmp("ABC ABCDAB ABCDABCDABDE", "ABCDABD"),
                          [15])
Пример #3
0
 def __init__(self, uploaded_files, text, option):
     if (option == "pilihan1"):
         kmp_0 = kmp.kmp()
         kmp_0.convertText(uploaded_files)
         if (kmp_0.kmpMatch(text.lower()) == -1):
             self.hasil = [[
                 '', "Tidak ditemukan " + text + " pada file.", ''
             ]]
         else:
             self.hasil = [[
                 '',
                 "indeks pada file: " + str(kmp_0.kmpMatch(text.lower())),
                 ''
             ]]
     elif (option == "pilihan2"):
         boyer = boyce.boyce()
         boyer.convertText(uploaded_files)
         if (boyer.bmMatch(text.lower()) == -1):
             self.hasil = [[
                 '', "Tidak ditemukan " + text + " pada file.", ''
             ]]
         else:
             self.hasil = [[
                 '',
                 "indeks pada file: " + str(boyer.bmMatch(text.lower())), ''
             ]]
     else:
         reg = regex.regex(uploaded_files)
         self.hasil = reg.regexMatch(text)
Пример #4
0
def extractor(keyword, sentences, option):

    informations = []
    for sentence in sentences[1]:
        # bisa diganti dengan regex ataupun boyermoore sesuai pilihan nantinya
        if option == 'kmp':
            positions = kmp(keyword, sentence)
        elif option == 'bm':
            positions = boyer_moore(keyword, sentence)
        elif option == 'regex':
            positions = [m.start(0) for m in re.finditer(keyword, sentence)]
        if positions:
            time = find_time(sentence)
            if len(time) == 0:
                for other_sentence in sentences[1]:
                    time = find_time(other_sentence)
                    if time:
                        break
            if len(time) == 0:
                time = ['tidak ada penanda waktu']
            amount = find_amount_info(sentence)
            closest_amount = ''
            if amount:
                closest_amount = amount[0][1]
                if len(amount) > 1:
                    # find the closest one with the keyword
                    minimal = len(sentence)
                    for amount_candidate in amount:
                        if abs(amount_candidate[0] - positions[0]) < minimal:
                            closest_amount = amount_candidate[1]
                            minimal = abs(amount_candidate[0] - positions[0])
            informations.append(
                [keyword, sentences[0], time[0], closest_amount, sentence])

    return informations
Пример #5
0
def analyseMsg(recvMsg):
    position = kmp.kmp(recvMsg, DELIMITER, KMP_TABLE)
    if position == len(recvMsg) - len(DELIMITER):
        formatMsg(recvMsg)
    else:
        preMsg = recvMsg[:position + len(DELIMITER)]
        formatMsg(preMsg)
        # analyse last msg
        analyseMsg(recvMsg[position + len(DELIMITER) + IGNORE_LEN:])
Пример #6
0
def analyseMsg(recvMsg, roomid):
    position = kmp.kmp(recvMsg, DELIMITER, KMP_TABLE)
    if position == len(recvMsg) - len(DELIMITER):
        formatMsg(recvMsg, roomid)
    else:
        preMsg = recvMsg[:position + len(DELIMITER)]
        formatMsg(preMsg, roomid)
        # analyse last msg
        analyseMsg(recvMsg[position + len(DELIMITER) + IGNORE_LEN:], roomid)
Пример #7
0
def naive_kmp(s1, s2):
    """Same as `naive` but using KMP instead of `_strcmp`."""

    if len(s1) != len(s2):
        return False

    # builds the shift table once
    T = kmp.get_shift_table(s1)
    for i in range(len(s2)):
        if kmp.kmp(Shifted(s2, i), s1, T=T) is not None:
            return True
    return False
Пример #8
0
            i += j - pmt(small[:j])
        else:
            i += 1
    return False


if __name__ == "__main__":
    words = read_file('words.txt')
    n = -1
    build_tree(words[:n])
    tree = build_tree(words[:n])
    lines = read_file('1984.txt')

    start = time.time()
    for line in lines:
        match_w = match_doc(tree, line)
        if match_w != "":
            print("{} \t {}\n".format(match_w, line))

    end = time.time()
    print(end-start)  # 0.10075116157531738

    # 测试kmp
    start = time.time()
    for line in lines:
        for word in words:
            result = kmp(line, word)
            if result:
                print(word)
    end = time.time()
    print(end - start)  # > 30min   我等了8分钟才找出100个, 上个算法找出了400多个,不能再等了。
Пример #9
0
if __name__ == "__main__":
    searchKey = sys.argv[2].lower()

    query = api.search(q=searchKey, count=100)

    tweet = []

    for tweetapi in query:
        tweet.append(tweetapi.text)

    data = {}
    data['query'] = sys.argv[2]
    data['pattern'] = sys.argv[3]
    data['tweets'] = []

    if (sys.argv[1] == '-kmp'):
        for isi in tweet:
            spamidx = kmp.kmp(isi.lower(), sys.argv[3])
            data['tweets'].append({'tweet': isi, 'spamidx': spamidx})
    elif (sys.argv[1] == '-bm'):
        for isi in tweet:
            spamidx = boyer_moore.bmMatch(isi.lower(), sys.argv[3])
            data['tweets'].append({'tweet': isi, 'spamidx': spamidx})
    elif (sys.argv[1] == '-re'):
        for isi in tweet:
            spamidx = regex.regexMatch(isi.lower(), sys.argv[3])
            data['tweets'].append({'tweet': isi, 'spamidx': spamidx})

    with open('data.txt', "w") as outfile:
        json.dump(data, outfile)
Пример #10
0
def is_rotation(s1, s2):
    """Returns True if `s2` is a rotation of `s1` using one pass of KMP."""

    if len(s1) != len(s2):
        return False
    return kmp.kmp(s2 + s2, s1) is not None
Пример #11
0
sys.path.append("..")
from lib import spacedPrint
from kmp import kmp

###############################
######## PATTERN MATCHING #####


#finds all the occurrences of a pattern in a text and returns the start indices of those occurrences
def findOccurrences(pattern, text):
    patternLength = len(pattern)
    positions = []
    for i in range(len(text) - patternLength + 1):
        if pattern == text[i:i + patternLength]:
            positions.append(i)
    return positions


if __name__ == "__main__":
    pattern_pm = "TGTTCATTG"
    text_pm = "TATGTTCATATGTGTTCATGTTCTTACACTAATGCAGTGTTCATTGTTCATGCCTTGTTCATTGTTCATTGTTCATGTTGTTCATCTGTTCATCACTGTTCATTCTTGTTCATATCTTTGGTGTTCATACTGGCCGTGTTCATGCAGCTCCGCTGTGTTCATATGTTCATGCAGTGTTCATTGTTCATATGTTCATATGTTCATTTGTTCATGGGACGATTTGTTCATTAGATTGTGTGTTCATTGTTCATAATGTTCATAATGTTCATCCGATGTTCATATGTTCATCTGTTCATCGTGTTCATACTGAAAAACTTGTTCATACAGTGTTCATTCTGTTCATCAAACAAGATGGTTTGTTCATTGTTCATACGAGGCTGTTCATCTCTGTTCATTTGTTCATTAGGTGTTCATTGCTGTTCATATGTTCATTGTTCATGTGTTCATAAGTGTTCATCGTTTCGTTTACGGGATCTGTTCATTTGTGTTCATATTGTTCATGATTTCTGGTTGAATGTTCATTGTTCATCTCGTGTACCTGTTCATATGTTCATCCGTGTTCATGTGTTCATTGTTCATGCCTGTTCATCGTGTTCATGTGTGTTCATTATAATGTTCATATGAATGTTCATAAATGACTTGTTCATCGTGTTCATCTGTGTTCATTTCGCTGTTCATACTGTTCATAAGCGTAATGTTCATTCTGTTCATTGTTCATTGTTCATGCTACCTTGTTCATATCACGTGTTCATCCGTGTTCATTGTTCATTCTCATTGTTCATTTTATGTGTTCATTGTTCATGCTTGATGTTCATTTGTTCATTCTGTTCATGTGTTCATTGTTCATTTACGTGTTCATTTGTTCATCGCCTGTTCATTGTTCATAGATGTTCATTTGTTCATCCGTGTTCATTACGGTGTTCATAGGACTGTTCATCTATGTTCATATGTTCATTGTTCATGTGTTCATTAAGGTGTTCATGACCTTGTTCATCCTTGTTCATCTTACTGTTCATCAAGTGTTCATATGTTCATTAACTGATGTTCATTGCTGTTCATCGTTGTTCATTGTTCATTCTGTTCATGGTGTTCATACCATGTTCATCGGCGGTTGTTCATGTCTGTTCATGATTCTGTTCATTTGTTCATTGTTCATGCTGTTCATAGTGTTCATTGTTCATTTGTTCATATTGTTCATGATTTGTTGTTCATGATTGTTCATTTACAATGTTCATTGTTCATAGTGTTCATTTGTTCATATGTTCATATGTTCATTCTTGTTCATTGTTCATTAGCCATTGTTCATTGTTCATTGTTCATATGATGTTCATTGTTCATTGTTCATTTTGTTCATTGTTCATTTGTTCATACCTGTTCATGACCTGTCCTTTGTTCATCCTGTTCATGTAGGTGTTCATGGGTGTTCATCTTGTTCATAATGTTCATTCAGTGTTCATAGCTATTGTTCATATTTGTTCATTTTGTTCATACTGTTCATTTGTTCATTGTTCATATGTTCATTGTTCATGGTCGCTTCCGGTTTAAGATGTTCATGTACAATTGTTCATACATGTTCATTTGTTCATTGTTCATTTGTTCATTAAGTGTGTTCATCTGTTCATTGTTCATACTATGTTCATTGTTCATTGTTCATCTACGATGTTCATTTTATGTTCATGATGTTCATTGTTCATGCTCCATCGTTTTGTTCATCTGTTCATGTGTTCATCGGCTGTTCATCTGTTCATAAAGCTTGTTCATTTGTTCATTGTTCATTTTATTTGTTCATGCTGTTCATAGTGTTCATTGTTCATCCCTGTTCATCCAATGTGTTCATGTGTTCATGCCGATGTGGTGTTCATGTGTTCATCAGTGGATGTTCATTCCTGTTCATCTGTTCATCCCGGTGTTCATACTGTTCATGAGTGTTCATGCACAACGTAAGCCTATGTTCATATTGTTCATTGCACGTACTCTGTTCATTTGTTCATATGTGTTCATATTGTTCATTGTTCATGTCGTTGTTCATCTGTTCATAACAGTGTTCATTGTTCATTGTTCATTGTTCATAGCTGTTCATTGTTCATATTGTTCATGATGTTCATTGTTCATATGTTCATCGTGTTCATCTGTTCATCACGTTAGGGTGTGTTCATTGTTCATGCCTGTTCATTGTTCATTATTGTGTTCATCAATGTATGTTCATTGTTCATCCATTCTGTTCATGGTGTTCATTGTTCATCCTTTAGATGTATGTTCATCACTGTTCATCATGTTCATGTGTTCATTTTGTTCATCTGTTGTTCATATGTTCATGCCTGTTCATATGTTCATCTGTTCATTGTTCATGCACCTGGTGTTCATTGTTCATTACTCCTGTTCATGCTGTTCATTCTGTTCATAGTGTGTTCATTGTTCATCTGTTCATCTGCTGTTCATATGTTCATTTGTTCATAACTGTTCATTTGTTCATCAGACTGTTCATAGTGTTCATCTGTTCATTGTTCATACTTGTTCATGTTGTTCATCACAGGTGTTCATTGTTCATTGTTCATCACGATAATGCTTATATTGATGTTCATACCTATTATGTTCATTGATGTTCATCCATGTGTTCATCAAGGGTTCTGTTCATTGTCATGTTCATAGCCAGTGTTTGTTCATTATGTTCATGGTGTTCATAAACGGAGTGTTCATGTAACATGTTCATTGTTCATTATGTGTTCATTGTTCATCCCCATTGTTCATCGTGTTCATGTTCCCCTATGTTCATTAGTGGTAGTCTGTTCATTTGTTCATCTGTTCATTGTTCATATGTTCATTGTTCATCTAGTGTTCATTGTTCATAATGTTCATACTGCCTGTTCATAAATGTTCATAAGACATAGCTGTTCATTGTTCATTGTTCATATCCTGTTCATGTGTTCATTCATTGTTCATATGCAATGTTCATGGATGTTCATCAAGTGTTCATATGTTCATAAATGTTCATGTGTTCATTTCTGTTCATTGTTCATTCAATACGCAGGTATGTGTTCATAATGTTCATATGTTCATACGATTGTTCATACCTGTTCATAGTGTTCATTGTCCTGTTCATACTATGTTCATTTTGTTCATTGTTCATGAAGTGCATTACTGATGTTCATTGTTCATGCTGTGTTCATTGTTCATGTTGTTCATTCTGTTCATGAGCTGAACTGTTCATGTGTTCATCCCTGTTCATGCTATGTTCATTGGGATGTTCATATGTTCATATTGTCAGCGATGTTCATCGTCATGTTCATATGTTCATATGTTCATCACGATGTTCATAGTGTTCATTGTTCATTGTTCATCGTGCTGTTCATACGTGTTCATTGTTCATGCCTGTTCATTGTTCATTGTTCATCTGTTCATGTTGTTCATTTGCCGTGTTCATATGTTCATGTTAGTTGTTCATGGTGTTCATTGTTCATTGTTCATTGTTCATTGTTCATTCCGTACACTGTTCATATGTTCATTGTTCATAGTCTTTGTTCATGCACGATGTTCATTGTTCATTGTTCATATGTTCATGAATGTTCATCCTGCGTAGATATGTTCATATGTTCATGTGTTCATTGTTCATTACTTGTTCATAATGTTCATTGGTTGTTCATCCAGTGTTCATCTGTTCATCTAATGTTCATTGTTCATTGTTCATAGTGTTCATGGCGTGTTCATTGTTCATGGATCGTGTTCATCGTGTTCATTTAGTTTTGTACATAGTGCTGTTCATTGTTCATGGGATTGTGTTCATATATGTTCATTGTTCATTGTTCATTGTTCATAATGTTCATTATCCTCTTGTTCATTGGAACCATGTTCATAGCACAGTGTTCATATGTTCATTTGTTCATCCCTGTTCATACGTCACTGTTCATTGAATGTTCATGGCTGTTCATGATATGTTCATGCCGATGTTCATGGGAACTGTTCATTATGTTCATCTGTAGCGTGCGAGTTGTTCATGTGCAAATGTTCATCACGATGGCCGGAGGTGTTCATCTGTGTTCATTGTTCATTGTTCATTGTTCATGATGTTCATATGTTCATCTATGTTCATCGGTCTGTTCATGGGTGTTCATGAAATTGTTCATGATGTTCATTGTTCATTGCTCTGTTCATTTGTTCATCTGTTCATGCTGTTGTTCATTCAGGCATGGTTGTTCATGATCCCCGTGTTCATGTGTTCATGTGTTCATCTTGTGTTCATTGTTCATGTGTTCATTGTTCATATGGTATACGCAAATGTTCATTGTTCATTCATGTTCATGTGTTCATCAAGCTCTGTTCATAGCTGTTCATGGTGTTCATAAGTGTTCATAGGTGTTCATCATGTTCATTGTTCATGTTAGTGTTCATGTCTGTTCATACCTGTTCATGTTGTTCATTGTTCATCCTGTTCATGTTGTTCATCTTGTTCATCGTTGTTCATCTTGTTCATACTGTTCATTGTTCATATTGTTCATATAGTCGTTTGTTCATCATGTTCATGGTTGTTCATTGTTCATGCATGTTGTTCATACAAGACCTAAACTGTTCATTTTGTTCATGTGTTCATCTTTGTTCATGTGTTCATCGGTGTTCATGTTGTTCATTGAGGTGTTCATACACGGTGTTCATATCTGTTCATTGTTCATATGTTCATTGTTCATTCGTGTTCATGTGTTCATAAAATGTTCATTTATGTTCATATGTTCATTTGTGTTCATGGTATTGTTCATATGTTCATGTGTTCATTGCGTGTTCATTGGTTTGTTCATTGGAGAAATGTTCATCTGTTCATATGTTCATTGTTCATTGTTCATTCGTGTTCATCCTTGTTCATCTTGTTCATGTTGTAGGATGTTCATTGTTCATTGTTCATAACTGTTCATAAATTGTTCATTGCTGTTCATGTTTGTTCATTGTTCATTTGTTCATGTGTTCATCATGTTCATAAGAGTGTTATATGTTCATTGTTCATTCTGTTCATTGTTCATGTTGTTCATGCTGTTCATCTGTTCATTGTTCATTGTGTTCATGTGTTCATAGTCTGTTCATCTGATGTTCATGATCAGCTTTGTTCATCATGCTAGTGTTCATTGTTCATAATGTTCATTATGTTGTTCATGCTGTTCATTGTTCATTGTTCATCATTATGTTCATAATGTTCATCATGTTCATATGTTCATACGTGTTCATGGTTGTTCATTTGTTCATAACTGTTCATTGTTCATGTGTTCATGATGTTCATTGTCGTGTCCCTGGTTGTTCATTGTTCATCGTTAATATGTTGTTCATATGTCGAGAGATGTTCATGCGGCGCTGTTCATTTGTTCATTGTTCATTCCTATGTTCATTGTTCATATGTTCATCTGTTCATATGTTCATGTGTTCATCACCTGTTCATTGTTCATTGTTCATTGTTCATATGAATGTTTGTTCATAGTATGTTCATAGGTGAATGATGTTCATATTGTTCATCTGTTCATTGTTCATGTGTTCATGGTGTTCATTGTTCATAGTGTTCATCGTGTTCATGGTGTTCATTTGTTCATAGTCCGTGTTCATTGTTCATCCTGTTCATGATGTTCATTATTTTATATGTTCATTTGTTCATTTAGTGTTCATTGTTCATAGTTGTTCATTGTTCATTGCGCTTGTTCATCACGTGTTCATGCTGTTCATAGGGCCTTTTATACTCGCTGTTCATGTCCCACTGTTCATGCCTGTTCATGCGTTGTTCATCGAGTGTTCATGAAGTGTTCATTGTTCATGTTGTTCATTATGTTCATTGTTCATGTTGTTCATATGTTCATGCAACTGTTCATGCACGTGTTCATTGTTCATCTGTTCATCTGTTCATTGTTCATGTGTTCATTGTTCATGTTGTTCATTTGTTCATTTGTTCATTTGTTCATATGTTCATCAGGGCCATGTTCATATGTTCATGTGTTCATGCCCTGTTCATTGTTCATGTGTTCATTGTTCATCTTGTTCATTGTTCATTGTTCATTCTGGTGTTCATGATGTTCATTAGTGTTCATTGTTCATGTGCAGGAATTGTTCATGTACCTGTTCATTCTGTTCATTATGTTCATGAGTCTCGTCTGGCCTCAAAACTTGTTCATATGTTCATTAAGTGTTCATATGTTCATTTATGTTCATTCGTTTGTTCATTGTTCATCGTGTTCATGGTATTGTTCATGGTTGTTCATTGTTCATATTGTTCATTTGTTCATCTCACCCACGCTGTTCATGCCTGTTCATGCATGTTCATATGTTCATCATGTCTGTTCATATGTTCATTGTGTTCATTGTTCATACTGTTCATTTGTTCATCTTGTTCATGCCCATTACGGGTGTTCATGCTCGTGTTCATTACTATGTTCATATGCGTGTTCATTGTTCATTGTTCATGGGATGTTCATTACTGTTCATTGTTCATTGTTCATAGCGAGTGTTCATGGGATGTTCATCTTGTTCATTGTTCATCACTGTTCATTTGTTCATGCCTGTTCATTCCTGTTCATGTGTTCATTGTTCATTCCTACAAAATCATACGCGATGATGTTCATGGTGTTCATAGTGTTCATACTGTTCATTCTTGTTCATCTGTTCATTGTTCATTGTTCATAGTGTTCATTGTTCATGAATGTTCATTGTTCATCTATGCTGTTTGTTCATCTAACCTGTTCATTGTTCATTTGTTCATCGTGGCGTGTTCATTAGCGCGGTGTTCATCCATGCGGTGTTCATTGTTCATATGTTCATAGCTTCCCTATGTTCATTGTTCATCTTAATATGTTGTTCATAATGAACTTGACCAGAGTATGTTCATAGTTTCCAGAATGTTCATTCCCGTGTTCATTGTTCATTTTCTGATGTTCATTGTTCATTGTTCATTGTTCATAGGTGTTCATTCTGTTCATTGTTCATGCCCATGTTCATCTCCGATTGTTCATTGTTCATTGTTCATAGTGTTCATTGTTCATCTGTTCATGTGTTCATGAGATGCTGTTCATTCAGACTCGTGTTCATACACATACGATGTTCATTGTTCATACAATGTTCATCTGTTCATTCGCTGTTCATTCTGTTCATAAGTCCTAAGCATGTTCATATAATGTTCATCCTGTTCATCCCCCGTGTTCATTTGTTGTTCATCCGCATAGCGCCGGGTATGTTCATTGTTCATGGCATTTGTTCATGTGTTCATCTTCTGTTCATCGTGTTCATTGTTCATGTCTGTGTTCATGTGTTCATATCTGTGTTCATTGTTCATTCAGCCTGTTCATACTGTTCATACAGATGGAATGTTCATGTGTTCATATTTGATTGCTCGGGGATGTTCATTTGACCTGTTCATTTGTTCATACGTGTTCATCATAGTATTTCTGTTCATTGTTCATTGTTCATGTGTTCATTCTTTACGCTGTTCATTCGGTACTGATATGTTCATGTTCGCACTAACCCTGTTCATAACGTGTTCATTGTTCATCTTATGTTCATGGCCGGGTCCGGCATTGTTCATCGGCATGTTCATATGTTCATCGATTGCGGGTGTGTTCATAAGGATGTTCATTTCCAAAGAATGTTCATCTCTGTTCATGTGTTCATTGTTCATTGCTGTTCATGAGTTGTTCATATTGTTCATTGTTCATCGGCATGTATGTTCATATGTTCATTGTTCATAATGTTCATTGTTCATGATGTTCATCAGGGAAATGTTCATCTGTTCATTGTTCATAACTGTTCATTGTTCATTGTTCATTGTTCATTGTTCATAGTTGTTCATAAGCGCTGTTCATCATGTTCATTGTTCATCGGGTGTTCATATCGACTCACTTGTTCATTGTTCATATGTTCATACTGTTCATGCTGTTCATATATGTTCATAATTGTTCATGGGTGTTCATTGTTCATCATGTTCATATGTTCATTGTTCATATGTTCATCTGTTCATTGTTCATCGGGCCGAGGTGAATGTTCATTGTTCATTCATTGTTCATAACCAGCGTTGTTCATTTGTTCATGTGTTCATTGTTCATATTGTTCATTGTTCATTGTTCATGCCAATGTTCATATGATTTTTGTTCATGTGTTCATTGTTCATTGTTCATTTACACTGTTCATCCCCCTGTTCATCTGTTCATTAAGGCCACTTCATGTTCATGATGTTCATTGTTCATCTTCAGCTGATGTTCATTGTTCATCCATTTGTTCATGATCATGTTCATATTGTTCATAGTGATGCTGTTCATACAACTGTTCATGGAGTGTTCATTGCTGTTCATGTGTTCATCACTGTTCATCGTGTTTGTTCATATGTTCATTGTTCATGGTTTGTGTTCATTGTTCATATGTTCATTGTTCATCCCCAGCATGTTCATAATGTTCATCCCTGTTCATTGTTCATTGTTCATTCACAAATTCTGTTCATAGATGTTCATTGTTCATAGTGTTCATTGTTCATTGTTCATAGGATGTTCATCATGTTCATAATCATGTTCATTGTTCATTACATTGTTCATTGTTCATCCGTTCAGCCATTGTTCATTGTTGTTCATTGTTCATCTGTTCATTTTGTTTCTGTTCATCTCCCTGTTCATACTGCTGTTCATTATGTTCATGTTCGTGTATTTGTTCATTGTTCATTGTTCATTTCTGTTCATCCAGGTGTTCATGTGTTCATTGTTCATGTGTTCATCTGTTCATATGTTCATATGTTCATCTCTGTTCATTGTTCATTGTTCATGGCTGTTCATTGTTCATACTTGTTCATTTGTTGTTCATTGTTCATGGCATCATGTTCATGTGTTCATTGTGTTCATCCTGTTCATCTGTTCATACAGAATTGTTCAT"
    spacedPrint(findOccurrences(pattern_pm, text_pm))

    print("-------------\n")
    spacedPrint(kmp(text_pm, pattern_pm))

    ## EXERCISE BREAK
    file = open("data/Vibrio_cholerae.txt", 'r')
    text_vc = file.read()
    pattern_vc = "CTTGATCAT"
    spacedPrint(findOccurrences(pattern_vc, text_vc))
Пример #12
0
import time

alphabet = list(string.ascii_lowercase)
bm_meas = []
kmp_meas = []
k = 1000
x = [i for i in range(10 * k, 1 * k * k + 1, 10 * k)]
needle_len = k

for hs_len in x:
    haystack = random.choices(alphabet, k=hs_len)
    entry_index = hs_len // 2
    needle = haystack[entry_index:entry_index + needle_len]

    # Measure KMP
    start = time.time()
    kmp_search.kmp(needle, haystack)
    kmp_meas.append(time.time() - start)

    # Measure BM
    start = time.time()
    bm_search.search(haystack, needle)
    bm_meas.append(time.time() - start)

plt.grid(True)
plt.ylabel("Время, сек.")
plt.xlabel("Длина текста для поиска паттерна")
plt.plot(x, kmp_meas, "o--")
plt.plot(x, bm_meas, "o--")
plt.legend(["Алгоритм Кнута-Морриса-Пратта", "Алгоритм Бойера-Мура"])
plt.show()