def test_suffix_binary_search_tobeornottobe_contains_tobeornottobe(self): text = "tobeornottobe$" suffix_array = [13, 11, 2, 12, 3, 6, 10, 1, 4, 7, 5, 9, 0, 8] substring = "tobeornottobe$" expected = 0 computed = suffix_binary_search(text, suffix_array, substring) self.assertEquals(computed, expected)
def test_suffix_binary_search_abracadabra_doesnt_contain_abrax(self): text = "abracadabra$" suffix_array = [11, 10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2] substring = "abrax" expected = -1 computed = suffix_binary_search(text, suffix_array, substring) self.assertEquals(computed, expected)
def search_dataset(search_type): print "Searching dataset..." sorted_keys = sorted(dataset_dict.keys()) for key in sorted_keys: total_time = 0 dataset = dataset_dict[key] for substring in dataset: i = time.time() if search_type == "lcp": answer = lcp_search(text, suffix_array, substring, llcp, rlcp) else: answer = suffix_binary_search(text, suffix_array, substring) f = time.time() assert substring == text[answer: answer + len(substring)] total_time += f - i print " key: %2d | total time: %f | average time: %f" %\ (key, total_time, total_time / LIST_SIZE) print "Done."