def test_words_with_shared_prefix_should_retain_counts(self): sym_spell = SymSpell(1, 3) sym_spell.create_dictionary_entry("pipe", 5) sym_spell.create_dictionary_entry("pips", 10) result = sym_spell.lookup("pipe", Verbosity.ALL, 1) self.assertEqual(2, len(result)) self.assertEqual("pipe", result[0].term) self.assertEqual(5, result[0].count) self.assertEqual("pips", result[1].term) self.assertEqual(10, result[1].count) result = sym_spell.lookup("pips", Verbosity.ALL, 1) self.assertEqual(2, len(result)) self.assertEqual("pips", result[0].term) self.assertEqual(10, result[0].count) self.assertEqual("pipe", result[1].term) self.assertEqual(5, result[1].count) result = sym_spell.lookup("pip", Verbosity.ALL, 1) self.assertEqual(2, len(result)) self.assertEqual("pips", result[0].term) self.assertEqual(10, result[0].count) self.assertEqual("pipe", result[1].term) self.assertEqual(5, result[1].count)
def test_delete_dictionary_entry(self): sym_spell = SymSpell() sym_spell.create_dictionary_entry("stea", 1) sym_spell.create_dictionary_entry("steama", 2) sym_spell.create_dictionary_entry("steem", 3) result = sym_spell.lookup("steama", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("steama", result[0].term) self.assertEqual(len("steama"), sym_spell.max_length()) self.assertEqual(3, sym_spell.word_count()) self.assertTrue(sym_spell.delete_dictionary_entry("steama")) self.assertEqual(len("steem"), sym_spell.max_length()) self.assertEqual(2, sym_spell.word_count()) result = sym_spell.lookup("steama", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("steem", result[0].term) self.assertTrue(sym_spell.delete_dictionary_entry("stea")) self.assertEqual(len("steem"), sym_spell.max_length()) self.assertEqual(1, sym_spell.word_count()) result = sym_spell.lookup("steama", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("steem", result[0].term)
def test_lookup_should_not_return_non_word_delete(self): sym_spell = SymSpell(2, 7, 10) sym_spell.create_dictionary_entry("pawn", 10) result = sym_spell.lookup("paw", Verbosity.TOP, 0) self.assertEqual(0, len(result)) result = sym_spell.lookup("awn", Verbosity.TOP, 0) self.assertEqual(0, len(result))
def test_lookup_transfer_casing(self): sym_spell = SymSpell() sym_spell.create_dictionary_entry("steam", 4) result = sym_spell.lookup("Stream", Verbosity.TOP, 2, transfer_casing=True) self.assertEqual("Steam", result[0].term) sym_spell = SymSpell() sym_spell.create_dictionary_entry("steam", 4) result = sym_spell.lookup("StreaM", Verbosity.TOP, 2, transfer_casing=True) self.assertEqual("SteaM", result[0].term) sym_spell = SymSpell() sym_spell.create_dictionary_entry("steam", 4) result = sym_spell.lookup("STREAM", Verbosity.TOP, 2, transfer_casing=True) self.assertEqual("STEAM", result[0].term) sym_spell = SymSpell() sym_spell.create_dictionary_entry("i", 4) result = sym_spell.lookup("I", Verbosity.TOP, 2, transfer_casing=True) self.assertEqual("I", result[0].term)
def test_add_additional_counts_should_increase_count(self): sym_spell = SymSpell() word = "hello" sym_spell.create_dictionary_entry(word, 11) result = sym_spell.lookup(word, Verbosity.ALL) count = result[0].count if len(result) == 1 else 0 self.assertEqual(11, count) sym_spell.create_dictionary_entry(word, 3) result = sym_spell.lookup(word, Verbosity.ALL) count = result[0].count if len(result) == 1 else 0 self.assertEqual(11 + 3, count)
def test_verbosity_should_control_lookup_results(self): sym_spell = SymSpell() sym_spell.create_dictionary_entry("steam", 1) sym_spell.create_dictionary_entry("steams", 2) sym_spell.create_dictionary_entry("steem", 3) result = sym_spell.lookup("steems", Verbosity.TOP, 2) self.assertEqual(1, len(result)) result = sym_spell.lookup("steems", Verbosity.CLOSEST, 2) self.assertEqual(2, len(result)) result = sym_spell.lookup("steems", Verbosity.ALL, 2) self.assertEqual(3, len(result))
def test_add_additional_counts_should_not_overflow(self): sym_spell = SymSpell() word = "hello" sym_spell.create_dictionary_entry(word, sys.maxsize - 10) result = sym_spell.lookup(word, Verbosity.ALL) count = result[0].count if len(result) == 1 else 0 self.assertEqual(sys.maxsize - 10, count) sym_spell.create_dictionary_entry(word, 11) result = sym_spell.lookup(word, Verbosity.ALL) count = result[0].count if len(result) == 1 else 0 self.assertEqual(sys.maxsize, count)
def test_pickle_compressed(self): pickle_path = os.path.join(self.fortests_path, "dictionary.pickle") edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) sym_spell.load_dictionary(self.dictionary_path, 0, 1) sym_spell.save_pickle(pickle_path) sym_spell_2 = SymSpell(edit_distance_max, prefix_length) sym_spell_2.load_pickle(pickle_path) self.assertEqual(sym_spell.max_length(), sym_spell_2.max_length()) self.assertEqual( sym_spell.lookup("flam", Verbosity.TOP, 0, True)[0].term, sym_spell_2.lookup("flam", Verbosity.TOP, 0, True)[0].term) os.remove(pickle_path)
def test_lookup_include_unknown(self): sym_spell = SymSpell(2, 7, 10) sym_spell.create_dictionary_entry("flame", 20) sym_spell.create_dictionary_entry("flam", 1) result = sym_spell.lookup("flam", Verbosity.TOP, 0, True) self.assertEqual(1, len(result)) self.assertEqual("flam", result[0].term)
def test_lookup_should_not_return_low_count_word_that_are_also_delete_word( self): sym_spell = SymSpell(2, 7, 10) sym_spell.create_dictionary_entry("flame", 20) sym_spell.create_dictionary_entry("flam", 1) result = sym_spell.lookup("flam", Verbosity.TOP, 0) self.assertEqual(0, len(result))
def test_lookup_should_find_exact_match(self): sym_spell = SymSpell() sym_spell.create_dictionary_entry("steama", 4) sym_spell.create_dictionary_entry("steamb", 6) sym_spell.create_dictionary_entry("steamc", 2) result = sym_spell.lookup("streama", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("steama", result[0].term)
def test_lookup_should_return_most_frequent(self): sym_spell = SymSpell() sym_spell.create_dictionary_entry("steama", 4) sym_spell.create_dictionary_entry("steamb", 6) sym_spell.create_dictionary_entry("steamc", 2) result = sym_spell.lookup("stream", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("steamb", result[0].term) self.assertEqual(6, result[0].count)
def test_deletes(self): sym_spell = SymSpell() sym_spell.create_dictionary_entry("steama", 4) sym_spell.create_dictionary_entry("steamb", 6) sym_spell.create_dictionary_entry("steamc", 2) result = sym_spell.lookup("stream", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("steamb", result[0].term) self.assertEqual(6, result[0].count) self.assertTrue(sym_spell.entry_count())
def test_load_dictionary_encoding(self): dictionary_path = os.path.join(self.fortests_path, "non_en_dict.txt") edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) sym_spell.load_dictionary(dictionary_path, 0, 1) result = sym_spell.lookup("АБ", Verbosity.TOP, 2) self.assertEqual(1, len(result)) self.assertEqual("АБИ", result[0].term)
def test_pickle_bytes(self): edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) sym_spell.create_dictionary_entry("test", 123) sym_spell.create_dictionary_entry("ball", 4) sym_spell.create_dictionary_entry("code", 56) sym_bytes = sym_spell.save_pickle_bytes() sym_spell_ld = SymSpell(edit_distance_max, prefix_length) sym_spell_ld.load_pickle_bytes(sym_bytes) self.assertEqual("test", sym_spell_ld.lookup("tst", Verbosity.CLOSEST)[0].term) self.assertEqual( "ball", sym_spell_ld.lookup("boll", Verbosity.CLOSEST)[0].term) self.assertEqual( 2, sym_spell_ld.lookup("c0d3", Verbosity.CLOSEST)[0].distance)
def test_save_load(self): before_save = self.symSpell.lookup("tke", Verbosity.CLOSEST)[0].term before_max_length = self.symSpell.max_length() os.makedirs("temp", exist_ok=True) self.symSpell.save_pickle("temp/temp.bin") load_sym_spell = SymSpell() load_sym_spell.load_pickle("temp/temp.bin") after_load = load_sym_spell.lookup("tke", Verbosity.CLOSEST)[0].term after_max_length = load_sym_spell.max_length() os.remove("temp/temp.bin") os.rmdir("temp") assert (before_save == after_load) assert (before_max_length == after_max_length)
def test_load_pickle_symspellcpppy(benchmark): sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7) benchmark(sym_spell.load_pickle, "temp_cpppy/temp.bin") os.remove("temp_cpppy/temp.bin") os.rmdir("temp_cpppy") assert (sym_spell.lookup("tke", VerbosityCpp.CLOSEST)[0].term == "the")