def test_add_spelling(): schema = fields.Schema(text1=fields.TEXT, text2=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text1=u("render zorro kaori postal"), text2=u("alfa")) w.add_document(text1=u("reader zebra koala pastry"), text2=u("alpa")) w.add_document(text1=u("leader libra ooala paster"), text2=u("alpha")) w.add_document(text1=u("feeder lorry zoala baster"), text2=u("olfo")) w.commit() with ix.reader() as r: assert not r.has_word_graph("text1") assert not r.has_word_graph("text2") from whoosh.writing import add_spelling add_spelling(ix, ["text1", "text2"]) with ix.reader() as r: assert r.has_word_graph("text1") assert r.has_word_graph("text2") sp = spelling.ReaderCorrector(r, "text1") assert sp.suggest(u("kaola"), maxdist=1) == [u('koala')] assert sp.suggest(u("kaola"), maxdist=2) == [ u('koala'), u('kaori'), u('ooala'), u('zoala') ] sp = spelling.ReaderCorrector(r, "text2") assert sp.suggest(u("alfo"), maxdist=1) == [u("alfa"), u("olfo")]
def test_reader_corrector_nograph(): schema = fields.Schema(text=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u("render zorro kaori postal")) w.add_document(text=u("reader zebra koala pastry")) w.add_document(text=u("leader libra ooala paster")) w.add_document(text=u("feeder lorry zoala baster")) w.commit() with ix.reader() as r: sp = spelling.ReaderCorrector(r, "text") assert sp.suggest(u("kaola"), maxdist=1) == ['koala'] assert sp.suggest(u("kaola"), maxdist=2) == ['koala', 'kaori', 'ooala', 'zoala']
def test_reader_corrector(): schema = fields.Schema(text=fields.TEXT()) with TempIndex(schema) as ix: with ix.writer() as w: w.add_document(text=u"render zorro kaori postal") w.add_document(text=u"reader zebra koala pastry") w.add_document(text=u"leader libra oola paster") w.add_document(text=u"feeder lorry zoala baster") with ix.reader() as r: sp = spelling.ReaderCorrector(r, "text", schema["text"]) assert sp.suggest(u"koala", maxdist=1) == [u'koala', u"zoala"] target = [u'kaori', u'koala', u'oola'] sugs = sp.suggest(u"kaola", maxdist=2) assert sugs == target
def test_unicode_spelling(): schema = fields.Schema(text=fields.ID()) domain = [ u"\u0924\u092a\u093e\u0907\u0939\u0930\u0941", u"\u65e5\u672c", u"\uc774\uc124\ud76c", ] with TempIndex(schema) as ix: with ix.writer() as w: for word in domain: w.add_document(text=word) with ix.reader() as r: rc = spelling.ReaderCorrector(r, "text", schema["text"]) assert rc.suggest(u"\u65e5\u672e\u672c") == [u"\u65e5\u672c"]
def test_reader_corrector(): schema = fields.Schema(text=fields.TEXT(spelling=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u("render zorro kaori postal")) w.add_document(text=u("reader zebra koala pastry")) w.add_document(text=u("leader libra ooala paster")) w.add_document(text=u("feeder lorry zoala baster")) w.commit() with ix.reader() as r: assert r.has_word_graph("text") sp = spelling.ReaderCorrector(r, "text") assert_equal(sp.suggest(u("kaola"), maxdist=1), [u('koala')]) assert_equal( sp.suggest(u("kaola"), maxdist=2), [u('koala'), u('kaori'), u('ooala'), u('zoala')])
def test_unicode_spelling(): schema = fields.Schema(text=fields.ID(spelling=True)) domain = [ u("\u0924\u092a\u093e\u0907\u0939\u0930\u0941"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] ix = RamStorage().create_index(schema) with ix.writer() as w: for word in domain: w.add_document(text=word) with ix.reader() as r: assert r.has_word_graph("text") c = r._get_graph().cursor("text") assert list(c.flatten_strings()) == domain assert list(r.word_graph("text").flatten_strings()) == domain rc = spelling.ReaderCorrector(r, "text") assert rc.suggest(u("\u65e5\u672e\u672c")) == [u("\u65e5\u672c")]