def testThreeElementPipeline(self): lexicon = Lexicon(Splitter(), StopWordPipelineElement({'and': 1}), StupidPipelineElement('dogs', 'fish'), WackyReversePipelineElement('fish')) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('hsif') self.assertEqual(wids, [2])
def testTwoElementPipeline(self): lexicon = Lexicon(Splitter(), StupidPipelineElement('cats', 'fish'), WackyReversePipelineElement('fish')) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('hsif') self.assertEqual(wids, [1])
def testTermToWordIdsWithProcess_post_glob(self): """This test is for added process_post_glob""" class AddedSplitter(Splitter): def process_post_glob(self, lst): assert lst == ['dogs'] return ['dogs'] lexicon = Lexicon(AddedSplitter()) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('dogs') self.assertEqual(wids, [3])
class TestLexiconConflict(unittest.TestCase): db = None def tearDown(self): if self.db is not None: self.db.close() self.storage.cleanup() def openDB(self): from ZODB.FileStorage import FileStorage from ZODB.DB import DB n = 'fs_tmp__%s' % os.getpid() self.storage = FileStorage(n) self.db = DB(self.storage) def testAddWordConflict(self): self.l = Lexicon(Splitter()) self.openDB() r1 = self.db.open().root() r1['l'] = self.l transaction.commit() r2 = self.db.open().root() copy = r2['l'] # Make sure the data is loaded list(copy._wids.items()) list(copy._words.items()) copy.length() self.assertEqual(self.l._p_serial, copy._p_serial) self.l.sourceToWordIds('mary had a little lamb') transaction.commit() copy.sourceToWordIds('whose fleece was') copy.sourceToWordIds('white as snow') transaction.commit() self.assertEqual(copy.length(), 11) self.assertEqual(copy.length(), len(copy._words))
def testMissingTermToWordIds(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('boxes') self.assertEqual(wids, [0])
def testTermToWordIds(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('dogs') self.assertEqual(wids, [3])
def testUpgradeLength(self): from BTrees.Length import Length lexicon = Lexicon(Splitter()) del lexicon.length # Older instances don't override length lexicon.sourceToWordIds('how now brown cow') self.assert_(lexicon.length.__class__ is Length)
def testSplitterAdaptorNofold(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('CATS and dogs') wids = lexicon.termToWordIds('cats and dogs') self.assertEqual(wids, [0, 2, 3])
def testOnePipelineElement(self): lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish')) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('fish') self.assertEqual(wids, [3])