Пример #1
0
 def testThreeElementPipeline(self):
     lexicon = Lexicon(Splitter(), StopWordPipelineElement({'and': 1}),
                       StupidPipelineElement('dogs', 'fish'),
                       WackyReversePipelineElement('fish'))
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('hsif')
     self.assertEqual(wids, [2])
Пример #2
0
 def testTwoElementPipeline(self):
     lexicon = Lexicon(Splitter(),
                       StupidPipelineElement('cats', 'fish'),
                       WackyReversePipelineElement('fish'))
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('hsif')
     self.assertEqual(wids, [1])
Пример #3
0
    def testTermToWordIdsWithProcess_post_glob(self):
        """This test is for added process_post_glob"""
        class AddedSplitter(Splitter):
            def process_post_glob(self, lst):
                assert lst == ['dogs']
                return ['dogs']

        lexicon = Lexicon(AddedSplitter())
        wids = lexicon.sourceToWordIds('cats and dogs')
        wids = lexicon.termToWordIds('dogs')
        self.assertEqual(wids, [3])
Пример #4
0
class TestLexiconConflict(unittest.TestCase):

    db = None

    def tearDown(self):
        if self.db is not None:
            self.db.close()
            self.storage.cleanup()

    def openDB(self):
        from ZODB.FileStorage import FileStorage
        from ZODB.DB import DB
        n = 'fs_tmp__%s' % os.getpid()
        self.storage = FileStorage(n)
        self.db = DB(self.storage)

    def testAddWordConflict(self):
        self.l = Lexicon(Splitter())
        self.openDB()
        r1 = self.db.open().root()
        r1['l'] = self.l
        transaction.commit()

        r2 = self.db.open().root()
        copy = r2['l']
        # Make sure the data is loaded
        list(copy._wids.items())
        list(copy._words.items())
        copy.length()

        self.assertEqual(self.l._p_serial, copy._p_serial)

        self.l.sourceToWordIds('mary had a little lamb')
        transaction.commit()

        copy.sourceToWordIds('whose fleece was')
        copy.sourceToWordIds('white as snow')
        transaction.commit()
        self.assertEqual(copy.length(), 11)
        self.assertEqual(copy.length(), len(copy._words))
Пример #5
0
class TestLexiconConflict(unittest.TestCase):
    
    db = None

    def tearDown(self):
        if self.db is not None:
            self.db.close()
            self.storage.cleanup()

    def openDB(self):
        from ZODB.FileStorage import FileStorage
        from ZODB.DB import DB
        n = 'fs_tmp__%s' % os.getpid()
        self.storage = FileStorage(n)
        self.db = DB(self.storage)
        
    def testAddWordConflict(self):
        self.l = Lexicon(Splitter())
        self.openDB()
        r1 = self.db.open().root()
        r1['l'] = self.l
        transaction.commit()
        
        r2 = self.db.open().root()
        copy = r2['l']
        # Make sure the data is loaded
        list(copy._wids.items())
        list(copy._words.items())
        copy.length()
        
        self.assertEqual(self.l._p_serial, copy._p_serial)
        
        self.l.sourceToWordIds('mary had a little lamb')
        transaction.commit()
        
        copy.sourceToWordIds('whose fleece was')
        copy.sourceToWordIds('white as snow')
        transaction.commit()
        self.assertEqual(copy.length(), 11)
        self.assertEqual(copy.length(), len(copy._words))
Пример #6
0
 def testMissingTermToWordIds(self):
     lexicon = Lexicon(Splitter())
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('boxes')
     self.assertEqual(wids, [0])
Пример #7
0
 def testTermToWordIds(self):
     lexicon = Lexicon(Splitter())
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('dogs')
     self.assertEqual(wids, [3])
Пример #8
0
 def testUpgradeLength(self):
     from BTrees.Length import Length
     lexicon = Lexicon(Splitter())
     del lexicon.length  # Older instances don't override length
     lexicon.sourceToWordIds('how now brown cow')
     self.assert_(lexicon.length.__class__ is Length)
Пример #9
0
 def testSplitterAdaptorNofold(self):
     lexicon = Lexicon(Splitter())
     wids = lexicon.sourceToWordIds('CATS and dogs')
     wids = lexicon.termToWordIds('cats and dogs')
     self.assertEqual(wids, [0, 2, 3])
Пример #10
0
 def testOnePipelineElement(self):
     lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('fish')
     self.assertEqual(wids, [3])
Пример #11
0
 def testMissingTermToWordIds(self):
     lexicon = Lexicon(Splitter())
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('boxes')
     self.assertEqual(wids, [0])
Пример #12
0
 def testTermToWordIds(self):
     lexicon = Lexicon(Splitter())
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('dogs')
     self.assertEqual(wids, [3])
Пример #13
0
 def testUpgradeLength(self):
     from BTrees.Length import Length
     lexicon = Lexicon(Splitter())
     del lexicon.length # Older instances don't override length
     lexicon.sourceToWordIds('how now brown cow')
     self.assert_(lexicon.length.__class__ is Length)        
Пример #14
0
 def testSplitterAdaptorNofold(self):
     lexicon = Lexicon(Splitter())
     wids = lexicon.sourceToWordIds('CATS and dogs')
     wids = lexicon.termToWordIds('cats and dogs')
     self.assertEqual(wids, [0, 2, 3])
Пример #15
0
 def testOnePipelineElement(self):
     lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
     wids = lexicon.sourceToWordIds('cats and dogs')
     wids = lexicon.termToWordIds('fish')
     self.assertEqual(wids, [3])