Пример #1
0
 def test_lsa(self):
     # Assert LSA properties.
     k = 100
     lsa = vector.LSA(self.model, k)
     self.assertEqual(lsa.model, self.model)
     self.assertEqual(lsa.vectors, lsa.u)
     self.assertEqual(set(lsa.terms), set(self.model.vector.keys()))
     self.assertTrue(isinstance(lsa.u, dict))
     self.assertTrue(isinstance(lsa.sigma, list))
     self.assertTrue(isinstance(lsa.vt, list))
     self.assertTrue(len(lsa.u), len(self.model))
     self.assertTrue(len(lsa.sigma), len(self.model) - k)
     self.assertTrue(len(lsa.vt), len(self.model) - k)
     for document in self.model:
         v = lsa.vectors[document.id]
         self.assertTrue(isinstance(v, vector.Vector))
         self.assertTrue(len(v) <= k)
     print("pattern.vector.LSA")
Пример #2
0
class TestLSA(unittest.TestCase):

    corpus = None

    def setUp(self):
        # Test spam corpus for reduction.
        if self.__class__.corpus is None:
            self.__class__.corpus = corpus(top=250)
        self.corpus = self.__class__.corpus
        random.seed(0)

    def tearDown(self):
        random.seed()

    def test_lsa(self):
        try:
            import numpy
        except ImportError, e:
            print e
            return
        # Assert LSA properties.
        k = 100
        lsa = vector.LSA(self.corpus, k)
        self.assertEqual(lsa.corpus, self.corpus)
        self.assertEqual(lsa.vectors, lsa.u)
        self.assertEqual(set(lsa.terms), set(self.corpus.vector.keys()))
        self.assertTrue(isinstance(lsa.u, dict))
        self.assertTrue(isinstance(lsa.sigma, list))
        self.assertTrue(isinstance(lsa.vt, list))
        self.assertTrue(len(lsa.u), len(self.corpus))
        self.assertTrue(len(lsa.sigma), len(self.corpus) - k)
        self.assertTrue(len(lsa.vt), len(self.corpus) - k)
        for document in self.corpus:
            v = lsa.vectors[document.id]
            self.assertTrue(isinstance(v, vector.Vector))
            self.assertTrue(len(v) == k)
        print "pattern.vector.LSA"
Пример #3
0
class TestLSA(unittest.TestCase):
    
    model = None
    
    def setUp(self):
        # Test spam model for reduction.
        if self.__class__.model is None:
            self.__class__.model = model(top=250)
        self.model = self.__class__.model
        random.seed(0)
        
    def tearDown(self):
        random.seed()
    
    def test_lsa(self):
        try:
            import numpy
        except ImportError, e:
            print(e)
            return
        # Assert LSA properties.
        k = 100
        lsa = vector.LSA(self.model, k)
        self.assertEqual(lsa.model, self.model)
        self.assertEqual(lsa.vectors, lsa.u)
        self.assertEqual(set(lsa.terms), set(self.model.vector.keys()))
        self.assertTrue(isinstance(lsa.u,     dict))
        self.assertTrue(isinstance(lsa.sigma, list))
        self.assertTrue(isinstance(lsa.vt,    list))
        self.assertTrue(len(lsa.u),     len(self.model))
        self.assertTrue(len(lsa.sigma), len(self.model)-k)
        self.assertTrue(len(lsa.vt),    len(self.model)-k)
        for document in self.model:
            v = lsa.vectors[document.id]
            self.assertTrue(isinstance(v, vector.Vector))
            self.assertTrue(len(v) <= k)
        print("pattern.vector.LSA")