Exemplo n.º 1
0
 def testPersistence(self):
     model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True)
     model.save(testfile())
     model2 = logentropy_model.LogEntropyModel.load(testfile())
     self.assertTrue(model.entr == model2.entr)
     tstvec = []
     self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec]))
Exemplo n.º 2
0
 def testPersistenceCompressed(self):
     fname = testfile() + '.gz'
     model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True)
     model.save(fname)
     model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None)
     self.assertTrue(model.entr == model2.entr)
     tstvec = []
     self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec]))
 def testPersistenceCompressed(self):
     fname = get_tmpfile('gensim_models_logentry.tst.gz')
     model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True)
     model.save(fname)
     model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None)
     self.assertTrue(model.entr == model2.entr)
     tstvec = []
     self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))
Exemplo n.º 4
0
    def test_corpus_validity(self):
        """
        check whether the corpus is valid

        words that appear in only one context cause trouble in the log entropy
        normalization and should therefore be filtered out.
        --> context diversity should be > 1
        """
        logentropy_model.LogEntropyModel(self.corpus_small)
Exemplo n.º 5
0
    def testTransform(self):
        # create the transformation model
        model = logentropy_model.LogEntropyModel(self.corpus_ok,
                                                 normalize=False)

        # transform one document
        doc = list(self.corpus_ok)[0]
        transformed = model[doc]
        expected = [(0, 0.056633012265132537), (1, 0.024757785476437949),
                    (3, 0.62707564002906502)]
        self.assertTrue(numpy.allclose(transformed, expected))
Exemplo n.º 6
0
    def testTransform(self):
        # create the transformation model
        model = logentropy_model.LogEntropyModel(self.corpus_ok,
                                                 normalize=False)

        # transform one document
        doc = list(self.corpus_ok)[0]
        transformed = model[doc]
        expected = [(0, 0.29155145321295795), (1, 0.024757785476437949),
                    (3, 1.0569257878828748)]
        self.assertTrue(numpy.allclose(transformed, expected))
Exemplo n.º 7
0
    def test_transform(self):
        # create the transformation model
        model = logentropy_model.LogEntropyModel(self.corpus_ok,
                                                 normalize=False)

        # transform one document
        doc = list(self.corpus_ok)[0]
        transformed = model[doc]

        expected = [(0, 0.3748900964125389), (1, 0.30730215324230725),
                    (3, 1.20941755462856)]
        self.assertTrue(np.allclose(transformed, expected))