Exemplo n.º 1
0
    def test_LdaCgsSeq_SeedTypes(self):
        """ Test for issue #74 issues. """

        from tempfile import NamedTemporaryFile
        import os

        c = random_corpus(1000, 50, 6, 100)
        tmp = NamedTemporaryFile(delete=False, suffix='.npz')
        try:
            m0 = LdaCgsSeq(c, 'document', K=10)
            m0.train(n_iterations=20)
            m0.save(tmp.name)
            m1 = LdaCgsSeq.load(tmp.name)

            self.assertTrue(type(m0.seed) == type(m1.seed))
            print("seed types:", type(m0._mtrand_state[0]),
                  type(m1._mtrand_state[0]))
            self.assertTrue(
                type(m0._mtrand_state[0]) == type(m1._mtrand_state[0]))
            self.assertTrue(
                type(m0._mtrand_state[1]) == type(m1._mtrand_state[1]))
            self.assertTrue(
                type(m0._mtrand_state[2]) == type(m1._mtrand_state[2]))
            self.assertTrue(
                type(m0._mtrand_state[3]) == type(m1._mtrand_state[3]))
            self.assertTrue(
                type(m0._mtrand_state[4]) == type(m1._mtrand_state[4]))
        finally:
            try:
                os.remove(tmp.name)
            except WindowsError:
                pass
Exemplo n.º 2
0
    def test_LdaCgsSeq_SeedTypes(self):
        """ Test for issue #74 issues. """

        from tempfile import NamedTemporaryFile
        import os
    
        c = random_corpus(1000, 50, 6, 100)
        tmp = NamedTemporaryFile(delete=False, suffix='.npz')
        try:
            m0 = LdaCgsSeq(c, 'document', K=10)
            m0.train(n_iterations=20)
            m0.save(tmp.name)
            m1 = LdaCgsSeq.load(tmp.name)

            self.assertTrue(type(m0.seed) == type(m1.seed))
            print("seed types:", type(m0._mtrand_state[0]), type(m1._mtrand_state[0]))
            self.assertTrue(type(m0._mtrand_state[0]) == type(m1._mtrand_state[0]))
            self.assertTrue(type(m0._mtrand_state[1]) == type(m1._mtrand_state[1]))
            self.assertTrue(type(m0._mtrand_state[2]) == type(m1._mtrand_state[2]))
            self.assertTrue(type(m0._mtrand_state[3]) == type(m1._mtrand_state[3]))
            self.assertTrue(type(m0._mtrand_state[4]) == type(m1._mtrand_state[4]))
        finally:
            try:
                os.remove(tmp.name)
            except WindowsError:
                pass
Exemplo n.º 3
0
    def test_LdaCgsSeq_IO(self):

        from tempfile import NamedTemporaryFile
        import os
    
        c = random_corpus(1000, 50, 6, 100)
        tmp = NamedTemporaryFile(delete=False, suffix='.npz')
        try:
            m0 = LdaCgsSeq(c, 'document', K=10)
            m0.train(n_iterations=20)
            m0.save(tmp.name)
            m1 = LdaCgsSeq.load(tmp.name)
            self.assertTrue(m0.context_type == m1.context_type)
            self.assertTrue(m0.K == m1.K)
            self.assertTrue((m0.alpha == m1.alpha).all())
            self.assertTrue((m0.beta == m1.beta).all())
            self.assertTrue(m0.log_probs == m1.log_probs)
            for i in range(max(len(m0.corpus), len(m1.corpus))):
                self.assertTrue(m0.corpus[i].all() == m1.corpus[i].all())
            self.assertTrue(m0.V == m1.V)
            self.assertTrue(m0.iteration == m1.iteration)
            for i in range(max(len(m0.Z), len(m1.Z))):
                self.assertTrue(m0.Z[i].all() == m1.Z[i].all())
            self.assertTrue(m0.top_doc.all() == m1.top_doc.all())
            self.assertTrue(m0.word_top.all() == m1.word_top.all())
            self.assertTrue(m0.inv_top_sums.all() == m1.inv_top_sums.all())

            self.assertTrue(m0.seed == m1.seed)
            self.assertTrue(m0._mtrand_state[0] == m1._mtrand_state[0])
            self.assertTrue((m0._mtrand_state[1] == m1._mtrand_state[1]).all())
            for s1,s2 in zip(m0._mtrand_state[2:], m1._mtrand_state[2:]):
                self.assertTrue(s1 == s2)
            

            m0 = LdaCgsSeq(c, 'document', K=10)
            m0.train(n_iterations=20)
            m0.save(tmp.name)
            m1 = LdaCgsSeq.load(tmp.name)
            self.assertTrue(not hasattr(m1, 'log_prob'))
        finally:
            try:
                os.remove(tmp.name)
            except WindowsError:
                pass
Exemplo n.º 4
0
    def test_LdaCgsSeq_IO(self):

        from tempfile import NamedTemporaryFile
        import os

        c = random_corpus(1000, 50, 6, 100)
        tmp = NamedTemporaryFile(delete=False, suffix='.npz')
        try:
            m0 = LdaCgsSeq(c, 'document', K=10)
            m0.train(n_iterations=20)
            m0.save(tmp.name)
            m1 = LdaCgsSeq.load(tmp.name)
            self.assertTrue(m0.context_type == m1.context_type)
            self.assertTrue(m0.K == m1.K)
            self.assertTrue((m0.alpha == m1.alpha).all())
            self.assertTrue((m0.beta == m1.beta).all())
            self.assertTrue(m0.log_probs == m1.log_probs)
            for i in range(max(len(m0.corpus), len(m1.corpus))):
                self.assertTrue(m0.corpus[i].all() == m1.corpus[i].all())
            self.assertTrue(m0.V == m1.V)
            self.assertTrue(m0.iteration == m1.iteration)
            for i in range(max(len(m0.Z), len(m1.Z))):
                self.assertTrue(m0.Z[i].all() == m1.Z[i].all())
            self.assertTrue(m0.top_doc.all() == m1.top_doc.all())
            self.assertTrue(m0.word_top.all() == m1.word_top.all())
            self.assertTrue(m0.inv_top_sums.all() == m1.inv_top_sums.all())

            self.assertTrue(m0.seed == m1.seed)
            self.assertTrue(m0._mtrand_state[0] == m1._mtrand_state[0])
            self.assertTrue((m0._mtrand_state[1] == m1._mtrand_state[1]).all())
            for s1, s2 in zip(m0._mtrand_state[2:], m1._mtrand_state[2:]):
                self.assertTrue(s1 == s2)

            m0 = LdaCgsSeq(c, 'document', K=10)
            m0.train(n_iterations=20)
            m0.save(tmp.name)
            m1 = LdaCgsSeq.load(tmp.name)
            self.assertTrue(not hasattr(m1, 'log_prob'))
        finally:
            try:
                os.remove(tmp.name)
            except WindowsError:
                pass