示例#1
0
文件: test_cluster.py 项目: votti/fcm
    def testListFitting(self):
        true1, data1 = self.generate_data()
        true2, data2 = self.generate_data()


        model = DPMixtureModel(3, 2000, 100, 1, type='BEM')
        rs = model.fit([data1, data2])
        assert(len(rs) == 2)
        for r in rs:
            print 'mu ', r.mus
            diffs = {}
            for i in gen_mean:

                diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
                # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
                assert(np.vdot(diffs[i], diffs[i]) < 2)

        fcm1 = FCMdata('test_fcm1', data1, ['fsc', 'ssc'], [0, 1])
        fcm2 = FCMdata('test_fcm2', data2, ['fsc', 'ssc'], [0, 1])

        c = FCMcollection('fcms', [fcm1, fcm2])

        rs = model.fit(c)
        assert(len(rs) == 2)
        for r in rs:
            print 'mu ', r.mus
            diffs = {}
            for i in gen_mean:

                diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
                # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
                assert(np.vdot(diffs[i], diffs[i]) < 2)
示例#2
0
    def testListFitting(self):
        true1, data1 = self.generate_data()
        true2, data2 = self.generate_data()

        model = DPMixtureModel(3, 2000, 100, 1, type="BEM")
        rs = model.fit([data1, data2])
        assert len(rs) == 2
        for r in rs:
            print "mu ", r.mus
            diffs = {}
            for i in gen_mean:

                diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
                # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
                assert np.vdot(diffs[i], diffs[i]) < 2

        fcm1 = FCMdata("test_fcm1", data1, ["fsc", "ssc"], [0, 1])
        fcm2 = FCMdata("test_fcm2", data2, ["fsc", "ssc"], [0, 1])

        c = FCMcollection("fcms", [fcm1, fcm2])

        rs = model.fit(c)
        assert len(rs) == 2
        for r in rs:
            print "mu ", r.mus
            diffs = {}
            for i in gen_mean:

                diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
                # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
                assert np.vdot(diffs[i], diffs[i]) < 4
示例#3
0
def fit_one(args):
    x, name = args
    print "fitting", name, "of size", x.shape
    m = DPMixtureModel(nclusts=8, iter=100, burnin=0, last=1)
    r = m.fit(x, verbose=10)
    print "done fitting", name
    return r
示例#4
0
    def test_mcmc_fitting(self):
        true, data = self.generate_data()
        
        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        r = model.fit(data, verbose=10)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus-gen_mean[i]), 0)
            assert(np.vdot(diffs[i], diffs[i]) < 1)
示例#5
0
 def setUp(self):
     self.mu = array([0, 0])
     self.sig = eye(2)
     self.pnts = multivariate_normal(self.mu, self.sig, 1000)
     self.k = 16
     self.niter = 10
     self.model = DPMixtureModel(self.k, self.niter, 0, 1)
示例#6
0
    def testBEMFitting(self):
        print "starting BEM"
        true, data = self.generate_data()
        m = data.mean(0)
        s = data.std(0)
        #        true_mean = {}
        #        for i in gen_mean:
        #            true_mean[i] = (gen_mean[i]-m)/s

        model = DPMixtureModel(3, 2000, 100, 1, type="BEM")
        model.seed = 1
        start = time()
        r = model.fit(data, verbose=False)

        end = time() - start

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
            # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        print "BEM fitting took %0.3f" % (end)
示例#7
0
    def testMCMCFitting(self):
        print "starting mcmc"
        true, data = self.generate_data()
        m = data.mean(0)
        s = data.std(0)
        #        true_mean = {}
        #        for i in gen_mean:
        #            true_mean[i] = (gen_mean[i]-m)/s

        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        start = time()
        r = model.fit(data, verbose=10)
        end = time() - start

        diffs = {}
        # print 'r.mus:', r.mus
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
            # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        # print diffs
        print r.classify(data)
        print "MCMC fitting took %0.3f" % (end)
示例#8
0
    def test_reference(self):
        true, data = self.generate_data()
        
        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        model.load_ref(array(true))
        r = model.fit(data, verbose=True)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.abs(r.mus[i]-gen_mean[i])
            assert(np.vdot(diffs[i], diffs[i]) < 1)

        model.load_ref(r)
        r = model.fit(data, verbose=True)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.abs(r.mus[i]-gen_mean[i])
            assert(np.vdot(diffs[i], diffs[i]) < 1)
示例#9
0
    def testRefernce(self):
        print "starting mcmc"
        true, data = self.generate_data()
        m = data.mean(0)
        s = data.std(0)
        #        true_mean = {}
        #        for i in gen_mean:
        #            true_mean[i] = (gen_mean[i]-m)/s

        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        model.load_ref(array(true))
        start = time()
        r = model.fit(data, verbose=True)
        end = time() - start

        diffs = {}
        # print 'r.mus:', r.mus
        for i in gen_mean:
            # diffs[i] = np.min(np.abs(r.mus-gen_mean[i]),0)
            diffs[i] = np.abs(r.mus[i] - gen_mean[i])
            # print i, gen_mean[i],r.mus[i], diffs[i],
            # np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        # print diffs
        print "MCMC fitting took %0.3f" % (end)

        model.load_ref(r)
        start = time()
        r = model.fit(data, verbose=True)
        end = time() - start

        diffs = {}
        # print 'r.mus:', r.mus
        for i in gen_mean:
            # diffs[i] = np.min(np.abs(r.mus-gen_mean[i]),0)
            diffs[i] = np.abs(r.mus[i] - gen_mean[i])
            # print i, gen_mean[i],r.mus[i], diffs[i],
            # np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
示例#10
0
class DPMixtureModel_TestCase(unittest.TestCase):
    def generate_data(self, n=1e4, k=2, ncomps=3, seed=1):

        npr.seed(seed)
        data_concat = []
        labels_concat = []

        for j in xrange(ncomps):
            mean = gen_mean[j]
            sd = gen_sd[j]
            corr = gen_corr[j]

            cov = np.empty((k, k))
            cov.fill(corr)
            cov[np.diag_indices(k)] = 1
            cov *= np.outer(sd, sd)

            num = int(n * group_weights[j])
            rvs = multivariate_normal(mean, cov, size=num)

            data_concat.append(rvs)
            labels_concat.append(np.repeat(j, num))

        return (np.concatenate(labels_concat), np.concatenate(data_concat, axis=0))

    def testListFitting(self):
        true1, data1 = self.generate_data()
        true2, data2 = self.generate_data()

        model = DPMixtureModel(3, 2000, 100, 1, type="BEM")
        rs = model.fit([data1, data2])
        assert len(rs) == 2
        for r in rs:
            print "mu ", r.mus
            diffs = {}
            for i in gen_mean:

                diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
                # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
                assert np.vdot(diffs[i], diffs[i]) < 2

        fcm1 = FCMdata("test_fcm1", data1, ["fsc", "ssc"], [0, 1])
        fcm2 = FCMdata("test_fcm2", data2, ["fsc", "ssc"], [0, 1])

        c = FCMcollection("fcms", [fcm1, fcm2])

        rs = model.fit(c)
        assert len(rs) == 2
        for r in rs:
            print "mu ", r.mus
            diffs = {}
            for i in gen_mean:

                diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
                # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
                assert np.vdot(diffs[i], diffs[i]) < 4

    def testBEMFitting(self):
        print "starting BEM"
        true, data = self.generate_data()
        m = data.mean(0)
        s = data.std(0)
        #        true_mean = {}
        #        for i in gen_mean:
        #            true_mean[i] = (gen_mean[i]-m)/s

        model = DPMixtureModel(3, 2000, 100, 1, type="BEM")
        model.seed = 1
        start = time()
        r = model.fit(data, verbose=False)

        end = time() - start

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
            # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        print "BEM fitting took %0.3f" % (end)

    #
    def testMCMCFitting(self):
        print "starting mcmc"
        true, data = self.generate_data()
        m = data.mean(0)
        s = data.std(0)
        #        true_mean = {}
        #        for i in gen_mean:
        #            true_mean[i] = (gen_mean[i]-m)/s

        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        start = time()
        r = model.fit(data, verbose=10)
        end = time() - start

        diffs = {}
        # print 'r.mus:', r.mus
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus - gen_mean[i]), 0)
            # print i, gen_mean[i], diffs[i], np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        # print diffs
        print r.classify(data)
        print "MCMC fitting took %0.3f" % (end)

    def testRefernce(self):
        print "starting mcmc"
        true, data = self.generate_data()
        m = data.mean(0)
        s = data.std(0)
        #        true_mean = {}
        #        for i in gen_mean:
        #            true_mean[i] = (gen_mean[i]-m)/s

        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        model.load_ref(array(true))
        start = time()
        r = model.fit(data, verbose=True)
        end = time() - start

        diffs = {}
        # print 'r.mus:', r.mus
        for i in gen_mean:
            # diffs[i] = np.min(np.abs(r.mus-gen_mean[i]),0)
            diffs[i] = np.abs(r.mus[i] - gen_mean[i])
            # print i, gen_mean[i],r.mus[i], diffs[i], np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        # print diffs
        print "MCMC fitting took %0.3f" % (end)

        model.load_ref(r)
        start = time()
        r = model.fit(data, verbose=True)
        end = time() - start

        diffs = {}
        # print 'r.mus:', r.mus
        for i in gen_mean:
            # diffs[i] = np.min(np.abs(r.mus-gen_mean[i]),0)
            diffs[i] = np.abs(r.mus[i] - gen_mean[i])
            # print i, gen_mean[i],r.mus[i], diffs[i], np.vdot(diffs[i],diffs[i])
            assert np.vdot(diffs[i], diffs[i]) < 1
        # print diffs

    def setUp(self):
        self.mu = array([0, 0])
        self.sig = eye(2)
        self.pnts = multivariate_normal(self.mu, self.sig, 1000)
        self.k = 16
        self.niter = 10
        self.model = DPMixtureModel(self.k, self.niter, 0, 1)

    def testModel(self):
        r = self.model.fit(self.pnts, verbose=False)
        assert isinstance(r, DPMixture)
        mus = r.mus
        assert mus.shape == (16, 2)

    def testModel_prior(self):
        self.model.load_mu(self.mu.reshape(1, 2))
        self.model.load_sigma(self.sig.reshape(1, 2, 2))
        r = self.model.fit(self.pnts, verbose=False)
        assert isinstance(r, DPMixture)
        mus = r.mus
        assert mus.shape == (16, 2)

    def testModel_datatypes(self):

        r = self.model.fit(self.pnts.astype("int"))
        self.assertIsInstance(r, DPMixture, "failed to fit integer data")

        r = self.model.fit(self.pnts.astype("float"))
        self.assertIsInstance(r, DPMixture, "failed to fit float data")

        r = self.model.fit(self.pnts.astype("double"))
        self.assertIsInstance(r, DPMixture, "failed to fit double data")
示例#11
0
 xs[200:300, :] = numpy.random.multivariate_normal(mus[2, :], sigma, 100)
 xs[300:400, :] = numpy.random.multivariate_normal(mus[3, :], sigma, 100)
 xs[400:500, :] = numpy.random.multivariate_normal(mus[4, :], sigma, 100)
 xs[500:600, :] = numpy.random.multivariate_normal(mus[5, :], sigma, 100)
 true_z = numpy.zeros((600,))
 true_z[0:100] = 0
 true_z[100:200] = 1
 true_z[200:300] = 2
 true_z[300:400] = 3
 true_z[400:500] = 4
 true_z[500:600] = 5
 pylab.subplot(1, 3, 1)
 pylab.scatter(xs[:, 0], xs[:, 1], c=true_z)
 pylab.title("True")
 # pylab.show()
 model = DPMixtureModel(xs, 6, 5000, 100, 1)
 model.gamma = 1.0
 model.fit(verbose=True)
 pylab.subplot(1, 3, 2)
 model_z = model.get_class()
 results = model.get_results()
 model_class = results.classify(xs)
 pylab.scatter(xs[:, 0], xs[:, 1], c=model_class)
 pylab.scatter(results.mus()[:, 0], results.mus()[:, 1], c='red')
 pylab.title("component")
 modal = results.make_modal()
 z = modal.classify(xs)
 print z.min(), z.max()
 pylab.subplot(1, 3, 3)
 pylab.scatter(xs[:, 0], xs[:, 1], c=z)
 pylab.title("modal")
示例#12
0
class DPMixtureModelTestCase(unittest.TestCase):
    def generate_data(self, n=1e4, k=2, ncomps=3, seed=1):
        
        np.random.seed(seed)
        data_concat = []
        labels_concat = []
    
        for j in xrange(ncomps):
            mean = gen_mean[j]
            sd = gen_sd[j]
            corr = gen_corr[j]
    
            cov = np.empty((k, k))
            cov.fill(corr)
            cov[np.diag_indices(k)] = 1
            cov *= np.outer(sd, sd)
    
            num = int(n * group_weights[j])
            rvs = multivariate_normal(mean, cov, size=(num,))
    
            data_concat.append(rvs)
            labels_concat.append(np.repeat(j, num))
    
        return (np.concatenate(labels_concat),
                np.concatenate(data_concat, axis=0))
        
    def test_list_fitting(self):
        true1, data1 = self.generate_data()
        true2, data2 = self.generate_data()

        model = DPMixtureModel(3, 2000, 100, 1, type='BEM')
        rs = model.fit([data1, data2])
        assert(len(rs) == 2)
        for r in rs:
            diffs = {}
            for i in gen_mean:
                diffs[i] = np.min(np.abs(r.mus-gen_mean[i]), 0)
                assert(np.vdot(diffs[i], diffs[i]) < 1)

        fcm1 = FCMdata('test_fcm1', data1, ['fsc', 'ssc'], [0, 1])
        fcm2 = FCMdata('test_fcm2', data2, ['fsc', 'ssc'], [0, 1])
        
        c = FCMCollection('fcms', [fcm1, fcm2])
        
        rs = model.fit(c)
        assert(len(rs) == 2)
        for r in rs:
            diffs = {}
            for i in gen_mean:
                diffs[i] = np.min(np.abs(r.mus-gen_mean[i]), 0)
                assert(np.vdot(diffs[i], diffs[i]) < 1)
    
    def test_bem_fitting(self):
        true, data = self.generate_data()
        
        model = DPMixtureModel(3, 2000, 100, 1, type='BEM')
        model.seed = 1
        r = model.fit(data, verbose=False)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus-gen_mean[i]), 0)
            assert(np.vdot(diffs[i], diffs[i]) < 1)

    def test_mcmc_fitting(self):
        true, data = self.generate_data()
        
        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        r = model.fit(data, verbose=10)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.min(np.abs(r.mus-gen_mean[i]), 0)
            assert(np.vdot(diffs[i], diffs[i]) < 1)

    def test_reference(self):
        true, data = self.generate_data()
        
        model = DPMixtureModel(3, 100, 100, 1)
        model.seed = 1
        model.load_ref(array(true))
        r = model.fit(data, verbose=True)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.abs(r.mus[i]-gen_mean[i])
            assert(np.vdot(diffs[i], diffs[i]) < 1)

        model.load_ref(r)
        r = model.fit(data, verbose=True)

        diffs = {}
        for i in gen_mean:
            diffs[i] = np.abs(r.mus[i]-gen_mean[i])
            assert(np.vdot(diffs[i], diffs[i]) < 1)

    def setUp(self):
        self.mu = array([0, 0])
        self.sig = eye(2)
        self.pnts = multivariate_normal(self.mu, self.sig, size=(1000,))
        self.k = 16
        self.niter = 10
        self.model = DPMixtureModel(self.k, self.niter, 0, 1)

    def test_model(self):
        r = self.model.fit(self.pnts, verbose=False)
        assert(isinstance(r, DPMixture))
        mus = r.mus
        assert(mus.shape == (16, 2))
        
    def test_model_prior(self):
        self.model.load_mu(self.mu.reshape(1, 2))
        self.model.load_sigma(self.sig.reshape(1, 2, 2))
        r = self.model.fit(self.pnts, verbose=False)
        assert(isinstance(r, DPMixture))
        mus = r.mus
        assert(mus.shape == (16, 2))
        
    def test_model_datatypes(self):
        r = self.model.fit(self.pnts.astype('int'))
        self.assertIsInstance(r, DPMixture, 'failed to fit integer data')

        r = self.model.fit(self.pnts.astype('float'))
        self.assertIsInstance(r, DPMixture, 'failed to fit float data')
        
        r = self.model.fit(self.pnts.astype('double'))
        self.assertIsInstance(r, DPMixture, 'failed to fit double data')
示例#13
0
def fit_one(args):
    x, name = args
    m = DPMixtureModel(nclusts=8, niter=100,  burnin=0, last=1)
    r = m.fit(x, verbose=10)
    return r