示例#1
0
文件: cluster.py 项目: whitews/fcm
    def fit(self, datasets, verbose=False, tune_interval=100):
        if isinstance(datasets, FCMCollection):
            datasets = datasets.to_list()
        self.d = datasets[0].shape[1]

        datasets = [i.copy().astype('double') for i in datasets]
        self.ndatasets = len(datasets)
        total_data = vstack(datasets)
        self.m = mean(total_data, 0)
        self.s = std(total_data, 0)
        standardized = []
        for i in datasets:
            if i.shape[1] != self.d:
                raise RuntimeError("Datasets shape do not match")
            standardized.append(((i - self.m) / self.s))

        if self.prior_mu is not None:
            self._load_mu_at_fit()
        if self.prior_sigma is not None:
            self._load_sigma_at_fit()

        if self.seed is not None:
            seed(self.seed)
        else:
            from datetime import datetime
            seed(datetime.now().microsecond)
        self.hdp = HDPNormalMixture(standardized,
                                    ncomp=self.nclusts,
                                    gamma0=self.gamma0,
                                    m0=self.m0,
                                    nu0=self.nu0,
                                    Phi0=self.Phi0,
                                    e0=self.e0,
                                    f0=self.f0,
                                    g0=self.g0,
                                    h0=self.h0,
                                    mu0=self._prior_mu,
                                    Sigma0=self._prior_sigma,
                                    weights0=self._prior_pi,
                                    alpha0=self.alpha0,
                                    gpu=self.device,
                                    parallel=self.parallel,
                                    verbose=verbose)
        self.hdp.sample(niter=self.niter,
                        nburn=self.burnin,
                        thin=1,
                        ident=self.ident,
                        tune_interval=tune_interval)

        self._run = True  #we've fit the mixture model

        return self.get_results()
示例#2
0
    def fit(self, datasets, verbose=False, tune_interval=100):
        if isinstance(datasets, FCMcollection):
            datasets = datasets.to_list()
        self.d = datasets[0].shape[1]

        datasets = [i.copy().astype('double') for i in datasets]
        self.ndatasets = len(datasets)
        total_data = vstack(datasets)
        self.m = mean(total_data, 0)
        self.s = std(total_data, 0)
        standardized = []
        for i in datasets:
            if i.shape[1] != self.d:
                raise RuntimeError("Datasets shape do not match")
            standardized.append(((i - self.m) / self.s))

        if self.prior_mu is not None:
            self._load_mu_at_fit()
        if self.prior_sigma is not None:
            self._load_sigma_at_fit()

        if self.seed is not None:
            seed(self.seed)
        else:
            from datetime import datetime
            seed(datetime.now().microsecond)
        self.hdp = HDPNormalMixture(
            standardized,
            ncomp=self.nclusts,
            gamma0=self.gamma0,
            m0=self.m0,
            nu0=self.nu0,
            Phi0=self.Phi0,
            e0=self.e0,
            f0=self.f0,
            g0=self.g0,
            h0=self.h0,
            mu0=self._prior_mu,
            Sigma0=self._prior_sigma,
            weights0=self._prior_pi,
            alpha0=self.alpha0,
            gpu=self.device,
            parallel=self.parallel,
            verbose=verbose)
        self.hdp.sample(
            niter=self.niter,
            nburn=self.burnin,
            thin=1,
            ident=self.ident,
            tune_interval=tune_interval)

        self._run = True  # we've fit the mixture model

        return self.get_results()
示例#3
0
import numpy.random as npr
from dpmix import HDPNormalMixture


if __name__ == '__main__':
    nclust = 256
    niter = 10
    burnin = 10
    device = 1
    max_events = 50000
    num_files = 10

    seed = 9
    #npr.seed(seed)
    for it in range(10, 20):
        xs = []
        for i in range(num_files):
            print i,
            xs.append(npr.uniform(-5,5,(max_events, 5)))
        print
        mcmc = HDPNormalMixture(xs, ncomp=nclust, gpu=device, parallel=True, verbose=2)
        mcmc.sample(burnin, nburn=0, tune_interval=5)
        imcmc = HDPNormalMixture(mcmc, verbose=2)
        imcmc.sample(niter, nburn=0, ident=True)

        del mcmc
        del imcmc
示例#4
0
from dpmix import HDPNormalMixture

#import gpustats as gs

if __name__ == '__main__':

    N = int(1e5)
    K = 2
    J = 4
    ncomps = 3
    true_labels, data = generate_data(n=N, k=K, ncomps=ncomps)
    data = data - data.mean(0)
    data = data / data.std(0)
    #shuffle the data ...
    ind = np.arange(N)
    np.random.shuffle(ind)
    all_data = data[ind].copy()
    data = [all_data[(N / J * i):(N / J * (i + 1))].copy() for i in range(J)]

    mcmc = HDPNormalMixture(data,
                            ncomp=100,
                            gpu=[0, 1, 2],
                            parallel=True,
                            verbose=100)
    mcmc.sample(2, nburn=5, tune_interval=100)
    imcmc = HDPNormalMixture(mcmc, verbose=100)
    imcmc.sample(2, nburn=0, ident=True)
    print imcmc.mu[-1]
    print imcmc.weights[-1]
    print imcmc.beta[-1]
示例#5
0
class HDPMixtureModel(DPMixtureModel):

    '''
    HDPMixtureModel(nclusts, niter=1000, burnin= 100, last= None)
    nclusts = number of clusters to fit
    niter = number of mcmc itterations
    burning = number of mcmc burnin itterations
    last = number of mcmc itterations to draw samples from. if None last = niter

    '''

    def __init__(self, *args, **kwargs):
        super(HDPMixtureModel, self).__init__(*args, **kwargs)
        self.g0 = 0.1
        self.h0 = 0.1

    def fit(self, datasets, verbose=False, tune_interval=100):
        if isinstance(datasets, FCMcollection):
            datasets = datasets.to_list()
        self.d = datasets[0].shape[1]

        datasets = [i.copy().astype('double') for i in datasets]
        self.ndatasets = len(datasets)
        total_data = vstack(datasets)
        self.m = mean(total_data, 0)
        self.s = std(total_data, 0)
        standardized = []
        for i in datasets:
            if i.shape[1] != self.d:
                raise RuntimeError("Datasets shape do not match")
            standardized.append(((i - self.m) / self.s))

        if self.prior_mu is not None:
            self._load_mu_at_fit()
        if self.prior_sigma is not None:
            self._load_sigma_at_fit()

        if self.seed is not None:
            seed(self.seed)
        else:
            from datetime import datetime
            seed(datetime.now().microsecond)
        self.hdp = HDPNormalMixture(
            standardized,
            ncomp=self.nclusts,
            gamma0=self.gamma0,
            m0=self.m0,
            nu0=self.nu0,
            Phi0=self.Phi0,
            e0=self.e0,
            f0=self.f0,
            g0=self.g0,
            h0=self.h0,
            mu0=self._prior_mu,
            Sigma0=self._prior_sigma,
            weights0=self._prior_pi,
            alpha0=self.alpha0,
            gpu=self.device,
            parallel=self.parallel,
            verbose=verbose)
        self.hdp.sample(
            niter=self.niter,
            nburn=self.burnin,
            thin=1,
            ident=self.ident,
            tune_interval=tune_interval)

        self._run = True  # we've fit the mixture model

        return self.get_results()

    def get_results(self):
        """
        get the results of the fitted mixture model
        """

        if self.last is None:
            self.last = self.niter

        if self._run:
            # print self.mus
            #            allresults = []
            #            for k in range(self.ndatasets):
            #                rslts = []
            #                for i in range(self.last):
            #                    for j in range(self.nclusts):
            #                        tmp = DPCluster(self.hdp.weights[-(i + 1), k, j], (self.hdp.mu[-(i + 1), j] * self.s) + self.m, self.hdp.Sigma[-(i + 1), j] * outer(self.s, self.s))
            #                        tmp.nmu = self.hdp.mu[-(i + 1), j]
            #                        tmp.nsigma = self.hdp.Sigma[-(i + 1), j]
            #                        rslts.append(tmp)
            #                allresults.append(DPMixture(rslts, self.last, self.m, self.s, self.ident))
            #            return allresults
            #pis = self.hdp.weights[-self.last:].T.reshape(self.ndatasets,self.last*self.nclusts).copy()
            pis = array([ self.hdp.weights[-self.last:, k, :].flatten() for k in range(self.ndatasets)])
            mus = (self.hdp.mu[-
                               self.last:].reshape(self.nclusts *
                                                   self.last, self.d) *
                   self.s +
                   self.m)
            sigmas = (self.hdp.Sigma[-
                                     self.last:].reshape(self.nclusts *
                                                         self.last, self.d, self.d) *
                      outer(self.s, self.s))
            return HDPMixture(
                pis,
                mus,
                sigmas,
                self.last,
                self.m,
                self.s,
                self.ident)
示例#6
0
import numpy.random as npr
from dpmix import HDPNormalMixture

if __name__ == '__main__':
    nclust = 256
    niter = 10
    burnin = 10
    device = 1
    max_events = 50000
    num_files = 10

    seed = 9
    #npr.seed(seed)
    for it in range(10, 20):
        xs = []
        for i in range(num_files):
            print i,
            xs.append(npr.uniform(-5, 5, (max_events, 5)))
        print
        mcmc = HDPNormalMixture(xs,
                                ncomp=nclust,
                                gpu=device,
                                parallel=True,
                                verbose=2)
        mcmc.sample(burnin, nburn=0, tune_interval=5)
        imcmc = HDPNormalMixture(mcmc, verbose=2)
        imcmc.sample(niter, nburn=0, ident=True)

        del mcmc
        del imcmc
示例#7
0
class HDPMixtureModel(DPMixtureModel):
    '''
    HDPMixtureModel(nclusts, niter=1000, burnin= 100, last= None)
    nclusts = number of clusters to fit
    niter = number of mcmc itterations
    burning = number of mcmc burnin itterations
    last = number of mcmc itterations to draw samples from. if None last = niter

    '''
    def fit(self, datasets, verbose=False, tune_interval=100):
        if isinstance(datasets, FCMcollection):
            datasets = datasets.to_list()
        self.d = datasets[0].shape[1]

        datasets = [i.copy() for i in datasets]
        self.ndatasets = len(datasets)
        total_data = vstack(datasets)
        self.m = mean(total_data, 0)
        self.s = std(total_data, 0)
        standardized = []
        for i in datasets:
            if i.shape[1] != self.d:
                raise RuntimeError("Datasets shape do not match")
            standardized.append((i - self.m) / self.s)

        if self.prior_mu is not None:
            self._load_mu_at_fit()
        if self.prior_sigma is not None:
            self._load_sigma_at_fit()

        if self.seed is not None:
            seed(self.seed)
        else:
            from datetime import datetime
            seed(datetime.now().microsecond)

        self.hdp = HDPNormalMixture(standardized,
                                    ncomp=self.nclusts,
                                    gamma0=self.gamma0,
                                    m0=self.m0,
                                    nu0=self.nu0,
                                    Phi0=self.Phi0,
                                    e0=self.e0,
                                    f0=self.f0,
                                    mu0=self._prior_mu,
                                    Sigma0=self._prior_sigma,
                                    weights0=self._prior_pi,
                                    alpha0=self.alpha0,
                                    gpu=self.device,
                                    parallel=self.parallel,
                                    verbose=verbose)
        self.hdp.sample(niter=self.niter,
                        nburn=self.burnin,
                        thin=1,
                        ident=self.ident,
                        tune_interval=tune_interval)

        self._run = True  #we've fit the mixture model

        return self.get_results()

    def get_results(self):
        """
        get the results of the fitted mixture model
        """

        if self.last is None:
            self.last = self.niter

        if self._run:
            #print self.mus
            allresults = []
            for k in range(self.ndatasets):
                rslts = []
                for i in range(self.last):
                    for j in range(self.nclusts):
                        tmp = DPCluster(
                            self.hdp.weights[-(i + 1), k, j],
                            (self.hdp.mu[-(i + 1), j] * self.s) + self.m,
                            self.hdp.Sigma[-(i + 1), j] *
                            outer(self.s, self.s))
                        tmp.nmu = self.hdp.mu[-(i + 1), j]
                        tmp.nsigma = self.hdp.Sigma[-(i + 1), j]
                        rslts.append(tmp)
                allresults.append(
                    DPMixture(rslts, self.last, self.m, self.s, self.ident))
            return allresults
示例#8
0
    all_data = np.r_[all_data, data[i]]

dmean = all_data.mean(0)
dstd = all_data.std(0)

for d in data:
    d -= dmean
    d /= dstd

## run some benchmarks!
if __name__ == '__main__':

    t1 = time()
    mcmc = HDPNormalMixture(data,
                            ncomp=100,
                            gpu=[0, 1, 2],
                            parallel=True,
                            verbose=100)
    mcmc.sample(1000, nburn=2000, tune_interval=50)
    imcmc = HDPNormalMixture(mcmc, verbose=100)
    imcmc.sample(1000, nburn=0, ident=True)
    t1 = time() - t1
    print 'ALL GPU: ' + str(t1)

    t2 = time()
    mcmc = HDPNormalMixture(data,
                            ncomp=100,
                            gpu=[0],
                            parallel=False,
                            verbose=100)
    mcmc.sample(1000, nburn=2000, tune_interval=50)
示例#9
0
文件: cluster.py 项目: whitews/fcm
class HDPMixtureModel(DPMixtureModel):
    """
    HDPMixtureModel(nclusts, niter=1000, burnin= 100, last= None)
    nclusts = number of clusters to fit
    niter = number of mcmc itterations
    burning = number of mcmc burnin itterations
    last = number of mcmc itterations to draw samples from. if None last = niter

    """
    def __init__(self, *args, **kwargs):
        super(HDPMixtureModel, self).__init__(*args, **kwargs)
        self.g0 = 0.1
        self.h0 = 0.1

    def fit(self, datasets, verbose=False, tune_interval=100):
        if isinstance(datasets, FCMCollection):
            datasets = datasets.to_list()
        self.d = datasets[0].shape[1]

        datasets = [i.copy().astype('double') for i in datasets]
        self.ndatasets = len(datasets)
        total_data = vstack(datasets)
        self.m = mean(total_data, 0)
        self.s = std(total_data, 0)
        standardized = []
        for i in datasets:
            if i.shape[1] != self.d:
                raise RuntimeError("Datasets shape do not match")
            standardized.append(((i - self.m) / self.s))

        if self.prior_mu is not None:
            self._load_mu_at_fit()
        if self.prior_sigma is not None:
            self._load_sigma_at_fit()

        if self.seed is not None:
            seed(self.seed)
        else:
            from datetime import datetime
            seed(datetime.now().microsecond)
        self.hdp = HDPNormalMixture(standardized,
                                    ncomp=self.nclusts,
                                    gamma0=self.gamma0,
                                    m0=self.m0,
                                    nu0=self.nu0,
                                    Phi0=self.Phi0,
                                    e0=self.e0,
                                    f0=self.f0,
                                    g0=self.g0,
                                    h0=self.h0,
                                    mu0=self._prior_mu,
                                    Sigma0=self._prior_sigma,
                                    weights0=self._prior_pi,
                                    alpha0=self.alpha0,
                                    gpu=self.device,
                                    parallel=self.parallel,
                                    verbose=verbose)
        self.hdp.sample(niter=self.niter,
                        nburn=self.burnin,
                        thin=1,
                        ident=self.ident,
                        tune_interval=tune_interval)

        self._run = True  #we've fit the mixture model

        return self.get_results()

    def get_results(self):
        """
        get the results of the fitted mixture model
        """
        if self.last is None:
            self.last = self.niter

        if self._run:
            pis = array([
                self.hdp.weights[-self.last:, k, :].flatten()
                for k in range(self.ndatasets)
            ])
            mus = (self.hdp.mu[-self.last:].reshape(self.nclusts * self.last,
                                                    self.d) * self.s + self.m)
            sigmas = (self.hdp.Sigma[-self.last:].reshape(
                self.nclusts * self.last, self.d, self.d) *
                      outer(self.s, self.s))
            return HDPMixture(pis, mus, sigmas, self.last, self.m, self.s,
                              self.ident)
示例#10
0
文件: test_hdp.py 项目: brodyh/dpmix
from dpmix import HDPNormalMixture

#import gpustats as gs

if __name__ == '__main__':

    N = int(1e5)
    K = 2
    J = 4
    ncomps = 3
    true_labels, data = generate_data(n=N, k=K, ncomps=ncomps)
    data = data - data.mean(0)
    data = data/data.std(0)
    #shuffle the data ... 
    ind = np.arange(N); np.random.shuffle(ind);
    all_data = data[ind].copy()
    data = [ all_data[(N/J*i):(N/J*(i+1))].copy() for i in range(J) ]

    mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0,1,2], parallel=True, verbose=100)
    mcmc.sample(2, nburn=5, tune_interval=100)
    imcmc = HDPNormalMixture(mcmc, verbose=100)
    imcmc.sample(2, nburn=0, ident=True)
    print imcmc.mu[-1]
    print imcmc.weights[-1]
    print imcmc.beta[-1]



    

示例#11
0
for i in range(1, 3):
    all_data = np.r_[all_data, data[i]]

dmean = all_data.mean(0)
dstd = all_data.std(0)

for d in data:
    d -= dmean
    d /= dstd


## run some benchmarks!
if __name__ == "__main__":

    t1 = time()
    mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0, 1, 2], parallel=True, verbose=100)
    mcmc.sample(1000, nburn=2000, tune_interval=50)
    imcmc = HDPNormalMixture(mcmc, verbose=100)
    imcmc.sample(1000, nburn=0, ident=True)
    t1 = time() - t1
    print "ALL GPU: " + str(t1)

    t2 = time()
    mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0], parallel=False, verbose=100)
    mcmc.sample(1000, nburn=2000, tune_interval=50)
    imcmc = HDPNormalMixture(mcmc, verbose=100)
    imcmc.sample(1000, nburn=0, ident=True)
    t2 = time() - t2
    print "One GPU: " + str(t2)

    t4 = time()