示例#1
0
    def _update_beta(self, stick_beta, beta, stick_weights, alpha0, alpha):
        old_stick_beta = stick_beta.copy()
        old_beta = beta.copy()
        for k in xrange(self.ncomp - 1):
            # get initial logpost
            lpost = beta_post(stick_beta, beta, stick_weights, float(alpha0),
                              float(alpha))

            # sample new beta from reflected normal
            prop = stats.norm.rvs(stick_beta[k], self.prop_scale[k])
            while prop > (1 - 1e-9) or prop < 1e-9:
                if prop > 1 - 1e-9:
                    prop = 2 * (1 - 1e-9) - prop
                else:
                    prop = 2 * 1e-9 - prop
            stick_beta[k] = prop
            beta = break_sticks(stick_beta)

            # get new posterior
            lpost_new = beta_post(stick_beta, beta, stick_weights,
                                  float(alpha0), float(alpha))

            # accept or reject
            if stats.expon.rvs() > lpost - lpost_new:
                #accept
                self.AR[k] += 1
            else:
                stick_beta[k] = old_stick_beta[k]
                beta = break_sticks(stick_beta)
        return stick_beta, beta
示例#2
0
文件: hdp.py 项目: jfrelinger/dpmix
    def _update_beta(self, stick_beta, beta, stick_weights, alpha0, alpha):

        old_stick_beta = stick_beta.copy()
        old_beta = beta.copy()
        for k in xrange(self.ncomp-1):
            # get initial logpost
            lpost = beta_post(stick_beta, beta, stick_weights, float(alpha0), float(alpha))

            # sample new beta from reflected normal
            prop = stats.norm.rvs(stick_beta[k], self.prop_scale[k])
            while prop > (1-1e-9) or prop < 1e-9:
                if prop > 1-1e-9:
                    prop = 2*(1-1e-9) - prop
                else:
                    prop = 2*1e-9 - prop
            stick_beta[k] = prop
            beta = break_sticks(stick_beta)

            # get new posterior
            lpost_new = beta_post(stick_beta, beta, stick_weights, float(alpha0), float(alpha))

            # accept or reject
            if stats.expon.rvs() > lpost - lpost_new:
                # accept
                self.AR[k] += 1
            else:
                stick_beta[k] = old_stick_beta[k]
                beta = break_sticks(stick_beta)
        return stick_beta, beta
示例#3
0
    def __init__(self,
                 data,
                 ncomp=256,
                 gamma0=10,
                 m0=None,
                 nu0=None,
                 Phi0=None,
                 e0=5,
                 f0=0.1,
                 g0=0.1,
                 h0=0.1,
                 mu0=None,
                 Sigma0=None,
                 weights0=None,
                 alpha0=1,
                 gpu=None,
                 parallel=False,
                 verbose=False):

        if not issubclass(type(data), HDPNormalMixture):
            # check for functioning gpu
            if _has_gpu:
                self.dev_list = np.asarray((0), dtype=np.int)
                self.dev_list.shape = 1
                if gpu is not None:
                    if type(gpu) is bool:
                        self.gpu = gpu
                    else:
                        self.gpu = True
                        self.dev_list = np.asarray(np.abs(gpu), dtype=np.int)
                        if self.dev_list.shape == ():
                            self.dev_list.shape = 1
                        self.dev_list = np.unique(self.dev_list)
                else:
                    self.gpu = True
            else:
                self.gpu = False

            self.parallel = parallel
            # get the data .. should add checks here later
            self.data = [np.asarray(d) for d in data]
            self.ngroups = len(self.data)
            self.ndim = self.data[0].shape[1]
            self.nobs = tuple([d.shape[0] for d in self.data])
            # need for ident code
            self.cumobs = np.zeros(self.ngroups + 1)
            self.cumobs[1:] = np.asarray(self.nobs).cumsum()
            self.ncomp = ncomp

            if m0 is not None:
                if len(m0) == self.ndim:
                    self.mu_prior_mean = m0.copy()
                elif len(m0) == 1:
                    self.mu_prior_mean = m0 * np.ones(self.ndim)
            else:
                self.mu_prior_mean = np.zeros(self.ndim)

                self.gamma = gamma0 * np.ones(ncomp)

            self._set_initial_values(alpha0, nu0, Phi0, mu0, Sigma0, weights0,
                                     e0, f0)
            # initialize hdp specific vars
            if weights0 is None:
                self._weights0 = np.zeros((self.ngroups, self.ncomp),
                                          dtype=np.float)
                self._weights0.fill(1 / self.ncomp)
            else:
                self._weights0 = weights0.copy()
            self._stick_beta0 = stats.beta.rvs(1,
                                               self._alpha0,
                                               size=self.ncomp - 1)
            self._beta0 = break_sticks(self._stick_beta0)
            self._alpha00 = 1.0
            self.e0, self.f0 = g0, h0
            self.prop_scale = 0.05 * np.ones(self.ncomp)
            self.prop_scale[-1] = 1.

        else:
            # get all important vars from input class
            self.data = data.data
            self.ngroups, self.nobs, self.ndim, self.ncomp = data.ngroups, data.nobs, data.ndim, data.ncomp
            self.cumobs = data.cumobs.copy()
            self._weights0 = data.weights[-1].copy()
            self._stick_beta0 = data.stick_beta.copy()
            self._beta0 = break_sticks(self._stick_beta0)
            self.e0, self.f0 = data.e0, data.f0
            self.e, self.f = data.e, data.f
            self._nu0 = data._nu0
            self._Phi0 = data._Phi0
            self.mu_prior_mean = data.mu_prior_mean.copy()
            self.gamma = data.gamma.copy()
            self._alpha0 = data.alpha[-1].copy()
            self._alpha00 = data.alpha0[-1].copy()
            self._weights0 = data.weights[-1].copy()
            self._mu0 = data.mu[-1].copy()
            self._Sigma0 = data.Sigma[-1].copy()
            self.prop_scale = data.prop_scale.copy()
            self.gpu = data.gpu
            if self.gpu:
                self.dev_list = np.unique(data.dev_list)
            self.parallel = data.parallel

        self.AR = np.zeros(self.ncomp)
        # verbosity
        self.verbose = verbose
        # data working var
        self.data_shared_mem = multiprocessing.RawArray(
            'd',
            sum(self.nobs) * self.ndim)
        self.alldata = np.frombuffer(self.data_shared_mem).reshape(
            sum(self.nobs), self.ndim)
        for i in xrange(self.ngroups):
            self.alldata[
                self.cumobs[i]:self.cumobs[i + 1], :] = self.data[i].copy()

        if self.parallel:
            self.num_cores = min(min(multiprocessing.cpu_count(), self.ncomp),
                                 16)
            compsperdev = self.ncomp / self.num_cores
            self.work_queue = [
                multiprocessing.Queue() for i in xrange(self.num_cores)
            ]
            self.result_queue = [
                multiprocessing.Queue() for i in xrange(self.num_cores)
            ]
            self.workers = [
                CPUWorker(np.zeros(
                    (1, 1)), self.gamma, self.mu_prior_mean, self._Phi0,
                          self._nu0, self.work_queue[i], self.result_queue[i])
                for i in xrange(self.num_cores)
            ]
            self.compsdevmap = {}
            cumcomps = 0
            for i in xrange(self.num_cores):
                self.compsdevmap[i] = [
                    int(cumcomps),
                    int(min(cumcomps + compsperdev, self.ncomp))
                ]
                cumcomps += compsperdev
            self.compsdevmap[self.num_cores - 1][1] = self.ncomp

            for thd in self.workers:
                thd.set_data(self.data_shared_mem, sum(self.nobs), self.ndim)
示例#4
0
文件: hdp.py 项目: jfrelinger/dpmix
    def __init__(self, data, ncomp=256, gamma0=10, m0=None,
                 nu0=None, Phi0=None, e0=5, f0=0.1, g0=0.1, h0=0.1,
                 mu0=None, Sigma0=None, weights0=None, alpha0=1,
                 gpu=None, parallel=False, verbose=False):

        if not issubclass(type(data), HDPNormalMixture):
            # check for functioning gpu
            if _has_gpu:
                import os
                self.dev_list = np.asarray(0, dtype=np.int)
                self.dev_list.shape = 1
                self.dev_list = {os.uname()[1]: self.dev_list}
                if gpu is not None:
                    if type(gpu) is bool:
                        self.gpu = gpu
                    elif type(gpu) is dict:
                        self.gpu = True
                        self.dev_list = gpu.copy()
                        for host in self.dev_list:
                            self.dev_list[host] = np.asarray(
                                self.dev_list[host],
                                dtype=np.int)
                            if self.dev_list[host].shape == ():
                                self.dev_list[host].shape = 1

                    else:
                        self.gpu = True
                        self.dev_list = np.asarray(np.abs(gpu), dtype=np.int)
                        if self.dev_list.shape == ():
                            self.dev_list.shape = 1
                        self.dev_list = np.unique(self.dev_list)
                        self.dev_list = {os.uname()[1]: self.dev_list}
                else:
                    self.gpu = True
            else:
                self.gpu = False

            self.parallel = parallel

            # get the data .. should add checks here later
            self.data = [np.asarray(d) for d in data]
            self.ngroups = len(self.data)
            self.ndim = self.data[0].shape[1]
            self.nobs = tuple([d.shape[0] for d in self.data])

            # need for ident code
            self.cumobs = np.zeros(self.ngroups+1)
            self.cumobs[1:] = np.asarray(self.nobs).cumsum()
            self.ncomp = ncomp

            if m0 is not None:
                if len(m0) == self.ndim:
                    self.mu_prior_mean = m0.copy()
                elif len(m0) == 1:
                    self.mu_prior_mean = m0*np.ones(self.ndim)
            else:
                self.mu_prior_mean = np.zeros(self.ndim)

                self.gamma = gamma0*np.ones(ncomp)

            self._set_initial_values(alpha0, nu0, Phi0, mu0, Sigma0,
                                     weights0, e0, f0)
            # initialize hdp specific vars
            if weights0 is None:
                self._weights0 = np.zeros(
                    (self.ngroups, self.ncomp),
                    dtype=np.float)
                self._weights0.fill(1/self.ncomp)
            else:
                self._weights0 = weights0.copy()
            self._stick_beta0 = stats.beta.rvs(
                1,
                self._alpha0,
                size=self.ncomp-1)
            self._beta0 = break_sticks(self._stick_beta0)
            self._alpha00 = 1.0
            self.e0, self.f0 = g0, h0
            # start out small? more accepts?
            self.prop_scale = 0.01 * np.ones(self.ncomp)
            self.prop_scale[-1] = 1.

        else:
            # get all important vars from input class
            self.data = data.data
            self.ngroups = data.ngroups
            self.nobs = data.nobs
            self.ndim = data.ndim
            self.ncomp = data.ncomp
            self.cumobs = data.cumobs.copy()
            self._weights0 = data.weights[-1].copy()
            self._stick_beta0 = data.stick_beta.copy()
            self._beta0 = break_sticks(self._stick_beta0)
            self.e0, self.f0 = data.e0, data.f0
            self.e, self.f = data.e, data.f
            self._nu0 = data._nu0
            self._Phi0 = data._Phi0
            self.mu_prior_mean = data.mu_prior_mean.copy()
            self.gamma = data.gamma.copy()
            self._alpha0 = data.alpha[-1].copy()
            self._alpha00 = data.alpha0[-1].copy()
            self._weights0 = data.weights[-1].copy()
            self._mu0 = data.mu[-1].copy()
            self._Sigma0 = data.Sigma[-1].copy()
            self.prop_scale = data.prop_scale.copy()
            self.gpu = data.gpu
            if self.gpu:
                self.dev_list = data.dev_list
            self.parallel = data.parallel

        self.AR = np.zeros(self.ncomp)
        self.verbose = verbose

        # data working var
        self.alldata = np.empty((sum(self.nobs), self.ndim), dtype=np.double)
        for i in xrange(self.ngroups):
            self.alldata[self.cumobs[i]:self.cumobs[i+1], :] = self.data[i].copy()
示例#5
0
文件: hdp.py 项目: brodyh/dpmix
    def __init__(self, data, ncomp=256, gamma0=10, m0=None,
                 nu0=None, Phi0=None, e0=5, f0=0.1, g0=0.1, h0=0.1, 
                 mu0=None, Sigma0=None, weights0=None, alpha0=1,
                 gpu=None, parallel=False, verbose=False):

        if not issubclass(type(data), HDPNormalMixture):
            # check for functioning gpu
            if _has_gpu:
                self.dev_list = np.asarray((0), dtype=np.int); self.dev_list.shape=1
                if gpu is not None:
                    if type(gpu) is bool:
                        self.gpu = gpu
                    else:
                        self.gpu = True
                        self.dev_list = np.asarray(np.abs(gpu), dtype=np.int)
                        if self.dev_list.shape == ():
                            self.dev_list.shape = 1
                        self.dev_list = np.unique(self.dev_list)
                else:
                    self.gpu=True
            else:
                self.gpu = False

            self.parallel = parallel
            # get the data .. should add checks here later
            self.data = [np.asarray(d) for d in data]
            self.ngroups = len(self.data)
            self.ndim = self.data[0].shape[1]
            self.nobs = tuple([d.shape[0] for d in self.data])
            # need for ident code
            self.cumobs = np.zeros(self.ngroups+1); 
            self.cumobs[1:] = np.asarray(self.nobs).cumsum()
            self.ncomp = ncomp

            if m0 is not None:
                if len(m0)==self.ndim:
                    self.mu_prior_mean = m0.copy()
                elif len(m0)==1:
                    self.mu_prior_mean = m0*np.ones(self.ndim)
            else:
                self.mu_prior_mean = np.zeros(self.ndim)

                self.gamma = gamma0*np.ones(ncomp)
        
            
            self._set_initial_values(alpha0, nu0, Phi0, mu0, Sigma0,
                                     weights0, e0, f0)
            # initialize hdp specific vars
            if weights0 is None:
                self._weights0 = np.zeros((self.ngroups, self.ncomp), dtype=np.float)
                self._weights0.fill(1/self.ncomp)
            else:
                self._weights0 = weights0.copy()
            self._stick_beta0 = stats.beta.rvs(1,self._alpha0, size=self.ncomp-1)
            self._beta0 = break_sticks(self._stick_beta0)
            self._alpha00 = 1.0
            self.e0, self.f0 = g0, h0
            self.prop_scale = 0.05 * np.ones(self.ncomp)
            self.prop_scale[-1] = 1.

        else:
            # get all important vars from input class
            self.data = data.data
            self.ngroups, self.nobs, self.ndim, self.ncomp = data.ngroups, data.nobs, data.ndim, data.ncomp
            self.cumobs = data.cumobs.copy()
            self._weights0 = data.weights[-1].copy()
            self._stick_beta0 = data.stick_beta.copy()
            self._beta0 = break_sticks(self._stick_beta0)
            self.e0, self.f0 = data.e0, data.f0
            self.e, self.f = data.e, data.f
            self._nu0 = data._nu0
            self._Phi0 = data._Phi0
            self.mu_prior_mean = data.mu_prior_mean.copy()
            self.gamma = data.gamma.copy()
            self._alpha0 = data.alpha[-1].copy()
            self._alpha00 = data.alpha0[-1].copy()
            self._weights0 = data.weights[-1].copy()
            self._mu0 = data.mu[-1].copy()
            self._Sigma0 = data.Sigma[-1].copy()
            self.prop_scale = data.prop_scale.copy()
            self.gpu = data.gpu
            if self.gpu:
                self.dev_list = np.unique(data.dev_list)
            self.parallel = data.parallel

        
        self.AR = np.zeros(self.ncomp)
        # verbosity
        self.verbose = verbose
        # data working var
        self.data_shared_mem = multiprocessing.RawArray('d', sum(self.nobs)*self.ndim)
        self.alldata = np.frombuffer(self.data_shared_mem).reshape(sum(self.nobs), self.ndim)
        for i in xrange(self.ngroups):
            self.alldata[self.cumobs[i]:self.cumobs[i+1],:] = self.data[i].copy()

        if self.parallel:
            self.num_cores = min(min(multiprocessing.cpu_count(), self.ncomp), 16)
            compsperdev = self.ncomp / self.num_cores
            self.work_queue = [ multiprocessing.Queue() for i in xrange(self.num_cores) ]
            self.result_queue = [ multiprocessing.Queue() for i in xrange(self.num_cores) ]
            self.workers = [ CPUWorker(np.zeros((1,1)), self.gamma, self.mu_prior_mean, 
                                       self._Phi0, self._nu0, self.work_queue[i], self.result_queue[i])
                             for i in xrange(self.num_cores) ]
            self.compsdevmap = {}; cumcomps = 0
            for i in xrange(self.num_cores):
                self.compsdevmap[i] = [int(cumcomps), int(min(cumcomps+compsperdev, self.ncomp))]
                cumcomps += compsperdev
            self.compsdevmap[self.num_cores-1][1] = self.ncomp

            for thd in self.workers:
                thd.set_data(self.data_shared_mem, sum(self.nobs), self.ndim)
示例#6
0
    def __init__(self, data, ncomp=256, gamma0=10, m0=None,
                 nu0=None, Phi0=None, e0=5, f0=0.1, g0=0.1, h0=0.1,
                 mu0=None, Sigma0=None, weights0=None, alpha0=1,
                 gpu=None, parallel=False, verbose=False):

        if not issubclass(type(data), HDPNormalMixture):
            # check for functioning gpu
            if _has_gpu:
                import os
                self.dev_list = np.asarray(0, dtype=np.int)
                self.dev_list.shape = 1
                self.dev_list = {os.uname()[1]: self.dev_list}
                if gpu is not None:
                    if type(gpu) is bool:
                        self.gpu = gpu
                    elif type(gpu) is dict:
                        self.gpu = True
                        self.dev_list = gpu.copy()
                        for host in self.dev_list:
                            self.dev_list[host] = np.asarray(
                                self.dev_list[host],
                                dtype=np.int)
                            if self.dev_list[host].shape == ():
                                self.dev_list[host].shape = 1

                    else:
                        self.gpu = True
                        self.dev_list = np.asarray(np.abs(gpu), dtype=np.int)
                        if self.dev_list.shape == ():
                            self.dev_list.shape = 1
                        self.dev_list = np.unique(self.dev_list)
                        self.dev_list = {os.uname()[1]: self.dev_list}
                else:
                    self.gpu = True
            else:
                self.gpu = False

            self.parallel = parallel

            # get the data .. should add checks here later
            self.data = [np.asarray(d) for d in data]
            self.ngroups = len(self.data)
            self.ndim = self.data[0].shape[1]
            self.nobs = tuple([d.shape[0] for d in self.data])

            # need for ident code
            self.cumobs = np.zeros(self.ngroups+1)
            self.cumobs[1:] = np.asarray(self.nobs).cumsum()
            self.ncomp = ncomp

            if m0 is not None:
                if len(m0) == self.ndim:
                    self.mu_prior_mean = m0.copy()
                elif len(m0) == 1:
                    self.mu_prior_mean = m0*np.ones(self.ndim)
            else:
                self.mu_prior_mean = np.zeros(self.ndim)

                self.gamma = gamma0*np.ones(ncomp)

            self._set_initial_values(alpha0, nu0, Phi0, mu0, Sigma0,
                                     weights0, e0, f0)
            # initialize hdp specific vars
            if weights0 is None:
                self._weights0 = np.zeros(
                    (self.ngroups, self.ncomp),
                    dtype=np.float)
                self._weights0.fill(1/self.ncomp)
            else:
                self._weights0 = weights0.copy()
            self._stick_beta0 = stats.beta.rvs(
                1,
                self._alpha0,
                size=self.ncomp-1)
            self._beta0 = break_sticks(self._stick_beta0)
            self._alpha00 = 1.0
            self.e0, self.f0 = g0, h0
            # start out small? more accepts?
            self.prop_scale = 0.01 * np.ones(self.ncomp)
            self.prop_scale[-1] = 1.

        else:
            # get all important vars from input class
            self.data = data.data
            self.ngroups = data.ngroups
            self.nobs = data.nobs
            self.ndim = data.ndim
            self.ncomp = data.ncomp
            self.cumobs = data.cumobs.copy()
            self._weights0 = data.weights[-1].copy()
            self._stick_beta0 = data.stick_beta.copy()
            self._beta0 = break_sticks(self._stick_beta0)
            self.e0, self.f0 = data.e0, data.f0
            self.e, self.f = data.e, data.f
            self._nu0 = data._nu0
            self._Phi0 = data._Phi0
            self.mu_prior_mean = data.mu_prior_mean.copy()
            self.gamma = data.gamma.copy()
            self._alpha0 = data.alpha[-1].copy()
            self._alpha00 = data.alpha0[-1].copy()
            self._weights0 = data.weights[-1].copy()
            self._mu0 = data.mu[-1].copy()
            self._Sigma0 = data.Sigma[-1].copy()
            self.prop_scale = data.prop_scale.copy()
            self.gpu = data.gpu
            if self.gpu:
                self.dev_list = data.dev_list
            self.parallel = data.parallel

        self.AR = np.zeros(self.ncomp)
        self.verbose = verbose

        # data working var
        self.alldata = np.empty((sum(self.nobs), self.ndim), dtype=np.double)
        for i in xrange(self.ngroups):
            self.alldata[self.cumobs[i]:self.cumobs[i+1], :] = self.data[i].copy()