Пример #1
0
 def log_marginal_likelihood(self,data):
     n = getdatasize(data)
     mu_0, kappa_0, sigmasq_0, nu_0 = self.mu_0, self.kappa_0, self.sigmasq_0, self.nu_0
     mu_n, kappa_n, sigmasq_n, nu_n = self._posterior_hypparams(*self._get_statistics(data))
     return special.gammaln(nu_n/2) - special.gammaln(nu_0/2) \
             + 0.5*(np.log(kappa_0) - np.log(kappa_n) \
                    + nu_0 * (np.log(nu_0) + np.log(sigmasq_0)) \
                      - nu_n * (np.log(nu_n) + np.log(sigmasq_n)) \
                    - n*np.log(np.pi))
Пример #2
0
    def max_likelihood(self,data,weights=None):
        if weights is not None:
            raise NotImplementedError
        assert isinstance(data,list) or isinstance(data,np.ndarray)
        if isinstance(data,list):
            data = np.concatenate(data)

        if getdatasize(data) > 0:
            self.add_data(data)
            self.EM_fit()
            self.labels_list = []
Пример #3
0
    def max_likelihood(self, data, weights=None):
        if weights is not None:
            raise NotImplementedError
        assert isinstance(data, list) or isinstance(data, np.ndarray)
        if isinstance(data, list):
            data = np.concatenate(data)

        if getdatasize(data) > 0:
            self.add_data(data)
            self.EM_fit()
            self.labels_list = []
Пример #4
0
    def _get_statistics(self,data):
        assert isinstance(data,np.ndarray) or \
                (isinstance(data,list) and all(isinstance(d,np.ndarray) for d in data))

        n = getdatasize(data)
        if n > 0:
            if isinstance(data,np.ndarray):
                xbar = data.mean()
            else:
                xbar = sum(d.sum() for d in data)/n
        else:
            xbar = None
        return n, xbar
Пример #5
0
 def _get_statistics(data,D):
     n = getdatasize(data)
     if n > 0:
         if isinstance(data,np.ndarray):
             xbar = np.reshape(data,(-1,D)).mean(0)
             centered = data - xbar
             sumsq = np.dot(centered.T,centered)
         else:
             xbar = sum(np.reshape(d,(-1,D)).sum(0) for d in data) / n
             sumsq = sum(np.dot((np.reshape(d,(-1,D))-xbar).T,(np.reshape(d,(-1,D))-xbar))
                     for d in data)
     else:
         xbar, sumsq = None, None
     return n, xbar, sumsq
Пример #6
0
 def resample(self,data=[],niter=20):
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = flattendata(data)
         N = len(data)
         for itr in range(niter):
             ### resample r
             msum = 0.
             for n in data:
                 msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum()
             self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p)))
             ### resample p
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
Пример #7
0
    def resample(self,data,niter=25,temp=None):
        # doesn't keep a reference to the data like a model would
        assert isinstance(data,list) or isinstance(data,np.ndarray)

        if getdatasize(data) > 0:
            if not isinstance(data,np.ndarray):
                data = np.concatenate(data)

            self.add_data(data)

            for itr in range(niter):
                self.resample_model(temp=temp)

            self.labels_list.pop()
        else:
            self.resample_model(temp=temp)
Пример #8
0
    def resample(self, data):
        # doesn't keep a reference to the data like a model would
        assert isinstance(data, list) or isinstance(data, np.ndarray)

        if getdatasize(data) > 0:
            if not isinstance(data, np.ndarray):
                data = np.concatenate(data)

            self.add_data(data, initialize_from_prior=False)

            for itr in range(self.niter):
                self.resample_model()

            self.labels_list.pop()
        else:
            self.resample_model()
Пример #9
0
    def resample(self,data):
        # doesn't keep a reference to the data like a model would
        assert isinstance(data,list) or isinstance(data,np.ndarray)

        if getdatasize(data) > 0:
            if not isinstance(data,np.ndarray):
                data = np.concatenate(data)

            self.add_data(data,initialize_from_prior=False)

            for itr in range(self.niter):
                self.resample_model()

            self.labels_list.pop()
        else:
            self.resample_model()
Пример #10
0
 def _resample_logseriesaug(self,data=[],niter=20):
     # an alternative algorithm, kind of opaque and no advantages...
     if getdatasize(data) == 0:
         self.p = np.random.beta(self.alpha_0,self.beta_0)
         self.r = np.random.gamma(self.k_0,self.theta_0)
     else:
         data = flattendata(data)
         N = data.shape[0]
         logF = self.logF
         L_i = np.zeros(N)
         data_nz = data[data > 0]
         for itr in range(niter):
             logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF
             L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1
             self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N))
             self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
Пример #11
0
    def resample(self, data, niter=25, temp=None):
        # doesn't keep a reference to the data like a model would
        assert isinstance(data, list) or isinstance(data, np.ndarray)

        if getdatasize(data) > 0:
            if not isinstance(data, np.ndarray):
                data = np.concatenate(data)

            self.add_data(data)

            for itr in range(niter):
                self.resample_model(temp=temp)

            self.labels_list.pop()
        else:
            self.resample_model(temp=temp)
Пример #12
0
    def _get_statistics(self,data):
        assert isinstance(data,np.ndarray) or \
                (isinstance(data,list) and all(isinstance(d,np.ndarray) for d in data))

        D = self.D
        n = getdatasize(data)
        if n > 0:
            if isinstance(data,np.ndarray):
                data = np.reshape(data,(-1,D))
                xbar = data.mean(0)
                sumsq = ((data-xbar)**2).sum()
            else:
                xbar = sum(np.reshape(d,(-1,D)).sum(0) for d in data) / n
                sumsq = sum(((np.reshape(data,(-1,D)) - xbar)**2).sum() for d in data)
        else:
            xbar, sumsq = None, None
        return n, xbar, sumsq
Пример #13
0
    def resample(self,data=[],niter=None):
        n = getdatasize(data)
        niter = self.niter if niter is None else niter
        if n > 0:
            data = flattendata(data)
            datasum = data.sum()
            nu_n = self.nu_0 + n
            for itr in range(niter):
                # resample mean
                tausq_n = 1/(1/self.tausq_0 + n/self.sigmasq)
                mu_n = tausq_n*(self.mu_0/self.tausq_0 + datasum/self.sigmasq)
                self.mu = np.sqrt(tausq_n)*np.random.normal() + mu_n
                # resample variance
                sigmasq_n = (self.nu_0*self.sigmasq_0 + ((data-self.mu)**2).sum())/(nu_n)
                self.sigmasq = sigmasq_n*nu_n/np.random.chisquare(nu_n)
        else:
            self.mu = np.sqrt(self.tausq_0) * np.random.normal() + self.mu_0
            self.sigmasq = self.sigmasq_0*self.nu_0/np.random.chisquare(self.nu_0)

        if self.mubin is not None and self.sigmasqbin is not None:
            self.mubin[...] = self.mu
            self.sigmasqbin[...] = self.sigmasq
Пример #14
0
    def _get_statistics(self,data):
        assert isinstance(data,np.ndarray) or \
                (isinstance(data,list) and all((isinstance(d,np.ndarray))
                    for d in data)) or \
                (isinstance(data,int) or isinstance(data,float))

        n = getdatasize(data)
        if n > 0:
            if isinstance(data,np.ndarray):
                ybar = data.mean()
                sumsqc = ((data-ybar)**2).sum()
            elif isinstance(data,list):
                ybar = sum(d.sum() for d in data)/n
                sumsqc = sum(np.sum((d-ybar)**2) for d in data)
            else:
                ybar = data
                sumsqc = 0
        else:
            ybar = None
            sumsqc = None

        return n, ybar, sumsqc
Пример #15
0
 def log_marginal_likelihood(self,data):
     n, D = getdatasize(data), self.D
     return self._log_partition_function(*self._posterior_hypparams(*self._get_statistics(data,self.D))) \
             - self._log_partition_function(self.mu_0,self.sigma_0,self.kappa_0,self.nu_0) \
             - n*D/2 * np.log(2*np.pi)
Пример #16
0
 def meanfieldupdate(self,data,weights):
     assert getdatasize(data) > 0
     # update
     self._mu_mf, self._sigma_mf, self._kappa_mf, self._nu_mf = \
             self._posterior_hypparams(*self._get_weighted_statistics(data,weights,self.D))
     self.mu, self.sigma = self._mu_mf, self._sigma_mf/(self._nu_mf - self.D - 1) # for plotting