def log_marginal_likelihood(self,data): n = getdatasize(data) mu_0, kappa_0, sigmasq_0, nu_0 = self.mu_0, self.kappa_0, self.sigmasq_0, self.nu_0 mu_n, kappa_n, sigmasq_n, nu_n = self._posterior_hypparams(*self._get_statistics(data)) return special.gammaln(nu_n/2) - special.gammaln(nu_0/2) \ + 0.5*(np.log(kappa_0) - np.log(kappa_n) \ + nu_0 * (np.log(nu_0) + np.log(sigmasq_0)) \ - nu_n * (np.log(nu_n) + np.log(sigmasq_n)) \ - n*np.log(np.pi))
def max_likelihood(self,data,weights=None): if weights is not None: raise NotImplementedError assert isinstance(data,list) or isinstance(data,np.ndarray) if isinstance(data,list): data = np.concatenate(data) if getdatasize(data) > 0: self.add_data(data) self.EM_fit() self.labels_list = []
def max_likelihood(self, data, weights=None): if weights is not None: raise NotImplementedError assert isinstance(data, list) or isinstance(data, np.ndarray) if isinstance(data, list): data = np.concatenate(data) if getdatasize(data) > 0: self.add_data(data) self.EM_fit() self.labels_list = []
def _get_statistics(self,data): assert isinstance(data,np.ndarray) or \ (isinstance(data,list) and all(isinstance(d,np.ndarray) for d in data)) n = getdatasize(data) if n > 0: if isinstance(data,np.ndarray): xbar = data.mean() else: xbar = sum(d.sum() for d in data)/n else: xbar = None return n, xbar
def _get_statistics(data,D): n = getdatasize(data) if n > 0: if isinstance(data,np.ndarray): xbar = np.reshape(data,(-1,D)).mean(0) centered = data - xbar sumsq = np.dot(centered.T,centered) else: xbar = sum(np.reshape(d,(-1,D)).sum(0) for d in data) / n sumsq = sum(np.dot((np.reshape(d,(-1,D))-xbar).T,(np.reshape(d,(-1,D))-xbar)) for d in data) else: xbar, sumsq = None, None return n, xbar, sumsq
def resample(self,data=[],niter=20): if getdatasize(data) == 0: self.p = np.random.beta(self.alpha_0,self.beta_0) self.r = np.random.gamma(self.k_0,self.theta_0) else: data = flattendata(data) N = len(data) for itr in range(niter): ### resample r msum = 0. for n in data: msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum() self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p))) ### resample p self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
def resample(self,data,niter=25,temp=None): # doesn't keep a reference to the data like a model would assert isinstance(data,list) or isinstance(data,np.ndarray) if getdatasize(data) > 0: if not isinstance(data,np.ndarray): data = np.concatenate(data) self.add_data(data) for itr in range(niter): self.resample_model(temp=temp) self.labels_list.pop() else: self.resample_model(temp=temp)
def resample(self, data): # doesn't keep a reference to the data like a model would assert isinstance(data, list) or isinstance(data, np.ndarray) if getdatasize(data) > 0: if not isinstance(data, np.ndarray): data = np.concatenate(data) self.add_data(data, initialize_from_prior=False) for itr in range(self.niter): self.resample_model() self.labels_list.pop() else: self.resample_model()
def resample(self,data): # doesn't keep a reference to the data like a model would assert isinstance(data,list) or isinstance(data,np.ndarray) if getdatasize(data) > 0: if not isinstance(data,np.ndarray): data = np.concatenate(data) self.add_data(data,initialize_from_prior=False) for itr in range(self.niter): self.resample_model() self.labels_list.pop() else: self.resample_model()
def _resample_logseriesaug(self,data=[],niter=20): # an alternative algorithm, kind of opaque and no advantages... if getdatasize(data) == 0: self.p = np.random.beta(self.alpha_0,self.beta_0) self.r = np.random.gamma(self.k_0,self.theta_0) else: data = flattendata(data) N = data.shape[0] logF = self.logF L_i = np.zeros(N) data_nz = data[data > 0] for itr in range(niter): logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1 self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N)) self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r)
def resample(self, data, niter=25, temp=None): # doesn't keep a reference to the data like a model would assert isinstance(data, list) or isinstance(data, np.ndarray) if getdatasize(data) > 0: if not isinstance(data, np.ndarray): data = np.concatenate(data) self.add_data(data) for itr in range(niter): self.resample_model(temp=temp) self.labels_list.pop() else: self.resample_model(temp=temp)
def _get_statistics(self,data): assert isinstance(data,np.ndarray) or \ (isinstance(data,list) and all(isinstance(d,np.ndarray) for d in data)) D = self.D n = getdatasize(data) if n > 0: if isinstance(data,np.ndarray): data = np.reshape(data,(-1,D)) xbar = data.mean(0) sumsq = ((data-xbar)**2).sum() else: xbar = sum(np.reshape(d,(-1,D)).sum(0) for d in data) / n sumsq = sum(((np.reshape(data,(-1,D)) - xbar)**2).sum() for d in data) else: xbar, sumsq = None, None return n, xbar, sumsq
def resample(self,data=[],niter=None): n = getdatasize(data) niter = self.niter if niter is None else niter if n > 0: data = flattendata(data) datasum = data.sum() nu_n = self.nu_0 + n for itr in range(niter): # resample mean tausq_n = 1/(1/self.tausq_0 + n/self.sigmasq) mu_n = tausq_n*(self.mu_0/self.tausq_0 + datasum/self.sigmasq) self.mu = np.sqrt(tausq_n)*np.random.normal() + mu_n # resample variance sigmasq_n = (self.nu_0*self.sigmasq_0 + ((data-self.mu)**2).sum())/(nu_n) self.sigmasq = sigmasq_n*nu_n/np.random.chisquare(nu_n) else: self.mu = np.sqrt(self.tausq_0) * np.random.normal() + self.mu_0 self.sigmasq = self.sigmasq_0*self.nu_0/np.random.chisquare(self.nu_0) if self.mubin is not None and self.sigmasqbin is not None: self.mubin[...] = self.mu self.sigmasqbin[...] = self.sigmasq
def _get_statistics(self,data): assert isinstance(data,np.ndarray) or \ (isinstance(data,list) and all((isinstance(d,np.ndarray)) for d in data)) or \ (isinstance(data,int) or isinstance(data,float)) n = getdatasize(data) if n > 0: if isinstance(data,np.ndarray): ybar = data.mean() sumsqc = ((data-ybar)**2).sum() elif isinstance(data,list): ybar = sum(d.sum() for d in data)/n sumsqc = sum(np.sum((d-ybar)**2) for d in data) else: ybar = data sumsqc = 0 else: ybar = None sumsqc = None return n, ybar, sumsqc
def log_marginal_likelihood(self,data): n, D = getdatasize(data), self.D return self._log_partition_function(*self._posterior_hypparams(*self._get_statistics(data,self.D))) \ - self._log_partition_function(self.mu_0,self.sigma_0,self.kappa_0,self.nu_0) \ - n*D/2 * np.log(2*np.pi)
def meanfieldupdate(self,data,weights): assert getdatasize(data) > 0 # update self._mu_mf, self._sigma_mf, self._kappa_mf, self._nu_mf = \ self._posterior_hypparams(*self._get_weighted_statistics(data,weights,self.D)) self.mu, self.sigma = self._mu_mf, self._sigma_mf/(self._nu_mf - self.D - 1) # for plotting