def _get_statistics(self,data): n = getdatasize(data) if n > 0: if isinstance(data, np.ndarray): xss = (data**2).sum() else: xss = np.sum([(d**2).sum() for d in data]) else: xss = 0 return n, xss
def _get_statistics(self, data): n = getdatasize(data) if n > 0: if isinstance(data, np.ndarray): xss = (data**2).sum() else: xss = np.sum([(d**2).sum() for d in data]) else: xss = 0 return n, xss
def resample(self,data=[],niter=None): niter = niter if niter else self.niter if getdatasize(data) == 0: self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\ .reshape(self.h_0.shape) self.sigma = sample_invwishart(self.S_0,self.nu_0) else: yyT, yxT, xxT, n = self._get_statistics(data) for itr in range(niter): self._resample_A(xxT, yxT, self.sigma) self._resample_sigma(xxT, yxT, yyT, n, self.A)
def max_likelihood(self,data,weights=None): if weights is not None: raise NotImplementedError assert isinstance(data,list) or isinstance(data,np.ndarray) if isinstance(data,list): data = np.concatenate(data) if getdatasize(data) > 0: self.add_data(data) self.EM_fit() self.labels_list = []
def resample(self, data=[], niter=None): niter = niter if niter else self.niter if getdatasize(data) == 0: self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\ .reshape(self.h_0.shape) self.sigma = sample_invwishart(self.S_0, self.nu_0) else: yyT, yxT, xxT, n = self._get_statistics(data) for itr in range(niter): self._resample_A(xxT, yxT, self.sigma) self._resample_sigma(xxT, yxT, yyT, n, self.A)
def max_likelihood(self, data, weights=None): if weights is not None: raise NotImplementedError assert isinstance(data, list) or isinstance(data, np.ndarray) if isinstance(data, list): data = np.concatenate(data) if getdatasize(data) > 0: self.add_data(data) self.EM_fit() self.labels_list = []
def _get_statistics(self,data): n = getdatasize(data) if n > 0: data = flattendata(data) feasible = self.r_support <= data.min() assert np.any(feasible) r_support = self.r_support[feasible] normalizers = (special.gammaln(data[:,na]) - special.gammaln(data[:,na]-r_support+1) - special.gammaln(r_support)).sum(0) return n, data.sum(), normalizers, feasible else: return n, None, None, None
def resample(self,data=[],niter=20): if getdatasize(data) == 0: self.p = np.random.beta(self.alpha_0,self.beta_0) self.r = np.random.gamma(self.k_0,self.theta_0) else: data = atleast_2d(flattendata(data)) N = len(data) for itr in range(niter): ### resample r msum = sample_crp_tablecounts(self.r,data).sum() self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p))) ### resample p self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) return self
def resample(self,data=[],niter=20): if getdatasize(data) == 0: self.p = np.random.beta(self.alpha_0,self.beta_0) self.r = np.random.gamma(self.k_0,self.theta_0) else: data = np.atleast_2d(flattendata(data)) ones = np.ones(data.shape[1],dtype=float) for itr in range(niter): ### resample r msum = sample_crp_tablecounts(float(self.r),data,ones).sum() self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p))) ### resample p self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) return self
def resample(self,data): # doesn't keep a reference to the data like a model would assert isinstance(data,list) or isinstance(data,np.ndarray) if getdatasize(data) > 0: if not isinstance(data,np.ndarray): data = np.concatenate(data) self.add_data(data,initialize_from_prior=False) for itr in range(self.niter): self.resample_model() self.labels_list.pop() else: self.resample_model()
def resample_python(self,data=[],niter=20): if getdatasize(data) == 0: self.p = np.random.beta(self.alpha_0,self.beta_0) self.r = np.random.gamma(self.k_0,self.theta_0) else: data = flattendata(data) N = len(data) for itr in range(niter): ### resample r msum = 0. for n in data: msum += (np.random.rand(n) < self.r/(np.arange(n)+self.r)).sum() self.r = np.random.gamma(self.k_0 + msum, 1/(1/self.theta_0 - N*np.log(1-self.p))) ### resample p self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) return self
def resample(self, data): # doesn't keep a reference to the data like a model would assert isinstance(data, list) or isinstance(data, np.ndarray) if getdatasize(data) > 0: if not isinstance(data, np.ndarray): data = np.concatenate(data) self.add_data(data, initialize_from_prior=False) for itr in range(self.niter): self.resample_model() self.labels_list.pop() else: self.resample_model()
def _get_statistics(self,data): if getdatasize(data) == 0: n, tot = 0, 0 elif isinstance(data,np.ndarray): assert np.all(data >= 0) data = np.atleast_1d(data) n, tot = data.shape[0], data.sum() elif isinstance(data,list): assert all(np.all(d >= 0) for d in data) n = sum(d.shape[0] for d in data) tot = sum(d.sum() for d in data) else: assert np.isscalar(data) n = 1 tot = data return np.array([n, tot])
def resample_logseriesaug(self,data=[],niter=20): # an alternative algorithm, kind of opaque and no advantages... if getdatasize(data) == 0: self.p = np.random.beta(self.alpha_0,self.beta_0) self.r = np.random.gamma(self.k_0,self.theta_0) else: data = flattendata(data) N = data.shape[0] logF = self.logF L_i = np.zeros(N) data_nz = data[data > 0] for itr in range(niter): logR = np.arange(1,logF.shape[1]+1)*np.log(self.r) + logF L_i[data > 0] = sample_discrete_from_log(logR[data_nz-1,:data_nz.max()],axis=1)+1 self.r = np.random.gamma(self.k_0 + L_i.sum(), 1/(1/self.theta_0 - np.log(1-self.p)*N)) self.p = np.random.beta(self.alpha_0 + data.sum(), self.beta_0 + N*self.r) return self