def sample(num_samples, initial_guesses, proposal_method, population_size = 20, stop_flag = flags.NeverStopFlag(), quiet = True): num_initial = len(initial_guesses) rval = [PmcSample(sample=s) for s in initial_guesses] while len(rval) - num_initial < num_samples and not stop_flag.stop(): #print(len(rval)) anc_cand = np.min((len(rval), 2 * population_size)) ancest_dist = categorical([1./anc_cand] * anc_cand) #choose ancestor uniformly at random from previous samples pop = [] for _ in range(population_size): #assert(len(idx) ==1 and idx.) tmp = proposal_method.gen_proposal(rval[-int(ancest_dist.rvs())]) if not hasattr(tmp, "__iter__"): tmp = [tmp] pop.extend(tmp) proposal_method.observe(pop) # adapt proposal rval.extend(importance_resampling(population_size, pop)) if not quiet: print(len(rval), "samples", file=sys.stderr) try: pass #print("jump model", proposal_method.jump_mdl.ddist.K, "thres model",proposal_method.jump_thres_mdl.rv(), # "gr_covar_mdl", proposal_method.gr_covar_mdl.rv()) except: pass #assert() return (np.array([s.sample for s in rval[num_initial:]]), np.array([s.lpost for s in rval[num_initial:]]))
def observe(self, population): lweights = np.array([s.lweight for s in population]) #print(lweights) lweights = lweights - logsumexp(lweights) #+ 1000 #print(lweights) indices = np.array([self.prop2idx[s.prop_obj] for s in population]) for i in range(len(lweights)): prop_idx = indices[i] self.num_samp[prop_idx] = self.num_samp[prop_idx] + 1 self.sum[prop_idx] = logsumexp((self.sum[prop_idx], lweights[i])) self.sqr_sum[prop_idx] = logsumexp( (self.sqr_sum[prop_idx], 2 * lweights[i])) lnum_samp = log(self.num_samp) self.var = exp( logsumexp([self.sum, self.sqr_sum - lnum_samp], 0) - lnum_samp) #self.var = exp(self.var - logsumexp(self.var)) if self.var.size > 1: tmp = self.var.sum() if tmp == 0 or np.isnan(tmp): prop_prob = np.array([1. / self.var.size] * self.var.size) else: prop_prob = (self.var.sum() - self.var) prop_prob = prop_prob / prop_prob.sum( ) / 2 + np.random.dirichlet(1 + self.num_samp) / 2 else: prop_prob = np.array([1. / self.var.size] * self.var.size) self.prop_dist = categorical(prop_prob)
def observe(self, population): lweights = np.array([s.lweight for s in population]) #print(lweights) lweights = lweights - logsumexp(lweights) #+ 1000 #print(lweights) indices = np.array([self.prop2idx[s.prop_obj] for s in population]) for i in range(len(lweights)): prop_idx = indices[i] self.num_samp[prop_idx] = self.num_samp[prop_idx] + 1 self.sum[prop_idx] = logsumexp((self.sum[prop_idx], lweights[i])) self.sqr_sum[prop_idx] = logsumexp((self.sqr_sum[prop_idx], 2*lweights[i])) lnum_samp = log(self.num_samp) self.var = exp(logsumexp([self.sum, self.sqr_sum - lnum_samp], 0) - lnum_samp) #self.var = exp(self.var - logsumexp(self.var)) if self.var.size > 1: tmp = self.var.sum() if tmp == 0 or np.isnan(tmp): prop_prob = np.array([1./self.var.size] * self.var.size) else: prop_prob = (self.var.sum() - self.var) prop_prob = prop_prob/prop_prob.sum()/2 + np.random.dirichlet(1 + self.num_samp)/2 else: prop_prob = np.array([1./self.var.size] * self.var.size) self.prop_dist = categorical(prop_prob)
def sample_lpost_based(num_samples, initial_particles, proposal_method, population_size = 20): rval = [] anc = proposal_method.process_initial_samples(initial_particles) num_initial = len(rval) while len(rval) - num_initial < num_samples: ancest_dist = np.array([a.lpost for a in anc]) ancest_dist = categorical(ancest_dist - logsumexp(ancest_dist), p_in_logspace = True) #choose ancestor uniformly at random from previous samples pop = [proposal_method.gen_proposal(anc[ancest_dist.rvs()]) for _ in range(population_size)] prop_w = np.array([s.lweight for s in pop]) prop_w = exp(prop_w - logsumexp(prop_w)) # Importance Resampling while True: try: draws = np.random.multinomial(population_size, prop_w) break except ValueError: prop_w /= prop_w.sum() for idx in range(len(draws)): rval.extend([pop[idx]] * draws[idx]) anc.append(pop[idx]) return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.DPGMM(covariance_type="full") m.fit(samples) self.num_components = len(m.weights_) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [mvnorm(m.means_[i], np.linalg.inv(m.precs_[i]), Ki = m.precs_[i]) for i in range(self.comp_lprior.size)] self.dim = m.means_[0].size
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.GMM(self.num_components, "full") m.fit(samples) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [mvnorm(m.means_[i], m.covars_[i]) for i in range(self.comp_lprior.size)] self.dim = m.means_[0].size #self._e_step() if False: old = -1 i = 0 while not np.all(old == self.resp): i += 1 old = self.resp.copy() self._e_step() self._m_step() print(np.sum(old == self.resp)/self.resp.size) #print("Convergence after",i,"iterations") self.dist_cat = categorical(exp(self.comp_lprior))
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.DPGMM(covariance_type="full") m.fit(samples) self.num_components = len(m.weights_) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [ mvnorm(m.means_[i], np.linalg.inv(m.precs_[i]), Ki=m.precs_[i]) for i in range(self.comp_lprior.size) ] self.dim = m.means_[0].size
def __init__(self, data, dir_param, mean_prior, cov_prior, df_prior): self.data = data self.num_obs = data.shape[0] self.dim_obs = data.shape[1] self.dir_param = np.array(dir_param).flatten() self.cat_param = np.random.dirichlet(self.dir_param) self.comp_indic = dist.categorical(self.cat_param).rvs(size=self.num_obs, indic=True) self.mean_prior = mean_prior self.cov_prior = cov_prior self.df_prior = df_prior self.update_comp_dists([[mean_prior.rvs(), cov_prior.rv(), df_prior.rvs()] for _ in range(len(dir_param))])
def lprior(self): rval = dist.dirichlet(self.dir_param).logpdf(self.cat_param) assert (rval != -np.inf) rval = rval + dist.categorical(self.cat_param).logpdf( self.comp_indic, indic=True).sum() assert (rval != -np.inf) for i in range(len(self.comp_param)): rval = rval + self.mean_prior.logpdf(self.comp_param[i][0]) assert (rval != -np.inf) rval = rval + self.cov_prior.logpdf(self.comp_param[i][1]) assert (rval != -np.inf) rval = rval + self.df_prior.logpdf(self.comp_param[i][2]) assert (rval != -np.inf) return rval
def lprior(self): rval = dist.dirichlet(self.dir_param).logpdf(self.cat_param) assert(rval != -np.inf) rval = rval + dist.categorical(self.cat_param).logpdf(self.comp_indic, indic = True).sum() assert(rval != -np.inf) for i in range(len(self.comp_param)): rval = rval + self.mean_prior.logpdf(self.comp_param[i][0]) assert(rval != -np.inf) rval = rval + self.cov_prior.logpdf(self.comp_param[i][1]) assert(rval != -np.inf) rval = rval + self.df_prior.logpdf(self.comp_param[i][2]) assert(rval != -np.inf) return rval
def fit(self, samples): import sklearn.mixture m = sklearn.mixture.GMM(self.num_components, "full") m.fit(samples) self.comp_lprior = log(m.weights_) self.dist_cat = categorical(exp(self.comp_lprior)) self.comp_dist = [ mvnorm(m.means_[i], m.covars_[i]) for i in range(self.comp_lprior.size) ] self.dim = m.means_[0].size #self._e_step() if False: old = -1 i = 0 while not np.all(old == self.resp): i += 1 old = self.resp.copy() self._e_step() self._m_step() print(np.sum(old == self.resp) / self.resp.size) #print("Convergence after",i,"iterations") self.dist_cat = categorical(exp(self.comp_lprior))
def importance_resampling(resampled_size, pop): prop_w = np.array([s.lweight for s in pop]) prop_w = exp(prop_w - logsumexp(prop_w)) # Importance Resampling while True: try: dist = categorical(prop_w) break except ValueError: prop_w /= prop_w.sum() new_samp = [] for idx in range(resampled_size): new_samp.append(pop[dist.rvs()]) return new_samp
def __init__(self, data, dir_param, mean_prior, cov_prior, df_prior): self.data = data self.num_obs = data.shape[0] self.dim_obs = data.shape[1] self.dir_param = np.array(dir_param).flatten() self.cat_param = np.random.dirichlet(self.dir_param) self.comp_indic = dist.categorical(self.cat_param).rvs( size=self.num_obs, indic=True) self.mean_prior = mean_prior self.cov_prior = cov_prior self.df_prior = df_prior self.update_comp_dists( [[mean_prior.rvs(), cov_prior.rv(), df_prior.rvs()] for _ in range(len(dir_param))])
def sample_sis(num_samples, initial_particles, proposal_method, stop_flag=flags.NeverStopFlag(), quiet=True): part = proposal_method.process_initial_samples(initial_particles) rval = [] num_initial = len(rval) while len(rval) - num_initial < num_samples and not stop_flag.stop(): #print(len(rval)) #choose ancestor uniformly at random from previous samples pop = [] part_new = [] for p in part: tmp = proposal_method.gen_proposal(p) if hasattr(tmp, "__iter__"): pop.extend(tmp) lposts = np.array([t.lpost for t in tmp]) cd = categorical(lposts - logsumexp(lposts), True) part_new.append(tmp[cd.rvs()]) else: pop.append(tmp) part_new.append(tmp) prop_w = np.array([s.lweight for s in pop]) prop_w = exp(prop_w - logsumexp(prop_w)) # Importance Resampling while True: try: draws = np.random.multinomial(len(initial_particles), prop_w) break except ValueError: prop_w /= prop_w.sum() new_samp = [] for idx in range(len(draws)): new_samp.extend([pop[idx]] * draws[idx]) proposal_method.process_new_ancestors(new_samp) rval.extend(new_samp) if not quiet: print(len(rval), "samples", file=sys.stderr) part = part_new return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
def test_DirCatTMM(): num_obs = 1000 for dim in range(2,4): mu = np.array([11 * (i+1) for i in range(dim)]) K = np.eye(dim) * 5 df = dim + 1 obs_dist = dist.mvt(mu, K, df) obs = obs_dist.rvs(num_obs) dctmm = dis.DirCatTMM(obs, [1]*dim, obs_dist, dist.invwishart(np.eye(dim) * 5, dim + 1), stats.gamma(1, scale=1, loc=dim+1)) orig_cat_param = dctmm.cat_param dctmm.cat_param = np.zeros(dim) for i in range(dim): dctmm.cat_param[i] = 1 ### Test DirCatTMM.lpost_comp_indic ### for j in range(dim): c_indic = np.zeros(dim) c_indic[j] = 1 for o in range(obs.shape[0]): if i == j: assert(dctmm.lpost_comp_indic(c_indic, o) > -np.inf) else: assert(dctmm.lpost_comp_indic(c_indic, o) == -np.inf) c_indic[j] = 0 ### Test DirCatTMM.llhood_comp_param ### highest = dctmm.llhood_comp_param((mu, K, df), i) assert(highest >= dctmm.llhood_comp_param((-mu, K, df), i)) assert(highest >= dctmm.llhood_comp_param((mu, K*5, df), i)) assert(highest >= dctmm.llhood_comp_param((mu, K/2, df), i)) assert(highest >= dctmm.llhood_comp_param((mu, K, df+10), i)) dctmm.cat_param[i] = 0 ### Test DirCatTMM.lprior ### dctmm.cat_param = np.array(dctmm.dir_param / dctmm.dir_param.sum()) dctmm.comp_indic = dist.categorical(dctmm.cat_param).rvs(num_obs, indic = True) dctmm.update_comp_dists([(mu, K, df)] * dim) highest = dctmm.lprior() c_param = dctmm.dir_param + np.arange(dim) dctmm.cat_param = np.array(c_param / c_param.sum()) ch_cat_param = dctmm.lprior() assert(highest > ch_cat_param) dctmm.update_comp_dists([(-mu, K, df)] * dim) assert(ch_cat_param > dctmm.lprior())
def sample_sis(num_samples, initial_particles, proposal_method, stop_flag = flags.NeverStopFlag(), quiet = True): part = proposal_method.process_initial_samples(initial_particles) rval = [] num_initial = len(rval) while len(rval) - num_initial < num_samples and not stop_flag.stop(): #print(len(rval)) #choose ancestor uniformly at random from previous samples pop = [] part_new = [] for p in part: tmp = proposal_method.gen_proposal(p) if hasattr(tmp, "__iter__"): pop.extend(tmp) lposts = np.array([t.lpost for t in tmp]) cd = categorical(lposts - logsumexp(lposts), True) part_new.append(tmp[cd.rvs()]) else: pop.append(tmp) part_new.append(tmp) prop_w = np.array([s.lweight for s in pop]) prop_w = exp(prop_w - logsumexp(prop_w)) # Importance Resampling while True: try: draws = np.random.multinomial(len(initial_particles), prop_w) break except ValueError: prop_w /= prop_w.sum() new_samp = [] for idx in range(len(draws)): new_samp.extend([pop[idx]] * draws[idx]) proposal_method.process_new_ancestors(new_samp) rval.extend(new_samp) if not quiet: print(len(rval), "samples", file=sys.stderr) part = part_new return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
def __init__(self, *predefined_proposals): """ Choose proposals to decrease variance of weights Parameters ========== predefined_proposals: a set of given proposals among which to choose """ length = len(predefined_proposals) self.prop2idx = {} self.idx2prop = [] for i in range(len(predefined_proposals)): self.prop2idx[predefined_proposals[i]] = i self.idx2prop.append(predefined_proposals[i]) self.num_samp = np.zeros(length) # number of samples for weights self.sum = -np.inf*np.ones(length) # sum of weights self.sqr_sum = -np.inf*np.ones(length) # sum of squares self.var = np.zeros(length) # weight variance estimate self.prop_dist = categorical(np.array([1./length] * length))
def __init__(self, *predefined_proposals): """ Choose proposals to decrease variance of weights Parameters ========== predefined_proposals: a set of given proposals among which to choose """ length = len(predefined_proposals) self.prop2idx = {} self.idx2prop = [] for i in range(len(predefined_proposals)): self.prop2idx[predefined_proposals[i]] = i self.idx2prop.append(predefined_proposals[i]) self.num_samp = np.zeros(length) # number of samples for weights self.sum = -np.inf * np.ones(length) # sum of weights self.sqr_sum = -np.inf * np.ones(length) # sum of squares self.var = np.zeros(length) # weight variance estimate self.prop_dist = categorical(np.array([1. / length] * length))
def importance_resampling(resampled_size, pop, ess = False): prop_w = np.array([s.lweight for s in pop]) prop_w = prop_w - logsumexp(prop_w) if ess: rval_ess = compute_ess(prop_w) prop_w = exp(prop_w) # Importance Resampling while True: try: dist = categorical(prop_w) break except ValueError: prop_w /= prop_w.sum() new_samp = [] for idx in range(resampled_size): new_samp.append(pop[dist.rvs()]) if ess: return (new_samp, rval_ess) else: return new_samp
def importance_resampling(resampled_size, pop, ess=False): prop_w = np.array([s.lweight for s in pop]) prop_w = prop_w - logsumexp(prop_w) if ess: rval_ess = compute_ess(prop_w) prop_w = exp(prop_w) # Importance Resampling while True: try: dist = categorical(prop_w) break except ValueError: prop_w /= prop_w.sum() new_samp = [] for idx in range(resampled_size): new_samp.append(pop[dist.rvs()]) if ess: return (new_samp, rval_ess) else: return new_samp
def lpost_comp_indic(self, x, observation_idx): assert(observation_idx is not None) comp_idx = np.argmax(x.flat) return (dist.categorical(self.cat_param).logpdf(comp_idx) + self.comp_dist[comp_idx].logpdf(self.data[observation_idx]))
def lpost_comp_indic(self, x, observation_idx): assert (observation_idx is not None) comp_idx = np.argmax(x.flat) return (dist.categorical(self.cat_param).logpdf(comp_idx) + self.comp_dist[comp_idx].logpdf(self.data[observation_idx]))