def test_InvWishartRandomWalkProposal(): dim = 4 iw = invwishart(np.eye(dim)*5, dim+1) for dim in [4]: df = stats.poisson.rvs(2) iw = invwishart(np.eye(dim)*5, dim+1) for K in [iw.rv() for _ in range(3)]: pdist = pmc.InvWishartRandomWalkProposal(dim + 1 + df, dim) props = np.array([pdist.gen_proposal(mean=K).sample for _ in range(3000)]) mse = ((K-props.mean(0))**2).mean() assert(np.abs(mse) < 5)
def test_InvWishartRandomWalkProposal(): dim = 4 iw = invwishart(np.eye(dim) * 5, dim + 1) for dim in [4]: df = stats.poisson.rvs(2) iw = invwishart(np.eye(dim) * 5, dim + 1) for K in [iw.rv() for _ in range(3)]: pdist = pmc.InvWishartRandomWalkProposal(dim + 1 + df, dim) props = np.array( [pdist.gen_proposal(mean=K).sample for _ in range(3000)]) mse = ((K - props.mean(0))**2).mean() assert (np.abs(mse) < 5)
def approximate_mixture_data(): num_loc_proposals = 2 num_imp_samp = 1000 n_comp = 2 p_comp = np.array([0.7, 0.3]) dim = 1 num_obs = 100 obs = None means = [] for i in range(n_comp): means.append([20*i]*dim) if obs is None: obs = dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i]))) else: obs = np.vstack([obs, dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))]) count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0} print(means) #return def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append(DirCatTMM(obs, [1]*n_comp, dist.mvt(np.mean(means,0), np.eye(dim)*5, dim), dist.invwishart(np.eye(dim) * 5, dim+1 ), stats.gamma(1,scale=1))) # (naive_samp, naive_lpost) = pmc.sample(num_imp_samp, initial_samples, # DirCatTMMProposal(naive_multi_proposals = num_loc_proposals, # lpost_count = count_closure("naive_lpost"), # llhood_count = count_closure("naive_llhood")), # population_size = 4) (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples, DirCatTMMProposal(num_local_proposals = num_loc_proposals, lpost_count = count_closure("local_lpost"), llhood_count = count_closure("local_llhood")), population_size = 4) (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples, DirCatTMMProposal(lpost_count = count_closure("standard_lpost"), llhood_count = count_closure("standard_llhood")), population_size = 4) print("===============\n",p_comp, means, # "\n\n--NAIVE--\n", # naive_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, naive_samp[-1].comp_indic.sum(0))+1, count["naive_llhood"], count["naive_lpost"], "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].comp_indic.sum(0))+1, count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].comp_indic.sum(0))+1, count["standard_llhood"], count["standard_lpost"],"\n\n") return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
def test_DirCatTMMProposal(): num_loc_proposals = 2 num_imp_samp = 1000 n_comp = 2 p_comp = np.array([0.7, 0.3]) dim = 1 num_obs = 100 obs = None means = [] for i in range(n_comp): means.append([20*i]*dim) if obs is None: obs = dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i]))) else: obs = np.vstack([obs, dist.mvt(means[-1], np.eye(dim),30).rvs(np.int(np.round(num_obs*p_comp[i])))]) count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0} print(means) #return def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append(dis.DirCatTMM(obs, [1]*n_comp, dist.mvt(np.mean(means,0), np.eye(dim)*5, dim), dist.invwishart(np.eye(dim) * 5, dim+1 ), stats.gamma(1,scale=1))) # (naive_samp, naive_lpost) = pmc.sample(num_imp_samp, initial_samples, # dis.DirCatTMMProposal(naive_multi_proposals = num_loc_proposals, # lpost_count = count_closure("naive_lpost"), # llhood_count = count_closure("naive_llhood")), # population_size = 4) (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples, dis.DirCatTMMProposal(num_local_proposals = num_loc_proposals, lpost_count = count_closure("local_lpost"), llhood_count = count_closure("local_llhood")), population_size = 4) (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples, dis.DirCatTMMProposal(lpost_count = count_closure("standard_lpost"), llhood_count = count_closure("standard_llhood")), population_size = 4) print("===============\n",p_comp, means, # "\n\n--NAIVE--\n", # naive_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, naive_samp[-1].comp_indic.sum(0))+1, count["naive_llhood"], count["naive_lpost"], "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].comp_indic.sum(0))+1, count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].comp_indic.sum(0))+1, count["standard_llhood"], count["standard_lpost"],"\n\n")
def test_DirCatTMM(): num_obs = 1000 for dim in range(2,4): mu = np.array([11 * (i+1) for i in range(dim)]) K = np.eye(dim) * 5 df = dim + 1 obs_dist = dist.mvt(mu, K, df) obs = obs_dist.rvs(num_obs) dctmm = dis.DirCatTMM(obs, [1]*dim, obs_dist, dist.invwishart(np.eye(dim) * 5, dim + 1), stats.gamma(1, scale=1, loc=dim+1)) orig_cat_param = dctmm.cat_param dctmm.cat_param = np.zeros(dim) for i in range(dim): dctmm.cat_param[i] = 1 ### Test DirCatTMM.lpost_comp_indic ### for j in range(dim): c_indic = np.zeros(dim) c_indic[j] = 1 for o in range(obs.shape[0]): if i == j: assert(dctmm.lpost_comp_indic(c_indic, o) > -np.inf) else: assert(dctmm.lpost_comp_indic(c_indic, o) == -np.inf) c_indic[j] = 0 ### Test DirCatTMM.llhood_comp_param ### highest = dctmm.llhood_comp_param((mu, K, df), i) assert(highest >= dctmm.llhood_comp_param((-mu, K, df), i)) assert(highest >= dctmm.llhood_comp_param((mu, K*5, df), i)) assert(highest >= dctmm.llhood_comp_param((mu, K/2, df), i)) assert(highest >= dctmm.llhood_comp_param((mu, K, df+10), i)) dctmm.cat_param[i] = 0 ### Test DirCatTMM.lprior ### dctmm.cat_param = np.array(dctmm.dir_param / dctmm.dir_param.sum()) dctmm.comp_indic = dist.categorical(dctmm.cat_param).rvs(num_obs, indic = True) dctmm.update_comp_dists([(mu, K, df)] * dim) highest = dctmm.lprior() c_param = dctmm.dir_param + np.arange(dim) dctmm.cat_param = np.array(c_param / c_param.sum()) ch_cat_param = dctmm.lprior() assert(highest > ch_cat_param) dctmm.update_comp_dists([(-mu, K, df)] * dim) assert(ch_cat_param > dctmm.lprior())
def gen_proposal(self, ancestor=None, mean=None): assert ((mean is not None and np.prod(mean.shape) == self.dim**2) or (ancestor is not None and ancestor.sample is not None and np.prod(ancestor.sample.shape) == self.dim**2)) if mean is None and ancestor is not None: mean = ancestor.sample scale_matr = mean * (self.df - self.dim - 1) pdist = invwishart(scale_matr, self.df) if not hasattr(pdist, "rvs"): pdist.__dict__["rvs"] = pdist.rv #some trickery return gen_sample_prototype(ancestor, self, prop_dist=pdist, lpost_func=self.lpost)
def gen_proposal(self, ancestor = None, mean = None): assert((mean is not None and np.prod(mean.shape) == self.dim**2) or (ancestor is not None and ancestor.sample is not None and np.prod(ancestor.sample.shape) == self.dim**2)) if mean is None and ancestor is not None: mean = ancestor.sample scale_matr = mean * (self.df - self.dim - 1) pdist = invwishart(scale_matr, self.df) if not hasattr(pdist, "rvs"): pdist.__dict__["rvs"] = pdist.rv #some trickery return gen_sample_prototype(ancestor, self, prop_dist = pdist, lpost_func = self.lpost)
def approximate_iris_mixture_data(): from sklearn.datasets import load_iris num_imp_samp = 100 num_loc_proposals = 3 n_comp = 3 p_comp = np.array([1/n_comp] * n_comp) dim = 4 iris = load_iris() obs = iris["data"] labels = iris["target"] means = np.array([obs[i*50:(i+1)*50].mean(0) for i in range(3)]) count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0,"standard_lpost" :0 ,"standard_llhood" :0} def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append(DirCatTMM(obs, [1]*n_comp, dist.mvt(obs.mean(0), np.diag(obs.var(0)), 20), dist.invwishart(np.eye(dim), 50), stats.gamma(500, scale=0.1))) (infl_samp, infl_lpost) = pmc.sample(num_imp_samp, initial_samples, DirCatTMMProposal(num_local_proposals = num_loc_proposals, lpost_count = count_closure("local_lpost"), llhood_count = count_closure("local_llhood")), population_size = 4) (stand_samp, stand_lpost) = pmc.sample(num_imp_samp * num_loc_proposals, initial_samples, DirCatTMMProposal(lpost_count = count_closure("standard_lpost"), llhood_count = count_closure("standard_llhood")), population_size = 4) print("===============\n",p_comp, means, "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()), count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"],"\n\n") return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)}
def gen_proposal(self, ancestor = None, mean = None): assert((mean is not None and np.prod(mean.shape) == self.dim**2) or (ancestor is not None and ancestor.sample is not None and np.prod(ancestor.sample.shape) == self.dim**2)) rval = PmcSample(ancestor, prop_obj = self) if mean is None and ancestor is not None: if ancestor.sample is not None: mean = ancestor.sample else: mean = np.zeros(self.pdist.rvs().shape) scale_matr = mean * (self.df - self.dim - 1) pdist = invwishart(scale_matr, self.df) rval.sample = pdist.rv() rval.lpost = self.lpost(rval.sample) rval.lprop = pdist.logpdf(rval.sample) if rval.lpost is not None: rval.lweight = rval.lpost - rval.lprop else: rval.lweight = None return rval
stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"],"\n\n") return {"infl":(infl_samp, infl_lpost), "standard":(stand_samp, stand_lpost)} if __name__ == "__main__": import scipy.io as io of3 = io.loadmat("data/oilFlow3Class.mat") of3_lab = np.vstack((of3["DataTrnLbls"], of3["DataTstLbls"],of3["DataVdnLbls"],)) of3 = np.vstack((of3["DataTrn"], of3["DataTst"],of3["DataVdn"],))*100 initial = [DirCatTMM(of3, [1]*3, dist.mvnorm([0]*12, np.eye(12)), dist.invwishart(np.eye(12)*5, 12), stats.gamma(1,scale=1)) for _ in range(10)] count = {"local_lpost" :0, "local_llhood" :0, "naive_lpost" :0 ,"naive_llhood" :0} def count_closure(name): def rval(): count[name] = count[name] + 1 return rval samps = pmc.sample(50, initial, DirCatTMMProposal(lpost_count = count_closure("naive_lpost"), llhood_count = count_closure("naive_lpost")), population_size=5, quiet=False)
def approximate_iris_mixture_data(): from sklearn.datasets import load_iris num_imp_samp = 100 num_loc_proposals = 3 n_comp = 3 p_comp = np.array([1 / n_comp] * n_comp) dim = 4 iris = load_iris() obs = iris["data"] labels = iris["target"] means = np.array([obs[i * 50:(i + 1) * 50].mean(0) for i in range(3)]) count = { "local_lpost": 0, "local_llhood": 0, "naive_lpost": 0, "naive_llhood": 0, "standard_lpost": 0, "standard_llhood": 0 } def count_closure(name): def rval(): count[name] = count[name] + 1 return rval initial_samples = [] for _ in range(10): initial_samples.append( DirCatTMM(obs, [1] * n_comp, dist.mvt(obs.mean(0), np.diag(obs.var(0)), 20), dist.invwishart(np.eye(dim), 50), stats.gamma(500, scale=0.1))) (infl_samp, infl_lpost) = pmc.sample( num_imp_samp, initial_samples, DirCatTMMProposal(num_local_proposals=num_loc_proposals, lpost_count=count_closure("local_lpost"), llhood_count=count_closure("local_llhood")), population_size=4) (stand_samp, stand_lpost) = pmc.sample( num_imp_samp * num_loc_proposals, initial_samples, DirCatTMMProposal(lpost_count=count_closure("standard_lpost"), llhood_count=count_closure("standard_llhood")), population_size=4) print("===============\n", p_comp, means, "\n\n--LOCAL--\n", infl_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, infl_samp[-1].cat_param.flatten()), count["local_llhood"], count["local_lpost"], "\n\n--STANDARD--\n", stand_samp[-1].comp_indic.sum(0), stats.entropy(p_comp, stand_samp[-1].cat_param.flatten()), count["standard_llhood"], count["standard_lpost"], "\n\n") return { "infl": (infl_samp, infl_lpost), "standard": (stand_samp, stand_lpost) }
of3 = io.loadmat("data/oilFlow3Class.mat") of3_lab = np.vstack(( of3["DataTrnLbls"], of3["DataTstLbls"], of3["DataVdnLbls"], )) of3 = np.vstack(( of3["DataTrn"], of3["DataTst"], of3["DataVdn"], )) * 100 initial = [ DirCatTMM(of3, [1] * 3, dist.mvnorm([0] * 12, np.eye(12)), dist.invwishart(np.eye(12) * 5, 12), stats.gamma(1, scale=1)) for _ in range(10) ] count = { "local_lpost": 0, "local_llhood": 0, "naive_lpost": 0, "naive_llhood": 0 } def count_closure(name): def rval(): count[name] = count[name] + 1 return rval