def resample(self,data=[],niter=None): niter = niter if niter else self.niter if getdatasize(data) == 0: self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\ .reshape(self.h_0.shape) self.sigma = sample_invwishart(self.S_0,self.nu_0) else: yyT, yxT, xxT, n = self._get_statistics(data) for itr in range(niter): self._resample_A(xxT, yxT, self.sigma) self._resample_sigma(xxT, yxT, yyT, n, self.A)
def resample(self, data=[], niter=None): niter = niter if niter else self.niter if getdatasize(data) == 0: self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\ .reshape(self.h_0.shape) self.sigma = sample_invwishart(self.S_0, self.nu_0) else: yyT, yxT, xxT, n = self._get_statistics(data) for itr in range(niter): self._resample_A(xxT, yxT, self.sigma) self._resample_sigma(xxT, yxT, yyT, n, self.A)
def _resample_sigma(self, xxT, yxT, yyT, n, A): S = self.S_0 + yyT - yxT.dot(A.T) - A.dot(yxT.T) + A.dot(xxT).dot(A.T) nu = self.nu_0 + n self.sigma = sample_invwishart(S, nu)
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None): """ Run experiment on 4 state, two group synthetic data. name : Name of experiment. datadir : Path to directory containing data. datafn : Prefix name to files that data and missing masks are stored in. K : Number of components in HMM. expdir : Path to directory to store experiment results. If None (default), then a directory, `name`_results, is made in the current directory. nfolds : Number of folds to generate if datafn is None. nrestarts : Number of random initial parameters. seed : Random number seed. """ # Set seed for reproducibility np.random.seed(seed) # Generate/Load data and folds (missing masks) # These are the emission distributions for the following tests if not os.path.exists(datadir): raise RuntimeError("Could not find datadir: %s" % (datadir,)) else: if not os.path.isdir(datadir): raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,)) if datafn is None: datafn = name dpath = os.path.join(datadir, datafn + "_data.txt") mpath = os.path.join(datadir, datafn + "_fold*.txt") try: X = np.loadtxt(dpath) except IOError: if os.path.exists(dpath) and not os.path.isdir(dpath): raise RuntimeError("Could not load data: %s" % (dpath,)) masks = glob.glob(mpath) if len(masks) == 0: masks = [None] # Initialize parameter possibilities obs_mean = np.mean(X, axis=0) mu_0 = obs_mean sigma_0 = 0.75*np.cov(X.T) # Vague values that keeps covariance matrices p.d. kappa_0 = 0.01 nu_0 = 4 prior_init = np.ones(K) prior_tran = np.ones((K,K)) N, D = X.shape rand_starts = list() for r in xrange(nrestarts): init_means = np.empty((K,D)) init_cov = list() for k in xrange(K): init_means[k,:] = mvnrand(mu_0, cov=sigma_0) init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0)) # We use prior b/c mu and sigma are sampled here prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0, mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=nu_0) for k in xrange(K)]) init_init = np.random.rand(K) init_init /= np.sum(init_init) init_tran = np.random.rand(K,K) init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis] # Make dict with initial parameters to pass to experiment. pd = {'init_init': init_init, 'init_tran': init_tran, 'prior_init': prior_init, 'prior_tran': prior_tran, 'prior_emit': prior_emit, 'maxit': maxit, 'verbose': verbose} rand_starts.append(pd) # Compute Cartesian product of random starts with other possible parameter # values, make a generator to fill in entries in the par dicts created # above, and then construct the par_list by calling the generator with the # Cartesian product iterator. par_prod_iter = itertools.product(rand_starts, taus, kappas, reuse_msg, grow_buffer, Ls, correct_trans) def gen_par(par_tuple): d = copy.copy(par_tuple[0]) d['tau'] = par_tuple[1] d['kappa'] = par_tuple[2] d['reuseMsg'] = par_tuple[3] d['growBuffer'] = par_tuple[4] d['metaobs_half'] = par_tuple[5] d['correctTrans'] = par_tuple[6] d['mb_sz'] = 100//(2*par_tuple[5]+1) return d # Call gen_par on each par product to pack into dictionary to pass to # experiment. par_list = itertools.imap(gen_par, par_prod_iter) # Create ExperimentSequential and call run_exper dname = os.path.join(datadir, datafn + "_data.txt") exp = ExpSeq(datafn, dname, run_exper, par_list, masks=masks, exper_dir=expdir) exp.run()
def test_wishart_correlated_pgm_rvs(K=10): # Randomly generate a covariance matrix from pybasicbayes.util.stats import sample_invwishart Sigma = sample_invwishart(np.eye(K-1), nu=K) test_correlated_pgm_rvs(Sigma)
def test_wishart_correlated_pgm_rvs(K=10): # Randomly generate a covariance matrix from pybasicbayes.util.stats import sample_invwishart Sigma = sample_invwishart(np.eye(K - 1), nu=K) test_correlated_pgm_rvs(Sigma)
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None): """ Run experiment on 4 state, two group synthetic data. name : Name of experiment. datadir : Path to directory containing data. datafn : Prefix name to files that data and missing masks are stored in. K : Number of components in HMM. expdir : Path to directory to store experiment results. If None (default), then a directory, `name`_results, is made in the current directory. nfolds : Number of folds to generate if datafn is None. nrestarts : Number of random initial parameters. seed : Random number seed. """ # Set seed for reproducibility np.random.seed(seed) # Generate/Load data and folds (missing masks) # These are the emission distributions for the following tests if not os.path.exists(datadir): raise RuntimeError("Could not find datadir: %s" % (datadir,)) else: if not os.path.isdir(datadir): raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,)) if datafn is None: datafn = name dpath = os.path.join(datadir, datafn + "_data.txt") mpath = os.path.join(datadir, datafn + "_fold*.txt") try: X = np.loadtxt(dpath) except IOError: if os.path.exists(dpath) and not os.path.isdir(dpath): raise RuntimeError("Could not load data: %s" % (dpath,)) masks = glob.glob(mpath) if len(masks) == 0: masks = [None] # Initialize parameter possibilities obs_mean = np.mean(X, axis=0) mu_0 = obs_mean sigma_0 = 0.75*np.cov(X.T) # Vague values that keeps covariance matrices p.d. kappa_0 = 0.01 nu_0 = 4 prior_init = np.ones(K) prior_tran = np.ones((K,K)) rand_starts = list() for r in xrange(nrestarts): init_means = np.empty((K,D)) init_cov = list() for k in xrange(K): init_means[k,:] = mvnrand(mu_0, cov=sigma_0) init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0)) # We use prior b/c mu and sigma are sampled here prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0, mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=nu_0) for k in xrange(K)]) init_init = np.random.rand(K) init_init /= np.sum(init_init) init_tran = np.random.rand(K,K) init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis] # Make dict with initial parameters to pass to experiment. pd = {'init_init': init_init, 'init_tran': init_tran, 'prior_init': prior_init, 'prior_tran': prior_tran, 'prior_emit': prior_emit, 'maxit': maxit} rand_starts.append(pd) # Compute Cartesian product of random starts with other possible parameter # values, make a generator to fill in entries in the par dicts created # above, and then construct the par_list by calling the generator with the # Cartesian product iterator. par_prod_iter = itertools.product(rand_starts, taus, kappas, Ls) def gen_par(par_tuple): d = copy.copy(par_tuple[0]) d['tau'] = par_tuple[1] d['kappa'] = par_tuple[2] d['metaobs_half'] = par_tuple[3] return d # Call gen_par on each par product to pack into dictionary to pass to # experiment. par_list = itertools.imap(gen_par, par_prod_iter) # Create ExperimentSequential and call run_exper dname = os.path.join(datadir, datafn + "_data.txt") exp = ExpSeq('exper_synth_4statedd', dname, run_exper, par_list, masks=masks, exper_dir=expdir) exp.run()