Пример #1
0
 def resample(self,data=[],niter=None):
     niter = niter if niter else self.niter
     if getdatasize(data) == 0:
         self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\
             .reshape(self.h_0.shape)
         self.sigma = sample_invwishart(self.S_0,self.nu_0)
     else:
         yyT, yxT, xxT, n = self._get_statistics(data)
         for itr in range(niter):
             self._resample_A(xxT, yxT, self.sigma)
             self._resample_sigma(xxT, yxT, yyT, n, self.A)
Пример #2
0
 def resample(self, data=[], niter=None):
     niter = niter if niter else self.niter
     if getdatasize(data) == 0:
         self.A = sample_gaussian(J=self.J_0,h=self.h_0.ravel())\
             .reshape(self.h_0.shape)
         self.sigma = sample_invwishart(self.S_0, self.nu_0)
     else:
         yyT, yxT, xxT, n = self._get_statistics(data)
         for itr in range(niter):
             self._resample_A(xxT, yxT, self.sigma)
             self._resample_sigma(xxT, yxT, yyT, n, self.A)
Пример #3
0
 def _resample_sigma(self, xxT, yxT, yyT, n, A):
     S = self.S_0 + yyT - yxT.dot(A.T) - A.dot(yxT.T) + A.dot(xxT).dot(A.T)
     nu = self.nu_0 + n
     self.sigma = sample_invwishart(S, nu)
Пример #4
0
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None):
    """ Run experiment on 4 state, two group synthetic data.

        name : Name of experiment.

        datadir : Path to directory containing data.

        datafn : Prefix name to files that data and missing masks are stored
                 in.

        K : Number of components in HMM.

        expdir : Path to directory to store experiment results.  If None
                 (default), then a directory, `name`_results, is made in the
                 current directory.

        nfolds : Number of folds to generate if datafn is None.

        nrestarts : Number of random initial parameters.

        seed : Random number seed.
    """

    # Set seed for reproducibility
    np.random.seed(seed)

    # Generate/Load data and folds (missing masks)
    # These are the emission distributions for the following tests
    if not os.path.exists(datadir):
        raise RuntimeError("Could not find datadir: %s" % (datadir,))
    else:
        if not os.path.isdir(datadir):
            raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,))

    if datafn is None:
        datafn = name

    dpath = os.path.join(datadir, datafn + "_data.txt")
    mpath = os.path.join(datadir, datafn + "_fold*.txt")
    try:
        X = np.loadtxt(dpath)
    except IOError:
        if os.path.exists(dpath) and not os.path.isdir(dpath):
            raise RuntimeError("Could not load data: %s" % (dpath,))

    masks = glob.glob(mpath)
    if len(masks) == 0:
        masks = [None]

    # Initialize parameter possibilities

    obs_mean = np.mean(X, axis=0)
    mu_0 = obs_mean
    sigma_0 = 0.75*np.cov(X.T)

    # Vague values that keeps covariance matrices p.d.
    kappa_0 = 0.01
    nu_0 = 4

    prior_init = np.ones(K)
    prior_tran = np.ones((K,K))
    N, D = X.shape

    rand_starts = list()
    for r in xrange(nrestarts):
        init_means = np.empty((K,D))
        init_cov = list()
        for k in xrange(K):
            init_means[k,:] = mvnrand(mu_0, cov=sigma_0)
            init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0))
        # We use prior b/c mu and sigma are sampled here
        prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0,
                                       mu_0=mu_0, sigma_0=sigma_0,
                                       kappa_0=kappa_0, nu_0=nu_0)
                              for k in xrange(K)])

        init_init = np.random.rand(K)
        init_init /= np.sum(init_init)

        init_tran = np.random.rand(K,K)
        init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis]

        # Make dict with initial parameters to pass to experiment.
        pd = {'init_init': init_init, 'init_tran': init_tran,
              'prior_init': prior_init, 'prior_tran': prior_tran,
              'prior_emit': prior_emit, 'maxit': maxit, 'verbose': verbose}
        rand_starts.append(pd)

    # Compute Cartesian product of random starts with other possible parameter
    # values, make a generator to fill in entries in the par dicts created
    # above, and then construct the par_list by calling the generator with the
    # Cartesian product iterator.
    par_prod_iter = itertools.product(rand_starts, taus, kappas, reuse_msg,
                                      grow_buffer, Ls, correct_trans)

    def gen_par(par_tuple):
        d = copy.copy(par_tuple[0])
        d['tau'] = par_tuple[1]
        d['kappa'] = par_tuple[2]
        d['reuseMsg'] = par_tuple[3]
        d['growBuffer'] = par_tuple[4]
        d['metaobs_half'] = par_tuple[5]
        d['correctTrans'] = par_tuple[6]
        d['mb_sz'] = 100//(2*par_tuple[5]+1)
        return d

    # Call gen_par on each par product to pack into dictionary to pass to
    # experiment.
    par_list = itertools.imap(gen_par, par_prod_iter)

    # Create ExperimentSequential and call run_exper
    dname = os.path.join(datadir, datafn + "_data.txt")
    exp = ExpSeq(datafn, dname, run_exper, par_list,
                 masks=masks, exper_dir=expdir)
    exp.run()
Пример #5
0
 def _resample_sigma(self, xxT, yxT, yyT, n, A):
     S = self.S_0 + yyT - yxT.dot(A.T) - A.dot(yxT.T) + A.dot(xxT).dot(A.T)
     nu = self.nu_0 + n
     self.sigma = sample_invwishart(S, nu)
Пример #6
0
def test_wishart_correlated_pgm_rvs(K=10):
    # Randomly generate a covariance matrix
    from pybasicbayes.util.stats import sample_invwishart
    Sigma = sample_invwishart(np.eye(K-1), nu=K)
    test_correlated_pgm_rvs(Sigma)
Пример #7
0
def test_wishart_correlated_pgm_rvs(K=10):
    # Randomly generate a covariance matrix
    from pybasicbayes.util.stats import sample_invwishart
    Sigma = sample_invwishart(np.eye(K - 1), nu=K)
    test_correlated_pgm_rvs(Sigma)
Пример #8
0
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None):
    """ Run experiment on 4 state, two group synthetic data.

        name : Name of experiment.

        datadir : Path to directory containing data.

        datafn : Prefix name to files that data and missing masks are stored
                 in.
        
        K : Number of components in HMM.
        
        expdir : Path to directory to store experiment results.  If None
                 (default), then a directory, `name`_results, is made in the
                 current directory.
        
        nfolds : Number of folds to generate if datafn is None.

        nrestarts : Number of random initial parameters.

        seed : Random number seed.
    """

    # Set seed for reproducibility
    np.random.seed(seed)

    # Generate/Load data and folds (missing masks)
    # These are the emission distributions for the following tests
    if not os.path.exists(datadir):
        raise RuntimeError("Could not find datadir: %s" % (datadir,))
    else:
        if not os.path.isdir(datadir):
            raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,))

    if datafn is None:
        datafn = name

    dpath = os.path.join(datadir, datafn + "_data.txt")
    mpath = os.path.join(datadir, datafn + "_fold*.txt")
    try:
        X = np.loadtxt(dpath)
    except IOError:
        if os.path.exists(dpath) and not os.path.isdir(dpath):
            raise RuntimeError("Could not load data: %s" % (dpath,))

    masks = glob.glob(mpath)
    if len(masks) == 0:
        masks = [None]

    # Initialize parameter possibilities

    obs_mean = np.mean(X, axis=0)
    mu_0 = obs_mean
    sigma_0 = 0.75*np.cov(X.T)

    # Vague values that keeps covariance matrices p.d.
    kappa_0 = 0.01
    nu_0 = 4

    prior_init = np.ones(K)
    prior_tran = np.ones((K,K))

    rand_starts = list()
    for r in xrange(nrestarts):
        init_means = np.empty((K,D))
        init_cov = list()
        for k in xrange(K):
            init_means[k,:] = mvnrand(mu_0, cov=sigma_0)
            init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0))
        # We use prior b/c mu and sigma are sampled here
        prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0,
                                       mu_0=mu_0, sigma_0=sigma_0,
                                       kappa_0=kappa_0, nu_0=nu_0)
                              for k in xrange(K)])

        init_init = np.random.rand(K)
        init_init /= np.sum(init_init)

        init_tran = np.random.rand(K,K)
        init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis]

        # Make dict with initial parameters to pass to experiment.
        pd = {'init_init': init_init, 'init_tran': init_tran,
              'prior_init': prior_init, 'prior_tran': prior_tran,
              'prior_emit': prior_emit, 'maxit': maxit}
        rand_starts.append(pd)

    # Compute Cartesian product of random starts with other possible parameter
    # values, make a generator to fill in entries in the par dicts created
    # above, and then construct the par_list by calling the generator with the
    # Cartesian product iterator.
    par_prod_iter = itertools.product(rand_starts, taus, kappas, Ls)

    def gen_par(par_tuple):
        d = copy.copy(par_tuple[0])
        d['tau'] = par_tuple[1]
        d['kappa'] = par_tuple[2]
        d['metaobs_half'] = par_tuple[3]
        return d

    # Call gen_par on each par product to pack into dictionary to pass to
    # experiment.
    par_list = itertools.imap(gen_par, par_prod_iter)

    # Create ExperimentSequential and call run_exper
    dname = os.path.join(datadir, datafn + "_data.txt")
    exp = ExpSeq('exper_synth_4statedd', dname, run_exper, par_list,
                 masks=masks, exper_dir=expdir)
    exp.run()