def test_instantiate(self): stan = os.path.join(DATAFILES_PATH, 'variational', 'eta_should_be_big.stan') model = CmdStanModel(stan_file=stan) no_data = {} args = VariationalArgs(algorithm='meanfield') cmdstan_args = CmdStanArgs( model_name=model.name, model_exe=model.exe_file, chain_ids=None, data=no_data, method_args=args, ) runset = RunSet(args=cmdstan_args, chains=1) runset._csv_files = [ os.path.join(DATAFILES_PATH, 'variational', 'eta_big_output.csv') ] variational = CmdStanVB(runset) self.assertIn('CmdStanVB: model=eta_should_be_big', variational.__repr__()) self.assertIn('method=variational', variational.__repr__()) self.assertEqual( variational.column_names, ('lp__', 'log_p__', 'log_g__', 'mu[1]', 'mu[2]'), ) self.assertAlmostEqual(variational.variational_params_dict['mu[1]'], 31.0299, places=2) self.assertAlmostEqual(variational.variational_params_dict['mu[2]'], 28.8141, places=2) self.assertEqual(variational.variational_sample.shape, (1000, 5))
def test_set_variational_attrs(self): stan = os.path.join(datafiles_path, 'variational', 'eta_should_be_big.stan') model = CmdStanModel(stan_file=stan) no_data = {} args = VariationalArgs(algorithm='meanfield') cmdstan_args = CmdStanArgs(model_name=model.name, model_exe=model.exe_file, chain_ids=None, data=no_data, method_args=args) runset = RunSet(args=cmdstan_args, chains=1) vi = CmdStanVB(runset) self.assertIn('CmdStanVB: model=eta_should_be_big', vi.__repr__()) self.assertIn('method=variational', vi.__repr__()) # check CmdStanVB.__init__ state self.assertEqual(vi._column_names, ()) self.assertEqual(vi._variational_mean, {}) self.assertEqual(vi._variational_sample, None) # process csv file, check attrs output = os.path.join(datafiles_path, 'variational', 'eta_big_output.csv') vi._set_variational_attrs(output) self.assertEqual(vi.column_names, ('lp__', 'log_p__', 'log_g__', 'mu.1', 'mu.2')) self.assertAlmostEqual(vi.variational_params_dict['mu.1'], 31.0299, places=2) self.assertAlmostEqual(vi.variational_params_dict['mu.2'], 28.8141, places=2) self.assertEqual(vi.variational_sample.shape, (1000, 5))
def test_args_bad(self): args = VariationalArgs(algorithm='no_such_algo') with self.assertRaises(ValueError): args.validate() args = VariationalArgs(iter=0) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(iter=1.1) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(grad_samples=0) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(grad_samples=1.1) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(elbo_samples=0) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(elbo_samples=1.1) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(eta=-0.00003) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(adapt_iter=0) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(adapt_iter=1.1) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(tol_rel_obj=0) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(eval_elbo=0) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(eval_elbo=1.5) with self.assertRaises(ValueError): args.validate() args = VariationalArgs(output_samples=0) with self.assertRaises(ValueError): args.validate()
def test_args_variational(self): args = VariationalArgs() self.assertTrue(True) args = VariationalArgs(output_samples=1) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('method=variational', ' '.join(cmd)) self.assertIn('output_samples=1', ' '.join(cmd)) args = VariationalArgs(tol_rel_obj=1) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('method=variational', ' '.join(cmd)) self.assertIn('tol_rel_obj=1', ' '.join(cmd))
def test_args_variational(self): args = VariationalArgs() self.assertTrue(True) args = VariationalArgs(output_samples=1) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('method=variational', ' '.join(cmd)) self.assertIn('output_samples=1', ' '.join(cmd)) args = VariationalArgs(tol_rel_obj=0.01) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('method=variational', ' '.join(cmd)) self.assertIn('tol_rel_obj=0.01', ' '.join(cmd)) args = VariationalArgs(adapt_engaged=True, adapt_iter=100) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('adapt engaged=1 iter=100', ' '.join(cmd)) args = VariationalArgs(adapt_engaged=False) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('adapt engaged=0', ' '.join(cmd)) args = VariationalArgs(eta=0.1) args.validate(chains=1) cmd = args.compose(idx=0, cmd=[]) self.assertIn('eta=0.1', ' '.join(cmd))
def variational( self, data: Union[Dict, str] = None, seed: int = None, inits: float = None, output_dir: str = None, save_diagnostics: bool = False, algorithm: str = None, iter: int = None, grad_samples: int = None, elbo_samples: int = None, eta: Real = None, adapt_engaged: bool = True, adapt_iter: int = None, tol_rel_obj: Real = None, eval_elbo: int = None, output_samples: int = None, require_converged: bool = True, ) -> CmdStanVB: """ Run CmdStan's variational inference algorithm to approximate the posterior distribution of the model conditioned on the data. This function validates the specified configuration, composes a call to the CmdStan ``variational`` method and spawns one subprocess to run the optimizer and waits for it to run to completion. Unspecified arguments are not included in the call to CmdStan, i.e., those arguments will have CmdStan default values. The ``CmdStanVB`` object records the command, the return code, and the paths to the variational method output csv and console files. The output files are written either to a specified output directory or to a temporary directory which is deleted upon session exit. Output files are either written to a temporary directory or to the specified output directory. Output filenames correspond to the template '<model_name>-<YYYYMMDDHHMM>-<chain_id>' plus the file suffix which is either '.csv' for the CmdStan output or '.txt' for the console messages, e.g. 'bernoulli-201912081451-1.csv'. Output files written to the temporary directory contain an additional 8-character random string, e.g. 'bernoulli-201912081451-1-5nm6as7u.csv'. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param seed: The seed for random number generator. Must be an integer between 0 and 2^32 - 1. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. :param inits: Specifies how the sampler initializes parameter values. Initialization is uniform random on a range centered on 0 with default range of 2. Specifying a single number n > 0 changes the initialization range to [-n, n]. :param output_dir: Name of the directory to which CmdStan output files are written. If unspecified, output files will be written to a temporary directory which is deleted upon session exit. :param save_diagnostics: Whether or not to save diagnostics. If True, csv output files are written to an output file with filename template '<model_name>-<YYYYMMDDHHMM>-diagnostic-<chain_id>', e.g. 'bernoulli-201912081451-diagnostic-1.csv'. :param algorithm: Algorithm to use. One of: 'meanfield', 'fullrank'. :param iter: Maximum number of ADVI iterations. :param grad_samples: Number of MC draws for computing the gradient. :param elbo_samples: Number of MC draws for estimate of ELBO. :param eta: Stepsize scaling parameter. :param adapt_engaged: Whether eta adaptation is engaged. :param adapt_iter: Number of iterations for eta adaptation. :param tol_rel_obj: Relative tolerance parameter for convergence. :param eval_elbo: Number of iterations between ELBO evaluations. :param output_samples: Number of approximate posterior output draws to save. :param require_converged: Whether or not to raise an error if stan reports that "The algorithm may not have converged". :return: CmdStanVB object """ variational_args = VariationalArgs( algorithm=algorithm, iter=iter, grad_samples=grad_samples, elbo_samples=elbo_samples, eta=eta, adapt_engaged=adapt_engaged, adapt_iter=adapt_iter, tol_rel_obj=tol_rel_obj, eval_elbo=eval_elbo, output_samples=output_samples, ) with MaybeDictToFilePath(data, inits) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=None, data=_data, seed=seed, inits=_inits, output_dir=output_dir, save_diagnostics=save_diagnostics, method_args=variational_args, ) dummy_chain_id = 0 runset = RunSet(args=args, chains=1) self._run_cmdstan(runset, dummy_chain_id) # treat failure to converge as failure transcript_file = runset.stdout_files[dummy_chain_id] valid = True pat = re.compile(r'The algorithm may not have converged.', re.M) with open(transcript_file, 'r') as transcript: contents = transcript.read() errors = re.findall(pat, contents) if len(errors) > 0: valid = False if require_converged and not valid: raise RuntimeError('The algorithm may not have converged.') if not runset._check_retcodes(): msg = 'Error during variational inference.\n{}'.format( runset.get_err_msgs()) raise RuntimeError(msg) # pylint: disable=invalid-name vb = CmdStanVB(runset) return vb
def variational( self, data: Union[Dict, str] = None, seed: int = None, inits: float = None, csv_basename: str = None, algorithm: str = None, iter: int = None, grad_samples: int = None, elbo_samples: int = None, eta: Real = None, adapt_iter: int = None, tol_rel_obj: Real = None, eval_elbo: int = None, output_samples: int = None, ) -> CmdStanVB: """ Run CmdStan's variational inference algorithm to approximate the posterior distribution of the model conditioned on the data. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param seed: The seed for random number generator. Must be an integer between ``0`` and ``2^32 - 1``. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. :param inits: Specifies how the sampler initializes parameter values. Initializiation is uniform random on a range centered on ``0`` with default range of ``2``. Specifying a single number ``n > 0`` changes the initialization range to ``[-n, n]``. :param csv_basename: A path or file name which will be used as the basename for the CmdStan output files. The csv output files are written to file ``<basename>-0.csv`` and the console output and error messages are written to file ``<basename>-0.txt``. :param algorithm: Algorithm to use. One of: "meanfield", "fullrank". :param iter: Maximum number of ADVI iterations. :param grad_samples: Number of MC draws for computing the gradient. :param elbo_samples: Number of MC draws for estimate of ELBO. :param eta: Stepsize scaling parameter. :param adapt_iter: Number of iterations for eta adaptation. :param tol_rel_obj: Relative tolerance parameter for convergence. :param eval_elbo: Number of interations between ELBO evaluations. :param output_samples: Number of approximate posterior output draws to save. :return: CmdStanVB object """ variational_args = VariationalArgs( algorithm=algorithm, iter=iter, grad_samples=grad_samples, elbo_samples=elbo_samples, eta=eta, adapt_iter=adapt_iter, tol_rel_obj=tol_rel_obj, eval_elbo=eval_elbo, output_samples=output_samples, ) with MaybeDictToFilePath(data, inits) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=None, data=_data, seed=seed, inits=_inits, output_basename=csv_basename, method_args=variational_args, ) dummy_chain_id = 0 runset = RunSet(args=args, chains=1) self._run_cmdstan(runset, dummy_chain_id) # treat failure to converge as failure transcript_file = runset.console_files[dummy_chain_id] valid = True pat = re.compile(r'The algorithm may not have converged.', re.M) with open(transcript_file, 'r') as transcript: contents = transcript.read() errors = re.findall(pat, contents) if len(errors) > 0: valid = False if not valid: raise RuntimeError('The algorithm may not have converged.') if not runset._check_retcodes(): msg = 'Error during variational inference' if runset._retcode(dummy_chain_id) != 0: msg = '{}, error code {}'.format( msg, runset._retcode(dummy_chain_id) ) raise RuntimeError(msg) # pylint: disable=invalid-name vb = CmdStanVB(runset) vb._set_variational_attrs(runset.csv_files[0]) return vb