Пример #1
0
 def test_instantiate(self):
     stan = os.path.join(DATAFILES_PATH, 'variational',
                         'eta_should_be_big.stan')
     model = CmdStanModel(stan_file=stan)
     no_data = {}
     args = VariationalArgs(algorithm='meanfield')
     cmdstan_args = CmdStanArgs(
         model_name=model.name,
         model_exe=model.exe_file,
         chain_ids=None,
         data=no_data,
         method_args=args,
     )
     runset = RunSet(args=cmdstan_args, chains=1)
     runset._csv_files = [
         os.path.join(DATAFILES_PATH, 'variational', 'eta_big_output.csv')
     ]
     variational = CmdStanVB(runset)
     self.assertIn('CmdStanVB: model=eta_should_be_big',
                   variational.__repr__())
     self.assertIn('method=variational', variational.__repr__())
     self.assertEqual(
         variational.column_names,
         ('lp__', 'log_p__', 'log_g__', 'mu[1]', 'mu[2]'),
     )
     self.assertAlmostEqual(variational.variational_params_dict['mu[1]'],
                            31.0299,
                            places=2)
     self.assertAlmostEqual(variational.variational_params_dict['mu[2]'],
                            28.8141,
                            places=2)
     self.assertEqual(variational.variational_sample.shape, (1000, 5))
Пример #2
0
    def test_set_variational_attrs(self):
        stan = os.path.join(datafiles_path, 'variational',
                            'eta_should_be_big.stan')
        model = CmdStanModel(stan_file=stan)
        no_data = {}
        args = VariationalArgs(algorithm='meanfield')
        cmdstan_args = CmdStanArgs(model_name=model.name,
                                   model_exe=model.exe_file,
                                   chain_ids=None,
                                   data=no_data,
                                   method_args=args)
        runset = RunSet(args=cmdstan_args, chains=1)
        vi = CmdStanVB(runset)
        self.assertIn('CmdStanVB: model=eta_should_be_big', vi.__repr__())
        self.assertIn('method=variational', vi.__repr__())

        # check CmdStanVB.__init__ state
        self.assertEqual(vi._column_names, ())
        self.assertEqual(vi._variational_mean, {})
        self.assertEqual(vi._variational_sample, None)

        # process csv file, check attrs
        output = os.path.join(datafiles_path, 'variational',
                              'eta_big_output.csv')
        vi._set_variational_attrs(output)
        self.assertEqual(vi.column_names,
                         ('lp__', 'log_p__', 'log_g__', 'mu.1', 'mu.2'))
        self.assertAlmostEqual(vi.variational_params_dict['mu.1'],
                               31.0299,
                               places=2)
        self.assertAlmostEqual(vi.variational_params_dict['mu.2'],
                               28.8141,
                               places=2)
        self.assertEqual(vi.variational_sample.shape, (1000, 5))
Пример #3
0
    def test_args_bad(self):
        args = VariationalArgs(algorithm='no_such_algo')
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(iter=0)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(iter=1.1)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(grad_samples=0)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(grad_samples=1.1)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(elbo_samples=0)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(elbo_samples=1.1)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(eta=-0.00003)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(adapt_iter=0)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(adapt_iter=1.1)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(tol_rel_obj=0)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(eval_elbo=0)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(eval_elbo=1.5)
        with self.assertRaises(ValueError):
            args.validate()

        args = VariationalArgs(output_samples=0)
        with self.assertRaises(ValueError):
            args.validate()
Пример #4
0
    def test_args_variational(self):
        args = VariationalArgs()
        self.assertTrue(True)

        args = VariationalArgs(output_samples=1)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('method=variational', ' '.join(cmd))
        self.assertIn('output_samples=1', ' '.join(cmd))

        args = VariationalArgs(tol_rel_obj=1)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('method=variational', ' '.join(cmd))
        self.assertIn('tol_rel_obj=1', ' '.join(cmd))
Пример #5
0
    def test_args_variational(self):
        args = VariationalArgs()
        self.assertTrue(True)

        args = VariationalArgs(output_samples=1)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('method=variational', ' '.join(cmd))
        self.assertIn('output_samples=1', ' '.join(cmd))

        args = VariationalArgs(tol_rel_obj=0.01)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('method=variational', ' '.join(cmd))
        self.assertIn('tol_rel_obj=0.01', ' '.join(cmd))

        args = VariationalArgs(adapt_engaged=True, adapt_iter=100)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('adapt engaged=1 iter=100', ' '.join(cmd))

        args = VariationalArgs(adapt_engaged=False)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('adapt engaged=0', ' '.join(cmd))

        args = VariationalArgs(eta=0.1)
        args.validate(chains=1)
        cmd = args.compose(idx=0, cmd=[])
        self.assertIn('eta=0.1', ' '.join(cmd))
Пример #6
0
    def variational(
        self,
        data: Union[Dict, str] = None,
        seed: int = None,
        inits: float = None,
        output_dir: str = None,
        save_diagnostics: bool = False,
        algorithm: str = None,
        iter: int = None,
        grad_samples: int = None,
        elbo_samples: int = None,
        eta: Real = None,
        adapt_engaged: bool = True,
        adapt_iter: int = None,
        tol_rel_obj: Real = None,
        eval_elbo: int = None,
        output_samples: int = None,
        require_converged: bool = True,
    ) -> CmdStanVB:
        """
        Run CmdStan's variational inference algorithm to approximate
        the posterior distribution of the model conditioned on the data.

        This function validates the specified configuration, composes a call to
        the CmdStan ``variational`` method and spawns one subprocess to run the
        optimizer and waits for it to run to completion.
        Unspecified arguments are not included in the call to CmdStan, i.e.,
        those arguments will have CmdStan default values.

        The ``CmdStanVB`` object records the command, the return code,
        and the paths to the variational method output csv and console files.
        The output files are written either to a specified output directory
        or to a temporary directory which is deleted upon session exit.

        Output files are either written to a temporary directory or to the
        specified output directory.  Output filenames correspond to the template
        '<model_name>-<YYYYMMDDHHMM>-<chain_id>' plus the file suffix which is
        either '.csv' for the CmdStan output or '.txt' for
        the console messages, e.g. 'bernoulli-201912081451-1.csv'.
        Output files written to the temporary directory contain an additional
        8-character random string, e.g. 'bernoulli-201912081451-1-5nm6as7u.csv'.

        :param data: Values for all data variables in the model, specified
            either as a dictionary with entries matching the data variables,
            or as the path of a data file in JSON or Rdump format.

        :param seed: The seed for random number generator. Must be an integer
            between 0 and 2^32 - 1. If unspecified,
            ``numpy.random.RandomState()``
            is used to generate a seed which will be used for all chains.

        :param inits:  Specifies how the sampler initializes parameter values.
            Initialization is uniform random on a range centered on 0 with
            default range of 2. Specifying a single number n > 0 changes
            the initialization range to [-n, n].

        :param output_dir: Name of the directory to which CmdStan output
            files are written. If unspecified, output files will be written
            to a temporary directory which is deleted upon session exit.

        :param save_diagnostics: Whether or not to save diagnostics. If True,
            csv output files are written to an output file with filename
            template '<model_name>-<YYYYMMDDHHMM>-diagnostic-<chain_id>',
            e.g. 'bernoulli-201912081451-diagnostic-1.csv'.

        :param algorithm: Algorithm to use. One of: 'meanfield', 'fullrank'.

        :param iter: Maximum number of ADVI iterations.

        :param grad_samples: Number of MC draws for computing the gradient.

        :param elbo_samples: Number of MC draws for estimate of ELBO.

        :param eta: Stepsize scaling parameter.

        :param adapt_engaged: Whether eta adaptation is engaged.

        :param adapt_iter: Number of iterations for eta adaptation.

        :param tol_rel_obj: Relative tolerance parameter for convergence.

        :param eval_elbo: Number of iterations between ELBO evaluations.

        :param output_samples: Number of approximate posterior output draws
            to save.

        :param require_converged: Whether or not to raise an error if stan
            reports that "The algorithm may not have converged".

        :return: CmdStanVB object
        """
        variational_args = VariationalArgs(
            algorithm=algorithm,
            iter=iter,
            grad_samples=grad_samples,
            elbo_samples=elbo_samples,
            eta=eta,
            adapt_engaged=adapt_engaged,
            adapt_iter=adapt_iter,
            tol_rel_obj=tol_rel_obj,
            eval_elbo=eval_elbo,
            output_samples=output_samples,
        )

        with MaybeDictToFilePath(data, inits) as (_data, _inits):
            args = CmdStanArgs(
                self._name,
                self._exe_file,
                chain_ids=None,
                data=_data,
                seed=seed,
                inits=_inits,
                output_dir=output_dir,
                save_diagnostics=save_diagnostics,
                method_args=variational_args,
            )

            dummy_chain_id = 0
            runset = RunSet(args=args, chains=1)
            self._run_cmdstan(runset, dummy_chain_id)

        # treat failure to converge as failure
        transcript_file = runset.stdout_files[dummy_chain_id]
        valid = True
        pat = re.compile(r'The algorithm may not have converged.', re.M)
        with open(transcript_file, 'r') as transcript:
            contents = transcript.read()
            errors = re.findall(pat, contents)
            if len(errors) > 0:
                valid = False
        if require_converged and not valid:
            raise RuntimeError('The algorithm may not have converged.')
        if not runset._check_retcodes():
            msg = 'Error during variational inference.\n{}'.format(
                runset.get_err_msgs())
            raise RuntimeError(msg)
        # pylint: disable=invalid-name
        vb = CmdStanVB(runset)
        return vb
Пример #7
0
    def variational(
        self,
        data: Union[Dict, str] = None,
        seed: int = None,
        inits: float = None,
        csv_basename: str = None,
        algorithm: str = None,
        iter: int = None,
        grad_samples: int = None,
        elbo_samples: int = None,
        eta: Real = None,
        adapt_iter: int = None,
        tol_rel_obj: Real = None,
        eval_elbo: int = None,
        output_samples: int = None,
    ) -> CmdStanVB:
        """
        Run CmdStan's variational inference algorithm to approximate
        the posterior distribution of the model conditioned on the data.

        :param data: Values for all data variables in the model, specified
            either as a dictionary with entries matching the data variables,
            or as the path of a data file in JSON or Rdump format.

        :param seed: The seed for random number generator. Must be an integer
            between ``0`` and ``2^32 - 1``. If unspecified,
            ``numpy.random.RandomState()``
            is used to generate a seed which will be used for all chains.

        :param inits:  Specifies how the sampler initializes parameter values.
            Initializiation is uniform random on a range centered on ``0`` with
            default range of ``2``. Specifying a single number ``n > 0`` changes
            the initialization range to ``[-n, n]``.

        :param csv_basename:  A path or file name which will be used as the
            basename for the CmdStan output files.  The csv output files
            are written to file ``<basename>-0.csv`` and the console output
            and error messages are written to file ``<basename>-0.txt``.

        :param algorithm: Algorithm to use. One of: "meanfield", "fullrank".

        :param iter: Maximum number of ADVI iterations.

        :param grad_samples: Number of MC draws for computing the gradient.

        :param elbo_samples: Number of MC draws for estimate of ELBO.

        :param eta: Stepsize scaling parameter.

        :param adapt_iter: Number of iterations for eta adaptation.

        :param tol_rel_obj: Relative tolerance parameter for convergence.

        :param eval_elbo: Number of interations between ELBO evaluations.

        :param output_samples: Number of approximate posterior output draws
            to save.

        :return: CmdStanVB object
        """
        variational_args = VariationalArgs(
            algorithm=algorithm,
            iter=iter,
            grad_samples=grad_samples,
            elbo_samples=elbo_samples,
            eta=eta,
            adapt_iter=adapt_iter,
            tol_rel_obj=tol_rel_obj,
            eval_elbo=eval_elbo,
            output_samples=output_samples,
        )

        with MaybeDictToFilePath(data, inits) as (_data, _inits):
            args = CmdStanArgs(
                self._name,
                self._exe_file,
                chain_ids=None,
                data=_data,
                seed=seed,
                inits=_inits,
                output_basename=csv_basename,
                method_args=variational_args,
            )

            dummy_chain_id = 0
            runset = RunSet(args=args, chains=1)
            self._run_cmdstan(runset, dummy_chain_id)

        # treat failure to converge as failure
        transcript_file = runset.console_files[dummy_chain_id]
        valid = True
        pat = re.compile(r'The algorithm may not have converged.', re.M)
        with open(transcript_file, 'r') as transcript:
            contents = transcript.read()
            errors = re.findall(pat, contents)
            if len(errors) > 0:
                valid = False
        if not valid:
            raise RuntimeError('The algorithm may not have converged.')
        if not runset._check_retcodes():
            msg = 'Error during variational inference'
            if runset._retcode(dummy_chain_id) != 0:
                msg = '{}, error code {}'.format(
                    msg, runset._retcode(dummy_chain_id)
                )
                raise RuntimeError(msg)
        # pylint: disable=invalid-name
        vb = CmdStanVB(runset)
        vb._set_variational_attrs(runset.csv_files[0])
        return vb