def test_numpyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = numpyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(numpyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. rng = random.PRNGKey(0) trace = numpyro.trace(numpyro.seed(modelfn, rng)).get_trace(**data) expected_sites = [site for (site, _, _) in expected] sample_sites = [ name for name, node in trace.items() if not node['is_observed'] ] assert set(sample_sites) == set(expected_sites) for (site, family_name, maybe_params) in expected: numpyro_family_name = dict(LKJ='LKJCholesky').get( family_name, family_name) fn = trace[site]['fn'] params = maybe_params or default_params[family_name] assert type(fn).__name__ == numpyro_family_name for (name, expected_val) in params.items(): if family_name == 'LKJ': assert name == 'eta' name = 'concentration' val = fn.__getattribute__(name) assert_equal(val._value, np.broadcast_to(expected_val, val.shape))
def test_pyro_codegen(N, formula_str, non_real_cols, contrasts, family, priors, expected): # Make dummy data. formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) # Generate the model from the column information rather than from # the metadata extracted from `df`. Since N is small, the metadata # extracted from `df` might loose information compared to the full # metadata derived from `cols` (e.g. levels of a categorical # column) leading to unexpected results. e.g. Missing levels might # cause correlations not to be modelled, even thought they ought # to be given the full metadata. metadata = metadata_from_cols(cols) desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) # Generate model function and data. modelfn = pyro_backend.gen(desc).fn df = dummy_df(cols, N) data = data_from_numpy(pyro_backend, makedata(formula, df, metadata, contrasts)) # Check sample sites. trace = poutine.trace(modelfn).get_trace(**data) expected_sites = [site for (site, _, _) in expected] assert set(trace.stochastic_nodes) - {'obs'} == set(expected_sites) for (site, family_name, maybe_params) in expected: pyro_family_name = dict(LKJ='LKJCorrCholesky').get( family_name, family_name) fn = unwrapfn(trace.nodes[site]['fn']) params = maybe_params or default_params[family_name] assert type(fn).__name__ == pyro_family_name for (name, expected_val) in params.items(): val = fn.__getattribute__(name) assert_equal(val, torch.tensor(expected_val).expand(val.shape))
def fitted(fit, what='expectation', data=None): assert type(fit) == Fit assert what in ['sample', 'expectation', 'linear', 'response'] assert data is None or type(data) is pd.DataFrame get_param = fit.samples.get_param location = fit.samples.location to_numpy = fit.backend.to_numpy expected_response = fit.model.expected_response_fn sample_response = fit.model.sample_response_fn inv_link = fit.model.inv_link_fn mu = location(fit.data if data is None else data_from_numpy( fit.backend, predictors(fit.formula, data, fit.metadata, fit.contrasts) )) if what == 'sample' or what == 'expectation': args = [ mu if name == 'mu' else get_param(name) for name in free_param_names(fit.model_desc.response.family) ] response_fn = sample_response if what == 'sample' else expected_response return to_numpy(response_fn(*args)) elif what == 'linear': return to_numpy(mu) elif what == 'response': return to_numpy(inv_link(mu)) else: raise 'Unhandled value of the `what` parameter encountered.'
def test_sampling_from_prior_smoke(N, backend, formula_str, non_real_cols, contrasts, family, priors, expected): formula = parse(formula_str) cols = expand_columns(formula, non_real_cols) metadata = metadata_from_cols( cols ) # Use full metadata for same reason given in comment in codegen test. desc = makedesc(formula, metadata, family, priors, code_lengths(contrasts)) model = backend.gen(desc) df = dummy_df(cols, N, allow_non_exhaustive=True) data = data_from_numpy(backend, makedata(formula, df, metadata, contrasts)) samples = backend.prior(data, model, num_samples=10, seed=None) assert type(samples) == Samples
def generate(self, backend=_default_backend): assert type(backend) == Backend model = backend.gen(self.desc) data = data_from_numpy(backend, self.data) return GenerateResult(self, backend, model, data)
def fitted(self, what='expectation', data=None, seed=None): """ Produces predictions from the fitted model. Predicted values are computed for each sample collected during inference, and for each row in the data set. :param what: The value to predict. Valid arguments and their effect are described below: .. list-table:: :widths: auto * - ``'expectation'`` - Computes the expected value of the response distribution. * - ``'sample'`` - Draws a sample from the response distribution. * - ``'response'`` - Computes the output of the model followed by any inverse link function. i.e. The value of the location parameter of the response distribution. * - ``'linear'`` - Computes the output of the model prior to the application of any inverse link function. :type what: str :param data: The data from which to compute the predictions. When omitted, the data on which the model was fit is used. :type data: pandas.DataFrame :param seed: Random seed. Used only when ``'sample'`` is given as the ``'what'`` argument. :type seed: int :return: An array with shape ``(S, N)``. Where ``S`` is the number of samples taken during inference and ``N`` is the number of rows in the data set used for prediction. :rtype: numpy.ndarray """ assert what in ['sample', 'expectation', 'linear', 'response'] assert data is None or type(data) is pd.DataFrame assert seed is None or type(seed) == int get_param = self.samples.get_param location = self.samples.location to_numpy = self.backend.to_numpy expected_response = partial(self.backend.expected_response, self.assets) sample_response = partial(self.backend.sample_response, self.assets, seed) inv_link = partial(self.backend.inv_link, self.assets) mu = location(self.data if data is None else data_from_numpy( self.backend, predictors(self.formula, data, self.metadata, self.contrasts))) if what == 'sample' or what == 'expectation': args = [ mu if name == 'mu' else get_param(name, False) for name in free_param_names(self.model_desc.response.family) ] response_fn = sample_response if what == 'sample' else expected_response return to_numpy(response_fn(*args)) elif what == 'linear': return to_numpy(mu) elif what == 'response': return to_numpy(inv_link(mu)) else: raise ValueError( 'Unhandled value of the `what` parameter encountered.')
def encode(self, df): data = self.model.encode(df) return data_from_numpy(self.backend, data)
def run_algo(self, name, backend, *args, df=None, **kwargs): assert type(backend) == Backend data = self.model.encode(df) if df is not None else self.data assets_wrapper = self.model.gen(backend) return assets_wrapper.run_algo(name, data_from_numpy(backend, data), *args, **kwargs)