Exemplo n.º 1
0
def test_uniform_normal():
    true_coef = 0.9
    data = true_coef + random.normal(random.PRNGKey(0), (1000,))

    def model(data):
        alpha = numpyro.sample("alpha", dist.Uniform(0, 1))
        with numpyro.handlers.reparam(config={"loc": TransformReparam()}):
            loc = numpyro.sample(
                "loc",
                dist.TransformedDistribution(
                    dist.Uniform(0, 1), transforms.AffineTransform(0, alpha)
                ),
            )
        numpyro.sample("obs", dist.Normal(loc, 0.1), obs=data)

    adam = optim.Adam(0.01)
    rng_key_init = random.PRNGKey(1)
    guide = AutoDiagonalNormal(model)
    svi = SVI(model, guide, adam, Trace_ELBO())
    svi_state = svi.init(rng_key_init, data)

    def body_fn(i, val):
        svi_state, loss = svi.update(val, data)
        return svi_state

    svi_state = fori_loop(0, 1000, body_fn, svi_state)
    params = svi.get_params(svi_state)
    median = guide.median(params)
    assert_allclose(median["loc"], true_coef, rtol=0.05)
    # test .quantile method
    median = guide.quantiles(params, [0.2, 0.5])
    assert_allclose(median["loc"][1], true_coef, rtol=0.1)
Exemplo n.º 2
0
def test_init_to_scalar_value():
    def model():
        numpyro.sample("x", dist.Normal(0, 1))

    guide = AutoDiagonalNormal(model, init_loc_fn=init_to_value(values={"x": 1.0}))
    svi = SVI(model, guide, optim.Adam(1.0), Trace_ELBO())
    svi.init(random.PRNGKey(0))
Exemplo n.º 3
0
def test_autoguide(deterministic):
    GLOBAL["count"] = 0
    guide = AutoDiagonalNormal(model)
    svi = SVI(model,
              guide,
              optim.Adam(0.1),
              Trace_ELBO(),
              deterministic=deterministic)
    svi_state = svi.init(random.PRNGKey(0))
    svi_state = lax.fori_loop(0, 100, lambda i, val: svi.update(val)[0],
                              svi_state)
    params = svi.get_params(svi_state)
    guide.sample_posterior(random.PRNGKey(1), params, sample_shape=(100, ))

    if deterministic:
        assert GLOBAL["count"] == 5
    else:
        assert GLOBAL["count"] == 4
Exemplo n.º 4
0
def test_autocontinuous_local_error():
    def model():
        with numpyro.plate("N", 10, subsample_size=4):
            numpyro.sample("x", dist.Normal(0, 1))

    guide = AutoDiagonalNormal(model)
    svi = SVI(model, guide, optim.Adam(1.0), Trace_ELBO())
    with pytest.raises(ValueError, match="local latent variables"):
        svi.init(random.PRNGKey(0))
Exemplo n.º 5
0
def test_module():
    x = random.normal(random.PRNGKey(0), (100, 10))
    y = random.normal(random.PRNGKey(1), (100,))

    def model(x, y):
        nn = numpyro.module("nn", Dense(1), (10,))
        mu = nn(x).squeeze(-1)
        sigma = numpyro.sample("sigma", dist.HalfNormal(1))
        numpyro.sample("y", dist.Normal(mu, sigma), obs=y)

    guide = AutoDiagonalNormal(model)
    svi = SVI(model, guide, optim.Adam(0.003), Trace_ELBO(), x=x, y=y)
    svi_state = svi.init(random.PRNGKey(2))
    lax.scan(lambda state, i: svi.update(state), svi_state, jnp.zeros(1000))
Exemplo n.º 6
0
def test_improper():
    y = random.normal(random.PRNGKey(0), (100,))

    def model(y):
        lambda1 = numpyro.sample('lambda1', dist.ImproperUniform(dist.constraints.real, (), ()))
        lambda2 = numpyro.sample('lambda2', dist.ImproperUniform(dist.constraints.real, (), ()))
        sigma = numpyro.sample('sigma', dist.ImproperUniform(dist.constraints.positive, (), ()))
        mu = numpyro.deterministic('mu', lambda1 + lambda2)
        numpyro.sample('y', dist.Normal(mu, sigma), obs=y)

    guide = AutoDiagonalNormal(model)
    svi = SVI(model, guide, optim.Adam(0.003), Trace_ELBO(), y=y)
    svi_state = svi.init(random.PRNGKey(2))
    lax.scan(lambda state, i: svi.update(state), svi_state, jnp.zeros(10000))
Exemplo n.º 7
0
def test_dynamic_supports():
    true_coef = 0.9
    data = true_coef + random.normal(random.PRNGKey(0), (1000,))

    def actual_model(data):
        alpha = numpyro.sample("alpha", dist.Uniform(0, 1))
        with numpyro.handlers.reparam(config={"loc": TransformReparam()}):
            loc = numpyro.sample(
                "loc",
                dist.TransformedDistribution(
                    dist.Uniform(0, 1), transforms.AffineTransform(0, alpha)
                ),
            )
        with numpyro.plate("N", len(data)):
            numpyro.sample("obs", dist.Normal(loc, 0.1), obs=data)

    def expected_model(data):
        alpha = numpyro.sample("alpha", dist.Uniform(0, 1))
        loc = numpyro.sample("loc", dist.Uniform(0, 1)) * alpha
        with numpyro.plate("N", len(data)):
            numpyro.sample("obs", dist.Normal(loc, 0.1), obs=data)

    adam = optim.Adam(0.01)
    rng_key_init = random.PRNGKey(1)

    guide = AutoDiagonalNormal(actual_model)
    svi = SVI(actual_model, guide, adam, Trace_ELBO())
    svi_state = svi.init(rng_key_init, data)
    actual_opt_params = adam.get_params(svi_state.optim_state)
    actual_params = svi.get_params(svi_state)
    actual_values = guide.median(actual_params)
    actual_loss = svi.evaluate(svi_state, data)

    guide = AutoDiagonalNormal(expected_model)
    svi = SVI(expected_model, guide, adam, Trace_ELBO())
    svi_state = svi.init(rng_key_init, data)
    expected_opt_params = adam.get_params(svi_state.optim_state)
    expected_params = svi.get_params(svi_state)
    expected_values = guide.median(expected_params)
    expected_loss = svi.evaluate(svi_state, data)

    # test auto_loc, auto_scale
    check_eq(actual_opt_params, expected_opt_params)
    check_eq(actual_params, expected_params)
    # test latent values
    assert_allclose(actual_values["alpha"], expected_values["alpha"])
    assert_allclose(actual_values["loc_base"], expected_values["loc"])
    assert_allclose(actual_loss, expected_loss)
def model_factory(twinify_args: argparse.Namespace, unparsed_args: Iterable[str], orig_data: pd.DataFrame) -> TModelFunction:
    model_args_parser = argparse.ArgumentParser()
    model_args_parser.add_argument('--prior_mu', type=float, default=0.)
    args = model_args_parser.parse_args(unparsed_args, twinify_args)

    d = orig_data.shape[-1]
    print(f"Model using prior mu = {args.prior_mu}")
    print(f"Privacy parameter epsilon is {args.epsilon}")

    def model(z = None, num_obs_total = None) -> None:
        batch_size = 1
        if z is not None:
            batch_size = z.shape[0]
        if num_obs_total is None:
            num_obs_total = batch_size

        mu = sample('mu', dists.Normal(args.prior_mu).expand_by((d,)).to_event(1))
        sigma = sample('sigma', dists.InverseGamma(1.).expand_by((d,)).to_event(1))
        with plate('batch', num_obs_total, batch_size):
            sample('x', dists.Normal(mu, sigma).to_event(1), obs=z)

    guide = AutoDiagonalNormal(model, prefix="guide")

    return model, guide
Exemplo n.º 9
0
def test_dynamic_supports():
    true_coef = 0.9
    data = true_coef + random.normal(random.PRNGKey(0), (1000, ))

    def actual_model(data):
        alpha = numpyro.sample('alpha', dist.Uniform(0, 1))
        with reparam(config={'loc': TransformReparam()}):
            loc = numpyro.sample('loc', dist.Uniform(0, alpha))
        numpyro.sample('obs', dist.Normal(loc, 0.1), obs=data)

    def expected_model(data):
        alpha = numpyro.sample('alpha', dist.Uniform(0, 1))
        loc = numpyro.sample('loc', dist.Uniform(0, 1)) * alpha
        numpyro.sample('obs', dist.Normal(loc, 0.1), obs=data)

    adam = optim.Adam(0.01)
    rng_key_init = random.PRNGKey(1)

    guide = AutoDiagonalNormal(actual_model)
    svi = SVI(actual_model, guide, adam, ELBO())
    svi_state = svi.init(rng_key_init, data)
    actual_opt_params = adam.get_params(svi_state.optim_state)
    actual_params = svi.get_params(svi_state)
    actual_values = guide.median(actual_params)
    actual_loss = svi.evaluate(svi_state, data)

    guide = AutoDiagonalNormal(expected_model)
    svi = SVI(expected_model, guide, adam, ELBO())
    svi_state = svi.init(rng_key_init, data)
    expected_opt_params = adam.get_params(svi_state.optim_state)
    expected_params = svi.get_params(svi_state)
    expected_values = guide.median(expected_params)
    expected_loss = svi.evaluate(svi_state, data)

    # test auto_loc, auto_scale
    check_eq(actual_opt_params, expected_opt_params)
    check_eq(actual_params, expected_params)
    # test latent values
    assert_allclose(actual_values['alpha'], expected_values['alpha'])
    assert_allclose(actual_values['loc_base'], expected_values['loc'])
    assert_allclose(actual_loss, expected_loss)
Exemplo n.º 10
0
def main():
    args = parser.parse_args()
    print(args)

    # read data
    df = pd.read_csv(args.data_path)

    # check whether we parse model from txt or whether we have a numpyro module
    try:
        if args.model_path[-3:] == '.py':
            spec = importlib.util.spec_from_file_location(
                "model_module", args.model_path)
            model_module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(model_module)

            model = model_module.model

            train_df = df
            if args.drop_na:
                train_df = train_df.dropna()

            ## AUTOMATIC PREPROCESSING CURRENTLY UNAVAILABLE
            # data preprocessing: determines number of categories for Categorical
            #   distribution and maps categorical values in the data to ints
            # for feature in features:
            #     train_df = feature.preprocess_data(train_df)

            ## ALTERNATIVE
            # we do allow the user to specify a preprocess/postprocess function pair
            # in the numpyro model file
            try:
                preprocess_fn = model_module.preprocess
            except:
                preprocess_fn = None
            if preprocess_fn:
                train_df = preprocess_fn(train_df)

            try:
                postprocess_fn = model_module.postprocess
            except:
                postprocess_fn = None

            try:
                guide = model_module.guide
            except:
                guide = AutoDiagonalNormal(model)

        else:
            print(
                "Parsing model from txt file (was unable to read it as python module containing numpyro code)"
            )
            k = args.k
            # read model file
            with open(args.model_path, 'r') as model_handle:
                model_str = "".join(model_handle.readlines())
            features = automodel.parse_model(model_str)
            feature_names = [feature.name for feature in features]

            # pick features from data according to model file
            missing_features = set(feature_names).difference(df.columns)
            if missing_features:
                raise automodel.ParsingError(
                    "The model specifies features that are not present in the data:\n{}"
                    .format(", ".join(missing_features)))

            train_df = df.loc[:, feature_names]
            if args.drop_na:
                train_df = train_df.dropna()

            # TODO normalize?

            # data preprocessing: determines number of categories for Categorical
            #   distribution and maps categorical values in the data to ints
            for feature in features:
                train_df = feature.preprocess_data(train_df)

            # build model
            model = automodel.make_model(features, k)

            # build variational guide for optimization
            guide = AutoDiagonalNormal(model)

            # postprocessing for automodel
            def postprocess_fn(syn_df):
                for feature in features:
                    syn_df = feature.postprocess_data(syn_df)
                return syn_df

    except Exception as e:  # handling errors in py-file parsing
        print("\n#### FAILED TO PARSE THE MODEL SPECIFICATION ####")
        print("Here's the technical error description:")
        print(e)
        traceback.print_tb(e.__traceback__)
        print("\nAborting...")
        exit(3)

    # pick features from data according to model file
    num_data = train_df.shape[0]
    if args.drop_na:
        print(
            "After removing missing values, the data has {} entries with {} features"
            .format(*train_df.shape))
    else:
        print(
            "The data has {} entries with {} features".format(*train_df.shape))

    # compute DP values
    target_delta = args.delta
    if target_delta is None:
        target_delta = 1. / num_data
    if target_delta * num_data > 1.:
        print("!!!!! WARNING !!!!! The given value for privacy parameter delta ({:1.3e}) exceeds 1/(number of data) ({:1.3e}),\n" \
            "which the maximum value that is usually considered safe!".format(
                target_delta, 1. / num_data
            ))
        x = input("Continue? (type YES ): ")
        if x != "YES":
            print("Aborting...")
            exit(4)
        print("Continuing... (YOU HAVE BEEN WARNED!)")

    num_compositions = int(args.num_epochs / args.sampling_ratio)
    dp_sigma, epsilon, _ = approximate_sigma_remove_relation(
        args.epsilon, target_delta, args.sampling_ratio, num_compositions)
    batch_size = q_to_batch_size(args.sampling_ratio, num_data)
    sigma_per_sample = dp_sigma / q_to_batch_size(args.sampling_ratio,
                                                  num_data)
    print("Will apply noise with std deviation {:.2f} (~ {:.2f} per element in batch) to achieve privacy epsilon "\
        "of {:.3f} (for delta {:.2e}) ".format(dp_sigma, sigma_per_sample, epsilon, target_delta))

    # TODO: warn for high noise? but when is it too high? what is a good heuristic?

    inference_rng, sampling_rng = initialize_rngs(args.seed)

    # learn posterior distributions
    try:
        posterior_params = train_model_no_dp(
            inference_rng,
            model,
            guide,
            train_df.to_numpy(),
            batch_size=int(args.sampling_ratio * len(train_df)),
            num_epochs=args.num_epochs,
            dp_scale=dp_sigma,
            clipping_threshold=args.clipping_threshold)
    except (InferenceException, FloatingPointError):
        print(
            "################################## ERROR ##################################"
        )
        print(
            "!!!!! The inference procedure encountered a NaN value (not a number). !!!!!"
        )
        print(
            "This means the model has major difficulties in capturing the data and is"
        )
        print("likely to happen when the dataset is very small and/or sparse.")
        print("Try adapting (simplifying) the model.")
        print("Aborting...")
        exit(2)

    num_synthetic = args.num_synthetic
    if num_synthetic is None:
        num_synthetic = train_df.shape[0]

    predictive_model = lambda: model(None)
    posterior_samples = Predictive(
        predictive_model,
        guide=guide,
        params=posterior_params,
        num_samples=num_synthetic).get_samples(sampling_rng)

    # sample synthetic data from posterior predictive distribution
    # posterior_samples = sample_multi_posterior_predictive(sampling_rng,
    #         args.num_synthetic, model, (None,), guide, (), posterior_params)
    syn_data = posterior_samples['x']

    # save results
    syn_df = pd.DataFrame(syn_data, columns=train_df.columns)

    # postprocess: if preprocessing involved data mapping, it is mapped back here
    #   so that the synthetic twin looks like the original data
    encoded_syn_df = syn_df.copy()
    if postprocess_fn:
        encoded_syn_df = postprocess_fn(encoded_syn_df)

    encoded_syn_df.to_csv("{}.csv".format(args.output_path), index=False)
    pickle.dump(posterior_params, open("{}.p".format(args.output_path), "wb"))

    ## illustrate results
    if args.visualize != 'none':
        show_popups = args.visualize in ('popup', 'both')
        save_plots = args.visualize in ('store', 'both')
        # Missing value rate
        if not args.drop_na:
            missing_value_fig = plot_missing_values(syn_df,
                                                    train_df,
                                                    show=show_popups)
            if save_plots:
                missing_value_fig.savefig(args.output_path +
                                          "_missing_value_plots.svg")
        # Marginal violins
        margin_fig = plot_margins(syn_df, train_df, show=show_popups)
        # Covariance matrices
        cov_fig = plot_covariance_heatmap(syn_df, train_df, show=show_popups)
        if save_plots:
            margin_fig.savefig(args.output_path + "_marginal_plots.svg")
            cov_fig.savefig(args.output_path + "_correlation_plots.svg")
        if show_popups:
            plt.show()
Exemplo n.º 11
0
def main():
    args, unknown_args = parser.parse_known_args()
    print(args)
    if unknown_args:
        print(f"Additional received arguments: {unknown_args}")

    # read data
    try:
        df = pd.read_csv(args.data_path)
    except Exception as e:
        print("#### UNABLE TO READ DATA FILE ####")
        print(e)
        return 1
    print("Loaded data set has {} rows (entries) and {} columns (features).".format(*df.shape))
    num_data = len(df)

    try:
    # check whether we parse model from txt or whether we have a numpyro module
        if args.model_path[-3:] == '.py':

            train_df = df.copy()
            if args.drop_na:
                train_df = train_df.dropna()

            try:
                model, guide, preprocess_fn, postprocess_fn = load_custom_numpyro_model(args.model_path, args, unknown_args, train_df)
            except (ModuleNotFoundError, FileNotFoundError) as e:
                print("#### COULD NOT FIND THE MODEL FILE ####")
                print(e)
                return 1

            train_data, num_data, feature_names = preprocess_fn(train_df)
        else:
            print("Parsing model from txt file (was unable to read it as python module containing numpyro code)")
            k = args.k
            # read model file
            with open(args.model_path, 'r') as model_handle:
                model_str = "".join(model_handle.readlines())
            features = automodel.parse_model(model_str)
            feature_names = [feature.name for feature in features]

            # pick features from data according to model file
            missing_features = set(feature_names).difference(df.columns)
            if missing_features:
                raise automodel.ParsingError(
                    "The model specifies features that are not present in the data:\n{}".format(
                        ", ".join(missing_features)
                    )
                )

            df = df.loc[:, feature_names]

            train_df = df.copy() # TODO: this duplicates code with the other branch but cannot currently pull it out because we are manipulating df above
            if args.drop_na:
                train_df = train_df.dropna()

            # TODO normalize?

            # data preprocessing: determines number of categories for Categorical
            #   distribution and maps categorical values in the data to ints
            for feature in features:
                train_df = feature.preprocess_data(train_df)

            # build model
            model = automodel.make_model(features, k)

            # build variational guide for optimization
            guide = AutoDiagonalNormal(model)

            # postprocessing for automodel
            postprocess_fn = automodel.postprocess_function_factory(features)
            num_data = train_df.shape[0]
            train_data = (train_df,)

        assert isinstance(train_data, tuple)
        if len(train_data) == 1:
            print("After preprocessing, the data has {} entries with {} features each.".format(*train_data[0].shape))
        else:
            print("After preprocessing, the data was split into {} splits:".format(len(train_data)))
            for i, x in enumerate(train_data):
                print("\tSplit {} has {} entries with {} features each.".format(i, x.shape[0], 1 if x.ndim == 1 else x.shape[1]))

        # compute DP values
        # TODO need to make this fail safely
        batch_size = q_to_batch_size(args.sampling_ratio, num_data)

        if not args.no_privacy:
            target_delta = args.delta
            if target_delta is None:
                target_delta = 1. / num_data
            if target_delta * num_data > 1.:
                print("!!!!! WARNING !!!!! The given value for privacy parameter delta ({:1.3e}) exceeds 1/(number of data) ({:1.3e}),\n" \
                    "which the maximum value that is usually considered safe!".format(
                        target_delta, 1. / num_data
                    ))
                x = input("Continue? (type YES ): ")
                if x != "YES":
                    print("Aborting...")
                    return 4
                print("Continuing... (YOU HAVE BEEN WARNED!)")

            num_compositions = int(args.num_epochs / args.sampling_ratio)
            dp_sigma, epsilon, _ = approximate_sigma_remove_relation(
                args.epsilon, target_delta, args.sampling_ratio, num_compositions
            )
            sigma_per_sample = dp_sigma / q_to_batch_size(args.sampling_ratio, num_data)
            print("Will apply noise with std deviation {:.2f} (~ {:.2f} per element in batch) to achieve privacy epsilon "\
                "of {:.3f} (for delta {:.2e}) ".format(dp_sigma, sigma_per_sample, epsilon, target_delta))
            # TODO: warn for high noise? but when is it too high? what is a good heuristic?

            do_training = lambda inference_rng: train_model(
                inference_rng,
                d3p.random,
                model, guide,
                train_data,
                batch_size=batch_size,
                num_data=num_data,
                num_epochs=args.num_epochs,
                dp_scale=dp_sigma,
                clipping_threshold=args.clipping_threshold
            )
        else:
            print("!!!!! WARNING !!!!! PRIVACY FEATURES HAVE BEEN DISABLED!")
            do_training = lambda inference_rng: train_model_no_dp(
                inference_rng,
                model, guide,
                train_data,
                batch_size=batch_size,
                num_data=num_data,
                num_epochs=args.num_epochs
            )

        inference_rng, sampling_rng = initialize_rngs(args.seed)

        # learn posterior distributions
        try:
            posterior_params, elbo = do_training(inference_rng)
        except (InferenceException, FloatingPointError):
            print("################################## ERROR ##################################")
            print("!!!!! The inference procedure encountered a NaN value (not a number). !!!!!")
            print("This means the model has major difficulties in capturing the data and is")
            print("likely to happen when the dataset is very small and/or sparse.")
            print("Try adapting (simplifying) the model.")
            print("Aborting...")
            return 2

        # Store learned model parameters
        # TODO: we should have a mode for twinify that allows to rerun the sampling without training, using stored parameters
        store_twinify_run_result(f"{args.output_path}.p", posterior_params, elbo, args, unknown_args, __version__)

        # sample synthetic data
        print("Model learning complete; now sampling data!")
        num_synthetic = args.num_synthetic
        if num_synthetic is None:
            num_synthetic = num_data

        num_parameter_samples = int(np.ceil(num_synthetic / args.num_synthetic_records_per_parameter_sample))
        num_synthetic = num_parameter_samples * args.num_synthetic_records_per_parameter_sample
        print(f"Will sample {args.num_synthetic_records_per_parameter_sample} synthetic data records for each of "
              f"{num_parameter_samples} samples from the parameter posterior for a total of {num_synthetic} records.")
        if args.separate_output:
            print("They will be stored in separate data sets for each parameter posterior sample.")
        else:
            print("They will be stored in a single large data set.")
        posterior_samples = sample_synthetic_data(
            model, guide, posterior_params, sampling_rng, num_parameter_samples, args.num_synthetic_records_per_parameter_sample
        )

        # postprocess: so that the synthetic twin looks like the original data
        #   - extract samples from the posterior_samples dictionary and construct pd.DataFrame
        #   - if preprocessing involved data mapping, it is mapped back here
        conditioned_postprocess_fn = lambda posterior_samples: postprocess_fn(posterior_samples, df, feature_names)
        for i, (syn_df, encoded_syn_df) in enumerate(reshape_and_postprocess_synthetic_data(
            posterior_samples, conditioned_postprocess_fn, args.separate_output, num_parameter_samples
        )):
            if args.separate_output:
                filename = f"{args.output_path}.{i}.csv"
            else:
                filename = f"{args.output_path}.csv"
            encoded_syn_df.to_csv(filename, index=False)

        ### illustrate results TODO need to adopt new way of handing train_df
        #if args.visualize != 'none':
        #    show_popups = args.visualize in ('popup', 'both')
        #    save_plots = args.visualize in ('store', 'both')
        #    # Missing value rate
        #    if not args.drop_na:
        #        missing_value_fig = plot_missing_values(syn_df, train_df, show=show_popups)
        #        if save_plots:
        #            missing_value_fig.savefig(args.output_path + "_missing_value_plots.svg")
        #    # Marginal violins
        #    margin_fig = plot_margins(syn_df, train_df, show=show_popups)
        #    # Covariance matrices
        #    cov_fig = plot_covariance_heatmap(syn_df, train_df, show=show_popups)
        #    if save_plots:
        #        margin_fig.savefig(args.output_path + "_marginal_plots.svg")
        #        cov_fig.savefig(args.output_path + "_correlation_plots.svg")
        #    if show_popups:
        #        plt.show()
        return 0
    except ModelException as e:
        print(e.format_message(args.model_path))
    except AssertionError as e:
        raise e
    except Exception as e:
        print("#### AN UNCATEGORISED ERROR OCCURRED ####")
        raise e
    return 1
Exemplo n.º 12
0
def load_custom_numpyro_model(
    model_path: str, args: argparse.Namespace, unknown_args: Iterable[str],
    orig_data: pd.DataFrame
) -> Tuple[TModelFunction, TGuideFunction, TWrappedPreprocessFunction,
           TWrappedPostprocessFunction]:

    if not os.path.exists(model_path):
        raise FileNotFoundError(model_path)

    try:
        spec = importlib.util.spec_from_file_location("model_module",
                                                      model_path)
        model_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(model_module)
    except Exception as e:  # handling errors in py-file parsing
        raise NumpyroModelParsingException(
            "Unable to read the specified file as a Python module.", e) from e

    # load the model function from the module
    model = None
    guide = None
    try:
        model = model_module.model
    except AttributeError:
        # model file did not directly contain a model function; check if it has model_factory
        try:
            model_factory = model_module.model_factory
        except AttributeError:
            raise NumpyroModelParsingException(
                "Model module does neither specify a 'model' nor a 'model_factory' function."
            )
        try:
            model_factory_return = model_factory(args, unknown_args, orig_data)
        except TypeError as e:
            if str(e).find('positional argument') != -1:
                raise ModelException(
                    "FAILED IN MODEL FACTORY",
                    f"Custom model_factory functions must accept a namespace of parsed arguments, an iterable of unparsed arguments and a pandas.DataFrame as arguments."
                )
            raise e
        except Exception as e:
            raise ModelException('FAILED IN MODEL FACTORY',
                                 base_exception=e) from e

        # determine whether model_factory returned model function or (model, guide) tuple
        if (type(model_factory_return) is tuple
                and isinstance(model_factory_return[0], TModelFunction)
                and isinstance(model_factory_return[1], TGuideFunction)):
            model, guide = model_factory_return
        elif isinstance(model_factory_return, TModelFunction):
            model = model_factory_return
        else:
            raise ModelException(
                'FAILED IN MODEL FACTORY',
                f"Custom model_factory functions must return either a model function or a tuple consisting of model and guide function, but returned {type(model_factory_return)}."
            )
    except Exception as e:
        raise NumpyroModelParsingUnknownException('model', e) from e

    if not isinstance(model, Callable):
        raise NumpyroModelParsingException(
            f"'model' must be a function; got {type(model)}")
    model = guard_model(model)

    if guide is None:
        try:
            guide = model_module.guide
        except AttributeError:
            guide = AutoDiagonalNormal(model)
        except Exception as e:
            raise NumpyroModelParsingUnknownException('guide', e) from e

    # try to obtain preprocessing function from custom model
    try:
        preprocess_fn = guard_preprocess(model_module.preprocess)
    except AttributeError:
        preprocess_fn = default_preprocess
    except Exception as e:
        raise NumpyroModelParsingUnknownException('preprocess', e) from e

    # try to obtain postprocessing function from custom model
    try:
        postprocess_fn = guard_postprocess(model_module.postprocess)
    except AttributeError:
        print(
            "Warning: Your model does not specify a postprocessing function for generated samples."
        )
        print(
            "     Using default, which assumes that your model only produces samples at sample site 'x' and outputs samples as they are."
        )
        postprocess_fn = automodel.postprocess_function_factory([])
    except Exception as e:
        raise NumpyroModelParsingUnknownException('postprocess', e) from e

    return model, guide, preprocess_fn, postprocess_fn