def run_simulation_study(mi_in: MaudInput, true_params_raw): """Run a simulation study. :param mi_in: A MaudInput object :param true_params_raw: dictionary of param name -> true param values """ # compile stan model here = os.path.dirname(os.path.abspath(__file__)) stan_path = os.path.join(here, STAN_PROGRAM_RELATIVE_PATH) model = CmdStanModel(stan_file=stan_path) # generate input data for simulation input_data_sim = get_input_data(mi_in) # get all true values (including for non-centered parameters) true_params = enrich_true_values(true_params_raw, input_data_sim) # generate simulated measurements sim = model.sample(data=input_data_sim, inits=true_params, **SIM_CONFIG) # extract simulated measurements and add them to mi_in mi = add_measurements_to_maud_input(mi_in, sim, input_data_sim) # create new input data input_data_sample = get_input_data(mi) # sample samples = model.sample(data=input_data_sample, inits=true_params, **mi.config.cmdstanpy_config) return SimulationStudyOutput(input_data_sim, input_data_sample, true_params, sim, mi, samples)
def run_model(): model = CmdStanModel(stan_file="eight_schools.stan") data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } fit1 = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) # Increase the uncertainties data["sigma"] = [i * 2 for i in data["sigma"]] fit2 = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) extra = [{"mu": 2.2, "tau": 1.3}] # Add extra values (optional) save_compare_parameters( [fit1, fit2], labels=['Original', 'Larger uncertainties', 'Extra'], extra_values=extra, type=CompareParametersType.TEXT, # or GITLAB_LATEX param_names=['mu', 'tau'])
def main(): # These are adjustable parameters rho = 0.9 # rho should be <1 N = 400 iter_warmup = 20 # Number of warmup iterations iter_sampling = 100 # Number of sampling iterations ################################## # specify .stan file for this model thisdir = os.path.dirname(os.path.realpath(__file__)) model_fname = f'{thisdir}/multi-normal.stan' with StanMonitor( # The stan monitor is a context manager label='multi-normal-example', # monitor these parameters parameter_names=["lp__", "accept_stat__", "stepsize__", "treedepth__", "n_leapfrog__", "divergent__", "energy__"] ) as monitor: # Load the model model = CmdStanModel(stan_file=model_fname) # Start sampling the posterior for this model/data # Use monitor._output_dir as the output directory fit = model.sample( data={'N': N, 'rho': rho}, output_dir=monitor._output_dir, iter_sampling=iter_sampling, iter_warmup=iter_warmup, save_warmup=True )
def main(): thisdir = os.path.dirname(os.path.realpath(__file__)) model_fname = f'{thisdir}/bernoulli.stan' output_dir = f'{thisdir}/output_' num_draws = 400000 # remove the output directory if it exists if os.path.exists(output_dir): shutil.rmtree(output_dir) # Load the bernoulli model model = CmdStanModel(stan_file=model_fname) # Sample the posterior distribution conditioned on some data with Timer(label='fit', verbose=True): fit = model.sample(data={ 'N': 5, 'y': [0, 1, 0, 0, 0] }, output_dir=output_dir, iter_sampling=num_draws) # Report the average value of the theta parameter # (this is probably not theoretically correct, just trying to get familiar) with Timer(label='Load draws', verbose=True): draws = fit.draws( ) # First dimension is the draw index, second is the chain index, and third is the column index print(f'{draws.shape[0]} draws; {draws.shape[1]} chains') theta_draws = draws[:, :, 7] # 7th column is the theta print(np.mean(theta_draws))
def sample( stan_file: str, input_json: str, coords: dict, dims: dict, sample_kwargs: dict, cpp_options: Optional[dict], stanc_options: Optional[dict], ) -> InferenceData: """Run cmdstanpy.CmdStanModel.sample and return an InferenceData.""" model = CmdStanModel( stan_file=stan_file, cpp_options=cpp_options, stanc_options=stanc_options, ) with open(input_json, "r") as f: stan_input = json.load(f) coords["ix_train"] = [i - 1 for i in stan_input["ix_train"]] coords["ix_test"] = [i - 1 for i in stan_input["ix_test"]] mcmc = model.sample(data=input_json, **sample_kwargs) return az.from_cmdstan( posterior=mcmc.runset.csv_files, log_likelihood="llik", posterior_predictive="yrep", observed_data=input_json, coords=coords, dims=dims, )
def run_stan(output_dir, settings: AnalysisSettings): """ Run Stan model and return the samples from posterior distributions. Parameters ---------- output_dir: str Directory where Stan's output will be created settings: AnalysisSettings Analysis settings. Returns ------- cmdstanpy.CmdStanMCMC Stan's output containing samples of posterior distribution of parameters. """ model = CmdStanModel(stan_file=settings.stan_model_path) fit = model.sample(data=settings.data, seed=333, adapt_delta=0.99, max_treedepth=settings.max_treedepth, iter_sampling=4000, iter_warmup=1000, chains=4, cores=4, show_progress=True, output_dir=output_dir) # Make summaries and plots of parameter distributions save_analysis(fit, param_names=["r", "sigma"]) return fit
def main(): here = os.path.dirname(os.path.realpath(__file__)) model_path = os.path.join(here, RELATIVE_PATHS["model"]) input_path = os.path.join(here, RELATIVE_PATHS["model_input"]) output_dir = os.path.join(here, RELATIVE_PATHS["model_output_dir"]) model = CmdStanModel(stan_file=model_path) mcmc = model.sample(data=input_path, **SAMPLE_ARGS) for f in mcmc.runset.csv_files: os.replace(f, standardise_csv_filename(f, output_dir))
def stan_sampler(model_code): sm = CmdStanModel(stan_file=model_code) fit = sm.sample( iter_sampling=iterations // num_chains, iter_warmup=warmup, chains=num_chains, ) samples = fit.stan_variable("theta").theta return samples.iloc[np.random.permutation( len(samples))].reset_index(drop=True)
def run_stan(output_dir, mydata): model = CmdStanModel(stan_file="eight_schools.stan") return model.sample(data=mydata, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000, output_dir=output_dir) # Make sure to pass this in
def run_model1_divorse_age(data, output_dir, sampling_iters, warmup_iters): model_path = "tarpan/testutils/a05_divorse/stan_model/divorse1_divorse_age.stan" model = CmdStanModel(stan_file=model_path) return model.sample(data=data, chains=4, cores=2, sampling_iters=sampling_iters, warmup_iters=warmup_iters, output_dir=output_dir, seed=1)
def run_model(): model = CmdStanModel(stan_file="eight_schools.stan") data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } fit1 = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) # Increase the uncertainties data["sigma"] = [i * 2 for i in data["sigma"]] fit2 = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) # Show extra markers in tree plot for comparison (optional) extra_values = [{ "mu": 2.2, "tau": 1.3, }] # Supply legend labels (optional) tree_params = TreePlotParams() tree_params.labels = ["Model 1", "Model 2", "Exact"] save_tree_plot(fits=[fit1, fit2], extra_values=extra_values, param_names=['mu', 'tau'], tree_params=tree_params)
def run_model(): model = CmdStanModel(stan_file="eight_schools.stan") data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } fit = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) save_tree_plot([fit], param_names=['mu', 'tau', 'eta'])
def run_model(): model = CmdStanModel(stan_file="eight_schools.stan") data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } fit = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) # Change all path components: # ~/tarpan/analysis/model1/normal.png save_tree_plot([fit], info_path=InfoPath(path='~/tarpan', dir_name="analysis", sub_dir_name="model1", base_name="normal", extension="png")) # Change the file name: # model_into/custom_location/my_summary.pdf save_tree_plot([fit], info_path=InfoPath(base_name="my_summary")) # Change the file type: # model_into/custom_location/summary.png save_tree_plot([fit], info_path=InfoPath(extension="png")) # Change the sub-directory name: # model_into/custom/summary.pdf save_tree_plot([fit], info_path=InfoPath(sub_dir_name="custom")) # Do not create sub-directory: # model_into/summary.pdf save_tree_plot([fit], info_path=InfoPath(sub_dir_name=InfoPath.DO_NOT_CREATE)) # Change the default top directory name from `model_info`: # my_files/custom_location/summary.pdf save_tree_plot([fit], info_path=InfoPath(dir_name='my_files')) # Change the root path to "tarpan" in your user's home directory # ~/tarpan/model_info/custom_location/summary.pdf save_tree_plot([fit], info_path=InfoPath(path='~/tarpan'))
def run_model(): model = CmdStanModel(stan_file="eight_schools.stan") data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } fit = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) # Make summary with custom HPDI values save_summary(fit, param_names=['mu', 'tau', 'eta.1'], summary_params=SummaryParams(hpdis=[0.05, 0.99]))
def run_model(data, output_dir): """ Runs Stan model and saves fit to disk """ model_path = "tarpan/testutils/a01_eight_schools/eight_schools.stan" model = CmdStanModel(stan_file=model_path) return model.sample(data=data, chains=4, cores=4, sampling_iters=1000, warmup_iters=1000, seed=1, output_dir=output_dir)
def run_model(data, output_dir): """ Runs Stan model and saves fit to disk """ model_path = "tarpan/testutils/a03_cars/stan_model/cars.stan" model = CmdStanModel(stan_file=model_path) return model.sample(data=data, chains=1, cores=1, sampling_iters=1000, warmup_iters=500, output_dir=output_dir, seed=1)
def run_model3_treatment(data, output_dir, sampling_iters, warmup_iters): """ Runs Stan model and saves fit to disk """ model_path = "tarpan/testutils/a04_height/stan_model/height3_treatment.stan" model = CmdStanModel(stan_file=model_path) return model.sample(data=data, chains=1, cores=1, sampling_iters=sampling_iters, warmup_iters=warmup_iters, output_dir=output_dir, seed=1)
def run_stan_model(stan_file, data, **kwargs): """ Convenience function to compile, sample and diagnose a Stan model. Notes ----- For prior predictive sampling (or to otherwise simulate data), pass `fixed_param=True`. https://cmdstanpy.readthedocs.io/en/latest/sample.html#example-generate-data-fixed-param-true """ model = CmdStanModel(stan_file=stan_file) model.compile() fit = model.sample(data=data, **kwargs) fit.diagnose() return model, fit
def myfunc(output_dir, data): data["count"] += 1 data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } model_path = "tarpan/testutils/a01_eight_schools/eight_schools.stan" model = CmdStanModel(stan_file=model_path) return model.sample(data=data, chains=1, cores=1, sampling_iters=1000, warmup_iters=1000, output_dir=output_dir)
def run_stan(observed_values, uncertainties): data = { "y": observed_values, "uncertainties": uncertainties, "N": len(observed_values) } model = CmdStanModel(stan_file="stan_model/gaussian_mixture.stan") return model.sample(data=data, seed=333, adapt_delta=0.90, max_treedepth=5, sampling_iters=500, warmup_iters=500, chains=4, cores=4)
def run_model(data, output_dir): """ Runs Stan model and saves fit to disk """ model_path = "tarpan/testutils/a02_gaussian_mixture/stan_model/gaussian_mixture.stan" model = CmdStanModel(stan_file=model_path) return model.sample(data=data, seed=333, adapt_delta=0.90, max_treedepth=5, sampling_iters=1000, warmup_iters=1000, chains=4, cores=4, output_dir=output_dir)
def run_stan_model(*, posterior, config): """ Compile and run the stan model Return the summary Dataframe """ stanfile = posterior.model.code_file_path(framework="stan") model = CmdStanModel(stan_file=stanfile) data = posterior.data.values() fit = model.sample( data=data, iter_warmup=config.warmups, iter_sampling=config.iterations, thin=config.thin, chains=config.chains, seed=config.seed, ) return fit
def run_bernoulli_fit(): # specify Stan file, create, compile CmdStanModel object bernoulli_path = os.path.join(cmdstan_path(), 'examples', 'bernoulli', 'bernoulli.stan') bernoulli_model = CmdStanModel(stan_file=bernoulli_path) bernoulli_model.compile() # specify data, fit the model bernoulli_data = {'N': 10, 'y': [0, 1, 0, 0, 0, 0, 0, 0, 0, 1]} # Show progress bernoulli_fit = bernoulli_model.sample(chains=4, cores=2, data=bernoulli_data, show_progress=True) # summarize the results (wraps CmdStan `bin/stansummary`): print(bernoulli_fit.summary())
def run_model(): model = CmdStanModel(stan_file="eight_schools.stan") data = { "J": 8, "y": [28, 8, -3, 7, -1, 1, 18, 12], "sigma": [15, 10, 16, 11, 9, 11, 10, 18] } fit = model.sample(data=data, chains=4, cores=4, seed=1, sampling_iters=1000, warmup_iters=1000) # Creates summaries, traceplots and histograms in `model_info` directory save_analysis(fit)
def generate_samples( study_name: str, measurements: pd.DataFrame, model_configurations: List[ModelConfiguration], ) -> None: """Run cmdstanpy.CmdStanModel.sample, do diagnostics and save results. :param study_name: a string """ infds = {} for model_config in model_configurations: fit_name = f"{study_name}-{model_config.name}" print(f"Fitting model {fit_name}...") loo_file = os.path.join(LOO_DIR, f"loo_{fit_name}.pkl") infd_file = os.path.join(INFD_DIR, f"infd_{fit_name}.ncdf") json_file = os.path.join(JSON_DIR, f"input_data_{fit_name}.json") stan_input = model_config.stan_input_function(measurements) print(f"Writing input data to {json_file}") jsondump(json_file, stan_input) model = CmdStanModel( model_name=fit_name, stan_file=model_config.stan_file ) print(f"Writing csv files to {SAMPLES_DIR}...") mcmc = model.sample( data=stan_input, output_dir=SAMPLES_DIR, **model_config.sample_kwargs, ) print(mcmc.diagnose().replace("\n\n", "\n")) infd = az.from_cmdstanpy( mcmc, **model_config.infd_kwargs_function(measurements) ) print(az.summary(infd)) infds[fit_name] = infd print(f"Writing inference data to {infd_file}") infd.to_netcdf(infd_file) print(f"Writing psis-loo results to {loo_file}\n") az.loo(infd, pointwise=True).to_pickle(loo_file) if len(infds) > 1: comparison = az.compare(infds) print(f"Loo comparison:\n{comparison}") comparison.to_csv(os.path.join(LOO_DIR, "loo_comparison.csv"))
def test_ordered_ragged_array(): """Test that the ordered_ragged_array function behaves as expected. The test ragged array: [[2], [3, 2], [5, 3, 2]] """ here = os.path.dirname(os.path.realpath(__file__)) stan_file = os.path.join(here, "stan/ordered_ragged_array_test_model.stan") include_path = os.path.join(here, "../src/stan") data = { "N": 3, "O": 6, "first_vals": [2, 3, 5], "n_elems": [1, 2, 3], "diffs": [1, 2, 1], } expected = [2., 3., 2., 5., 3., 2.] model = CmdStanModel(stan_file=stan_file, stanc_options={"include_paths": [include_path]}) samples = model.sample(data=data, fixed_param=True, iter_sampling=1) actual = samples.get_drawset(params=["actual"]).loc[0].tolist() assert actual == expected
def generate_fake_measurements(real_data: pd.DataFrame) -> pd.DataFrame: """Fake a table of measurements by simulating from the true model. You will need to customise this function to make sure it matches the data generating process you want to simulate from. :param real_data: dataframe of real data to copy """ true_param_values = TRUE_PARAM_VALUES.copy() true_param_values["ability"] = np.random.normal( 0, 1, real_data["name"].nunique()) fake_data = real_data.copy() fake_data["score"] = 0 name_to_ability = dict( zip(pd.factorize(fake_data["name"])[1], true_param_values["ability"])) fake_data["true_ability"] = fake_data["name"].map(name_to_ability) model = CmdStanModel(stan_file=TRUE_MODEL_CONFIG.stan_file) stan_input = TRUE_MODEL_CONFIG.stan_input_function(fake_data) mcmc = model.sample(stan_input, inits=true_param_values, fixed_param=True, iter_sampling=1) return fake_data.assign(score=mcmc.stan_variable("yrep")[0] - 6)
from cmdstanpy import CmdStanModel from tarpan.cmdstanpy.summary import print_summary model = CmdStanModel(stan_file="stan_model/ex02.06.stan") data = dict(w=6, l=3) fit = model.sample(data=data, show_progress=True) print_summary(fit)
json_data = { "J": J - Start, "x_distance": x_distance[Start:J].tolist(), "y_successes": y_successes_shrunk[Start:J].tolist(), "n_attempts": n_attempts_shrunk[Start:J].tolist() } fig, (ax2, ax) = plt.subplots(1, 2) ax.set_facecolor('grey') ax.set_ylabel('chance in 1') ax.set_xlabel('putt distance (feet)') stan_program = CmdStanModel(stan_file='stan/logist.stan') stan_program.compile() fit = stan_program.sample(data=json_data, csv_basename='./puttbetlog') #print(fit.summary()) logistic_color = 'y' runGolf(fit, 'stan/logist.stan', ax, logistic_color) a_draws = fit.get_drawset(['a_intercept']).to_numpy() b_draws = fit.get_drawset(['b_slope']).to_numpy() ax2.hist(a_draws, color=logistic_color, label='a_intercept') ax2.hist(b_draws, label='b_slope') ax2.legend(loc='upper right') ax2.set_ylabel('number of draws') stan_program = CmdStanModel(stan_file='stan/mechai.stan') stan_program.compile() fit2 = stan_program.sample(data=json_data, csv_basename='./puttbetmech') runGolf(fit2, 'stan/mechai.stan', ax, 'g') sigma_draws = fit2.get_drawset(['sigma_error_in_degrees']).to_numpy()
# maximum likelihood estimation optim = sm.optimize(data=mdl_data).optimized_params_pd optim[optim.columns[~optim.columns.str.startswith("lp")]] # variational inference vb = sm.variational(data=mdl_data) vb.variational_sample.columns = vb.variational_params_dict.keys() vb_name = vb.variational_params_pd.columns[~vb.variational_params_pd.columns. str.startswith(("lp", "log_"))] vb.variational_params_pd[vb_name] vb.variational_sample[vb_name] # Markov chain Monte Carlo fit = sm.sample(data=mdl_data, show_progress=True, chains=4, iter_sampling=50000, iter_warmup=10000, thin=5) fit.draws().shape # iterations, chains, parameters fit.summary().loc[vb_name] # pandas DataFrame print(fit.diagnose()) posterior = fit.stan_variables() az_trace = az.from_cmdstanpy(fit) az.summary(az_trace).loc[vb_name] # pandas DataFrame az.plot_trace(az_trace)