def test_rpy2(sampler): # run the notebook example r_file = "doc/examples/myRModel.R" r = pyabc.external.R(r_file) r.display_source_ipython() model = r.model("myModel") distance = r.distance("myDistance") sum_stat = r.summary_statistics("mySummaryStatistics") data = r.observation("mySumStatData") prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10), meanY=pyabc.RV("uniform", 0, 10)) abc = pyabc.ABCSMC(model, prior, distance, summary_statistics=sum_stat, sampler=sampler, population_size=5) db = pyabc.create_sqlite_db_id(file_="test_external.db") abc.new(db, data) history = abc.run(minimum_epsilon=0.9, max_nr_populations=2) history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head() # try load id_ = history.id abc = pyabc.ABCSMC(model, prior, distance, summary_statistics=sum_stat, sampler=sampler, population_size=6) # shan't even need to pass the observed data again abc.load(db, id_) abc.run(minimum_epsilon=0.1, max_nr_populations=2)
def test_stochastic_acceptor(): acceptor = pyabc.StochasticAcceptor( pdf_norm_method=pyabc.pdf_norm_max_found) eps = pyabc.Temperature(initial_temperature=1) distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) def model(par): return {'s0': par['p0'] + np.array([0.3, 0.7])} x_0 = {'s0': np.array([0.4, -0.6])} # just run prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=1, minimum_epsilon=1.) # use no initial temperature and adaptive c acceptor = pyabc.StochasticAcceptor() eps = pyabc.Temperature() abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3, minimum_epsilon=1.)
def test_default_eps(): def model(par): return {'s0': par['p0'] + np.random.random(), 's1': np.random.random()} x_0 = {'s0': 0.4, 's1': 0.6} prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) # usual setting abc = pyabc.ABCSMC(model, prior, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3) assert abc.minimum_epsilon == 0.0 # noisy setting acceptor = pyabc.StochasticAcceptor() eps = pyabc.Temperature() distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3) assert abc.minimum_epsilon == 1.0
def test_pipeline(db_file): """Test whole pipeline using a learned summary statistic.""" rng = np.random.Generator(np.random.PCG64(0)) def model(p): return {"s0": p["p0"] + 1e-2 * rng.normal(size=2), "s1": rng.normal()} prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 1)) distance = pyabc.AdaptivePNormDistance(sumstat=PredictorSumstat( LinearPredictor(), fit_ixs={1, 3}), ) data = {"s0": np.array([0.1, 0.105]), "s1": 0.5} # run a little analysis abc = pyabc.ABCSMC(model, prior, distance, population_size=100) abc.new("sqlite:///" + db_file, data) h = abc.run(max_total_nr_simulations=1000) # first iteration df0, w0 = h.get_distribution(t=0) off0 = abs(pyabc.weighted_mean(df0.p0, w0) - 0.1) # last iteration df, w = h.get_distribution() off = abs(pyabc.weighted_mean(df.p0, w) - 0.1) assert off0 > off # alternative run with simple distance distance = pyabc.PNormDistance() abc = pyabc.ABCSMC(model, prior, distance, population_size=100) abc.new("sqlite:///" + db_file, data) h = abc.run(max_total_nr_simulations=1000) df_comp, w_comp = h.get_distribution() off_comp = abs(pyabc.weighted_mean(df_comp.p0, w_comp) - 0.1) assert off_comp > off # alternative run with info weighting distance = pyabc.InfoWeightedPNormDistance( predictor=LinearPredictor(), fit_info_ixs={1, 3}, ) abc = pyabc.ABCSMC(model, prior, distance, population_size=100) abc.new("sqlite:///" + db_file, data) h = abc.run(max_total_nr_simulations=1000) df_info, w_info = h.get_distribution() off_info = abs(pyabc.weighted_mean(df_info.p0, w_info) - 0.1) assert off_comp > off_info
def setup_module(): """Run an analysis and create a database. Called once at the beginning. """ def model(p): return { 'ss0': p['p0'] + 0.1 * np.random.uniform(), 'ss1': p['p1'] + 0.1 * np.random.uniform(), } p_true = {'p0': 3, 'p1': 4} observation = {'ss0': p_true['p0'], 'ss1': p_true['p1']} limits = {'p0': (0, 5), 'p1': (1, 8)} prior = pyabc.Distribution( **{ key: pyabc.RV('uniform', limits[key][0], limits[key][1] - limits[key][0]) for key in p_true.keys() }) distance = pyabc.PNormDistance(p=2) abc = pyabc.ABCSMC(model, prior, distance, population_size=50) abc.new(db_path, observation) abc.run(minimum_epsilon=0.1, max_nr_populations=4)
def test_redis_catch_error(): def model(pars): if np.random.uniform() < 0.1: raise ValueError("error") return {'s0': pars['p0'] + 0.2 * np.random.uniform()} def distance(s0, s1): return abs(s0['s0'] - s1['s0']) prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 10)) sampler = RedisEvalParallelSamplerServerStarter(batch_size=3, workers=1, processes_per_worker=1) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=10) db_file = "sqlite:///" + os.path.join(tempfile.gettempdir(), "test.db") data = {'s0': 2.8} abc.new(db_file, data) abc.run(minimum_epsilon=.1, max_nr_populations=3) finally: sampler.shutdown()
def test_redis_look_ahead_error(): """Test whether the look-ahead mode fails as expected.""" model, prior, distance, obs = basic_testcase() with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerServerStarter( look_ahead=True, look_ahead_delay_evaluation=False, log_file=fh.name) args_list = [{ 'eps': pyabc.MedianEpsilon() }, { 'distance_function': pyabc.AdaptivePNormDistance() }] for args in args_list: if 'distance_function' not in args: args['distance_function'] = distance try: with pytest.raises(AssertionError) as e: abc = pyabc.ABCSMC(model, prior, sampler=sampler, population_size=10, **args) abc.new(pyabc.create_sqlite_db_id(), obs) abc.run(max_nr_populations=3) assert "cannot be used in look-ahead mode" in str(e.value) finally: sampler.shutdown()
def test_redis_look_ahead_delayed(): """Test the look-ahead sampler with delayed evaluation in an adaptive setup.""" model, prior, distance, obs = basic_testcase() # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerLookAheadDelayWrapper( log_file=fh.name) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=pop_size) abc.new(pyabc.create_sqlite_db_id(), obs) abc.run(max_nr_populations=3) finally: sampler.shutdown() # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() # in delayed mode, all look-aheads must have been preliminary assert (df.n_lookahead == df.n_preliminary).all()
def test_export(): """Test database export. Just calls export and does some very basic checks. """ # simple problem def model(p): return {"y": p["p"] + 0.1 * np.random.normal()} prior = pyabc.Distribution(p=pyabc.RV("uniform", -1, 2)) distance = pyabc.PNormDistance() try: # run db_file = tempfile.mkstemp(suffix=".db")[1] abc = pyabc.ABCSMC(model, prior, distance, population_size=100) abc.new("sqlite:///" + db_file, model({"p": 0})) abc.run(max_nr_populations=3) # export history for fmt in ["csv", "json", "html", "stata"]: out_file = tempfile.mkstemp()[1] try: pyabc.storage.export(db_file, out=out_file, out_format=fmt) assert os.path.exists(out_file) assert os.stat(out_file).st_size > 0 finally: if os.path.exists(out_file): os.remove(out_file) finally: if os.path.exists(db_file): os.remove(db_file)
def test_early_stopping(): """Basic test whether an early stopping pipeline works. Heavily inspired by the `early_stopping` notebook. """ prior = pyabc.Distribution(step_size=pyabc.RV("uniform", 0, 10)) n_steps = 30 gt_step_size = 5 gt_trajectory = simulate(n_steps, gt_step_size) model = MyStochasticProcess(n_steps=n_steps, gt_step_size=gt_step_size, gt_trajectory=gt_trajectory) abc = pyabc.ABCSMC( models=model, parameter_priors=prior, distance_function=pyabc.NoDistance(), population_size=30, transitions=pyabc.LocalTransition(k_fraction=0.2), eps=pyabc.MedianEpsilon(300, median_multiplier=0.7), ) # initializing eps manually is necessary as we only have an integrated # model # TODO automatically iniitalizing would be possible, e.g. using eps = inf abc.new(pyabc.create_sqlite_db_id()) abc.run(max_nr_populations=3)
def test_simple_function_acceptor(): def distance(x, x_0): return sum(abs(x[key] - x_0[key]) for key in x_0) def dummy_accept(dist, eps, x, x_0, t, par): d = dist(x, x_0) return AcceptorResult(d, d < eps(t)) x = {'s0': 1, 's1': 0} y = {'s0': 2, 's1': 2} acceptor = pyabc.SimpleFunctionAcceptor(dummy_accept) ret = acceptor(distance_function=distance, eps=lambda t: 0.1, x=x, x_0=y, t=0, par=None) assert isinstance(ret, AcceptorResult) assert ret.distance == 3 # test integration def model(par): return {'s0': par['p0'] + 1, 's1': 42} prior = pyabc.Distribution(p0=pyabc.RV('uniform', -5, 10)) abc = pyabc.ABCSMC(model, prior, distance, population_size=2) abc.new(pyabc.create_sqlite_db_id(), model({'p0': 1})) h = abc.run(max_nr_populations=2) df = h.get_weighted_distances() assert np.isfinite(df['distance']).all()
def setup_module(): """Set up module. Called before all tests here.""" # create and run some model observation = {'ss0': p_true['p0'], 'ss1': p_true['p1']} prior = pyabc.Distribution( **{ key: pyabc.RV('uniform', limits[key][0], limits[key][1] - limits[key][0]) for key in p_true.keys() }) distance = pyabc.PNormDistance(p=2) n_history = 2 sampler = pyabc.sampler.MulticoreEvalParallelSampler(n_procs=2) for _ in range(n_history): abc = pyabc.ABCSMC(model, prior, distance, population_size=100, sampler=sampler) abc.new(db_path, observation) abc.run(minimum_epsilon=.1, max_nr_populations=3) for j in range(n_history): history = pyabc.History(db_path) history.id = j + 1 histories.append(history) labels.append("Some run " + str(j))
def test_pdf_norm_methods_integration(): """Test integration of pdf normalization methods in ABCSMC.""" def model(par): return {'s0': par['p0'] + np.array([0.3, 0.7])} x_0 = {'s0': np.array([0.4, -0.6])} for pdf_norm in [ pyabc.pdf_norm_max_found, pyabc.pdf_norm_from_kernel, pyabc.ScaledPDFNorm(), ]: # just run acceptor = pyabc.StochasticAcceptor(pdf_norm_method=pdf_norm) eps = pyabc.Temperature() distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=20) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3)
def test_redis_look_ahead(): """Test the redis sampler in look-ahead mode.""" model, prior, distance, obs = basic_testcase() eps = pyabc.ListEpsilon([20, 10, 5]) # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerServerStarter( look_ahead=True, look_ahead_delay_evaluation=False, log_file=fh.name) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=pop_size, eps=eps) abc.new(pyabc.create_sqlite_db_id(), obs) h = abc.run(max_nr_populations=3) finally: sampler.shutdown() assert h.n_populations == 3 # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() assert (df.n_preliminary == 0).all()
def load_history(self, dbpath, id): def fakesim(p): return dict(null=p) dummy_abc = pyabc.ABCSMC(fakesim, None, None, sampler=pyabc.sampler.SingleCoreSampler()) return dummy_abc.load("sqlite:///" + dbpath, id)
def test_stochastic_acceptor(): """Test the stochastic acceptor's features.""" # store pnorms pnorm_file = tempfile.mkstemp(suffix=".json")[1] acceptor = pyabc.StochasticAcceptor( pdf_norm_method=pyabc.pdf_norm_max_found, log_file=pnorm_file) eps = pyabc.Temperature(initial_temperature=1) distance = pyabc.IndependentNormalKernel(var=np.array([1, 1])) def model(par): return {'s0': par['p0'] + np.array([0.3, 0.7])} x_0 = {'s0': np.array([0.4, -0.6])} # just run prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2)) abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=10) abc.new(pyabc.create_sqlite_db_id(), x_0) h = abc.run(max_nr_populations=1, minimum_epsilon=1.) # check pnorms pnorms = pyabc.storage.load_dict_from_json(pnorm_file) assert len(pnorms) == h.max_t + 2 # +1 t0, +1 one final update assert isinstance(list(pnorms.keys())[0], int) assert isinstance(pnorms[0], float) # use no initial temperature and adaptive c acceptor = pyabc.StochasticAcceptor() eps = pyabc.Temperature() abc = pyabc.ABCSMC(model, prior, distance, eps=eps, acceptor=acceptor, population_size=20) abc.new(pyabc.create_sqlite_db_id(), x_0) abc.run(max_nr_populations=3)
def test_progressbar(sampler): """Test whether using a progress bar gives any errors.""" model, prior, distance, obs = basic_testcase() abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=20) abc.new(db=pyabc.create_sqlite_db_id(), observed_sum_stat=obs) abc.run(max_nr_populations=3)
def test_sensitivity_sankey(): """Test pyabc.visualization.plot_sensitivity_sankey`""" sigmas = {"p1": 0.1} def model(p): return { "y1": p["p1"] + 1 + sigmas["p1"] * np.random.normal(), "y2": 2 + 0.1 * np.random.normal(size=3), } gt_par = {"p1": 3} data = {"y1": gt_par["p1"] + 1, "y2": 2 * np.ones(shape=3)} prior_bounds = {"p1": (0, 10)} prior = pyabc.Distribution( **{ key: pyabc.RV("uniform", lb, ub - lb) for key, (lb, ub) in prior_bounds.items() }, ) total_sims = 1000 # tmp files db_file = tempfile.mkstemp(suffix=".db")[1] scale_log_file = tempfile.mkstemp(suffix=".json")[1] info_log_file = tempfile.mkstemp(suffix=".json")[1] info_sample_log_file = tempfile.mkstemp()[1] distance = pyabc.InfoWeightedPNormDistance( p=1, scale_function=pyabc.distance.mad, predictor=pyabc.predictor.LinearPredictor(), fit_info_ixs=pyabc.util.EventIxs(sims=int(0.4 * total_sims)), scale_log_file=scale_log_file, info_log_file=info_log_file, info_sample_log_file=info_sample_log_file, ) abc = pyabc.ABCSMC(model, prior, distance, population_size=100) h = abc.new(db="sqlite:///" + db_file, observed_sum_stat=data) abc.run(max_total_nr_simulations=total_sims) pyabc.visualization.plot_sensitivity_sankey( info_sample_log_file=info_sample_log_file, t=info_log_file, h=h, predictor=pyabc.predictor.LinearPredictor(), )
def train_patient( pat_df, training_time_range=(0, 10), prediction_time_range=(-10, 40), n_predictions=1, treatments=[], frequency=0.1, n_percent=10, epsilon=1, populations=20, ): train_start, train_end = training_time_range train_df = pat_df[(pat_df["t"] >= train_start) & (pat_df["t"] <= train_end)] print(len(train_df)) abc = pyabc.ABCSMC( abc_model(prediction_time_range, training_time_range, frequency, treatments), prior(n_percent), abs_distance, ) db_path = "sqlite:///test.db" abc.new(db_path, {"data": train_df["mtd"]}) history = abc.run(minimum_epsilon=epsilon, max_nr_populations=populations) df, w = history.get_distribution() best_inds = np.argpartition(w, -n_predictions)[-n_predictions:] print(w) print(best_inds, w[best_inds]) weights = sorted([*zip(best_inds, w[best_inds])], key=operator.itemgetter(1), reverse=True) print(weights) best_inds_sorted = [weight[0] for weight in weights] print(best_inds_sorted) best_params = df.iloc[best_inds_sorted] return best_params, history
def test_basic(sampler: pyabc.sampler.Sampler): """Some basic tests.""" def model(par): return {'s0': par['p0'] + np.random.randn(4)} def distance(x, y): return np.sum(x['s0'] - y['s0']) x0 = model({'p0': 2}) prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 10)) abc = pyabc.ABCSMC( model, prior, distance, sampler=sampler, population_size=50 ) abc.new(pyabc.create_sqlite_db_id(), x0) abc.run(max_nr_populations=4)
def test_r(sampler): r_file = "doc/examples/myRModel.R" r = pyabc.external.R(r_file) r.display_source_ipython() model = r.model("myModel") distance = r.distance("myDistance") sum_stat = r.summary_statistics("mySummaryStatistics") prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10), meanY=pyabc.RV("uniform", 0, 10)) abc = pyabc.ABCSMC(model, prior, distance, summary_statistics=sum_stat, sampler=sampler) db = "sqlite:///" + os.path.join(gettempdir(), "test_external.db") abc.new(db, r.observation("mySumStatData")) history = abc.run(minimum_epsilon=0.9, max_nr_populations=2) history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head()
def test_reference_parameter(): def model(parameter): return {"data": parameter["mean"] + 0.5 * np.random.randn()} prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 5), p1=pyabc.RV("uniform", 0, 1)) def distance(x, y): return abs(x["data"] - y["data"]) abc = pyabc.ABCSMC(model, prior, distance, population_size=2) db_path = ("sqlite:///" + os.path.join(tempfile.gettempdir(), "test.db")) observation = 2.5 gt_par = {'p0': 1, 'p1': 0.25} abc.new(db_path, {"data": observation}, gt_par=gt_par) history = abc.history par_from_history = history.get_ground_truth_parameter() assert par_from_history == gt_par
def test_redis_look_ahead(): """Test the redis sampler in look-ahead mode.""" model, prior, distance, obs = basic_testcase() eps = pyabc.ListEpsilon([20, 10, 5]) # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerServerStarter( look_ahead=True, look_ahead_delay_evaluation=False, log_file=fh.name, ) try: abc = pyabc.ABCSMC( model, prior, distance, sampler=sampler, population_size=pop_size, eps=eps, ) abc.new(pyabc.create_sqlite_db_id(), obs) h = abc.run(max_nr_populations=3) finally: sampler.shutdown() assert h.n_populations == 3 # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() assert (df.n_preliminary == 0).all() # check history proposal ids for t in range(0, h.max_t + 1): pop = h.get_population(t=t) pop_size = len(pop) n_lookahead_pop = len( [p for p in pop.particles if p.proposal_id == -1]) assert (min(pop_size, int( df.loc[df.t == t, 'n_lookahead_accepted'])) == n_lookahead_pop)
def test_r(): """ This is basically just the using_R notebook. """ r = R(r_file) r.display_source_ipython() model = r.model("myModel") distance = r.distance("myDistance") sum_stat = r.summary_statistics("mySummaryStatistics") prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10), meanY=pyabc.RV("uniform", 0, 10)) sampler = pyabc.sampler.MulticoreEvalParallelSampler(n_procs=2) abc = pyabc.ABCSMC(model, prior, distance, summary_statistics=sum_stat, sampler=sampler) db = "sqlite:///" + os.path.join(gettempdir(), "test_external.db") abc.new(db, r.observation("mySumStatData")) history = abc.run(minimum_epsilon=0.9, max_nr_populations=2) history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head()
def test_pipeline(db_path): model = BasicoModel(MODEL1_PATH, duration=MAX_T, method="deterministic") data = model(TRUE_PAR) prior = pyabc.Distribution(rate=pyabc.RV("uniform", 0, 100)) n_test_times = 20 t_test_times = np.linspace(0, MAX_T, n_test_times) def distance(x, y): xt_ind = np.searchsorted(x["t"], t_test_times) - 1 yt_ind = np.searchsorted(y["t"], t_test_times) - 1 error = ( np.absolute(x["X"][:, 1][xt_ind] - y["X"][:, 1][yt_ind]).sum() / t_test_times.size) return error abc = pyabc.ABCSMC(model, prior, distance) abc.new(db_path, data) abc.run(max_nr_populations=3)
def execute(self): result_id = f"{self.model_id}__{self.analysis_id}__{self.i_data}__{self.i_rep}" db_file = f"db_{result_id}.db" print("Result id: ", result_id) if os.path.isfile(db_file): print("Skipping since exists already.") return abc = pyabc.ABCSMC(models=self.model, parameter_priors=self.prior, distance_function=self.distance, population_size=self.n_acc, transitions=self.transition, eps=self.eps, acceptor=self.acceptor, sampler=self.sampler) abc.new("sqlite:///" + db_file, self.y_obs, gt_par=self.p_true) abc.run(minimum_epsilon=self.eps_min, min_acceptance_rate=self.min_acc_rate, max_nr_populations=self.n_pop)
def test_redis_subprocess(): """Test whether the instructed redis sampler allows worker subprocesses.""" # print worker output logging.getLogger("Redis-Worker").addHandler(logging.StreamHandler()) def model_process(p, pipe): """The actual model.""" pipe.send({"y": p['p0'] + 0.1 * np.random.randn(10)}) def model(p): """Model calling a subprocess.""" parent, child = multiprocessing.Pipe() proc = multiprocessing.Process(target=model_process, args=(p, child)) proc.start() res = parent.recv() proc.join() return res prior = pyabc.Distribution(p0=pyabc.RV('uniform', -5, 10), p1=pyabc.RV('uniform', -2, 2)) def distance(y1, y2): return np.abs(y1['y'] - y2['y']).sum() obs = {'y': 1} # False as daemon argument is ok, True and None are not allowed sampler = RedisEvalParallelSamplerServerStarter(workers=1, processes_per_worker=2, daemon=False) try: abc = pyabc.ABCSMC(model, prior, distance, sampler=sampler, population_size=10) abc.new(pyabc.create_sqlite_db_id(), obs) # would just never return if model evaluation fails abc.run(max_nr_populations=3) finally: sampler.shutdown()
def test_redis_look_ahead_delayed(): """Test the look-ahead sampler with delayed evaluation in an adaptive setup.""" model, prior, distance, obs = basic_testcase() # spice things up with an adaptive population size pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50, mean_cv=0.5, max_population_size=50) with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh: sampler = RedisEvalParallelSamplerLookAheadDelayWrapper( log_file=fh.name, wait_for_all_samples=True) try: abc = pyabc.ABCSMC( model, prior, distance, sampler=sampler, population_size=pop_size, ) abc.new(pyabc.create_sqlite_db_id(), obs) h = abc.run(max_nr_populations=3) finally: sampler.shutdown() # read log file df = pd.read_csv(fh.name, sep=',') assert (df.n_lookahead > 0).any() assert (df.n_lookahead_accepted > 0).any() # in delayed mode, all look-aheads must have been preliminary assert (df.n_lookahead == df.n_preliminary).all() print(df) # check history proposal ids for t in range(0, h.max_t + 1): pop = h.get_population(t=t) pop_size = len(pop) n_lookahead_pop = len( [p for p in pop.particles if p.proposal_id == -1]) assert (min(pop_size, int( df.loc[df.t == t, 'n_lookahead_accepted'])) == n_lookahead_pop)
def calibrate(observed: dict, hostname: str = None): """Calibrates. observed is a dictionary with keys as in calibration_statistic.statistics containing the real data""" db_path = "sqlite:///" + os.path.join(os.getcwd(), "data", "calibration.db") if hostname is not None: # If we're given a hostname, use the above sandman mapping wrapper sampler = pyabc.sampler.MappingSampler(map_=get_sm_map(hostname), mapper_pickles=True) else: # Otherwise, run locally with the normal sampler sampler = pyabc.sampler.MulticoreEvalParallelSampler() # Adaptive distance based on Prangle (2017) (also acceptor below) dist = pyabc.distance.AdaptivePNormDistance(p=2, adaptive=True) prior = pyabc.Distribution(**get_prior()) pop_size = pyabc.populationstrategy.AdaptivePopulationSize( start_nr_particles=32, max_population_size=256, min_population_size=4) abc = pyabc.ABCSMC( model, parameter_priors=prior, distance_function=dist, population_size=pop_size, sampler=sampler, acceptor=pyabc.accept_use_complete_history, ) run_id = abc.new(db=db_path, observed_sum_stat=observed) print(f"Run ID is {run_id}") history = abc.run(max_nr_populations=10) df, w = history.get_distribution() results = {} for param in df.columns.values: # Calculate the posterior mean of each parameter results[param] = np.dot(list(df[param]), list(w)) print("Done! The results are:") print(results)
def run_smc(priors, data, epsilon, max_episodes, smc_population_size, sequence_sample_size, pop_size, distance_function=l1_distance): start = time.time() initial_gen = data.columns.min() gen_num = data.columns.max() - initial_gen model = partial(smc_model, intial_freq=data[initial_gen].values, sequence_sample_size=sequence_sample_size, pop_size=pop_size, gen_num=gen_num, initial_gen=initial_gen) model.__name__ = 'model with params' # SMC needs this for some reason... abc = pyabc.ABCSMC( model, priors, distance_function, smc_population_size) # TODO: add a readme to temp_smc_dbs folder.. or erase on error..? dbs_dir = '.temp_smc_dbs' os.makedirs(dbs_dir, exist_ok=True) random_num = random.randint(0, 9999) db_path = os.path.join(dbs_dir, f"db_{random_num}.db") sql_path = (f"sqlite:///{db_path}") smc_post = abc.new(sql_path, {'a': data}) smc_post = abc.run(minimum_epsilon=epsilon, max_nr_populations=max_episodes) print("SMC run time: ", round(time.time()-start, 2)) print("Total number of SMC simulations: ", smc_post.total_nr_simulations) df, ws = smc_post.get_distribution() df['weights'] = ws os.remove(db_path) return df