Exemplo n.º 1
0
def test_pipeline(transition: Transition):
    """Test the various transitions in a full pipeline."""
    def model(p):
        return {'s0': p['a'] + p['b'] * np.random.normal()}

    prior = Distribution(a=RV('uniform', -5, 10), b=RV('uniform', 0.01, 0.09))

    abc = ABCSMC(model, prior, transitions=transition, population_size=10)
    abc.new(create_sqlite_db_id(), {'s0': 3.5})
    abc.run(max_nr_populations=3)
Exemplo n.º 2
0
def test_two_competing_gaussians_multiple_population(db_path, sampler):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", mu_x_1, sigma)),
        Distribution(x=RV("norm", mu_x_2, sigma))
    ]

    # We plug all the ABC setup together
    nr_populations = 3
    population_size = ConstantPopulationStrategy(40)
    abc = ABCSMC(models,
                 parameter_given_model_prior_distribution,
                 PercentileDistanceFunction(measures_to_use=["y"]),
                 population_size,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)

    # Finally we add meta data such as model names and
    # define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        res = st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed)
        return res

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    assert history.max_t == nr_populations - 1
    # the next line only tests if we obtain correct numerical types
    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < sp.inf
Exemplo n.º 3
0
def test_pyjulia_pipeline(sampler: Sampler):
    """Test that a pipeline with Julia calls runs through."""
    jl = pyabc.external.julia.Julia(
        source_file="doc/examples/model_julia/Normal.jl",
        module_name="Normal",
    )
    # just call it
    assert jl.display_source_ipython()  # noqa: S101

    model = jl.model()
    distance = jl.distance()
    obs = jl.observation()

    prior = Distribution(p=RV("uniform", -5, 10))

    if not isinstance(sampler, SingleCoreSampler):
        # call model once for Julia pre-combination
        distance(model(prior.rvs()), model(prior.rvs()))

    db_file = tempfile.mkstemp(suffix=".db")[1]
    abc = ABCSMC(model, prior, distance, population_size=100, sampler=sampler)
    abc.new("sqlite:///" + db_file, obs)
    abc.run(max_nr_populations=2)

    if os.path.exists(db_file):
        os.remove(db_file)
Exemplo n.º 4
0
def test_resume(db_path, gt_model):
    def model(parameter):
        return {"data": parameter["mean"] + sp.randn()}

    prior = Distribution(mean=RV("uniform", 0, 5))

    def distance(x, y):
        x_data = x["data"]
        y_data = y["data"]
        return abs(x_data - y_data)

    abc = ABCSMC(model, prior, distance, population_size=10)
    history = abc.new(db_path, {"data": 2.5}, gt_model=gt_model)
    run_id = history.id
    print("Run ID:", run_id)
    hist_new = abc.run(minimum_epsilon=0, max_nr_populations=1)
    assert hist_new.n_populations == 1

    abc_continued = ABCSMC(model, prior, distance)
    run_id_continued = abc_continued.load(db_path, run_id)
    print("Run ID continued:", run_id_continued)
    hist_contd = abc_continued.run(minimum_epsilon=0, max_nr_populations=1)

    assert hist_contd.n_populations == 2
    assert hist_new.n_populations == 2
Exemplo n.º 5
0
def test_beta_binomial_different_priors_initial_epsilon_from_sample(
        db_path, sampler):
    binomial_n = 5

    def model(args):
        return {"result": st.binom(binomial_n, args.theta).rvs()}

    models = [model for _ in range(2)]
    models = list(map(FunctionModel, models))
    population_size = ConstantPopulationSize(800)
    a1, b1 = 1, 1
    a2, b2 = 10, 1
    parameter_given_model_prior_distribution = [
        Distribution(theta=RV("beta", a1, b1)),
        Distribution(theta=RV("beta", a2, b2)),
    ]
    abc = ABCSMC(
        models,
        parameter_given_model_prior_distribution,
        MinMaxDistance(measures_to_use=["result"]),
        population_size,
        eps=MedianEpsilon(median_multiplier=0.9),
        sampler=sampler,
    )
    n1 = 2
    abc.new(db_path, {"result": n1})

    minimum_epsilon = -1
    history = abc.run(minimum_epsilon, max_nr_populations=5)
    mp = history.get_model_probabilities(history.max_t)

    def B(a, b):
        return gamma(a) * gamma(b) / gamma(a + b)

    def expected_p(a, b, n1):
        return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b)

    p1_expected_unnormalized = expected_p(a1, b1, n1)
    p2_expected_unnormalized = expected_p(a2, b2, n1)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)

    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < 0.08
Exemplo n.º 6
0
def priors_from_kde(df,w):
    prior_dict = {}
    for key in df.columns:
        kde = MultivariateNormalTransition(scaling=1)
        kde.fit(df[[key]], w)
        x = kde.rvs(1000)
        α,β,loc,scale = scst.beta.fit(x[key])
        prior_dict.update({key: RV("beta", α,β,loc,scale)})
    return(Distribution(**prior_dict))
Exemplo n.º 7
0
def test_two_competing_gaussians_multiple_population_adaptive_populatin_size(db_path, sampler):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # The prior over the model classes is uniform
    model_prior = RV("randint", 0, 2)

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [Distribution(x=st.norm(mu_x_1, sigma)),
                                                Distribution(x=st.norm(mu_x_2, sigma))]

    # Particles are perturbed in a Gaussian fashion
    parameter_perturbation_kernels = [MultivariateNormalTransition() for _ in range(2)]

    # We plug all the ABC setup together
    nr_populations = 3
    population_size = AdaptivePopulationSize(400, mean_cv=0.05,
                                             max_population_size=1000)
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["y"]),
                 population_size,
                 model_prior=model_prior,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)

    # Finally we add meta data such as model names and define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=3)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        return st.norm(mu_x_model, sp.sqrt(sigma ** 2 + sigma ** 2)).pdf(y_observed)

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized)
    assert history.max_t == nr_populations-1
    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .07
Exemplo n.º 8
0
def abc_setup():
    """
    Create abc model
    """

    abc_prior_dict = {
        's': RV("uniform", *simtools.PARAMS['abc_limits_shape']),
        'c': RV("uniform", *simtools.PARAMS['abc_limits_center']),
        'w': RV("uniform", *simtools.PARAMS['abc_limits_width']),
        'n': RV("uniform", *simtools.PARAMS['abc_limits_noise_sigma']),
        'm': RV("uniform", *simtools.PARAMS['abc_limits_normal_maximum_rate']),
        'r': RV("uniform", *simtools.PARAMS['abc_limits_maximum_rate_ratio']),
    }

    abc_priors = Distribution(abc_prior_dict)

    abc = ABCSMC(abc_model,
                 abc_priors,
                 abc_distance,
                 population_size=AdaptivePopulationSize(
                     simtools.PARAMS['abc_initial_population_size'],
                     mean_cv=simtools.PARAMS['abc_population_size_epsilon'],
                     max_population_size=simtools.
                     PARAMS['abc_population_size_maximum']),
                 sampler=MulticoreEvalParallelSampler(
                     simtools.PARAMS['abc_parallel_simulations']))

    return abc
Exemplo n.º 9
0
def test_model_gets_parameter(transition: Transition):
    """Check that we use Parameter objects as model input throughout.

    This should be the case both when the parameter is created from the prior,
    and from the transition.
    """
    def model(p):
        assert isinstance(p, Parameter)
        return {'s0': p['p0'] + 0.1 * np.random.normal()}
    prior = Distribution(p0=RV('uniform', -5, 10))

    abc = ABCSMC(model, prior, transitions=transition, population_size=10)
    abc.new(create_sqlite_db_id(), {'s0': 3.5})
    abc.run(max_nr_populations=3)
Exemplo n.º 10
0
def test_progressbar(sampler):
    """Test whether using a progress bar gives any errors."""
    def model(p):
        return {"y": p['p0'] + 0.1 * np.random.randn(10)}

    def distance(y1, y2):
        return np.abs(y1['y'] - y2['y']).sum()

    prior = Distribution(p0=RV('uniform', -5, 10))
    obs = {'y': 1}

    abc = ABCSMC(model, prior, distance, sampler=sampler, population_size=20)
    abc.new(db=create_sqlite_db_id(), observed_sum_stat=obs)
    abc.run(max_nr_populations=3)
Exemplo n.º 11
0
def test_gaussian_single_population(db_path, sampler):
    sigma_prior = 1
    sigma_ground_truth = 1
    observed_data = 1

    def model(args):
        return {"y": st.norm(args['x'], sigma_ground_truth).rvs()}

    models = [model]
    models = list(map(FunctionModel, models))
    nr_populations = 1
    population_size = ConstantPopulationSize(600)
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", 0, sigma_prior))
    ]
    abc = ABCSMC(
        models,
        parameter_given_model_prior_distribution,
        MinMaxDistance(measures_to_use=["y"]),
        population_size,
        eps=MedianEpsilon(0.1),
        sampler=sampler,
    )
    abc.new(db_path, {"y": observed_data})

    minimum_epsilon = -1

    abc.do_not_stop_when_only_single_model_alive()
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["x"].values
    sort_indices = np.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(
        np.hstack((-200, posterior_x[sort_indices], 200)),
        np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)),
    )

    sigma_x_given_y = 1 / np.sqrt(1 / sigma_prior**2 +
                                  1 / sigma_ground_truth**2)
    mu_x_given_y = (sigma_x_given_y**2 * observed_data / sigma_ground_truth**2)
    expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
    x = np.linspace(-8, 8)
    max_distribution_difference = np.absolute(
        f_empirical(x) - expected_posterior_x.cdf(x)).max()
    assert max_distribution_difference < 0.12
    assert history.max_t == nr_populations - 1
    mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
    assert abs(mean_emp - mu_x_given_y) < 0.07
    assert abs(std_emp - sigma_x_given_y) < 0.1
Exemplo n.º 12
0
def test_all_in_one_model(db_path, sampler):
    models = [AllInOneModel() for _ in range(2)]
    population_size = ConstantPopulationSize(800)
    parameter_given_model_prior_distribution = [Distribution(theta=RV("beta",
                                                                      1, 1))
                                                for _ in range(2)]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.1),
                 sampler=sampler)
    abc.new(db_path, {"result": 2})

    minimum_epsilon = .2
    history = abc.run(minimum_epsilon, max_nr_populations=3)
    mp = history.get_model_probabilities(history.max_t)
    assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
Exemplo n.º 13
0
def command():
    parser = argparse.ArgumentParser()
    parser.add_argument("model",
                        nargs="+",
                        help="Kappa model files to simulate")
    parser.add_argument("-fit",
                        nargs="*",
                        default="",
                        help="Parameters to fit")
    parser.add_argument("-fix",
                        nargs="*",
                        default="",
                        help="Parameters to fix")
    parser.add_argument("-N",
                        type=int,
                        default="10000",
                        help="Population size")
    parser.add_argument("-I", type=int, default="10", help="Initial infected")
    parser.add_argument("-tmax",
                        default=365,
                        type=int,
                        help="Simulation max time")
    parser.add_argument("-db",
                        default="sqlite:///abc.db",
                        help="Database for ABC MCMC results")
    parser.add_argument("-R", default=1.0, type=float, help="Target R(t)")
    args = parser.parse_args()

    fixed = dict(
        (k, float(v)) for k, v in map(lambda s: s.split("="), args.fix))
    fixed["N"] = args.N
    fixed["INIT_I"] = args.I
    m = Model(args.model, fixed=fixed, tmax=args.tmax)

    priors = dict((n, RV("uniform", float(lb), float(ub)))
                  for (n, lb, ub) in map(lambda v: v.split(":"), args.fit))
    prior = Distribution(priors)

    abc = ABCSMC(m, prior, distance_target_R)
    abc_id = abc.new(args.db, {"R": args.R})
    history = abc.run(max_nr_populations=15)

    df, w = history.get_distribution()
    best = np.argmax(w)
    print(df.iloc[best])
Exemplo n.º 14
0
def abc_setup(birthrate_groups):
    """
    create abc model
    parameters are stored in the global simtools.PARAMS dict
    """

    for curve_resolution in simtools.PARAMS['abc_params']['resolution_limits']:
        assert curve_resolution > 0 and curve_resolution <= 9

    abc_priors = []
    for resolution_limit in range(
            simtools.PARAMS['abc_params']['resolution_limits'][0],
            simtools.PARAMS['abc_params']['resolution_limits'][1] + 1):
        abc_prior_dict = {}
        for i in range(resolution_limit):
            abc_prior_dict['r' + str(i)] = \
                RV("uniform", simtools.PARAMS['abc_params']['rate_limits'][0],
                abs(simtools.PARAMS['abc_params']['rate_limits'][1] - \
                simtools.PARAMS['abc_params']['rate_limits'][0]))
        abc_priors.append(
            Distribution(birthrate=copy.deepcopy(abc_prior_dict)))

    print('priors', abc_priors)

    #abc = ABCSMC([abc_model for __ in abc_priors], abc_priors, abc_distance,
    #             population_size=AdaptivePopulationSize(
    #                 int(simtools.PARAMS['abc_params']['starting_population_size']),
    #                 0.15,
    #                 max_population_size=int(simtools.PARAMS['abc_params']['max_population_size']),
    #                 min_population_size=int(simtools.PARAMS['abc_params']['min_population_size'])),
    #             sampler=MulticoreEvalParallelSampler(
    #                 simtools.PARAMS['abc_params']['parallel_simulations']))
    abc = ABCSMC(
        [abc_model for __ in abc_priors],
        abc_priors,
        abc_distance,
        population_size=ConstantPopulationSize(
            int(simtools.PARAMS['abc_params']['starting_population_size'])),
        sampler=MulticoreEvalParallelSampler(
            simtools.PARAMS['abc_params']['parallel_simulations']))

    return abc
Exemplo n.º 15
0
def test_continuous_non_gaussian(db_path, sampler):
    def model(args):
        return {"result": sp.rand() * args['u']}

    models = [model]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(250)
    parameter_given_model_prior_distribution = [Distribution(u=RV("uniform", 0,
                                                                  1))]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)
    d_observed = .5
    abc.new(db_path, {"result": d_observed})
    abc.do_not_stop_when_only_single_model_alive()

    minimum_epsilon = -1
    history = abc.run(minimum_epsilon, max_nr_populations=2)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["u"].values
    sort_indices = sp.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(sp.hstack((-200,
                                                     posterior_x[sort_indices],
                                                     200)),
                                          sp.hstack((0,
                                                     sp.cumsum(
                                                         posterior_weight[
                                                             sort_indices]),
                                                     1)))

    @sp.vectorize
    def f_expected(u):
        return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \
               (u > d_observed)

    x = sp.linspace(0.1, 1)
    max_distribution_difference = sp.absolute(f_empirical(x) -
                                              f_expected(x)).max()
    assert max_distribution_difference < 0.12
Exemplo n.º 16
0
def test_redis_catch_error():

    def model(pars):
        if np.random.uniform() < 0.1:
            raise ValueError("error")
        return {'s0': pars['p0'] + 0.2 * np.random.uniform()}

    def distance(s0, s1):
        return abs(s0['s0'] - s1['s0'])

    prior = Distribution(p0=RV("uniform", 0, 10))
    sampler = RedisEvalParallelSamplerServerStarter(
        batch_size=3, workers=1, processes_per_worker=1, port=8775)

    abc = ABCSMC(model, prior, distance, sampler=sampler, population_size=10)

    db_file = "sqlite:///" + os.path.join(tempfile.gettempdir(), "test.db")
    data = {'s0': 2.8}
    abc.new(db_file, data)

    abc.run(minimum_epsilon=.1, max_nr_populations=3)

    sampler.cleanup()
Exemplo n.º 17
0

def sum_stat_sim(parameters):

    price_path = preisSim(parameters)

    p_true = pd.read_csv(os.path.join("/home/gsnkel001/master_dissertation/",
                                      "Log_Original_Price_Bars_2300.csv"),
                         header=None)

    # summary statistics
    return all_summary_stats(price_path, p_true)


# Parameters as Random Variables
prior = Distribution(delta=RV("uniform", delta_min, delta_max),
                     mu=RV("uniform", mu_min, mu_max),
                     alpha=RV("uniform", alpha_min, alpha_max),
                     lambda0=RV("uniform", lambda0_min, lambda0_max),
                     C_lambda=RV("uniform", C_lambda_min, C_lambda_max),
                     delta_S=RV("uniform", deltaS_min, deltaS_max))

# define "true" parameters to calibrate
# param_true = {"delta": delta_true,
#               "mu": mu_true,
#               "alpha": alpha_true,
#               "lambda0": lambda0_true,
#               "C_lambda": C_lambda_true,
#               "delta_S": delta_S_true}

# define distance function
Exemplo n.º 18
0
import numpy as np
import os
import matplotlib.pyplot as plt
from param_inference import MyStochasticProcess, distance, distance2
from pyabc import ABCSMC, RV, Distribution, AggregatedTransition, DiscreteJumpTransition, MultivariateNormalTransition
from pyabc.visualization import plot_kde_matrix
from pyabc.transition import GridSearchCV

if __name__ == '__main__':
    n = int(3e5)
    k_domain = np.arange(1, 17)
    data = np.load('cases_de_feb26_mar16.npy') #* n / 1e6
    # print(data[0])
    # assert data < 3
    tmax = len(data)
    prior = Distribution(n01=RV('uniform', 0, 3 * round(data[0])),
                         n02=RV('uniform', 0, 10 * round(data[0])),
                         k=RV('randint', k_domain.min(), k_domain.max()+1),
                         log_p=RV('uniform', 0, 6),
                         p_inf=RV('uniform', 0.01, 0.07))
    model = MyStochasticProcess(n, tmax, data)
    transition = AggregatedTransition(mapping={
        # 'n01': DiscreteJumpTransition(domain=np.arange(int(data.max()))),
        # 'n02': DiscreteJumpTransition(domain=np.arange(10 * int(data.max()))),
        'k': DiscreteJumpTransition(domain=k_domain, p_stay=0.7),
        ('n01', 'n02', 'log_p', 'p_inf'): GridSearchCV()
    })

    db = "sqlite:///" + os.path.join(os.getcwd(), "early_growth.db")

    abc = ABCSMC(model, prior, distance, transitions=transition)
Exemplo n.º 19
0
    n = int(3e5)
    data_prev = np.load('casespm_de_feb26_mar16.npy') * n / 1e6
    data = np.load('casespm_de_mar16_jun6.npy') * n / 1e6
    data_ext = np.load('casespm_de_mar16_jun21.npy') * n / 1e6
    tmax = len(data)
    kmin, kmax = 1, 11
    k_domain = np.arange(kmin, kmax + 1)
    n01min, n01max = round(data[0]), round(data[:4].sum())

    n01_domain = np.arange(n01min, n01max + 1)
    n02min, n02max = round(50 * data[0] / 10), round(50 * data[2])
    n02_domain = np.arange(n02min, n02max + 1)
    delaymin, delaymax = 0, 10
    delay_domain = np.arange(delaymin, delaymax + 1)
    prior = Distribution(
        n01=RV('uniform', n01min, n01max + 0.5),
        n02=RV('uniform', n02min, n02max + 0.5),
        k=RV('randint', kmin, kmax),
        # delay=RV('randint', delaymin, delaymax+1),
        log_p=RV('uniform', 0, 7),
        p_inf=RV('uniform', 0.01, 0.03))
    model = MyStochasticProcess(n, tmax, data)
    transition = AggregatedTransition(
        mapping={
            # 'delay': DiscreteJumpTransition(domain=delay_domain),
            # 'n01': DiscreteJumpTransition(domain=n01_domain),
            # 'n02': DiscreteJumpTransition(domain=n02_domain),
            'k': DiscreteJumpTransition(domain=k_domain, p_stay=.8),
            ('n01', 'n02', 'log_p', 'p_inf'): GridSearchCV()
        })
    id = 'n=3e5_2'
Exemplo n.º 20
0
    delay = round(x['delay'])
    tmax = len(x['cases'])
    data = y['cases']
    data = data[delay:tmax + delay]

    return np.sum(np.abs(x['cases'] - data))


if __name__ == '__main__':
    # os.environ['NUMEXPR_MAX_THREADS'] = '12'
    n = int(3e5)
    k_domain = np.arange(1, 11)
    data = np.load('casespm_de_mar16_jun6.npy') * n / 1e6
    tmax = len(data)

    prior = Distribution(n01=RV('uniform', 0, int(data.max())),
                         n02=RV('uniform', 0, 10 * int(data.max())),
                         k=RV('randint', 1, 7),
                         log_p=RV('uniform', 0, 6),
                         p_inf=RV('uniform', 0.01, 0.03))
    model = MyStochasticProcess(n, tmax, data)
    transition = AggregatedTransition(
        mapping={
            # 'n01': DiscreteJumpTransition(domain=np.arange(int(data.max()))),
            # 'n02': DiscreteJumpTransition(domain=np.arange(10 * int(data.max()))),
            'k':
            DiscreteJumpTransition(domain=k_domain, p_stay=0.7),
            ('n01', 'n02', 'log_p', 'p_inf'):
            MultivariateNormalTransition(scaling=0.8)
        })
Exemplo n.º 21
0
    # Initial proportion of tissue with lesion-removing clones.
    (0, 1),

    # growth rate of clones. Limited by the growth rate of MAML clones against WT tissue (Alcolea et al 2014).
    (0, 0.04)
]

param_order = [
    'division_rate', 'delta', 'lesion_starting_cells',
    'initial_lesion_density', 'sensitive_lesion_proportion',
    'starting_mutant_proportion', 'k'
]
priors = {}
for p, b in zip(param_order, BOUNDS):
    priors[p] = RV("uniform", b[0], b[1] - b[0])

priors = Distribution(priors)

NUM_CORES = 3
POPULATION_SIZE = 10000
DB_PATH = "full_model_abc.db"

###### ABC ######
distance = PNormDistance(p=1)
sampler = MulticoreEvalParallelSampler(n_procs=NUM_CORES)

abc = ABCSMC(run_model, priors, distance, population_size=POPULATION_SIZE)

db_path = ("sqlite:///" + DB_PATH)
Exemplo n.º 22
0
        return {'cases': newcases_seir(nw.n_t)}


if __name__ == '__main__':
    n = int(3e5)
    rel = n / 1e6
    data1 = np.load('casespm_de_mar16_jun6.npy') * rel
    data2 = np.load('casespm_de_jun7_sep15.npy') * rel
    t1 = len(data1)
    print(t1)
    assert t1 == 0
    t2 = len(data2)
    kmin, kmax = 1, 11
    k_domain = np.arange(kmin, kmax + 1)
    # print(len(data_ext[delaymax:tmax+delaymax]), tmax)
    prior = Distribution(k=RV('randint', kmin, kmax),
                         log_p=RV('uniform', 0, 7),
                         p_inf=RV('uniform', 0.01, 0.03))
    model = MyStochasticProcess(n, t1, data1)
    transition = AggregatedTransition(
        mapping={
            'k': DiscreteJumpTransition(domain=k_domain),
            ('log_p', 'p_inf'): GridSearchCV()
        })
    id_old = 'n=3e5_new'
    db_old = "sqlite:///" + os.path.join(os.getcwd(), id_old + ".db")
    abc_old = ABCSMC(model, prior, distance)
    abc_old.load(db_old, int(np.load('run_id_' + id_old + '.npy')))
    model = ContinuedSpread(n, t1, t2, data2, abc_old.history)
    id = 'n=3e5_late'
    db = 'sqlite:///' + os.path.join(os.getcwd(), id + '.db')
observations = [Model1()({"rate": true_rate}), Model2()({"rate": 30})]

N_TEST_TIMES = 20

t_test_times = np.linspace(0, MAX_T, N_TEST_TIMES)


def distance(x, y):
    xt_ind = np.searchsorted(x["t"], t_test_times) - 1
    yt_ind = np.searchsorted(y["t"], t_test_times) - 1
    error = (np.absolute(x["X"][:, 1][xt_ind] - y["X"][:, 1][yt_ind]).sum() /
             t_test_times.size)
    return error


prior = Distribution(rate=RV("uniform", 0, 100))

abc = ABCSMC([Model1(), Model2()], [prior, prior],
             distance,
             population_size=AdaptivePopulationSize(500, 0.15))

abc_id = abc.new("sqlite:////tmp/mjp.db", observations[0])

history = abc.run(minimum_epsilon=0.7, max_nr_populations=15)

ax = history.get_model_probabilities().plot.bar()
ax.set_ylabel("Probability")
ax.set_xlabel("Generation")
ax.legend([1, 2],
          title="Model",
          ncol=2,
Exemplo n.º 24
0
# true_trajectory = model({"theta1": theta1_true,
#                          "theta2": theta2_true})["X_2"]

# plt.plot(true_trajectory, color="C0", label='Simulation')
# plt.scatter(measurement_times, measurement_data,
#             color="C1", label='Data')
# plt.xlabel('Time $t$')
# plt.ylabel('Measurement $Y$')
# plt.title('Conversion reaction: True parameters fit')
# plt.legend()
# plt.show()

# def distance(simulation, data):
#     return np.absolute(data["X_2"] - simulation["X_2"]).sum()

parameter_prior = Distribution(r=RV("uniform", 0.1, 4.0),
                               C=RV("uniform", 6.0, 10.0),
                               d=RV("uniform", 0.01, 4.0),
                               g=RV("uniform", 0.01, 4.0))

parameter_prior.get_parameter_names()

#Noisey model
# sigma=0.02
# acceptor = pyabc.StochasticAcceptor()
# kernel = pyabc.IndependentNormalKernel(var=sigma**2)
# eps = pyabc.Temperature()

# abc = pyabc.ABCSMC(deterministic_run, parameter_prior, kernel, eps=eps, acceptor=acceptor,population_size=100)
# abc.new(db_path,{"Contamination": measurement_data}) #This distance model assumes the name of the predicited and confirmed are the same
# history_acceptor = abc.run(max_nr_populations=10,minimum_epsilon=10)
Exemplo n.º 25
0
    return price_path


def sum_stat_sim(parameters):

    price_path = preisSim(parameters)

    p_true = pd.read_csv(os.path.join(temp_output_folder, "p_true.csv"),
                         header=None)

    # summary statistics
    return all_summary_stats(price_path, p_true)


# Parameters as Random Variables
prior = Distribution(delta=RV("uniform", DELTA_MIN, DELTA_MAX),
                     mu=RV("uniform", MU_MIN, MU_MAX),
                     alpha=RV("uniform", ALPHA_MIN, ALPHA_MAX),
                     lambda0=RV("uniform", LAMBDA0_MIN, LAMBDA0_MAX),
                     C_lambda=RV("uniform", C_LAMBDA_MIN, C_LAMBDA_MAX),
                     delta_S=RV("uniform", DELTAS_MIN, DELTAS_MAX))

# define "true" parameters to calibrate
param_true = {
    "delta": DELTA_TRUE,
    "mu": MU_TRUE,
    "alpha": ALPHA_TRUE,
    "lambda0": LAMBDA0_TRUE,
    "C_lambda": C_LAMBDA_TRUE,
    "delta_S": DELTA_S_TRUE
}
Exemplo n.º 26
0
Arquivo: main.py Projeto: Ajris/miss
def main1():
    measurement_data = np.array(get_from_csv()) / 40000000
    measurement_times = np.arange(len(measurement_data))
    u = 39999999 / 40000000
    w = 0.0
    h = 0
    v = 1 / 40000000
    q = 0
    r = 0
    d = 0
    init = np.array([u, w, h, v, q, r, d])

    # beta, gamma, alpha, mi, theta, theta_0, sigma, eta, kappa_1, kappa_2
    def model(pars):
        sol = sp.integrate.odeint(
            f, init, measurement_times,
            args=(
                pars["eta"],
                # pars["gamma"],
                pars["alpha"],
                # pars["mi"],
                # pars["theta"],
                # pars["theta_0"],
                # pars["sigma"],
                # pars["eta"],
                # pars["kappa_1"],
                # pars["kappa_2"]
            ))

        new_scale = sol[:, 4]
        return {"X_2": new_scale}

    # beta, gamma, alpha, mi, theta, theta_0, sigma, eta, kappa_1, kappa_2

    parameter_prior = Distribution(
        eta=RV("uniform", 0, 1),
        # gamma=RV("uniform", 0, 1),
        alpha=RV("uniform", 0, 1),
        # mi=RV("uniform", 0, 1),
        # theta=RV("uniform", 0, 1),
        # theta_0=RV("uniform", 0, 1),
        # sigma=RV("uniform", 0, 1),
        # eta=RV("uniform", 0, 1),
        # kappa_1=RV("uniform", 0, 1),
        # kappa_2=RV("uniform", 0, 1)
    )

    abc = ABCSMC(models=model,
                 parameter_priors=parameter_prior,
                 distance_function=distance,
                 population_size=5,
                 transitions=LocalTransition(k_fraction=.3),
                 eps=MedianEpsilon(500, median_multiplier=0.7),

                 )

    db_path = ("sqlite:///" +
               os.path.join("./", "test.db"))
    abc.new(db_path, {"X_2": measurement_data})
    h = abc.run(minimum_epsilon=0.1, max_nr_populations=3)
    print(*h.get_distribution(m=0, t=h.max_t))
Exemplo n.º 27
0
def two_competing_gaussians_multiple_population(db_path, sampler, n_sim):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", mu_x_1, sigma)),
        Distribution(x=RV("norm", mu_x_2, sigma)),
    ]

    # We plug all the ABC setup together
    nr_populations = 2
    pop_size = ConstantPopulationSize(23, nr_samples_per_parameter=n_sim)
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 PercentileDistance(measures_to_use=["y"]),
                 pop_size,
                 eps=MedianEpsilon(),
                 sampler=sampler)

    # Finally we add meta data such as model names and
    # define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        res = st.norm(mu_x_model, np.sqrt(sigma**2 + sigma**2)).pdf(y_observed)
        return res

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized
                                              + p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized
                                              + p2_expected_unnormalized)
    assert history.max_t == nr_populations-1
    # the next line only tests if we obtain correct numerical types
    try:
        mp0 = mp.p[0]
    except KeyError:
        mp0 = 0

    try:
        mp1 = mp.p[1]
    except KeyError:
        mp1 = 0

    assert abs(mp0 - p1_expected) + abs(mp1 - p2_expected) < np.inf

    # check that sampler only did nr_particles samples in first round
    pops = history.get_all_populations()
    # since we had calibration (of epsilon), check that was saved
    pre_evals = pops[pops['t'] == History.PRE_TIME]['samples'].values
    assert pre_evals >= pop_size.nr_particles
    # our samplers should not have overhead in calibration, except batching
    batch_size = sampler.batch_size if hasattr(sampler, 'batch_size') else 1
    max_expected = pop_size.nr_particles + batch_size - 1
    if pre_evals > max_expected:
        # Violations have been observed occasionally for the redis server
        # due to runtime conditions with the increase of the evaluations
        # counter. This could be overcome, but as it usually only happens
        # for low-runtime models, this should not be a problem. Thus, only
        # print a warning here.
        logger.warning(
            f"Had {pre_evals} simulations in the calibration iteration, "
            f"but a maximum of {max_expected} would have been sufficient for "
            f"the population size of {pop_size.nr_particles}.")
Exemplo n.º 28
0
    return {
        "barcodeFrequency":
        np.array([primaryNorm, secondaryNorm, tertiaryNorm])
    }


# Generate synthetic data
# simBarFreq = determineTestParameters({"Omega":0.15,"Probability":0.15,"Lambda":1.0,"Gamma":0.48})

# Parameter inference using approximate Bayesian computation (pyABC)
limits = dict(Omega=(0, 0.3),
              Probability=(0, 0.2),
              Lambda=(0, 1.5),
              Gamma=(0, 3))
parameter_prior = Distribution(
    **{key: RV("uniform", a, b - a)
       for key, (a, b) in limits.items()})
db_path = pyabc.create_sqlite_db_id(file_="glioblatomaLanModel_syn.db")
abc = ABCSMC(models = determineTestParameters, \
    parameter_priors = parameter_prior, \
    distance_function = DistanceAfterBinning, \
    population_size = 160, \
    sampler = sampler.MulticoreParticleParallelSampler(), \
    transitions = transition.LocalTransition(k_fraction=0.3))
abc.new(db_path, expBarFreq)
h = abc.run(minimum_epsilon=0.1, max_nr_populations=10)

df, w = h.get_distribution(m=0)
plot_kde_matrix(df, w, limits=limits)
plt.savefig('infer_result.pdf')
plt.clf()
Exemplo n.º 29
0
                              init,
                              measurement_times,
                              args=(pars["theta1"], pars["theta2"]))
    pause_ms = np.random.lognormal(0, 1) / 50
    time.sleep(pause_ms)
    return {"X_2": sol[:, 1]}


true_trajectory = model({"theta1": theta1_true, "theta2": theta2_true})["X_2"]


def distance(simulation, data):
    return np.absolute(data["X_2"] - simulation["X_2"]).sum()


parameter_prior = Distribution(theta1=RV("uniform", 0, 1),
                               theta2=RV("uniform", 0, 1))
parameter_prior.get_parameter_names()

resultfilepath = "/p/home/jusers/reck1/juwels/scripts/Batch_pyABC/programs/results/sleeptimeresults32.txt"
resultfile = open(resultfilepath, "w")
resultfile.write(
    "Pop size, Look_ahead, Repetitions, Runtime Expectation, Runtime Variance, total Walltime\n"
)
resultfile = open(resultfilepath, "a")

redis_sampler = sampler.RedisEvalParallelSampler(host=host,
                                                 port=port,
                                                 look_ahead=False)

for psize in pop_sizes:
Exemplo n.º 30
0
from pyabc import ABCSMC, RV, Distribution
from pyabc.visualization import plot_kde_1d, plot_kde_2d

# In[20]:


def model(params):
    # run a single simulation
    sim_res = gillespie_ssa(params.b, params.h, params.ϵ, params.d, t_steps=15)
    return dict(  ###
        times=sim_res[0], hare=sim_res[1], lynx=sim_res[2])  ###


prior = Distribution(  ###
    # set the parameter prior distributions here
    h=RV("uniform", 0, 1),
    ϵ=RV("uniform", 0, 1),
    b=RV("expon", 1),
    d=RV("expon", 1))  ###


def mse(x, y):  ###
    # compute the distance between data and simulation (distance is symmetric so it doesn't matter which is x and which is y)
    x_tuple = [x['hare'], x['lynx']]
    y_tuple = [y['hare'], y['lynx']]
    return (np.subtract(x_tuple, y_tuple)**2).mean()


# create the ABC object and init it with the new method
abc = ABCSMC(model, prior, mse)