Пример #1
0
    def test_sample(self):
        # use the PMCABC scheme for T = 1
        T, n_sample, n_simulate, eps_arr, eps_percentile = 1, 10, 1, [10], 10
        sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1)
        journal = sampler.sample([self.observation], T, eps_arr, n_sample,
                                 n_simulate, eps_percentile)
        mu_post_sample, sigma_post_sample, post_weights = np.array(
            journal.get_parameters()['mu']), np.array(
                journal.get_parameters()['sigma']), np.array(
                    journal.get_weights())

        # Compute posterior mean
        mu_post_mean, sigma_post_mean = np.average(mu_post_sample,
                                                   weights=post_weights,
                                                   axis=0), np.average(
                                                       sigma_post_sample,
                                                       weights=post_weights,
                                                       axis=0)

        # test shape of sample
        mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(
            mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights)
        self.assertEqual(mu_sample_shape, (10, 1))
        self.assertEqual(sigma_sample_shape, (10, 1))
        self.assertEqual(weights_sample_shape, (10, 1))
        self.assertLess(mu_post_mean - 0.03713, 10e-2)
        self.assertLess(sigma_post_mean - 7.727, 10e-2)

        #self.assertEqual((mu_post_mean, sigma_post_mean), (,))

        # use the PMCABC scheme for T = 2
        T, n_sample, n_simulate, eps_arr, eps_percentile = 2, 10, 1, [10,
                                                                      5], 10
        sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1)
        sampler.sample_from_prior(rng=np.random.RandomState(1))
        journal = sampler.sample([self.observation], T, eps_arr, n_sample,
                                 n_simulate, eps_percentile)
        mu_post_sample, sigma_post_sample, post_weights = np.array(
            journal.get_parameters()['mu']), np.array(
                journal.get_parameters()['sigma']), np.array(
                    journal.get_weights())

        # Compute posterior mean
        mu_post_mean, sigma_post_mean = np.average(mu_post_sample,
                                                   weights=post_weights,
                                                   axis=0), np.average(
                                                       sigma_post_sample,
                                                       weights=post_weights,
                                                       axis=0)

        # test shape of sample
        mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(
            mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights)
        self.assertEqual(mu_sample_shape, (10, 1))
        self.assertEqual(sigma_sample_shape, (10, 1))
        self.assertEqual(weights_sample_shape, (10, 1))
        self.assertLess(mu_post_mean - 0.9356, 10e-2)
        self.assertLess(sigma_post_mean - 7.819, 10e-2)

        self.assertFalse(journal.number_of_simulations == 0)
Пример #2
0
    def test_sample(self):
        # use the PMCABC scheme for T = 1
        T, n_sample, n_simulate, eps_arr, eps_percentile = 1, 10, 1, [.1], 10
        sampler = PMCABC(self.model,
                         self.dist_calc,
                         self.kernel,
                         self.backend,
                         seed=1)
        journal = sampler.sample(self.observation, T, eps_arr, n_sample,
                                 n_simulate, eps_percentile)
        samples = (journal.get_parameters(), journal.get_weights())

        # Compute posterior mean
        mu_post_sample, sigma_post_sample, post_weights = np.asarray(
            samples[0][:, 0]), np.asarray(samples[0][:, 1]), np.asarray(
                samples[1][:, 0])
        mu_post_mean, sigma_post_mean = np.average(
            mu_post_sample,
            weights=post_weights), np.average(sigma_post_sample,
                                              weights=post_weights)

        # test shape of sample
        mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(
            mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights)
        self.assertEqual(mu_sample_shape, (10, ))
        self.assertEqual(sigma_sample_shape, (10, ))
        self.assertEqual(weights_sample_shape, (10, ))

        #self.assertEqual((mu_post_mean, sigma_post_mean), (,))

        # use the PMCABC scheme for T = 2
        T, n_sample, n_simulate, eps_arr, eps_percentile = 2, 10, 1, [.1,
                                                                      .05], 10
        sampler = PMCABC(self.model,
                         self.dist_calc,
                         self.kernel,
                         self.backend,
                         seed=1)
        journal = sampler.sample(self.observation, T, eps_arr, n_sample,
                                 n_simulate, eps_percentile)
        samples = (journal.get_parameters(), journal.get_weights())

        # Compute posterior mean
        mu_post_sample, sigma_post_sample, post_weights = np.asarray(
            samples[0][:, 0]), np.asarray(samples[0][:, 1]), np.asarray(
                samples[1][:, 0])
        mu_post_mean, sigma_post_mean = np.average(
            mu_post_sample,
            weights=post_weights), np.average(sigma_post_sample,
                                              weights=post_weights)

        # test shape of sample
        mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(
            mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights)
        self.assertEqual(mu_sample_shape, (10, ))
        self.assertEqual(sigma_sample_shape, (10, ))
        self.assertEqual(weights_sample_shape, (10, ))
        self.assertLess(mu_post_mean - 3.80593164247, 10e-2)
        self.assertLess(sigma_post_mean - 7.21421951262, 10e-2)
Пример #3
0
def infer_parameters_pmcabc():
    # define observation for true parameters mean=170, 65
    rng = np.random.RandomState(seed=1)
    y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2, ))]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu0 = Uniform([[150], [200]], name="mu0")
    mu1 = Uniform([[25], [100]], name="mu1")
    # define the model
    height_weight_model = NestedBivariateGaussian([mu0, mu1])

    # define statistics
    from abcpy.statistics import Identity
    statistics_calculator = Identity(degree=2, cross=False)

    # define distance
    from abcpy.distances import Euclidean
    distance_calculator = Euclidean(statistics_calculator)

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1)
    # sample from scheme
    T, n_sample, n_samples_per_param = 2, 10, 1
    eps_arr = np.array([10000])
    epsilon_percentile = 95
    print('PMCABC Inferring')
    journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile)

    return journal
Пример #4
0
def infer_parameters():
    # define observation for true parameters mean=170, std=15
    y_obs = [
        160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812,
        140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934,
        176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886,
        166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379,
        170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047,
        178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686,
        146.67058471763457, 179.41946565658628, 238.02751620619537,
        206.22458790620766, 220.89530574344568, 221.04082532837026,
        142.25301427453394, 261.37656571434275, 171.63761180867033,
        210.28121820385866, 237.29130237612236, 175.75558340169619,
        224.54340549862235, 197.42448680731226, 165.88273684581381,
        166.55094082844519, 229.54308602661584, 222.99844054358519,
        185.30223966014586, 152.69149367593846, 206.94372818527413,
        256.35498655339154, 165.43140916577741, 250.19273595481803,
        148.87781549665536, 223.05547559193792, 230.03418198709608,
        146.13611923127021, 138.24716809523139, 179.26755740864527,
        141.21704876815426, 170.89587081800852, 222.96391329259626,
        188.27229523693822, 202.67075179617672, 211.75963110985992,
        217.45423324370509
    ]

    # define prior
    from abcpy.distributions import Uniform
    prior = Uniform([150, 5], [200, 25])

    # define the model
    model = Gaussian(prior)

    # define statistics
    from abcpy.statistics import Identity
    statistics_calculator = Identity(degree=2, cross=False)

    # define distance
    from abcpy.distances import LogReg
    distance_calculator = LogReg(statistics_calculator)

    # define kernel
    from abcpy.distributions import MultiStudentT
    mean, cov, df = np.array([.0, .0]), np.eye(2), 3.
    kernel = MultiStudentT(mean, cov, df)

    # define backend
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC(model, distance_calculator, kernel, backend)

    # sample from scheme
    T, n_sample, n_samples_per_param = 3, 250, 10
    eps_arr = np.array([.75])
    epsilon_percentile = 10
    journal = sampler.sample(y_obs, T, eps_arr, n_sample, n_samples_per_param,
                             epsilon_percentile)

    return journal
Пример #5
0
def infer_parameters(backend,
                     steps=3,
                     n_sample=250,
                     n_samples_per_param=10,
                     logging_level=logging.WARN):
    logging.basicConfig(level=logging_level)
    # define observation for true parameters mean=170, std=15
    height_obs = [
        160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812,
        140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934,
        176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886,
        166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379,
        170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047,
        178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686,
        146.67058471763457, 179.41946565658628, 238.02751620619537,
        206.22458790620766, 220.89530574344568, 221.04082532837026,
        142.25301427453394, 261.37656571434275, 171.63761180867033,
        210.28121820385866, 237.29130237612236, 175.75558340169619,
        224.54340549862235, 197.42448680731226, 165.88273684581381,
        166.55094082844519, 229.54308602661584, 222.99844054358519,
        185.30223966014586, 152.69149367593846, 206.94372818527413,
        256.35498655339154, 165.43140916577741, 250.19273595481803,
        148.87781549665536, 223.05547559193792, 230.03418198709608,
        146.13611923127021, 138.24716809523139, 179.26755740864527,
        141.21704876815426, 170.89587081800852, 222.96391329259626,
        188.27229523693822, 202.67075179617672, 211.75963110985992,
        217.45423324370509
    ]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu = Uniform([[150], [200]], name='mu')
    sigma = Uniform([[5], [25]], name='sigma')

    # define the model
    from abcpy.continuousmodels import Normal
    height = Normal([mu, sigma], name='height')

    # define statistics
    from abcpy.statistics import Identity
    statistics_calculator = Identity(degree=2, cross=False)

    # define distance
    from abcpy.distances import LogReg
    distance_calculator = LogReg(statistics_calculator, seed=42)

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([height], [distance_calculator], backend, seed=1)

    # sample from scheme
    eps_arr = np.array([.75])
    epsilon_percentile = 10
    journal = sampler.sample([height_obs], steps, eps_arr, n_sample,
                             n_samples_per_param, epsilon_percentile)

    return journal
Пример #6
0
    def test_restart_from_journal(self):
        # test with value of eps_arr_2 > percentile of distances
        n_sample, n_simulate, eps_arr, eps_percentile = 10, 1, [10, 5], 10
        # 2 steps with intermediate journal:
        sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1)
        sampler.sample_from_prior(rng=np.random.RandomState(1))
        journal_intermediate = sampler.sample([self.observation], 1,
                                              [eps_arr[0]], n_sample,
                                              n_simulate, eps_percentile)
        journal_intermediate.save("tmp.jnl")
        journal_final_1 = sampler.sample([self.observation],
                                         1, [eps_arr[1]],
                                         n_sample,
                                         n_simulate,
                                         eps_percentile,
                                         journal_file="tmp.jnl")
        # 2 steps directly
        sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1)
        sampler.sample_from_prior(rng=np.random.RandomState(1))
        journal_final_2 = sampler.sample([self.observation], 2, eps_arr,
                                         n_sample, n_simulate, eps_percentile)

        self.assertEqual(journal_final_1.configuration["epsilon_arr"],
                         journal_final_2.configuration["epsilon_arr"])
        self.assertEqual(journal_final_1.posterior_mean()['mu'],
                         journal_final_2.posterior_mean()['mu'])

        # test with value of eps_arr_2 < percentile of distances
        n_sample, n_simulate, eps_arr, eps_percentile = 10, 1, [10, 1], 10
        # 2 steps with intermediate journal:
        sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1)
        sampler.sample_from_prior(rng=np.random.RandomState(1))
        journal_intermediate = sampler.sample([self.observation], 1,
                                              [eps_arr[0]], n_sample,
                                              n_simulate, eps_percentile)
        journal_intermediate.save("tmp.jnl")
        journal_final_1 = sampler.sample([self.observation],
                                         1, [eps_arr[1]],
                                         n_sample,
                                         n_simulate,
                                         eps_percentile,
                                         journal_file="tmp.jnl")
        # 2 steps directly
        sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1)
        sampler.sample_from_prior(rng=np.random.RandomState(1))
        journal_final_2 = sampler.sample([self.observation], 2, eps_arr,
                                         n_sample, n_simulate, eps_percentile)

        self.assertEqual(journal_final_1.configuration["epsilon_arr"],
                         journal_final_2.configuration["epsilon_arr"])
        self.assertEqual(journal_final_1.posterior_mean()['mu'],
                         journal_final_2.posterior_mean()['mu'])
Пример #7
0
def infer_parameters():
    # define backend
    # Note, the dummy backend does not parallelize the code!
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # define observation for true parameters mean=170, std=15
    height_obs = [
        160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812,
        140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934,
        176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886,
        166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379,
        170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047,
        178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686,
        146.67058471763457, 179.41946565658628, 238.02751620619537,
        206.22458790620766, 220.89530574344568, 221.04082532837026,
        142.25301427453394, 261.37656571434275, 171.63761180867033,
        210.28121820385866, 237.29130237612236, 175.75558340169619,
        224.54340549862235, 197.42448680731226, 165.88273684581381,
        166.55094082844519, 229.54308602661584, 222.99844054358519,
        185.30223966014586, 152.69149367593846, 206.94372818527413,
        256.35498655339154, 165.43140916577741, 250.19273595481803,
        148.87781549665536, 223.05547559193792, 230.03418198709608,
        146.13611923127021, 138.24716809523139, 179.26755740864527,
        141.21704876815426, 170.89587081800852, 222.96391329259626,
        188.27229523693822, 202.67075179617672, 211.75963110985992,
        217.45423324370509
    ]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu = Uniform([[150], [200]], )
    sigma = Uniform([[5], [25]], )

    # define the model
    from abcpy.continuousmodels import Normal
    height = Normal([mu, sigma], )

    # define statistics
    from abcpy.statistics import Identity
    statistics_calculator = Identity(degree=3, cross=True)

    # Learn the optimal summary statistics using Semiautomatic summary selection
    from abcpy.summaryselections import Semiautomatic
    summary_selection = Semiautomatic([height],
                                      statistics_calculator,
                                      backend,
                                      n_samples=1000,
                                      n_samples_per_param=1,
                                      seed=1)

    # Redefine the statistics function
    statistics_calculator.statistics = lambda x, f2=summary_selection.transformation, \
                                              f1=statistics_calculator.statistics: f2(f1(x))

    # define distance
    from abcpy.distances import LogReg
    distance_calculator = LogReg(statistics_calculator)

    # define kernel
    from abcpy.perturbationkernel import DefaultKernel
    kernel = DefaultKernel([mu, sigma])

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1)

    # sample from scheme
    T, n_sample, n_samples_per_param = 3, 250, 10
    eps_arr = np.array([.75])
    epsilon_percentile = 10
    journal = sampler.sample([height_obs], T, eps_arr, n_sample,
                             n_samples_per_param, epsilon_percentile)

    return journal
Пример #8
0
    def run(self,
            jobID,
            n_sample,
            steps,
            epsilon_init,
            epsilon_percentile,
            save_output=True,
            parallelize=False):

        assert self._prior_set is True

        if parallelize:
            backend = BackendMPI()
        else:
            backend = BackendDummy()

        steps_minus_1 = steps - 1
        epsilon_init = [epsilon_init] + [None] * steps_minus_1

        sim = Simulator(self, self.to_sample_list, self.priors_over_hood)
        sampler = PMCABC([sim], [self._distance_calc], backend, seed=1)

        journal_filename = self.output_folder + 'journal_' + jobID

        if os.path.exists(journal_filename):
            f = open(journal_filename, 'rb')
            journal_init = pickle.load(f)
            f.close()
            print('loading from journal file..')
            stat = journal_init.get_distances()
            print(
                str(epsilon_percentile) +
                'th percentile of initial distances: ',
                np.percentile(stat, epsilon_percentile))
        else:
            print('first_iteration...')
            journal_init = None

        journal = sampler.sample([self._obs],
                                 steps,
                                 epsilon_init,
                                 n_sample,
                                 1,
                                 epsilon_percentile,
                                 journal_class=journal_init)

        stat = journal.get_distances()
        print(
            str(epsilon_percentile) + 'th percentile of new distances: ',
            np.percentile(stat, epsilon_percentile))
        print('obtained ' + str(n_sample) + ' samples from ' +
              str(journal.number_of_simulations[0]) + ' realizations')

        if save_output:
            f = open(journal_filename, 'wb')
            pickle.dump(journal, f)
            f.close()

        self._prior_set = False

        return journal
def infer_parameters(steps=2, n_sample=50, n_samples_per_param=1, logging_level=logging.WARN):
    """Perform inference for this example.

    Parameters
    ----------
    steps : integer, optional
        Number of iterations in the sequential PMCABC algorithm ("generations"). The default value is 3
    n_samples : integer, optional
        Number of posterior samples to generate. The default value is 250.
    n_samples_per_param : integer, optional
        Number of data points in each simulated data set. The default value is 10.

    Returns
    -------
    abcpy.output.Journal
        A journal containing simulation results, metadata and optionally intermediate results.
    """
    logging.basicConfig(level=logging_level)
    # define backend
    # Note, the dummy backend does not parallelize the code!
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # define observation for true parameters mean=170, std=15
    height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084,
                  172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017,
                  183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515,
                  197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633,
                  165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537,
                  206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275,
                  171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235,
                  197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519,
                  185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741,
                  250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021,
                  138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626,
                  188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu = Uniform([[150], [200]], name="mu")
    sigma = Uniform([[5], [25]], name="sigma")

    # define the model
    from abcpy.continuousmodels import Normal
    height = Normal([mu, sigma], )

    # 1) generate simulations from prior
    from abcpy.inferences import DrawFromPrior
    draw_from_prior = DrawFromPrior([height], backend=backend)

    # notice the use of the `.sample_par_sim_pairs` method rather than `.sample` to obtain data suitably formatted
    # for the summary statistics learning routines
    parameters, simulations = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1)
    # if you want to use the test loss to do early stopping in the training:
    parameters_val, simulations_val = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1)
    # discard the mid dimension (n_samples_per_param, as the StatisticsLearning classes use that =1)
    simulations = simulations.reshape(simulations.shape[0], simulations.shape[2])
    simulations_val = simulations_val.reshape(simulations_val.shape[0], simulations_val.shape[2])

    # 2) now train the NNs with the different methods with the generated data
    from abcpy.statistics import Identity
    identity = Identity()  # to apply before computing the statistics

    logging.info("semiNN")
    from abcpy.statisticslearning import SemiautomaticNN, TripletDistanceLearning
    semiNN = SemiautomaticNN([height], identity, backend=backend, parameters=parameters,
                             simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val,
                             early_stopping=True,  # early stopping
                             seed=1, n_epochs=10, scale_samples=False, use_tqdm=False)
    logging.info("triplet")
    triplet = TripletDistanceLearning([height], identity, backend=backend, parameters=parameters,
                                      simulations=simulations, parameters_val=parameters_val,
                                      simulations_val=simulations_val,
                                      early_stopping=True,  # early stopping
                                      seed=1, n_epochs=10, scale_samples=True, use_tqdm=False)

    # 3) save and re-load NNs:
    # get the statistics from the already fit StatisticsLearning object 'semiNN':
    learned_seminn_stat = semiNN.get_statistics()
    learned_triplet_stat = triplet.get_statistics()

    # this has a save net method:
    learned_seminn_stat.save_net("seminn_net.pth")
    # if you used `scale_samples=True` in learning the NNs, need to provide a path where pickle stores the scaler too:
    learned_triplet_stat.save_net("triplet_net.pth", path_to_scaler="scaler.pkl")

    # to reload: need to use the Neural Embedding statistics fromFile; this needs to know which kind of NN it is using;
    # need therefore to pass either the input/output size (it data size and number parameters) or the network class if
    # that was specified explicitly in the StatisticsLearning class. Check the docstring for NeuralEmbedding.fromFile
    # for more details.
    from abcpy.statistics import NeuralEmbedding
    learned_seminn_stat_loaded = NeuralEmbedding.fromFile("seminn_net.pth", input_size=1, output_size=2)
    learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2,
                                                           path_to_scaler="scaler.pkl")

    # 4) you can optionally rescale the different summary statistics be their standard deviation on a reference dataset
    # of simulations. To do this, it is enough to pass at initialization the reference dataset, and the rescaling will
    # be applied every time the statistics is computed on some simulation or observation.
    learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2,
                                                           path_to_scaler="scaler.pkl",
                                                           reference_simulations=simulations_val)

    # 5) perform inference
    # define distance
    from abcpy.distances import Euclidean
    distance_calculator = Euclidean(learned_seminn_stat_loaded)

    # define kernel
    from abcpy.perturbationkernel import DefaultKernel
    kernel = DefaultKernel([mu, sigma])

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1)

    eps_arr = np.array([500])  # starting value of epsilon; the smaller, the slower the algorithm.
    # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous
    # iteration from the observation
    epsilon_percentile = 10
    journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile)

    return journal
def infer_parameters(steps=3,
                     n_sample=250,
                     n_samples_per_param=10,
                     logging_level=logging.WARN):
    """Perform inference for this example.

    Parameters
    ----------
    steps : integer, optional
        Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3
    n_samples : integer, optional
        Number of posterior samples to generate. The default value is 250.
    n_samples_per_param : integer, optional
        Number of data points in each simulated data set. The default value is 10.

    Returns
    -------
    abcpy.output.Journal
        A journal containing simulation results, metadata and optionally intermediate results.
    """
    logging.basicConfig(level=logging_level)
    # define observation for true parameters mean=170, std=15
    y_obs = [
        160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812,
        140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934,
        176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886,
        166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379,
        170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047,
        178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686,
        146.67058471763457, 179.41946565658628, 238.02751620619537,
        206.22458790620766, 220.89530574344568, 221.04082532837026,
        142.25301427453394, 261.37656571434275, 171.63761180867033,
        210.28121820385866, 237.29130237612236, 175.75558340169619,
        224.54340549862235, 197.42448680731226, 165.88273684581381,
        166.55094082844519, 229.54308602661584, 222.99844054358519,
        185.30223966014586, 152.69149367593846, 206.94372818527413,
        256.35498655339154, 165.43140916577741, 250.19273595481803,
        148.87781549665536, 223.05547559193792, 230.03418198709608,
        146.13611923127021, 138.24716809523139, 179.26755740864527,
        141.21704876815426, 170.89587081800852, 222.96391329259626,
        188.27229523693822, 202.67075179617672, 211.75963110985992,
        217.45423324370509
    ]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu = Uniform([[150], [200]], name="mu")
    sigma = Uniform([[5], [25]], name="sigma")

    # define the model
    model = Gaussian([mu, sigma], name='height')

    # define statistics
    from abcpy.statistics import Identity
    statistics_calculator = Identity(degree=2, cross=False)

    # define distance
    from abcpy.distances import LogReg
    distance_calculator = LogReg(statistics_calculator, seed=42)

    # define backend
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([model], [distance_calculator], backend, seed=1)

    # sample from scheme
    eps_arr = np.array([.75])
    epsilon_percentile = 10
    journal = sampler.sample([y_obs], steps, eps_arr, n_sample,
                             n_samples_per_param, epsilon_percentile)

    return journal
Пример #11
0
def infer_parameters(steps=3,
                     n_sample=250,
                     n_samples_per_param=10,
                     logging_level=logging.WARN):
    """Perform inference for this example.
    Parameters
    ----------
    steps : integer, optional
        Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3
    n_samples : integer, optional
        Number of posterior samples to generate. The default value is 250.
    n_samples_per_param : integer, optional
        Number of data points in each simulated data set. The default value is 10.
    Returns
    -------
    abcpy.output.Journal
        A journal containing simulation results, metadata and optionally intermediate results.
    """
    logging.basicConfig(level=logging_level)
    # define backend
    # Note, the dummy backend does not parallelize the code!
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # define observation for true parameters mean=170, std=15
    height_obs = [
        160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812,
        140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934,
        176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886,
        166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379,
        170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047,
        178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686,
        146.67058471763457, 179.41946565658628, 238.02751620619537,
        206.22458790620766, 220.89530574344568, 221.04082532837026,
        142.25301427453394, 261.37656571434275, 171.63761180867033,
        210.28121820385866, 237.29130237612236, 175.75558340169619,
        224.54340549862235, 197.42448680731226, 165.88273684581381,
        166.55094082844519, 229.54308602661584, 222.99844054358519,
        185.30223966014586, 152.69149367593846, 206.94372818527413,
        256.35498655339154, 165.43140916577741, 250.19273595481803,
        148.87781549665536, 223.05547559193792, 230.03418198709608,
        146.13611923127021, 138.24716809523139, 179.26755740864527,
        141.21704876815426, 170.89587081800852, 222.96391329259626,
        188.27229523693822, 202.67075179617672, 211.75963110985992,
        217.45423324370509
    ]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu = Uniform([[150], [200]], name="mu")
    sigma = Uniform([[5], [25]], name="sigma")

    # define the model
    from abcpy.continuousmodels import Normal
    height = Normal([mu, sigma], )

    # define statistics
    from abcpy.statistics import Identity
    statistics_calculator = Identity(degree=3, cross=True)

    # Learn the optimal summary statistics using Semiautomatic summary selection
    from abcpy.statisticslearning import Semiautomatic
    statistics_learning = Semiautomatic([height],
                                        statistics_calculator,
                                        backend,
                                        n_samples=1000,
                                        n_samples_per_param=1,
                                        seed=1)

    # Redefine the statistics function
    new_statistics_calculator = statistics_learning.get_statistics()

    # Learn the optimal summary statistics using SemiautomaticNN summary selection;
    # we use 200 samples as a validation set for early stopping:
    from abcpy.statisticslearning import SemiautomaticNN
    statistics_learning = SemiautomaticNN([height],
                                          statistics_calculator,
                                          backend,
                                          n_samples=1000,
                                          n_samples_val=200,
                                          n_samples_per_param=1,
                                          seed=1,
                                          early_stopping=True)

    # Redefine the statistics function
    new_statistics_calculator = statistics_learning.get_statistics()

    # define distance
    from abcpy.distances import Euclidean
    distance_calculator = Euclidean(new_statistics_calculator)

    # define kernel
    from abcpy.perturbationkernel import DefaultKernel
    kernel = DefaultKernel([mu, sigma])

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1)

    eps_arr = np.array([
        500
    ])  # starting value of epsilon; the smaller, the slower the algorithm.
    # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous
    # iteration from the observation
    epsilon_percentile = 10
    journal = sampler.sample([height_obs], steps, eps_arr, n_sample,
                             n_samples_per_param, epsilon_percentile)

    return journal
Пример #12
0
def infer_parameters():
    # The data corresponding to model_1 defined below
    grades_obs = [3.872486707973337, 4.6735380808674405, 3.9703538990858376, 4.11021272048805, 4.211048655421368, 4.154817956586653, 4.0046893064392695, 4.01891381384729, 4.123804757702919, 4.014941267301294, 3.888174595940634, 4.185275142948246, 4.55148774469135, 3.8954427675259016, 4.229264035335705, 3.839949451328312, 4.039402553532825, 4.128077814241238, 4.361488645531874, 4.086279074446419, 4.370801602256129, 3.7431697332475466, 4.459454162392378, 3.8873973643008255, 4.302566721487124, 4.05556051626865, 4.128817316703757, 3.8673704442215984, 4.2174459453805015, 4.202280254493361, 4.072851400451234, 3.795173229398952, 4.310702877332585, 4.376886328810306, 4.183704734748868, 4.332192463368128, 3.9071312388426587, 4.311681374107893, 3.55187913252144, 3.318878360783221, 4.187850500877817, 4.207923106081567, 4.190462065625179, 4.2341474252986036, 4.110228694304768, 4.1589891480847765, 4.0345604687633045, 4.090635481715123, 3.1384654393449294, 4.20375641386518, 4.150452690356067, 4.015304457401275, 3.9635442007388195, 4.075915739179875, 3.5702080541929284, 4.722333310410388, 3.9087618197155227, 4.3990088006390735, 3.968501165774181, 4.047603645360087, 4.109184340976979, 4.132424805281853, 4.444358334346812, 4.097211737683927, 4.288553086265748, 3.8668863066511303, 3.8837108501541007]

    # The prior information changing the class size and the teacher student ratio, depending on the yearly budget of the school 
    from abcpy.continuousmodels import Uniform, Normal
    school_budget = Uniform([[1], [10]], name = 'school_budget')

    # The average class size of a certain school
    class_size = Normal([[800*school_budget], [1]], name = 'class_size')

    # The number of teachers in the school
    no_teacher = Normal([[20*school_budget], [1]], name = 'no_teacher')

    # The grade a student would receive without any bias
    grade_without_additional_effects = Normal([[4.5], [0.25]], name = 'grade_without_additional_effects')

    # The grade a student of a certain school receives
    final_grade = grade_without_additional_effects - .001 * class_size + .02 * no_teacher

    # The data corresponding to model_2 defined below
    scholarship_obs = [2.7179657436207805, 2.124647285937229, 3.07193407853297, 2.335024761813643, 2.871893855192, 3.4332002458233837, 3.649996835818173, 3.50292335102711, 2.815638168018455, 2.3581613289315992, 2.2794821846395568, 2.8725835459926503, 3.5588573782815685, 2.26053126526137, 1.8998143530749971, 2.101110815311782, 2.3482974964831573, 2.2707679029919206, 2.4624550491079225, 2.867017757972507, 3.204249152084959, 2.4489542437714213, 1.875415915801106, 2.5604889644872433, 3.891985093269989, 2.7233633223405205, 2.2861070389383533, 2.9758813233490082, 3.1183403287267755, 2.911814060853062, 2.60896794303205, 3.5717098647480316, 3.3355752461779824, 1.99172284546858, 2.339937680892163, 2.9835630207301636, 2.1684912355975774, 3.014847335983034, 2.7844122961916202, 2.752119871525148, 2.1567428931391635, 2.5803629307680644, 2.7326646074552103, 2.559237193255186, 3.13478196958166, 2.388760269933492, 3.2822443541491815, 2.0114405441787437, 3.0380056368041073, 2.4889680313769724, 2.821660164621084, 3.343985964873723, 3.1866861970287808, 4.4535037154856045, 3.0026333138006027, 2.0675706089352612, 2.3835301730913185, 2.584208398359566, 3.288077633446465, 2.6955853384148183, 2.918315169739928, 3.2464814419322985, 2.1601516779909433, 3.231003347780546, 1.0893224045062178, 0.8032302688764734, 2.868438615047827]

    # A quantity that determines whether a student will receive a scholarship
    scholarship_without_additional_effects = Normal([[2], [0.5]], name = 'schol_without_additional_effects')

    # A quantity determining whether a student receives a scholarship, including his social teacher_student_ratio
    final_scholarship = scholarship_without_additional_effects + .03 * no_teacher

    # Define a summary statistics for final grade and final scholarship
    from abcpy.statistics import Identity
    statistics_calculator_final_grade = Identity(degree = 2, cross = False)
    statistics_calculator_final_scholarship = Identity(degree = 3, cross = False)

    # Define a distance measure for final grade and final scholarship
    from abcpy.distances import Euclidean
    distance_calculator_final_grade = Euclidean(statistics_calculator_final_grade)
    distance_calculator_final_scholarship = Euclidean(statistics_calculator_final_scholarship)

    # Define a backend
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # Define kernels
    from abcpy.perturbationkernel import MultivariateNormalKernel, MultivariateStudentTKernel
    kernel_1 = MultivariateNormalKernel([school_budget,\ 
                scholarship_without_additional_effects, grade_without_additional_effects])
    kernel_2 = MultivariateStudentTKernel([class_size, no_teacher], df=3)

    # Join the defined kernels
    from abcpy.perturbationkernel import JointPerturbationKernel
    kernel = JointPerturbationKernel([kernel_1, kernel_2])


    # Define sampling parameters
    T, n_sample, n_samples_per_param = 3, 250, 10
    eps_arr = np.array([.75])
    epsilon_percentile = 10

    # Define sampler
    from abcpy.inferences import PMCABC
    sampler = PMCABC([final_grade, final_scholarship], [distance_calculator, distance_calculator], backend, kernel)

    # Sample
    journal = sampler.sample([y_obs_grades, y_obs_scholarship], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile)
Пример #13
0
def ABC_inference(algorithm,
                  model,
                  observation,
                  distance_calculator,
                  eps,
                  n_samples,
                  n_steps,
                  backend,
                  seed=None,
                  full_output=0,
                  **kwargs):
    """NB: eps represents initial value of epsilon for PMCABC and SABC; represents the single eps value for RejectionABC
     and represents the final value for SMCABC."""
    start = time()
    if algorithm == "PMCABC":
        sampler = PMCABC([model], [distance_calculator], backend, seed=seed)
        jrnl = sampler.sample([[observation]],
                              n_steps,
                              np.array([eps]),
                              n_samples=n_samples,
                              full_output=full_output,
                              **kwargs)
    if algorithm == "APMCABC":
        sampler = APMCABC([model], [distance_calculator], backend, seed=seed)
        jrnl = sampler.sample([[observation]],
                              n_steps,
                              n_samples=n_samples,
                              full_output=full_output,
                              **kwargs)
    elif algorithm == "SABC":
        sampler = SABC([model], [distance_calculator], backend, seed=seed)
        jrnl = sampler.sample([[observation]],
                              n_steps,
                              eps,
                              n_samples=n_samples,
                              full_output=full_output,
                              **kwargs)
    elif algorithm == "RejectionABC":
        sampler = RejectionABC([model], [distance_calculator],
                               backend,
                               seed=seed)
        jrnl = sampler.sample([[observation]],
                              eps,
                              n_samples=n_samples,
                              full_output=full_output,
                              **kwargs)
    elif algorithm == "SMCABC":
        # for this usually a larger number of steps is required. alpha can be left to 0.95, covFactor=2 and
        # resample=None. epsilon_final is instead important to fix!
        sampler = SMCABC([model], [distance_calculator], backend, seed=seed)
        # sample(observations, steps, n_samples=10000, n_samples_per_param=1, epsilon_final=0.1, alpha=0.95,
        #        covFactor=2, resample=None, full_output=0, which_mcmc_kernel=0, journal_file=None)
        jrnl = sampler.sample([[observation]],
                              n_steps,
                              n_samples=n_samples,
                              full_output=full_output,
                              epsilon_final=eps,
                              **kwargs)

    print("It took ", time() - start, " seconds.")

    return jrnl