def test_sample(self): # use the PMCABC scheme for T = 1 T, n_sample, n_simulate, eps_arr, eps_percentile = 1, 10, 1, [10], 10 sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1) journal = sampler.sample([self.observation], T, eps_arr, n_sample, n_simulate, eps_percentile) mu_post_sample, sigma_post_sample, post_weights = np.array( journal.get_parameters()['mu']), np.array( journal.get_parameters()['sigma']), np.array( journal.get_weights()) # Compute posterior mean mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average( sigma_post_sample, weights=post_weights, axis=0) # test shape of sample mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape( mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) self.assertEqual(mu_sample_shape, (10, 1)) self.assertEqual(sigma_sample_shape, (10, 1)) self.assertEqual(weights_sample_shape, (10, 1)) self.assertLess(mu_post_mean - 0.03713, 10e-2) self.assertLess(sigma_post_mean - 7.727, 10e-2) #self.assertEqual((mu_post_mean, sigma_post_mean), (,)) # use the PMCABC scheme for T = 2 T, n_sample, n_simulate, eps_arr, eps_percentile = 2, 10, 1, [10, 5], 10 sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1) sampler.sample_from_prior(rng=np.random.RandomState(1)) journal = sampler.sample([self.observation], T, eps_arr, n_sample, n_simulate, eps_percentile) mu_post_sample, sigma_post_sample, post_weights = np.array( journal.get_parameters()['mu']), np.array( journal.get_parameters()['sigma']), np.array( journal.get_weights()) # Compute posterior mean mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average( sigma_post_sample, weights=post_weights, axis=0) # test shape of sample mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape( mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) self.assertEqual(mu_sample_shape, (10, 1)) self.assertEqual(sigma_sample_shape, (10, 1)) self.assertEqual(weights_sample_shape, (10, 1)) self.assertLess(mu_post_mean - 0.9356, 10e-2) self.assertLess(sigma_post_mean - 7.819, 10e-2) self.assertFalse(journal.number_of_simulations == 0)
def test_sample(self): # use the PMCABC scheme for T = 1 T, n_sample, n_simulate, eps_arr, eps_percentile = 1, 10, 1, [.1], 10 sampler = PMCABC(self.model, self.dist_calc, self.kernel, self.backend, seed=1) journal = sampler.sample(self.observation, T, eps_arr, n_sample, n_simulate, eps_percentile) samples = (journal.get_parameters(), journal.get_weights()) # Compute posterior mean mu_post_sample, sigma_post_sample, post_weights = np.asarray( samples[0][:, 0]), np.asarray(samples[0][:, 1]), np.asarray( samples[1][:, 0]) mu_post_mean, sigma_post_mean = np.average( mu_post_sample, weights=post_weights), np.average(sigma_post_sample, weights=post_weights) # test shape of sample mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape( mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) self.assertEqual(mu_sample_shape, (10, )) self.assertEqual(sigma_sample_shape, (10, )) self.assertEqual(weights_sample_shape, (10, )) #self.assertEqual((mu_post_mean, sigma_post_mean), (,)) # use the PMCABC scheme for T = 2 T, n_sample, n_simulate, eps_arr, eps_percentile = 2, 10, 1, [.1, .05], 10 sampler = PMCABC(self.model, self.dist_calc, self.kernel, self.backend, seed=1) journal = sampler.sample(self.observation, T, eps_arr, n_sample, n_simulate, eps_percentile) samples = (journal.get_parameters(), journal.get_weights()) # Compute posterior mean mu_post_sample, sigma_post_sample, post_weights = np.asarray( samples[0][:, 0]), np.asarray(samples[0][:, 1]), np.asarray( samples[1][:, 0]) mu_post_mean, sigma_post_mean = np.average( mu_post_sample, weights=post_weights), np.average(sigma_post_sample, weights=post_weights) # test shape of sample mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape( mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) self.assertEqual(mu_sample_shape, (10, )) self.assertEqual(sigma_sample_shape, (10, )) self.assertEqual(weights_sample_shape, (10, )) self.assertLess(mu_post_mean - 3.80593164247, 10e-2) self.assertLess(sigma_post_mean - 7.21421951262, 10e-2)
def infer_parameters_pmcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState(seed=1) y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2, ))] # define prior from abcpy.continuousmodels import Uniform mu0 = Uniform([[150], [200]], name="mu0") mu1 = Uniform([[25], [100]], name="mu1") # define the model height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) # sample from scheme T, n_sample, n_samples_per_param = 2, 10, 1 eps_arr = np.array([10000]) epsilon_percentile = 95 print('PMCABC Inferring') journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(): # define observation for true parameters mean=170, std=15 y_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.distributions import Uniform prior = Uniform([150, 5], [200, 25]) # define the model model = Gaussian(prior) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import LogReg distance_calculator = LogReg(statistics_calculator) # define kernel from abcpy.distributions import MultiStudentT mean, cov, df = np.array([.0, .0]), np.eye(2), 3. kernel = MultiStudentT(mean, cov, df) # define backend from abcpy.backends import BackendDummy as Backend backend = Backend() # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC(model, distance_calculator, kernel, backend) # sample from scheme T, n_sample, n_samples_per_param = 3, 250, 10 eps_arr = np.array([.75]) epsilon_percentile = 10 journal = sampler.sample(y_obs, T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(backend, steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): logging.basicConfig(level=logging_level) # define observation for true parameters mean=170, std=15 height_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name='mu') sigma = Uniform([[5], [25]], name='sigma') # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], name='height') # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import LogReg distance_calculator = LogReg(statistics_calculator, seed=42) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, seed=1) # sample from scheme eps_arr = np.array([.75]) epsilon_percentile = 10 journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def test_restart_from_journal(self): # test with value of eps_arr_2 > percentile of distances n_sample, n_simulate, eps_arr, eps_percentile = 10, 1, [10, 5], 10 # 2 steps with intermediate journal: sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1) sampler.sample_from_prior(rng=np.random.RandomState(1)) journal_intermediate = sampler.sample([self.observation], 1, [eps_arr[0]], n_sample, n_simulate, eps_percentile) journal_intermediate.save("tmp.jnl") journal_final_1 = sampler.sample([self.observation], 1, [eps_arr[1]], n_sample, n_simulate, eps_percentile, journal_file="tmp.jnl") # 2 steps directly sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1) sampler.sample_from_prior(rng=np.random.RandomState(1)) journal_final_2 = sampler.sample([self.observation], 2, eps_arr, n_sample, n_simulate, eps_percentile) self.assertEqual(journal_final_1.configuration["epsilon_arr"], journal_final_2.configuration["epsilon_arr"]) self.assertEqual(journal_final_1.posterior_mean()['mu'], journal_final_2.posterior_mean()['mu']) # test with value of eps_arr_2 < percentile of distances n_sample, n_simulate, eps_arr, eps_percentile = 10, 1, [10, 1], 10 # 2 steps with intermediate journal: sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1) sampler.sample_from_prior(rng=np.random.RandomState(1)) journal_intermediate = sampler.sample([self.observation], 1, [eps_arr[0]], n_sample, n_simulate, eps_percentile) journal_intermediate.save("tmp.jnl") journal_final_1 = sampler.sample([self.observation], 1, [eps_arr[1]], n_sample, n_simulate, eps_percentile, journal_file="tmp.jnl") # 2 steps directly sampler = PMCABC([self.model], [self.dist_calc], self.backend, seed=1) sampler.sample_from_prior(rng=np.random.RandomState(1)) journal_final_2 = sampler.sample([self.observation], 2, eps_arr, n_sample, n_simulate, eps_percentile) self.assertEqual(journal_final_1.configuration["epsilon_arr"], journal_final_2.configuration["epsilon_arr"]) self.assertEqual(journal_final_1.posterior_mean()['mu'], journal_final_2.posterior_mean()['mu'])
def infer_parameters(): # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], ) sigma = Uniform([[5], [25]], ) # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=3, cross=True) # Learn the optimal summary statistics using Semiautomatic summary selection from abcpy.summaryselections import Semiautomatic summary_selection = Semiautomatic([height], statistics_calculator, backend, n_samples=1000, n_samples_per_param=1, seed=1) # Redefine the statistics function statistics_calculator.statistics = lambda x, f2=summary_selection.transformation, \ f1=statistics_calculator.statistics: f2(f1(x)) # define distance from abcpy.distances import LogReg distance_calculator = LogReg(statistics_calculator) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) # sample from scheme T, n_sample, n_samples_per_param = 3, 250, 10 eps_arr = np.array([.75]) epsilon_percentile = 10 journal = sampler.sample([height_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def run(self, jobID, n_sample, steps, epsilon_init, epsilon_percentile, save_output=True, parallelize=False): assert self._prior_set is True if parallelize: backend = BackendMPI() else: backend = BackendDummy() steps_minus_1 = steps - 1 epsilon_init = [epsilon_init] + [None] * steps_minus_1 sim = Simulator(self, self.to_sample_list, self.priors_over_hood) sampler = PMCABC([sim], [self._distance_calc], backend, seed=1) journal_filename = self.output_folder + 'journal_' + jobID if os.path.exists(journal_filename): f = open(journal_filename, 'rb') journal_init = pickle.load(f) f.close() print('loading from journal file..') stat = journal_init.get_distances() print( str(epsilon_percentile) + 'th percentile of initial distances: ', np.percentile(stat, epsilon_percentile)) else: print('first_iteration...') journal_init = None journal = sampler.sample([self._obs], steps, epsilon_init, n_sample, 1, epsilon_percentile, journal_class=journal_init) stat = journal.get_distances() print( str(epsilon_percentile) + 'th percentile of new distances: ', np.percentile(stat, epsilon_percentile)) print('obtained ' + str(n_sample) + ' samples from ' + str(journal.number_of_simulations[0]) + ' realizations') if save_output: f = open(journal_filename, 'wb') pickle.dump(journal, f) f.close() self._prior_set = False return journal
def infer_parameters(steps=2, n_sample=50, n_samples_per_param=1, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- steps : integer, optional Number of iterations in the sequential PMCABC algorithm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name="mu") sigma = Uniform([[5], [25]], name="sigma") # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # 1) generate simulations from prior from abcpy.inferences import DrawFromPrior draw_from_prior = DrawFromPrior([height], backend=backend) # notice the use of the `.sample_par_sim_pairs` method rather than `.sample` to obtain data suitably formatted # for the summary statistics learning routines parameters, simulations = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1) # if you want to use the test loss to do early stopping in the training: parameters_val, simulations_val = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1) # discard the mid dimension (n_samples_per_param, as the StatisticsLearning classes use that =1) simulations = simulations.reshape(simulations.shape[0], simulations.shape[2]) simulations_val = simulations_val.reshape(simulations_val.shape[0], simulations_val.shape[2]) # 2) now train the NNs with the different methods with the generated data from abcpy.statistics import Identity identity = Identity() # to apply before computing the statistics logging.info("semiNN") from abcpy.statisticslearning import SemiautomaticNN, TripletDistanceLearning semiNN = SemiautomaticNN([height], identity, backend=backend, parameters=parameters, simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val, early_stopping=True, # early stopping seed=1, n_epochs=10, scale_samples=False, use_tqdm=False) logging.info("triplet") triplet = TripletDistanceLearning([height], identity, backend=backend, parameters=parameters, simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val, early_stopping=True, # early stopping seed=1, n_epochs=10, scale_samples=True, use_tqdm=False) # 3) save and re-load NNs: # get the statistics from the already fit StatisticsLearning object 'semiNN': learned_seminn_stat = semiNN.get_statistics() learned_triplet_stat = triplet.get_statistics() # this has a save net method: learned_seminn_stat.save_net("seminn_net.pth") # if you used `scale_samples=True` in learning the NNs, need to provide a path where pickle stores the scaler too: learned_triplet_stat.save_net("triplet_net.pth", path_to_scaler="scaler.pkl") # to reload: need to use the Neural Embedding statistics fromFile; this needs to know which kind of NN it is using; # need therefore to pass either the input/output size (it data size and number parameters) or the network class if # that was specified explicitly in the StatisticsLearning class. Check the docstring for NeuralEmbedding.fromFile # for more details. from abcpy.statistics import NeuralEmbedding learned_seminn_stat_loaded = NeuralEmbedding.fromFile("seminn_net.pth", input_size=1, output_size=2) learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2, path_to_scaler="scaler.pkl") # 4) you can optionally rescale the different summary statistics be their standard deviation on a reference dataset # of simulations. To do this, it is enough to pass at initialization the reference dataset, and the rescaling will # be applied every time the statistics is computed on some simulation or observation. learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2, path_to_scaler="scaler.pkl", reference_simulations=simulations_val) # 5) perform inference # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(learned_seminn_stat_loaded) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) eps_arr = np.array([500]) # starting value of epsilon; the smaller, the slower the algorithm. # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous # iteration from the observation epsilon_percentile = 10 journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- steps : integer, optional Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # define observation for true parameters mean=170, std=15 y_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name="mu") sigma = Uniform([[5], [25]], name="sigma") # define the model model = Gaussian([mu, sigma], name='height') # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import LogReg distance_calculator = LogReg(statistics_calculator, seed=42) # define backend from abcpy.backends import BackendDummy as Backend backend = Backend() # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([model], [distance_calculator], backend, seed=1) # sample from scheme eps_arr = np.array([.75]) epsilon_percentile = 10 journal = sampler.sample([y_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- steps : integer, optional Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name="mu") sigma = Uniform([[5], [25]], name="sigma") # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=3, cross=True) # Learn the optimal summary statistics using Semiautomatic summary selection from abcpy.statisticslearning import Semiautomatic statistics_learning = Semiautomatic([height], statistics_calculator, backend, n_samples=1000, n_samples_per_param=1, seed=1) # Redefine the statistics function new_statistics_calculator = statistics_learning.get_statistics() # Learn the optimal summary statistics using SemiautomaticNN summary selection; # we use 200 samples as a validation set for early stopping: from abcpy.statisticslearning import SemiautomaticNN statistics_learning = SemiautomaticNN([height], statistics_calculator, backend, n_samples=1000, n_samples_val=200, n_samples_per_param=1, seed=1, early_stopping=True) # Redefine the statistics function new_statistics_calculator = statistics_learning.get_statistics() # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(new_statistics_calculator) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) eps_arr = np.array([ 500 ]) # starting value of epsilon; the smaller, the slower the algorithm. # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous # iteration from the observation epsilon_percentile = 10 journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(): # The data corresponding to model_1 defined below grades_obs = [3.872486707973337, 4.6735380808674405, 3.9703538990858376, 4.11021272048805, 4.211048655421368, 4.154817956586653, 4.0046893064392695, 4.01891381384729, 4.123804757702919, 4.014941267301294, 3.888174595940634, 4.185275142948246, 4.55148774469135, 3.8954427675259016, 4.229264035335705, 3.839949451328312, 4.039402553532825, 4.128077814241238, 4.361488645531874, 4.086279074446419, 4.370801602256129, 3.7431697332475466, 4.459454162392378, 3.8873973643008255, 4.302566721487124, 4.05556051626865, 4.128817316703757, 3.8673704442215984, 4.2174459453805015, 4.202280254493361, 4.072851400451234, 3.795173229398952, 4.310702877332585, 4.376886328810306, 4.183704734748868, 4.332192463368128, 3.9071312388426587, 4.311681374107893, 3.55187913252144, 3.318878360783221, 4.187850500877817, 4.207923106081567, 4.190462065625179, 4.2341474252986036, 4.110228694304768, 4.1589891480847765, 4.0345604687633045, 4.090635481715123, 3.1384654393449294, 4.20375641386518, 4.150452690356067, 4.015304457401275, 3.9635442007388195, 4.075915739179875, 3.5702080541929284, 4.722333310410388, 3.9087618197155227, 4.3990088006390735, 3.968501165774181, 4.047603645360087, 4.109184340976979, 4.132424805281853, 4.444358334346812, 4.097211737683927, 4.288553086265748, 3.8668863066511303, 3.8837108501541007] # The prior information changing the class size and the teacher student ratio, depending on the yearly budget of the school from abcpy.continuousmodels import Uniform, Normal school_budget = Uniform([[1], [10]], name = 'school_budget') # The average class size of a certain school class_size = Normal([[800*school_budget], [1]], name = 'class_size') # The number of teachers in the school no_teacher = Normal([[20*school_budget], [1]], name = 'no_teacher') # The grade a student would receive without any bias grade_without_additional_effects = Normal([[4.5], [0.25]], name = 'grade_without_additional_effects') # The grade a student of a certain school receives final_grade = grade_without_additional_effects - .001 * class_size + .02 * no_teacher # The data corresponding to model_2 defined below scholarship_obs = [2.7179657436207805, 2.124647285937229, 3.07193407853297, 2.335024761813643, 2.871893855192, 3.4332002458233837, 3.649996835818173, 3.50292335102711, 2.815638168018455, 2.3581613289315992, 2.2794821846395568, 2.8725835459926503, 3.5588573782815685, 2.26053126526137, 1.8998143530749971, 2.101110815311782, 2.3482974964831573, 2.2707679029919206, 2.4624550491079225, 2.867017757972507, 3.204249152084959, 2.4489542437714213, 1.875415915801106, 2.5604889644872433, 3.891985093269989, 2.7233633223405205, 2.2861070389383533, 2.9758813233490082, 3.1183403287267755, 2.911814060853062, 2.60896794303205, 3.5717098647480316, 3.3355752461779824, 1.99172284546858, 2.339937680892163, 2.9835630207301636, 2.1684912355975774, 3.014847335983034, 2.7844122961916202, 2.752119871525148, 2.1567428931391635, 2.5803629307680644, 2.7326646074552103, 2.559237193255186, 3.13478196958166, 2.388760269933492, 3.2822443541491815, 2.0114405441787437, 3.0380056368041073, 2.4889680313769724, 2.821660164621084, 3.343985964873723, 3.1866861970287808, 4.4535037154856045, 3.0026333138006027, 2.0675706089352612, 2.3835301730913185, 2.584208398359566, 3.288077633446465, 2.6955853384148183, 2.918315169739928, 3.2464814419322985, 2.1601516779909433, 3.231003347780546, 1.0893224045062178, 0.8032302688764734, 2.868438615047827] # A quantity that determines whether a student will receive a scholarship scholarship_without_additional_effects = Normal([[2], [0.5]], name = 'schol_without_additional_effects') # A quantity determining whether a student receives a scholarship, including his social teacher_student_ratio final_scholarship = scholarship_without_additional_effects + .03 * no_teacher # Define a summary statistics for final grade and final scholarship from abcpy.statistics import Identity statistics_calculator_final_grade = Identity(degree = 2, cross = False) statistics_calculator_final_scholarship = Identity(degree = 3, cross = False) # Define a distance measure for final grade and final scholarship from abcpy.distances import Euclidean distance_calculator_final_grade = Euclidean(statistics_calculator_final_grade) distance_calculator_final_scholarship = Euclidean(statistics_calculator_final_scholarship) # Define a backend from abcpy.backends import BackendDummy as Backend backend = Backend() # Define kernels from abcpy.perturbationkernel import MultivariateNormalKernel, MultivariateStudentTKernel kernel_1 = MultivariateNormalKernel([school_budget,\ scholarship_without_additional_effects, grade_without_additional_effects]) kernel_2 = MultivariateStudentTKernel([class_size, no_teacher], df=3) # Join the defined kernels from abcpy.perturbationkernel import JointPerturbationKernel kernel = JointPerturbationKernel([kernel_1, kernel_2]) # Define sampling parameters T, n_sample, n_samples_per_param = 3, 250, 10 eps_arr = np.array([.75]) epsilon_percentile = 10 # Define sampler from abcpy.inferences import PMCABC sampler = PMCABC([final_grade, final_scholarship], [distance_calculator, distance_calculator], backend, kernel) # Sample journal = sampler.sample([y_obs_grades, y_obs_scholarship], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile)
def ABC_inference(algorithm, model, observation, distance_calculator, eps, n_samples, n_steps, backend, seed=None, full_output=0, **kwargs): """NB: eps represents initial value of epsilon for PMCABC and SABC; represents the single eps value for RejectionABC and represents the final value for SMCABC.""" start = time() if algorithm == "PMCABC": sampler = PMCABC([model], [distance_calculator], backend, seed=seed) jrnl = sampler.sample([[observation]], n_steps, np.array([eps]), n_samples=n_samples, full_output=full_output, **kwargs) if algorithm == "APMCABC": sampler = APMCABC([model], [distance_calculator], backend, seed=seed) jrnl = sampler.sample([[observation]], n_steps, n_samples=n_samples, full_output=full_output, **kwargs) elif algorithm == "SABC": sampler = SABC([model], [distance_calculator], backend, seed=seed) jrnl = sampler.sample([[observation]], n_steps, eps, n_samples=n_samples, full_output=full_output, **kwargs) elif algorithm == "RejectionABC": sampler = RejectionABC([model], [distance_calculator], backend, seed=seed) jrnl = sampler.sample([[observation]], eps, n_samples=n_samples, full_output=full_output, **kwargs) elif algorithm == "SMCABC": # for this usually a larger number of steps is required. alpha can be left to 0.95, covFactor=2 and # resample=None. epsilon_final is instead important to fix! sampler = SMCABC([model], [distance_calculator], backend, seed=seed) # sample(observations, steps, n_samples=10000, n_samples_per_param=1, epsilon_final=0.1, alpha=0.95, # covFactor=2, resample=None, full_output=0, which_mcmc_kernel=0, journal_file=None) jrnl = sampler.sample([[observation]], n_steps, n_samples=n_samples, full_output=full_output, epsilon_final=eps, **kwargs) print("It took ", time() - start, " seconds.") return jrnl