class EuclideanTests(unittest.TestCase): def setUp(self): self.stat_calc = Identity(degree=1, cross=0) self.distancefunc = Euclidean(self.stat_calc) def test_distance(self): # test simple distance computation a = [[0, 0, 0], [0, 0, 0]] b = [[0, 0, 0], [0, 0, 0]] c = [[1, 1, 1], [1, 1, 1]] #Checks whether wrong input type produces error message self.assertRaises(TypeError, self.distancefunc.distance, 3.4, b) self.assertRaises(TypeError, self.distancefunc.distance, a, 3.4) # test input has different dimensionality self.assertRaises(BaseException, self.distancefunc.distance, a, np.array([[0, 0], [1, 2]])) self.assertRaises(BaseException, self.distancefunc.distance, a, np.array([[0, 0, 0], [1, 2, 3], [4, 5, 6]])) # test whether they compute correct values self.assertTrue(self.distancefunc.distance(a, b) == np.array([0])) self.assertTrue( self.distancefunc.distance(a, c) == np.array([1.7320508075688772])) def test_dist_max(self): self.assertTrue(self.distancefunc.dist_max() == np.inf)
def setUp(self): self.stat_calc1 = Identity(degree=1, cross=0) self.stat_calc2 = Identity(degree=1, cross=0) self.distancefunc1 = Euclidean(self.stat_calc1) self.distancefunc2 = Euclidean(self.stat_calc2) ## Define Models # define a uniform prior distribution mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model1 = Normal([mu, sigma]) self.model2 = Normal([mu, sigma]) #Check whether wrong sized distnacefuncs gives an error self.assertRaises(ValueError, LinearCombination, [self.model1, self.model2], [self.distancefunc1], [1.0, 1.0]) #Check whether wrong sized weights gives an error self.assertRaises(ValueError, LinearCombination, [self.model1, self.model2], [self.distancefunc1, self.distancefunc2], [1.0, 1.0, 1.0]) self.jointdistancefunc = LinearCombination( [self.model1, self.model2], [self.distancefunc1, self.distancefunc2], [1.0, 1.0])
def test_sample(self): # setup backend dummy = BackendDummy() # define a uniform prior distribution mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) # define sufficient statistics for the model stat_calc = Identity(degree=2, cross=0) # define a distance function dist_calc = Euclidean(stat_calc) # create fake observed data y_obs = [np.array(9.8)] # use the rejection sampling scheme sampler = RejectionABC([self.model], [dist_calc], dummy, seed=1) journal = sampler.sample([y_obs], 10, 1, 10) mu_sample = np.array(journal.get_parameters()['mu']) sigma_sample = np.array(journal.get_parameters()['sigma']) # test shape of samples self.assertEqual(np.shape(mu_sample), (10, 1)) self.assertEqual(np.shape(sigma_sample), (10, 1)) # Compute posterior mean #self.assertAlmostEqual(np.average(np.asarray(samples[:,0])),1.22301,10e-2) self.assertLess(np.average(mu_sample) - 1.22301, 1e-2) self.assertLess(np.average(sigma_sample) - 6.992218, 10e-2) self.assertFalse(journal.number_of_simulations == 0)
def infer_parameters_smcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState() y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2,))] # define prior from abcpy.continuousmodels import Uniform mu0 = Uniform([[150], [200]], ) mu1 = Uniform([[25], [100]], ) # define the model height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree = 2, cross = False) # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import SMCABC sampler = SMCABC([height_weight_model], [distance_calculator], backend, seed=1) steps, n_samples, n_samples_per_param, epsilon = 2, 10, 1, 2000 journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, epsilon, full_output=1) return journal
def infer_parameters_abcsubsim(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState(seed=1) y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2, ))] # define prior from abcpy.continuousmodels import Uniform mu0 = Uniform([[150], [200]], name="mu0") mu1 = Uniform([[25], [100]], name="mu1") # define the model height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import ABCsubsim sampler = ABCsubsim([height_weight_model], [distance_calculator], backend, seed=1) steps, n_samples, n_samples_per_param, chain_length = 2, 10, 1, 2 print('ABCsubsim Inferring') journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, chain_length) return journal
def infer_parameters_sabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState(seed=1) y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2, ))] # define prior from abcpy.continuousmodels import Uniform mu0 = Uniform([[150], [200]], name="mu0") mu1 = Uniform([[25], [100]], name="mu1") # define the model height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import SABC sampler = SABC([height_weight_model], [distance_calculator], backend, seed=1) steps, epsilon, n_samples, n_samples_per_param, beta, delta, v = 2, 40000, 10, 1, 2, 0.2, 0.3 ar_cutoff, resample, n_update, full_output = 0.1, None, None, 1 print('SABC Inferring') journal = sampler.sample([y_obs], steps, epsilon, n_samples, n_samples_per_param, beta, delta, v, ar_cutoff, resample, n_update, full_output) return journal
def test_sample(self): # setup backend dummy = BackendDummy() # define a uniform prior distribution lb = np.array([-5, 0]) ub = np.array([5, 10]) prior = Uniform(lb, ub, seed=1) # define a Gaussian model model = Gaussian(prior, mu=2.1, sigma=5.0, seed=1) # define sufficient statistics for the model stat_calc = Identity(degree=2, cross=0) # define a distance function dist_calc = Euclidean(stat_calc) # create fake observed data y_obs = model.simulate(1) # use the rejection sampling scheme sampler = RejectionABC(model, dist_calc, dummy, seed=1) journal = sampler.sample(y_obs, 10, 1, 0.1) samples = journal.get_parameters() # test shape of samples samples_shape = np.shape(samples) self.assertEqual(samples_shape, (10, 2)) # Compute posterior mean self.assertEqual((np.average(np.asarray( samples[:, 0])), np.average(np.asarray(samples[:, 1]))), (1.6818856447333246, 8.4384177826766518))
class DefaultJointDistance(Distance): """ This class implements a default distance to be used when multiple root models exist. It uses LogReg as the distance calculator for each root model, and adds all individual distances. Parameters ---------- statistics: abcpy.statistics object The statistics calculator to be used number_of_models: integer The number of root models on which the distance will act. """ def __init__(self, statistics): self.statistics_calc = statistics self.distance_calc = Euclidean(self.statistics_calc) def distance(self, d1, d2): total_distance = 0 for observed_data, simulated_data in zip(d1, d2): total_distance += self.distance_calc.distance([observed_data], [simulated_data]) total_distance /= len(d2) return total_distance def dist_max(self): return np.inf
def infer_parameters_apmcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState(seed=1) y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2, ))] # define prior from abcpy.continuousmodels import Uniform mu0 = Uniform([[150], [200]], name="mu0") mu1 = Uniform([[25], [100]], name="mu1") # define the model height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import APMCABC sampler = APMCABC([height_weight_model], [distance_calculator], backend, seed=1) steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file = 2, 100, 1, 0.2, 0.03, 2.0, 1, None print('APMCABC Inferring') journal = sampler.sample([y_obs], steps, n_samples, n_samples_per_param, alpha, acceptance_cutoff, covFactor, full_output, journal_file) return journal
def infer_parameters_pmcabc(): # define observation for true parameters mean=170, 65 rng = np.random.RandomState(seed=1) y_obs = [np.array(rng.multivariate_normal([170, 65], np.eye(2), 1).reshape(2, ))] # define prior from abcpy.continuousmodels import Uniform mu0 = Uniform([[150], [200]], name="mu0") mu1 = Uniform([[25], [100]], name="mu1") # define the model height_weight_model = NestedBivariateGaussian([mu0, mu1]) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=2, cross=False) # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height_weight_model], [distance_calculator], backend, seed=1) # sample from scheme T, n_sample, n_samples_per_param = 2, 10, 1 eps_arr = np.array([10000]) epsilon_percentile = 95 print('PMCABC Inferring') journal = sampler.sample([y_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def setUp(self): # find spark and initialize it self.backend = BackendDummy() # define a uniform prior distribution mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) # define a distance function stat_calc = Identity(degree=2, cross=0) self.dist_calc = Euclidean(stat_calc) # create fake observed data #self.observation = self.model.forward_simulate(1, np.random.RandomState(1))[0].tolist() self.observation = [np.array(9.8)]
def infer_parameters(model_num, synthetic, T, n_sample, ar_cutoff): y_obs = [0] #prior = Uniform([[0, 0, 0, 0], [5, 5, 2.5, 2.5]]) prior1 = Uniform([[0], [5]]) prior2 = Uniform([[0], [5]]) prior3 = Uniform([[0], [2.5]]) prior4 = Uniform([[0], [2.5]]) model = Airport([prior1, prior2, prior3, prior4], name="Airport", model_num=model_num, synthetic=synthetic) from abcpy.statistics import Identity statistics_calculator = Identity(degree=1, cross=False) from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # define sampling scheme from abcpy.inferences import SABC sampler = SABC([model], [distance_calculator], backend) # define sampling scheme #from abcpy.inferences import PMCABC #sampler = PMCABC([model], [distance_calculator], backend, kernel, seed=1) # sample from scheme journal = sampler.sample([y_obs], T, 1000, n_sample, 1, ar_cutoff=ar_cutoff) return journal
def setUp(self): # find spark and initialize it self.backend = BackendDummy() # define a uniform prior distribution lb = np.array([-5, 0]) ub = np.array([5, 10]) prior = Uniform(lb, ub, seed=1) # define a Gaussian model self.model = Gaussian(prior, mu=2.1, sigma=5.0, seed=1) # define a distance function stat_calc = Identity(degree=2, cross=0) self.dist_calc = Euclidean(stat_calc) # create fake observed data self.observation = self.model.simulate(1) # define kernel mean = np.array([-13.0, .0, 7.0]) cov = np.eye(3) self.kernel = MultiNormal(mean, cov, seed=1)
def infer_parameters(): # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], ) sigma = Uniform([[5], [25]], ) # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=3, cross=True) # Learn the optimal summary statistics using Semiautomatic summary selection from abcpy.statisticslearning import Semiautomatic statistics_learning = Semiautomatic([height], statistics_calculator, backend, n_samples=1000, n_samples_per_param=1, seed=1) # Redefine the statistics function new_statistics_calculator = statistics_learning.get_statistics() # Learn the optimal summary statistics using SemiautomaticNN summary selection from abcpy.statisticslearning import SemiautomaticNN statistics_learning = SemiautomaticNN([height], statistics_calculator, backend, n_samples=1000, n_samples_per_param=1, seed=1) # Redefine the statistics function new_statistics_calculator = statistics_learning.get_statistics() # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(new_statistics_calculator) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) # sample from scheme T, n_sample, n_samples_per_param = 3, 10, 10 eps_arr = np.array([500]) epsilon_percentile = 10 journal = sampler.sample([height_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def __init__(self, statistics): self.statistics_calc = statistics self.distance_calc = Euclidean(self.statistics_calc)
# resultfakeobs2 = EarthWorm.forward_simulate([1, 1, 1, 1, 1, 1, 1, 11, 0.015, 0.5, 0.25, 0.177, 0.182, 0.0002], 2) # # Define backend from abcpy.backends import BackendMPI as Backend backend = Backend() # # Define Statistics statistics_calculator = Identity(degree=1, cross=False) # #print('# Check whether the statistis works') # print(statistics_calculator.statistics(resultfakeobs1)) # print(statistics_calculator.statistics(resultfakeobs2)) # Define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # print('# Check whether the distance works') # print(distance_calculator.distance(resultfakeobs1, resultfakeobs1)) # print(distance_calculator.distance(resultfakeobs1, resultfakeobs2)) # # Define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([ B_0, activation_energy, energy_tissue, energy_food, energy_synthesis, half_saturation_coeff, max_ingestion_rate, mass_birth, mass_cocoon, mass_maximum, mass_sexual_maturity, growth_constant, max_reproduction_rate, speed ]) ## SABC ##
def infer_parameters(backend, scheme='rejection', n_samples=250, n_samples_per_param=10, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- backend The parallelization backend steps : integer, optional Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # experimental setup T = 50. # simulation time dt = 0.025 # time step I_amp = 0.32 # stimulus amplitude r_soma = 40 # radius of soma threshold = -55 # AP threshold # input stimulus stimulus_dict = constant_stimulus(I_amp=I_amp, T=T, dt=dt, t_stim_on=10, t_stim_off=40, r_soma=r_soma) I = stimulus_dict["I"] #I_stim = stimulus_dict["I_stim"] # true parameters gbar_K_true = 36 gbar_Na_true = 120 gbar_K_std = 5 gbar_Na_std = 5 # define priors gbar_K = Normal([[gbar_K_true], [gbar_K_std]], name='gbar_K') gbar_Na = Normal([[gbar_Na_true], [gbar_Na_std]], name='gbar_Na') # define the model hh_simulator = HHSimulator([gbar_K, gbar_Na], I, T, dt) # observed data obs_data = hh_simulator.forward_simulate([gbar_K_true, gbar_Na_true]) # define statistics statistics_calculator = Identity() # Learn the optimal summary statistics using Semiautomatic summary selection statistics_learning = Semiautomatic([hh_simulator], statistics_calculator, backend, n_samples=1000, n_samples_per_param=1, seed=42) new_statistics_calculator = statistics_learning.get_statistics() # define distance distance_calculator = Euclidean(new_statistics_calculator) # define kernel kernel = DefaultKernel([gbar_K, gbar_Na]) # define sampling scheme if scheme == 'rejection': sampler = RejectionABC([hh_simulator], [distance_calculator], backend, seed=42) # sample from scheme epsilon = 2. journal = sampler.sample([obs_data], n_samples, n_samples_per_param, epsilon) elif scheme == 'smc': sampler = SMCABC([hh_simulator], [distance_calculator], backend, kernel, seed=42) # sample from scheme steps = 3 journal = sampler.sample([obs_data], steps, n_samples, n_samples_per_param) elif scheme == 'pmc': sampler = PMCABC([hh_simulator], [distance_calculator], backend, kernel, seed=42) # sample from scheme steps = 3 eps_arr = np.array([2.]) epsilon_percentile = 10 journal = sampler.sample([obs_data], steps, eps_arr, n_samples, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(steps=2, n_sample=50, n_samples_per_param=1, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- steps : integer, optional Number of iterations in the sequential PMCABC algorithm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name="mu") sigma = Uniform([[5], [25]], name="sigma") # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # 1) generate simulations from prior from abcpy.inferences import DrawFromPrior draw_from_prior = DrawFromPrior([height], backend=backend) # notice the use of the `.sample_par_sim_pairs` method rather than `.sample` to obtain data suitably formatted # for the summary statistics learning routines parameters, simulations = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1) # if you want to use the test loss to do early stopping in the training: parameters_val, simulations_val = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1) # discard the mid dimension (n_samples_per_param, as the StatisticsLearning classes use that =1) simulations = simulations.reshape(simulations.shape[0], simulations.shape[2]) simulations_val = simulations_val.reshape(simulations_val.shape[0], simulations_val.shape[2]) # 2) now train the NNs with the different methods with the generated data from abcpy.statistics import Identity identity = Identity() # to apply before computing the statistics logging.info("semiNN") from abcpy.statisticslearning import SemiautomaticNN, TripletDistanceLearning semiNN = SemiautomaticNN([height], identity, backend=backend, parameters=parameters, simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val, early_stopping=True, # early stopping seed=1, n_epochs=10, scale_samples=False, use_tqdm=False) logging.info("triplet") triplet = TripletDistanceLearning([height], identity, backend=backend, parameters=parameters, simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val, early_stopping=True, # early stopping seed=1, n_epochs=10, scale_samples=True, use_tqdm=False) # 3) save and re-load NNs: # get the statistics from the already fit StatisticsLearning object 'semiNN': learned_seminn_stat = semiNN.get_statistics() learned_triplet_stat = triplet.get_statistics() # this has a save net method: learned_seminn_stat.save_net("seminn_net.pth") # if you used `scale_samples=True` in learning the NNs, need to provide a path where pickle stores the scaler too: learned_triplet_stat.save_net("triplet_net.pth", path_to_scaler="scaler.pkl") # to reload: need to use the Neural Embedding statistics fromFile; this needs to know which kind of NN it is using; # need therefore to pass either the input/output size (it data size and number parameters) or the network class if # that was specified explicitly in the StatisticsLearning class. Check the docstring for NeuralEmbedding.fromFile # for more details. from abcpy.statistics import NeuralEmbedding learned_seminn_stat_loaded = NeuralEmbedding.fromFile("seminn_net.pth", input_size=1, output_size=2) learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2, path_to_scaler="scaler.pkl") # 4) you can optionally rescale the different summary statistics be their standard deviation on a reference dataset # of simulations. To do this, it is enough to pass at initialization the reference dataset, and the rescaling will # be applied every time the statistics is computed on some simulation or observation. learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2, path_to_scaler="scaler.pkl", reference_simulations=simulations_val) # 5) perform inference # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(learned_seminn_stat_loaded) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) eps_arr = np.array([500]) # starting value of epsilon; the smaller, the slower the algorithm. # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous # iteration from the observation epsilon_percentile = 10 journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
# 2.00000000e+01, 3.90572997e+03, 1.59328549e+01, 1.38943902e+05, 0.00000000e+00, # 6.00000000e+01, 3.42727305e+03, 2.80052570e+01, 8.57585366e+04, 0.00000000e+00, # 1.20000000e+02, 2.33523014e+03, 7.57715388e+01, 4.25231707e+04, 0.00000000e+00, # 3.00000000e+02, 1.74166329e+02, 2.46413793e+03, 5.15975610e+03, 0.00000000e+00]) # obsdata = [np.array(XObserved).reshape(1, -1)] # # Define kernel and join the defined kernels from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([pAd, pAg, pT, pF, aT, v_z_AP, v_z_NAP]) # Define Distance functions from abcpy.distances import Euclidean from statistic import Multiply L = np.load('Data/L_all_3_cross.npz')['L'] stat_mult = Multiply(L=L, degree=3, cross=True) dist_calc_mult = Euclidean(stat_mult) # SABC - Multiply## from abcpy.inferences import SABC print('Inference using Classifier Loss') sampler = SABC([PD], [dist_calc_mult], backend, kernel, seed=1) steps, epsilon, n_samples, n_samples_per_param, ar_cutoff, full_output, journal_file = 20, 10e20, 511, 1, 0.001, 1, None print('SABC Inferring') fakedata = PD.forward_simulate([noAP, noNAP, SR_x, 89.0, 76.0, 2.49, 7e-3, 7.7, 6e-3, 8e-4], k=1) print(fakedata) journal_sabc = sampler.sample(observations=[fakedata], steps=steps, epsilon=epsilon, n_samples=n_samples, n_samples_per_param=n_samples_per_param, beta=2, delta=0.2, v=0.3, ar_cutoff=0.001, resample=None, n_update=None, full_output=1, journal_file=journal_file)
perform_ABC = True else: perform_ABC = True # you can perform ABC inference with both SM, FP statistics and the true ones if technique in ["SM", "SSM"]: if perform_ABC: print(f"\nPerform ABC inference with {technique} statistics.") if weighted_euclidean_distance: # define the distance object # keep the last 100 test samples to estimate the initial eps value if not provided distance_calculator = WeightedEuclidean( NeuralEmbedding(RescaleAndDiscardLastOutputNet(net_data_SM, scaler_data_SM)), [samples_matrix_test[i].numpy() for i in range(samples_matrix_test.shape[0] - 100 * (ABC_eps is None))]) else: distance_calculator = Euclidean( NeuralEmbedding(RescaleAndDiscardLastOutputNet(net_data_SM, scaler_data_SM))) elif "FP" == technique: if perform_ABC: print("\nPerform ABC inference with FP statistics.") if weighted_euclidean_distance: distance_calculator = WeightedEuclidean( NeuralEmbedding(RescaleAndNet(net_FP, scaler_data_FP)), [samples_matrix_test[i].numpy() for i in range(samples_matrix_test.shape[0] - 100 * (ABC_eps is None))]) else: distance_calculator = Euclidean(NeuralEmbedding(RescaleAndNet(net_FP, scaler_data_FP))) elif "true" == technique: if perform_ABC: print("\nPerform ABC inference with true statistics.")
def infer_parameters(steps=3, n_sample=250, n_samples_per_param=10, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- steps : integer, optional Number of iterations in the sequential PMCABC algoritm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [ 160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509 ] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name="mu") sigma = Uniform([[5], [25]], name="sigma") # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # define statistics from abcpy.statistics import Identity statistics_calculator = Identity(degree=3, cross=True) # Learn the optimal summary statistics using Semiautomatic summary selection from abcpy.statisticslearning import Semiautomatic statistics_learning = Semiautomatic([height], statistics_calculator, backend, n_samples=1000, n_samples_per_param=1, seed=1) # Redefine the statistics function new_statistics_calculator = statistics_learning.get_statistics() # Learn the optimal summary statistics using SemiautomaticNN summary selection; # we use 200 samples as a validation set for early stopping: from abcpy.statisticslearning import SemiautomaticNN statistics_learning = SemiautomaticNN([height], statistics_calculator, backend, n_samples=1000, n_samples_val=200, n_samples_per_param=1, seed=1, early_stopping=True) # Redefine the statistics function new_statistics_calculator = statistics_learning.get_statistics() # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(new_statistics_calculator) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) eps_arr = np.array([ 500 ]) # starting value of epsilon; the smaller, the slower the algorithm. # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous # iteration from the observation epsilon_percentile = 10 journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
def infer_parameters(): # The data corresponding to model_1 defined below grades_obs = [3.872486707973337, 4.6735380808674405, 3.9703538990858376, 4.11021272048805, 4.211048655421368, 4.154817956586653, 4.0046893064392695, 4.01891381384729, 4.123804757702919, 4.014941267301294, 3.888174595940634, 4.185275142948246, 4.55148774469135, 3.8954427675259016, 4.229264035335705, 3.839949451328312, 4.039402553532825, 4.128077814241238, 4.361488645531874, 4.086279074446419, 4.370801602256129, 3.7431697332475466, 4.459454162392378, 3.8873973643008255, 4.302566721487124, 4.05556051626865, 4.128817316703757, 3.8673704442215984, 4.2174459453805015, 4.202280254493361, 4.072851400451234, 3.795173229398952, 4.310702877332585, 4.376886328810306, 4.183704734748868, 4.332192463368128, 3.9071312388426587, 4.311681374107893, 3.55187913252144, 3.318878360783221, 4.187850500877817, 4.207923106081567, 4.190462065625179, 4.2341474252986036, 4.110228694304768, 4.1589891480847765, 4.0345604687633045, 4.090635481715123, 3.1384654393449294, 4.20375641386518, 4.150452690356067, 4.015304457401275, 3.9635442007388195, 4.075915739179875, 3.5702080541929284, 4.722333310410388, 3.9087618197155227, 4.3990088006390735, 3.968501165774181, 4.047603645360087, 4.109184340976979, 4.132424805281853, 4.444358334346812, 4.097211737683927, 4.288553086265748, 3.8668863066511303, 3.8837108501541007] # The prior information changing the class size and the teacher student ratio, depending on the yearly budget of the school from abcpy.continuousmodels import Uniform, Normal school_budget = Uniform([[1], [10]], name = 'school_budget') # The average class size of a certain school class_size = Normal([[800*school_budget], [1]], name = 'class_size') # The number of teachers in the school no_teacher = Normal([[20*school_budget], [1]], name = 'no_teacher') # The grade a student would receive without any bias grade_without_additional_effects = Normal([[4.5], [0.25]], name = 'grade_without_additional_effects') # The grade a student of a certain school receives final_grade = grade_without_additional_effects - .001 * class_size + .02 * no_teacher # The data corresponding to model_2 defined below scholarship_obs = [2.7179657436207805, 2.124647285937229, 3.07193407853297, 2.335024761813643, 2.871893855192, 3.4332002458233837, 3.649996835818173, 3.50292335102711, 2.815638168018455, 2.3581613289315992, 2.2794821846395568, 2.8725835459926503, 3.5588573782815685, 2.26053126526137, 1.8998143530749971, 2.101110815311782, 2.3482974964831573, 2.2707679029919206, 2.4624550491079225, 2.867017757972507, 3.204249152084959, 2.4489542437714213, 1.875415915801106, 2.5604889644872433, 3.891985093269989, 2.7233633223405205, 2.2861070389383533, 2.9758813233490082, 3.1183403287267755, 2.911814060853062, 2.60896794303205, 3.5717098647480316, 3.3355752461779824, 1.99172284546858, 2.339937680892163, 2.9835630207301636, 2.1684912355975774, 3.014847335983034, 2.7844122961916202, 2.752119871525148, 2.1567428931391635, 2.5803629307680644, 2.7326646074552103, 2.559237193255186, 3.13478196958166, 2.388760269933492, 3.2822443541491815, 2.0114405441787437, 3.0380056368041073, 2.4889680313769724, 2.821660164621084, 3.343985964873723, 3.1866861970287808, 4.4535037154856045, 3.0026333138006027, 2.0675706089352612, 2.3835301730913185, 2.584208398359566, 3.288077633446465, 2.6955853384148183, 2.918315169739928, 3.2464814419322985, 2.1601516779909433, 3.231003347780546, 1.0893224045062178, 0.8032302688764734, 2.868438615047827] # A quantity that determines whether a student will receive a scholarship scholarship_without_additional_effects = Normal([[2], [0.5]], name = 'schol_without_additional_effects') # A quantity determining whether a student receives a scholarship, including his social teacher_student_ratio final_scholarship = scholarship_without_additional_effects + .03 * no_teacher # Define a summary statistics for final grade and final scholarship from abcpy.statistics import Identity statistics_calculator_final_grade = Identity(degree = 2, cross = False) statistics_calculator_final_scholarship = Identity(degree = 3, cross = False) # Define a distance measure for final grade and final scholarship from abcpy.distances import Euclidean distance_calculator_final_grade = Euclidean(statistics_calculator_final_grade) distance_calculator_final_scholarship = Euclidean(statistics_calculator_final_scholarship) # Define a backend from abcpy.backends import BackendDummy as Backend backend = Backend() # Define kernels from abcpy.perturbationkernel import MultivariateNormalKernel, MultivariateStudentTKernel kernel_1 = MultivariateNormalKernel([school_budget,\ scholarship_without_additional_effects, grade_without_additional_effects]) kernel_2 = MultivariateStudentTKernel([class_size, no_teacher], df=3) # Join the defined kernels from abcpy.perturbationkernel import JointPerturbationKernel kernel = JointPerturbationKernel([kernel_1, kernel_2]) # Define sampling parameters T, n_sample, n_samples_per_param = 3, 250, 10 eps_arr = np.array([.75]) epsilon_percentile = 10 # Define sampler from abcpy.inferences import PMCABC sampler = PMCABC([final_grade, final_scholarship], [distance_calculator, distance_calculator], backend, kernel) # Sample journal = sampler.sample([y_obs_grades, y_obs_scholarship], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile)
# Define backend from abcpy.backends import BackendDummy as Backend backend = Backend() # # Define Statistics statistics_calculator = HakkarainenLorenzStatistics(degree=1, cross=False) print('# Check whether the statistis works') print(statistics_calculator.statistics(resultfakeobs1)) # # Define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(statistics_calculator) # print('# Check whether the distance works') print(distance_calculator.distance(resultfakeobs1, resultfakeobs1)) print(distance_calculator.distance(resultfakeobs1, resultfakeobs2)) # Define kernel from abcpy.perturbationkernel import MultivariateNormalKernel, JointPerturbationKernel # Join the defined kernels kernel1 = MultivariateNormalKernel([theta1, theta2]) kernel = JointPerturbationKernel([kernel1]) # ## APMCABC ## from abcpy.inferences import APMCABC
# observed data rseed = 42 rng = np.random.RandomState(rseed) n_samples = int(T / dt) + 1 noise = rng.normal(loc=0, scale=0.05, size=(n_samples)) obs_data = hh_simulator.forward_simulate([gbar_K_true, gbar_Na_true]) # define statistics # statistics_calculator = Identity() t = stimulus_dict["t"] stim_duration = stimulus_dict["duration"] t_stim_on = stimulus_dict["t_stim_on"] statistics_calculator = Features(t, stim_duration, t_stim_on) # define distance distance_calculator = Euclidean(statistics_calculator) distance_calculator2 = Wasserstein(statistics_calculator) distance_calculator3 = LogReg(statistics_calculator) # checking sim_data = hh_simulator.forward_simulate( [gbar_K_true + 3, gbar_Na_true + 4]) #obs_data = [obs_data[0] / np.max(obs_data[0]) + noise] #sim_data = [sim_data[0] / np.max(sim_data[0])] s1 = statistics_calculator.statistics(obs_data) s2 = statistics_calculator.statistics(sim_data) dist = distance_calculator.distance(obs_data, sim_data) dist2 = distance_calculator2.distance(obs_data, sim_data) dist3 = distance_calculator3.distance(obs_data, sim_data)
def setUp(self): self.stat_calc = Identity(degree=1, cross=0) self.distancefunc = Euclidean(self.stat_calc)
true_parameters_fake_1 = [0.05, 5, 7, 5, 5, 6, 0.06, .1, .2, .3, .4, .1, .2, .3, .4, .5, 50, .4, .3, 0.3] # print(len(true_parameters_fake_1)) observation_france_1 = model.forward_simulate(true_parameters_fake_1, 1) true_parameters_fake_2 = [0.05, 5, 7, 5, 5, 6, 0.05, .1, .2, .3, .4, .1, .2, .3, .4, .5, 50, .4, .3, 0.3] observation_france_2 = model.forward_simulate(true_parameters_fake_2, 1) # we define now the statistics and distances: rho = 1 # multiplier to decrease importance of past distances_list = [] # this has to be used if the model returns already the correct observations (return_observation_only=True) # now add the distance on the number of death and hospitalized people, that needs to discard the first 21 elements because there # is no data on that before that. for i in range(n): distances_list.append( Euclidean(ExtractSingleTimeseries2DArray(index=i, rho=rho, start_step=21, end_step=-1))) # deceased distances_list.append(Euclidean(ExtractSingleTimeseries2DArray(index=5, rho=rho, start_step=17, end_step=-1))) # define a weighted distance: # max values of the daily counts: 1., 9., 73., 354., 462., 17933. # we could use the inverse of them as weights; I think however the last timeseries have less noise as they are sampled # from larger numbers, so they should be slightly less important. weights = [1, 1, 1, 2, 2, .1] # weights = [1.0 / 1 * 0.75, 1.0 / 9 * 0.75, 1.0 / 68 * 0.85, 1.0 / 338, 1.0 / 445, 1.0 / 4426] # weights = [1, 0.1, 0.01, 0.005, 0.005, 0.005] final_distance = WeightedDistance(distances_list, weights=weights) print("dist", final_distance.distance(observation_france_1, [observation_france])) # define backend backend = Backend()
learned_triplet_stat.save_net("Data/Pilots/triplet_net_fake.pth") else: learned_seminn_stat.save_net("Data/Pilots/seminn_net_"+str(whichobs)+".pth") learned_triplet_stat.save_net("Data/Pilots/triplet_net_"+str(whichobs)+".pth") if sample: if fake: learned_seminn_stat_loaded = NeuralEmbedding.fromFile("Data/Pilots/seminn_net_fake.pth", input_size=len(InformativeIndices), output_size=7, previous_statistics=identity) learned_triplet_stat_loaded = NeuralEmbedding.fromFile("Data/Pilots/triplet_net_fake.pth", input_size=len(InformativeIndices), output_size=7, previous_statistics=identity) else: learned_seminn_stat_loaded = NeuralEmbedding.fromFile("Data/Pilots/seminn_net_"+str(whichobs)+".pth", input_size=len(InformativeIndices), output_size=7, previous_statistics=identity) learned_triplet_stat_loaded = NeuralEmbedding.fromFile("Data/Pilots/triplet_net_"+str(whichobs)+".pth", input_size=len(InformativeIndices), output_size=7, previous_statistics=identity) # Define Distance functions from abcpy.distances import Euclidean dist_calc_seminn = Euclidean(learned_seminn_stat_loaded) dist_calc_triplet = Euclidean(learned_triplet_stat_loaded) from statistic import Multiply L = np.load('Data/L_all_3_cross.npz')['L'] stat_mult = Multiply(L=L, degree=3, cross=True) dist_calc_mult = Euclidean(stat_mult) print(dist_calc_mult.distance(obsdata, obsdata)) # print('Inference starting') # # ## SABC - SemiNN## from abcpy.inferences import SABC print('Inference using Semi NN') sampler = SABC([PD], [dist_calc_seminn], backend, kernel, seed=1) steps, epsilon, n_samples, n_samples_per_param, ar_cutoff, full_output, journal_file = 25, 10e20, 511, 1, 0.001, 1, None print('SABC Inferring') journal_sabc = sampler.sample(observations=[obsdata], steps=steps, epsilon=epsilon, n_samples=n_samples,