def __init__(self, root_models, backend, seed=None, discard_too_large_values=True): self.model = root_models self.backend = backend self.rng = np.random.RandomState(seed) self.discard_too_large_values = discard_too_large_values # An object managing the bds objects self.accepted_parameters_manager = AcceptedParametersManager(self.model)
def test_DefaultKernel(self): B1 = Binomial([10, 0.2]) N1 = Normal([0.1, 0.01]) N2 = Normal([0.3, N1]) graph = Normal([B1, N2]) Manager = AcceptedParametersManager([graph]) backend = Backend() kernel = DefaultKernel([N1, N2, B1]) Manager.update_broadcast(backend, [[2, 0.27, 0.097], [3, 0.32, 0.012]], np.array([1, 1]), accepted_cov_mats=[[[0.01, 0], [0, 0.01]], []]) kernel_parameters = [] for krnl in kernel.kernels: kernel_parameters.append( Manager.get_accepted_parameters_bds_values(krnl.models)) Manager.update_kernel_values(backend, kernel_parameters=kernel_parameters) rng = np.random.RandomState(1) perturbed_values_and_models = kernel.update(Manager, 1, rng) self.assertEqual(perturbed_values_and_models, [(N1, [0.17443453636632419]), (N2, [0.25882435863499248]), (B1, [3])])
def test_return_value_Student_T(self): N1 = Normal([0.1, 0.01]) N2 = Normal([0.3, N1]) graph = Normal([N1, N2]) Manager = AcceptedParametersManager([graph]) backend = Backend() kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)]) Manager.update_broadcast(backend, [[0.4, 0.09], [0.2, 0.008]], np.array([0.5, 0.2])) kernel_parameters = [] for krnl in kernel.kernels: kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) Manager.update_kernel_values(backend, kernel_parameters) mapping, mapping_index = Manager.get_mapping(Manager.model) covs = [[[1, 0], [0, 1]], []] Manager.update_broadcast(backend, accepted_cov_mats=covs) pdf = kernel.pdf(mapping, Manager, Manager.accepted_parameters_bds.value()[1], [0.3, 0.1]) self.assertTrue(isinstance(pdf, float))
def test_return_value(self): B1 = Binomial([10, 0.2]) N1 = Normal([0.1, 0.01]) N2 = Normal([0.3, N1]) graph = Normal([B1, N2]) Manager = AcceptedParametersManager([graph]) backend = Backend() kernel = DefaultKernel([N1, N2, B1]) Manager.update_broadcast(backend, [[2, 0.4, 0.09], [3, 0.2, 0.008]], np.array([0.5, 0.2])) kernel_parameters = [] for krnl in kernel.kernels: kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) Manager.update_kernel_values(backend, kernel_parameters) mapping, mapping_index = Manager.get_mapping(Manager.model) covs = [[[1,0],[0,1]],[]] Manager.update_broadcast(backend, accepted_cov_mats=covs) pdf = kernel.pdf(mapping, Manager, 1, [2,0.3,0.1]) self.assertTrue(isinstance(pdf, float))
def test_Student_T(self): N1 = Normal([0.1, 0.01]) N2 = Normal([0.3, N1]) graph = Normal([N1, N2]) Manager = AcceptedParametersManager([graph]) backend = Backend() kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)]) Manager.update_broadcast(backend, [[0.27, 0.097], [0.32, 0.012]], np.array([1, 1])) kernel_parameters = [] for krnl in kernel.kernels: kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) Manager.update_kernel_values(backend, kernel_parameters) covs = kernel.calculate_cov(Manager) print(covs) self.assertTrue(len(covs) == 1) self.assertTrue(len(covs[0]) == 2)
def test(self): B1 = Binomial([10, 0.2]) N1 = Normal([0.1, 0.01]) N2 = Normal([0.3, N1]) graph = Normal([B1, N2]) Manager = AcceptedParametersManager([graph]) backend = Backend() kernel = DefaultKernel([N1, N2, B1]) Manager.update_broadcast(backend, [[2, 0.27, 0.097], [3, 0.32, 0.012]], np.array([1, 1])) kernel_parameters = [] for krnl in kernel.kernels: kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models)) Manager.update_kernel_values(backend, kernel_parameters) covs = kernel.calculate_cov(Manager) self.assertTrue(len(covs)==2) self.assertTrue(len(covs[0])==2) self.assertTrue(not(covs[1]))
def test_Student_T(self): N1 = Normal([0.1, 0.01]) N2 = Normal([0.3, N1]) graph = Normal([N1, N2]) Manager = AcceptedParametersManager([graph]) backend = Backend() kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)]) Manager.update_broadcast(backend, [[0.27, 0.097], [0.32, 0.012]], np.array([1, 1]), accepted_cov_mats=[[[0.01, 0], [0, 0.01]], []]) kernel_parameters = [] for krnl in kernel.kernels: kernel_parameters.append( Manager.get_accepted_parameters_bds_values(krnl.models)) Manager.update_kernel_values(backend, kernel_parameters=kernel_parameters) rng = np.random.RandomState(1) perturbed_values_and_models = kernel.update(Manager, 1, rng) print(perturbed_values_and_models) self.assertEqual(perturbed_values_and_models, [(N1, [0.2107982411716391]), (N2, [-0.049106838502166614])])
class DrawFromPosterior(InferenceMethod): model = None rng = None n_samples = None backend = None n_samples_per_param = None # this needs to be there otherwise it does not instantiate correctly def __init__(self, root_models, backend, seed=None, discard_too_large_values=True): self.model = root_models self.backend = backend self.rng = np.random.RandomState(seed) self.discard_too_large_values = discard_too_large_values # An object managing the bds objects self.accepted_parameters_manager = AcceptedParametersManager(self.model) self.n_samples_per_param = 1 def sample(self, journal_file): journal = Journal.fromFile(journal_file) accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) n_samples = journal.configuration["n_samples"] self.accepted_parameters_manager.broadcast(self.backend, 1) # Broadcast Accepted parameters and Accepted weights self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) index_arr = np.arange(0,n_samples,1) data_arr = [] for i in range(len(rng_arr)): data_arr.append([rng_arr[i], index_arr[i]]) data_pds = self.backend.parallelize(data_arr) parameters_simulations_pds = self.backend.map(self._sample_parameter, data_pds) parameters_simulations = self.backend.collect(parameters_simulations_pds) parameters, simulations = [list(t) for t in zip(*parameters_simulations)] parameters = np.squeeze(np.array(parameters)) simulations = np.squeeze(np.array(simulations)) return parameters, simulations def _sample_parameter(self, data, npc=None): if isinstance(data, np.ndarray): data = data.tolist() rng = data[0] index = data[1] rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) parameter = self.accepted_parameters_manager.accepted_parameters_bds.value()[index] print(parameter) parameter_list = [x[0] for x in parameter] print(parameter_list) self.set_parameters(parameter_list) param = self.get_parameters() print(param) y_sim = self.simulate(n_samples_per_param=1) #y_sim = self.model[0].forward_simulate(parameter_list,1) return parameter, y_sim
class DrawFromPrior(InferenceMethod): model = None rng = None n_samples = None backend = None n_samples_per_param = None # this needs to be there otherwise it does not instantiate correctly def __init__(self, root_models, backend, seed=None, discard_too_large_values=True): self.model = root_models self.backend = backend self.rng = np.random.RandomState(seed) self.discard_too_large_values = discard_too_large_values # An object managing the bds objects self.accepted_parameters_manager = AcceptedParametersManager(self.model) def sample(self, n_samples, n_samples_per_param): self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param self.accepted_parameters_manager.broadcast(self.backend, 1) # now generate an array of seeds that need to be different one from the other. One way to do it is the # following. # Moreover, you cannot use int64 as seeds need to be < 2**32 - 1. How to fix this? # Note that this is not perfect; you still have small possibility of having some seeds that are equal. Is there # a better way? This would likely not change much the performance # An idea would be to use rng.choice but that is too seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) # check how many equal seeds there are and remove them: sorted_seed_arr = np.sort(seed_arr) indices = sorted_seed_arr[:-1] == sorted_seed_arr[1:] # print("Number of equal seeds:", np.sum(indices)) if np.sum(indices) > 0: # the following can be used to remove the equal seeds in case there are some sorted_seed_arr[:-1][indices] = sorted_seed_arr[:-1][indices] + 1 # print("Number of equal seeds after update:", np.sum(sorted_seed_arr[:-1] == sorted_seed_arr[1:])) rng_arr = np.array([np.random.RandomState(seed) for seed in sorted_seed_arr]) rng_pds = self.backend.parallelize(rng_arr) parameters_simulations_pds = self.backend.map(self._sample_parameter, rng_pds) parameters_simulations = self.backend.collect(parameters_simulations_pds) parameters, simulations = [list(t) for t in zip(*parameters_simulations)] parameters = np.squeeze(np.array(parameters)) simulations = np.squeeze(np.array(simulations)) #parameters = parameters.reshape((parameters.shape[0], parameters.shape[1])) #simulations = simulations.reshape((simulations.shape[0], simulations.shape[2], simulations.shape[3],)) return parameters, simulations def sample_in_chunks(self, n_samples, n_samples_per_param, max_chunk_size=10 ** 4): """This splits the data generation in chunks. It is useful when generating large datasets with MPI backend, which gives an overflow error due to pickling very large objects.""" parameters_list = [] simulations_list = [] samples_to_sample = n_samples while samples_to_sample > 0: parameters_part, simulations_part = self.sample(min(samples_to_sample, max_chunk_size), n_samples_per_param) samples_to_sample -= max_chunk_size parameters_list.append(parameters_part) simulations_list.append(simulations_part) parameters = np.concatenate(parameters_list) simulations = np.concatenate(simulations_list) return parameters, simulations def _sample_parameter(self, rng, npc=None): ok_flag = False while not ok_flag: self.sample_from_prior(rng=rng) theta = self.get_parameters(self.model) y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) # if there are no potential infinities there (or if we do not check for those). # For instance, Lorenz model may give too large values sometimes (quite rarely). if np.sum(np.isinf(np.array(y_sim).astype("float32"))) > 0 and self.discard_too_large_values: print("y_sim contained too large values for float32; simulating again.") else: ok_flag = True return theta, y_sim
class DrawFromParamValues(InferenceMethod): model = None rng = None n_samples = None backend = None n_samples_per_param = None # this needs to be there otherwise it does not instantiate correctly def __init__(self, root_models, backend, seed=None, discard_too_large_values=True): self.model = root_models self.backend = backend self.rng = np.random.RandomState(seed) self.discard_too_large_values = discard_too_large_values # An object managing the bds objects self.accepted_parameters_manager = AcceptedParametersManager( self.model) def sample(self, param_values): self.param_values = param_values # list of parameter values self.n_samples = len(param_values) self.accepted_parameters_manager.broadcast(self.backend, 1) # now generate an array of seeds that need to be different one from the other. One way to do it is the # following. # Moreover, you cannot use int64 as seeds need to be < 2**32 - 1. How to fix this? # Note that this is not perfect; you still have small possibility of having some seeds that are equal. Is there # a better way? This would likely not change much the performance # An idea would be to use rng.choice but that is too seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=self.n_samples, dtype=np.uint32) # check how many equal seeds there are and remove them: sorted_seed_arr = np.sort(seed_arr) indices = sorted_seed_arr[:-1] == sorted_seed_arr[1:] # print("Number of equal seeds:", np.sum(indices)) if np.sum(indices) > 0: # the following can be used to remove the equal seeds in case there are some sorted_seed_arr[:-1][indices] = sorted_seed_arr[:-1][indices] + 1 # print("Number of equal seeds after update:", np.sum(sorted_seed_arr[:-1] == sorted_seed_arr[1:])) rng_arr = np.array( [np.random.RandomState(seed) for seed in sorted_seed_arr]) # zip with the param values: data_arr = list(zip(self.param_values, rng_arr)) data_pds = self.backend.parallelize(data_arr) parameters_simulations_pds = self.backend.map(self._sample_parameter, data_pds) parameters_simulations = self.backend.collect( parameters_simulations_pds) parameters, simulations = [ list(t) for t in zip(*parameters_simulations) ] parameters = np.array(parameters).squeeze() simulations = np.array(simulations).squeeze() return parameters, simulations def _sample_parameter(self, data, npc=None): theta, rng = data[0], data[1] ok_flag = False while not ok_flag: # assume that we have one single model y_sim = self.model[0].forward_simulate(theta, 1, rng=rng) # self.sample_from_prior(rng=rng) # theta = self.get_parameters(self.model) # y_sim = self.simulate(1, rng=rng, npc=npc) # if there are no potential infinities there (or if we do not check for those). # For instance, Lorenz model may give too large values sometimes (quite rarely). if np.sum(np.isinf(np.array(y_sim).astype( "float32"))) > 0 and self.discard_too_large_values: print( "y_sim contained too large values for float32; simulating again." ) else: ok_flag = True return theta, y_sim