def run_model(self, params, minibatch=50, skip_feedback=False, keep_data=True, verbose=False): # Run forward model for params (in batches) if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=len(params)) desc = 'Run simulations ' if type(verbose) == str: desc += verbose pbar.set_description(desc) self.start_workers() final_params = [] final_stats = [] # list of summary stats minibatches = self.iterate_minibatches(params, minibatch) done = False with pbar: while not done: active_list = [] for w, p in zip(self.workers, self.pipes): try: params_batch = next(minibatches) except StopIteration: done = True break active_list.append((w,p)) self.log("Dispatching to worker (len = {})".format(len(params_batch))) p.send(params_batch) self.log("Done") n_remaining = len(active_list) while n_remaining > 0: self.log("Listening to worker") msg = self.queue.get() if type(msg) == int: self.log("Received int") pbar.update(msg) elif type(msg) == tuple: self.log("Received results") stats, params = self.filter_data(*msg, skip_feedback=skip_feedback) final_stats += stats final_params += params n_remaining -= 1 else: self.log("Warning: Received unknown message of type {}".format(type(msg))) self.stop_workers() # TODO: for n_reps > 1 duplicate params; reshape stats array # n_samples x n_reps x dim theta params = np.array(final_params) # n_samples x n_reps x dim summary stats stats = np.array(final_stats) stats = stats.squeeze(axis=1) return params, stats
def draw_params(self, n_samples, skip_feedback=False, prior_mixin=0, verbose=True, leave_pbar=True): if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=n_samples, leave=leave_pbar) desc = 'Draw parameters ' if type(verbose) == str: desc += verbose pbar.set_description(desc) # collect valid parameter vectors from the prior params = [] # list of parameter vectors with pbar: i = 0 while i < n_samples: # sample parameter if self.proposal is None or self.rng.random_sample( ) < prior_mixin: proposed_param = self.prior.gen(n_samples=1) # dim params, else: proposed_param = self.proposal.gen(n_samples=1) # check if parameter vector is valid response = self._feedback_proposed_param(proposed_param) if response == 'accept' or skip_feedback: # add valid param vector to list params.append(proposed_param.reshape(-1)) i += 1 pbar.update(1) elif response == 'resample': # continue without increment on i or updating the bar continue else: raise ValueError('response not supported') return params
def gen(self, params_list, n_reps=1, verbose=True): """Forward model for simulator for list of parameters Parameters ---------- params_list : list of lists or 1-d np.arrays List of parameter vectors, each of which will be simulated n_reps : int If greater than 1, generate multiple samples given param verbose : bool or str If False, will not display progress bars. If a string is passed, it will be appended to the description of the progress bar. Returns ------- data_list : list of lists containing n_reps dicts with data Repetitions are runs with the same parameter set, different repetitions. Each dictionary must contain a key data that contains the results of the forward run. Additional entries can be present. """ if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=len(params_list)) desc = 'Run simulations ' if type(verbose) == str: desc += verbose pbar.set_description(desc) with pbar: data_list = [] for param in params_list: rep_list = [] for r in range(n_reps): rep_list.append(self.gen_single(param)) data_list.append(rep_list) pbar.update(1) return data_list
def gen(self, n_samples, n_reps=1, skip_feedback=False, prior_mixin=0, minibatch=50, keep_data=True, verbose=True, leave_pbar=True): """Draw parameters and run forward model Parameters ---------- n_samples : int Number of samples n_reps: int Number of repetitions per parameter sample skip_feedback: bool If True, feedback checks on params, data and sum stats are skipped verbose : bool or str If False, will not display progress bars. If a string is passed, it will be appended to the description of the progress bar. Returns ------- params : n_samples x n_reps x n_params Parameters stats : n_samples x n_reps x n_summary Summary statistics of data """ assert n_reps == 1, 'n_reps > 1 is not yet supported' params = self.draw_params(n_samples=n_samples, skip_feedback=skip_feedback, prior_mixin=prior_mixin, verbose=verbose, leave_pbar=leave_pbar) # Run forward model for params (in batches) if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=len(params), leave=leave_pbar) desc = 'Run simulations ' if type(verbose) == str: desc += verbose pbar.set_description(desc) final_params = [] final_stats = [] # list of summary stats with pbar: for params_batch in self.iterate_minibatches(params, minibatch): # run forward model for all params, each n_reps times result = self.model.gen(params_batch, n_reps=n_reps, pbar=pbar) stats, params = self.process_batch(params_batch, result, skip_feedback=skip_feedback) final_params += params final_stats += stats # TODO: for n_reps > 1 duplicate params; reshape stats array # n_samples x dim theta params = np.array(final_params) # n_samples x dim summary stats stats = np.array(final_stats) if len(final_stats) > 0: stats = stats.squeeze(axis=1) return params, stats
def run(self, n_train=100, n_rounds=2, epochs=100, minibatch=50, monitor=None, **kwargs): """Run algorithm Parameters ---------- n_train : int or list of ints Number of data points drawn per round. If a list is passed, the nth list element specifies the number of training examples in the nth round. If there are fewer list elements than rounds, the last list element is used. n_rounds : int Number of rounds epochs : int Number of epochs used for neural network training minibatch : int Size of the minibatches used for neural network training monitor : list of str Names of variables to record during training along with the value of the loss function. The observables attribute contains all possible variables that can be monitored kwargs : additional keyword arguments Additional arguments for the Trainer instance Returns ------- logs : list of dicts Dictionaries contain information logged while training the networks trn_datasets : list of (params, stats) Training datasets posteriors : list of distributions Posterior after each round """ logs = [] trn_datasets = [] optim_state = [] posteriors = [] if not self.verbose: pbar = no_tqdm() else: pbar = progressbar(total=n_rounds) desc = 'Round ' pbar.set_description(desc) with pbar: for r in range(n_rounds): self.round += 1 # if round > 1, set new proposal distribution before sampling if self.round > 1: # posterior becomes new proposal prior proposal = self.predict(self.obs) # see super # convert proposal to student's T? if self.convert_to_T is not None: if type(self.convert_to_T) == int: dofs = self.convert_to_T else: dofs = 10 proposal = proposal.convert_to_T(dofs=dofs) self.generator.proposal = proposal # number of training examples to generate for this round if type(n_train) == list: try: n_train_round = n_train[self.round - 1] except: n_train_round = n_train[-1] else: n_train_round = n_train # draw training data (z-transformed params and stats) verbose = '(round {}) '.format( self.round) if self.verbose else False trn_data = self.gen(n_train_round, verbose=False) # precompute importance weights iws = np.ones((n_train_round, )) if self.generator.proposal is not None: params = self.params_std * trn_data[0] + self.params_mean p_prior = self.generator.prior.eval(params, log=False) p_proposal = self.generator.proposal.eval(params, log=False) iws *= p_prior / p_proposal trn_data = (trn_data[0], trn_data[1], iws) trn_datasets.append(trn_data) params_ = np.array([i for sub in trn_datasets for i in sub[0]]) stats_ = np.array([i for sub in trn_datasets for i in sub[1]]) iws_ = np.array([i for sub in trn_datasets for i in sub[2]]) trn_data_round = (params_, stats_, iws_) trn_inputs = [ self.network.params, self.network.stats, self.network.iws ] t = Trainer(self.network, self.loss(N=n_train_round), trn_data=trn_data_round, trn_inputs=trn_inputs, seed=self.gen_newseed(), monitor=self.monitor_dict_from_names(monitor), **kwargs) # recover adam state variables if self.recover_adam and len(optim_state) != 0: for p, value in zip(t.updates.keys(), optim_state): p.set_value(value) # train logs.append( t.train(epochs=epochs, minibatch=minibatch, verbose=verbose)) # save state of optimizer optim_state = [p.get_value() for p in t.updates.keys()] # append posterior to list posteriors.append(self.predict(self.obs)) pbar.update(1) return logs, trn_datasets, posteriors
def train(self, epochs=250, minibatch=50, patience=20, monitor_every=None, stop_on_nan=False, strict_batch_size=False, tol=None, verbose=False, print_each_epoch=False): """Trains the model Parameters ---------- epochs : int number of epochs (iterations per sample) minibatch : int minibatch size monitor_every : int monitoring frequency stop_on_nan : bool (default: False) if True, will stop if loss becomes NaN tol : float tolerance criterion for stopping based on training set verbose : bool if True, print progress during training strict_batch_size : bool Whether to ignore last batch if it would be smaller than minibatch print_each_epoch: bool Whether to print a period `.' each epoch, useful to avoid timeouts in continuous integration. Returns ------- dict : containing loss values and possibly additional keys """ # initialize variables iter = 0 patience_left = patience if monitor_every is None: monitor_every = min(10**5 / float(self.n_trn_data), 1.0) logger = sys.stdout # minibatch size minibatch = self.n_trn_data if minibatch is None else minibatch if minibatch > self.n_trn_data: minibatch = self.n_trn_data maxiter = int(self.n_trn_data / minibatch + 0.5) * epochs # placeholders for outputs trn_outputs = {} for key in self.trn_outputs_names: trn_outputs[key] = [] if self.do_validation: trn_outputs['val_loss'], trn_outputs['val_loss_iter'] = [], [] # cast trn_data self.trn_data = [x.astype(dtype) for x in self.trn_data] if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=maxiter * minibatch) desc = 'Training on {0} samples'.format(self.trn_data[0].shape[0]) if type(verbose) == str: desc += verbose pbar.set_description(desc) break_flag = False with pbar: # loop over epochs for epoch in range(epochs): # set learning rate lr_epoch = self.lr * (self.lr_decay**epoch) self.lr_op.set_value(lr_epoch) # loop over batches for trn_batch in iterate_minibatches( self.trn_data, minibatch, seed=self.gen_newseed(), strict_batch_size=strict_batch_size): if self.assemble_extra_inputs is not None: trn_batch = self.assemble_extra_inputs( tuple(trn_batch)) else: trn_batch = tuple(trn_batch) outputs = self.make_update(*trn_batch) for name, value in zip(self.trn_outputs_names, outputs): trn_outputs[name].append(value) trn_loss = trn_outputs['loss'][-1] diff = self.loss - trn_loss self.loss = trn_loss # check for convergence if tol is not None: if abs(diff) < tol: break_flag = True break # check for nan if stop_on_nan and np.isnan(trn_loss): break_flag = True break # validation-data tracking of convergence if self.do_validation: epoch_frac = (iter * minibatch) / self.n_trn_data if epoch_frac % monitor_every == 0: # do validation val_loss = self.validate() trn_outputs['val_loss'].append(val_loss) trn_outputs['val_loss_iter'].append(iter) patience_left -= 1 if val_loss < self.best_val_loss: self.best_val_loss = val_loss patience_left = patience # reset patience_left if patience_left <= 0: break_flag = True if verbose: print('Stopping at epoch = {0}, ' 'training loss = {1}, ' 'validation loss = {2}\n'.format( epoch, trn_loss, val_loss)) break pbar.update(minibatch) iter += 1 if print_each_epoch: print('.') if break_flag: break # convert lists to arrays for name, value in trn_outputs.items(): trn_outputs[name] = np.asarray(value) return trn_outputs
def gen(self, n_samples, n_reps=1, skip_feedback=False, verbose=True): """Draw parameters and run forward model Parameters ---------- n_samples : int Number of samples n_reps: int Number of repetitions per parameter sample skip_feedback: bool If True, feedback checks on params, data and sum stats are skipped verbose : bool or str If False, will not display progress bars. If a string is passed, it will be appended to the description of the progress bar. Returns ------- params : n_samples x n_reps x n_params Parameters stats : n_samples x n_reps x n_summary Summary statistics of data """ assert n_reps == 1, 'n_reps > 1 is not yet supported' if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=n_samples) desc = 'Draw parameters ' if type(verbose) == str: desc += verbose pbar.set_description(desc) # collect valid parameter vectors from the prior params = [] # list of parameter vectors with pbar: i = 0 while i < n_samples: # sample parameter if self.proposal is None: proposed_param = self.prior.gen(n_samples=1) # dim params, else: proposed_param = self.proposal.gen(n_samples=1) # check if parameter vector is valid response = self._feedback_proposed_param(proposed_param) if response == 'accept' or skip_feedback: # add valid param vector to list params.append(proposed_param.reshape(-1)) i += 1 pbar.update(1) elif response == 'resample': # continue without increment on i or updating the bar continue else: raise ValueError('response not supported') # run forward model for all params, each n_reps times result = self.model.gen(params, n_reps=n_reps, verbose=verbose) # for every datum in data, check validity params_data_valid = [] # list of params with valid data data_valid = [] # list of lists containing n_reps dicts with data for param, datum in zip(params, result): # check validity response = self._feedback_forward_model(datum) if response == 'accept' or skip_feedback: data_valid.append(datum) # if data is accepted, accept the param as well params_data_valid.append(param) elif response == 'discard': continue else: raise ValueError('response not supported') # for every data in data, calculate summary stats final_params = [] final_stats = [] # list of summary stats for param, datum in zip(params_data_valid, data_valid): # calculate summary statistics sum_stats = self.summary.calc(datum) # n_reps x dim stats # check validity response = self._feedback_summary_stats(sum_stats) if response == 'accept' or skip_feedback: final_stats.append(sum_stats) # if sum stats is accepted, accept the param as well final_params.append(param) elif response == 'discard': continue else: raise ValueError('response not supported') # TODO: for n_reps > 1 duplicate params; reshape stats array # n_samples x n_reps x dim theta params = np.array(final_params) # n_samples x n_reps x dim summary stats stats = np.array(final_stats) stats = stats.squeeze(axis=1) return params, stats
def gen(self, n_samples, n_reps=1, skip_feedback=False, prior_mixin=0, minibatch=50, keep_data=True, verbose=True): """Draw parameters and run forward model Parameters ---------- n_samples : int Number of samples n_reps: int Number of repetitions per parameter sample skip_feedback: bool If True, feedback checks on params, data and sum stats are skipped verbose : bool or str If False, will not display progress bars. If a string is passed, it will be appended to the description of the progress bar. Returns ------- params : n_samples x n_reps x n_params Parameters stats : n_samples x n_reps x n_summary Summary statistics of data """ assert n_reps == 1, 'n_reps > 1 is not yet supported' params = self.draw_params(n_samples=n_samples, skip_feedback=skip_feedback, prior_mixin=prior_mixin, verbose=verbose) # Run forward model for params (in batches) if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=len(params)) desc = 'Run simulations ' if type(verbose) == str: desc += verbose pbar.set_description(desc) final_params = [] final_stats = [] # list of summary stats minibatches = self.iterate_minibatches(params, minibatch) done = False with pbar: while not done: active_list = [] for w, p in zip(self.workers, self.pipes): try: params_batch = next(minibatches) except StopIteration: done = True break active_list.append((w, p)) self.log("Dispatching to worker (len = {})".format( len(params_batch))) p.send(params_batch) self.log("Done") n_remaining = len(active_list) while n_remaining > 0: self.log("Listening to worker") msg = self.queue.get() if type(msg) == int: self.log("Received int") pbar.update(msg) elif type(msg) == tuple: self.log("Received results") stats, params = msg final_stats += stats final_params += params n_remaining -= 1 else: self.log( "Warning: Received unknown message of type {}". format(type(msg))) # TODO: for n_reps > 1 duplicate params; reshape stats array # n_samples x n_reps x dim theta params = np.array(final_params) # n_samples x n_reps x dim summary stats stats = np.array(final_stats) stats = stats.squeeze(axis=1) return params, stats
def train(self, epochs=250, minibatch=50, monitor_every=None, stop_on_nan=False, tol=None, verbose=False): """Trains the model Parameters ---------- epochs : int number of epochs (iterations per sample) minibatch : int minibatch size monitor_every : int monitoring frequency stop_on_nan : bool (default: False) if True, will stop if loss becomes NaN tol : float tolerance criterion for stopping based on training set verbose : bool if True, print progress during training Returns ------- dict : containing loss values and possibly additional keys """ # initialize variables iter = 0 # minibatch size minibatch = self.n_trn_data if minibatch is None else minibatch if minibatch > self.n_trn_data: minibatch = self.n_trn_data maxiter = int(self.n_trn_data / minibatch + 0.5) * epochs # placeholders for outputs trn_outputs = {} for key in self.trn_outputs_names: trn_outputs[key] = [] # cast trn_data self.trn_data = [x.astype(dtype) for x in self.trn_data] if not verbose: pbar = no_tqdm() else: pbar = progressbar(total=maxiter * minibatch) desc = 'Training ' if type(verbose) == str: desc += verbose pbar.set_description(desc) with pbar: # loop over epochs for epoch in range(epochs): # set learning rate lr_epoch = self.lr * (self.lr_decay**epoch) self.lr_op.set_value(lr_epoch) # loop over batches for trn_batch in iterate_minibatches(self.trn_data, minibatch, seed=self.gen_newseed()): trn_batch = tuple(trn_batch) outputs = self.make_update(*trn_batch) for name, value in zip(self.trn_outputs_names, outputs): trn_outputs[name].append(value) trn_loss = trn_outputs['loss'][-1] diff = self.loss - trn_loss self.loss = trn_loss # check for convergence if tol is not None: if abs(diff) < tol: break # check for nan if stop_on_nan and np.isnan(trn_loss): break pbar.update(minibatch) # convert lists to arrays for name, value in trn_outputs.items(): trn_outputs[name] = np.asarray(value) return trn_outputs