示例#1
0
    def run_model(self, params, minibatch=50, skip_feedback=False, keep_data=True, verbose=False):
        # Run forward model for params (in batches)
        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=len(params))
            desc = 'Run simulations '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        self.start_workers()
        final_params = []
        final_stats = []  # list of summary stats
        minibatches = self.iterate_minibatches(params, minibatch)
        done = False
        with pbar:
            while not done:
                active_list = []
                for w, p in zip(self.workers, self.pipes):
                    try:
                        params_batch = next(minibatches)
                    except StopIteration:
                        done = True
                        break

                    active_list.append((w,p))
                    self.log("Dispatching to worker (len = {})".format(len(params_batch)))
                    p.send(params_batch)
                    self.log("Done")

                n_remaining = len(active_list)
                while n_remaining > 0:
                    self.log("Listening to worker")
                    msg = self.queue.get()
                    if type(msg) == int:
                        self.log("Received int")
                        pbar.update(msg)
                    elif type(msg) == tuple:
                        self.log("Received results")
                        stats, params = self.filter_data(*msg, skip_feedback=skip_feedback)
                        final_stats += stats
                        final_params += params
                        n_remaining -= 1
                    else:
                        self.log("Warning: Received unknown message of type {}".format(type(msg)))

        self.stop_workers()

        # TODO: for n_reps > 1 duplicate params; reshape stats array

        # n_samples x n_reps x dim theta
        params = np.array(final_params)

        # n_samples x n_reps x dim summary stats
        stats = np.array(final_stats)
        stats = stats.squeeze(axis=1)

        return params, stats
示例#2
0
    def draw_params(self,
                    n_samples,
                    skip_feedback=False,
                    prior_mixin=0,
                    verbose=True,
                    leave_pbar=True):
        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=n_samples, leave=leave_pbar)
            desc = 'Draw parameters '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        # collect valid parameter vectors from the prior
        params = []  # list of parameter vectors
        with pbar:
            i = 0
            while i < n_samples:
                # sample parameter
                if self.proposal is None or self.rng.random_sample(
                ) < prior_mixin:
                    proposed_param = self.prior.gen(n_samples=1)  # dim params,
                else:
                    proposed_param = self.proposal.gen(n_samples=1)

                # check if parameter vector is valid
                response = self._feedback_proposed_param(proposed_param)
                if response == 'accept' or skip_feedback:
                    # add valid param vector to list
                    params.append(proposed_param.reshape(-1))
                    i += 1
                    pbar.update(1)
                elif response == 'resample':
                    # continue without increment on i or updating the bar
                    continue
                else:
                    raise ValueError('response not supported')

            return params
示例#3
0
    def gen(self, params_list, n_reps=1, verbose=True):
        """Forward model for simulator for list of parameters

        Parameters
        ----------
        params_list : list of lists or 1-d np.arrays
            List of parameter vectors, each of which will be simulated
        n_reps : int
            If greater than 1, generate multiple samples given param
        verbose : bool or str
            If False, will not display progress bars. If a string is passed,
            it will be appended to the description of the progress bar.

        Returns
        -------
        data_list : list of lists containing n_reps dicts with data
            Repetitions are runs with the same parameter set, different
            repetitions. Each dictionary must contain a key data that contains
            the results of the forward run. Additional entries can be present.
        """
        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=len(params_list))
            desc = 'Run simulations '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        with pbar:
            data_list = []
            for param in params_list:
                rep_list = []
                for r in range(n_reps):
                    rep_list.append(self.gen_single(param))
                data_list.append(rep_list)
                pbar.update(1)

            return data_list
示例#4
0
    def gen(self,
            n_samples,
            n_reps=1,
            skip_feedback=False,
            prior_mixin=0,
            minibatch=50,
            keep_data=True,
            verbose=True,
            leave_pbar=True):
        """Draw parameters and run forward model

        Parameters
        ----------
        n_samples : int
            Number of samples
        n_reps: int
            Number of repetitions per parameter sample
        skip_feedback: bool
            If True, feedback checks on params, data and sum stats are skipped
        verbose : bool or str
            If False, will not display progress bars. If a string is passed,
            it will be appended to the description of the progress bar.

        Returns
        -------
        params : n_samples x n_reps x n_params
            Parameters
        stats : n_samples x n_reps x n_summary
            Summary statistics of data
        """
        assert n_reps == 1, 'n_reps > 1 is not yet supported'

        params = self.draw_params(n_samples=n_samples,
                                  skip_feedback=skip_feedback,
                                  prior_mixin=prior_mixin,
                                  verbose=verbose,
                                  leave_pbar=leave_pbar)

        # Run forward model for params (in batches)
        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=len(params), leave=leave_pbar)
            desc = 'Run simulations '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        final_params = []
        final_stats = []  # list of summary stats
        with pbar:
            for params_batch in self.iterate_minibatches(params, minibatch):
                # run forward model for all params, each n_reps times
                result = self.model.gen(params_batch, n_reps=n_reps, pbar=pbar)

                stats, params = self.process_batch(params_batch,
                                                   result,
                                                   skip_feedback=skip_feedback)
                final_params += params
                final_stats += stats

        # TODO: for n_reps > 1 duplicate params; reshape stats array

        # n_samples x dim theta
        params = np.array(final_params)

        # n_samples x dim summary stats
        stats = np.array(final_stats)
        if len(final_stats) > 0:
            stats = stats.squeeze(axis=1)

        return params, stats
示例#5
0
    def run(self,
            n_train=100,
            n_rounds=2,
            epochs=100,
            minibatch=50,
            monitor=None,
            **kwargs):
        """Run algorithm

        Parameters
        ----------
        n_train : int or list of ints
            Number of data points drawn per round. If a list is passed, the
            nth list element specifies the number of training examples in the
            nth round. If there are fewer list elements than rounds, the last
            list element is used.
        n_rounds : int
            Number of rounds
        epochs : int
            Number of epochs used for neural network training
        minibatch : int
            Size of the minibatches used for neural network training
        monitor : list of str
            Names of variables to record during training along with the value
            of the loss function. The observables attribute contains all
            possible variables that can be monitored
        kwargs : additional keyword arguments
            Additional arguments for the Trainer instance

        Returns
        -------
        logs : list of dicts
            Dictionaries contain information logged while training the networks
        trn_datasets : list of (params, stats)
            Training datasets
        posteriors : list of distributions
            Posterior after each round
        """
        logs = []
        trn_datasets = []
        optim_state = []
        posteriors = []

        if not self.verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=n_rounds)
            desc = 'Round '
            pbar.set_description(desc)

        with pbar:
            for r in range(n_rounds):
                self.round += 1

                # if round > 1, set new proposal distribution before sampling
                if self.round > 1:
                    # posterior becomes new proposal prior
                    proposal = self.predict(self.obs)  # see super

                    # convert proposal to student's T?
                    if self.convert_to_T is not None:
                        if type(self.convert_to_T) == int:
                            dofs = self.convert_to_T
                        else:
                            dofs = 10
                        proposal = proposal.convert_to_T(dofs=dofs)

                    self.generator.proposal = proposal

                # number of training examples to generate for this round
                if type(n_train) == list:
                    try:
                        n_train_round = n_train[self.round - 1]
                    except:
                        n_train_round = n_train[-1]
                else:
                    n_train_round = n_train

                # draw training data (z-transformed params and stats)
                verbose = '(round {}) '.format(
                    self.round) if self.verbose else False
                trn_data = self.gen(n_train_round, verbose=False)

                # precompute importance weights
                iws = np.ones((n_train_round, ))
                if self.generator.proposal is not None:
                    params = self.params_std * trn_data[0] + self.params_mean
                    p_prior = self.generator.prior.eval(params, log=False)
                    p_proposal = self.generator.proposal.eval(params,
                                                              log=False)
                    iws *= p_prior / p_proposal

                trn_data = (trn_data[0], trn_data[1], iws)
                trn_datasets.append(trn_data)

                params_ = np.array([i for sub in trn_datasets for i in sub[0]])
                stats_ = np.array([i for sub in trn_datasets for i in sub[1]])
                iws_ = np.array([i for sub in trn_datasets for i in sub[2]])

                trn_data_round = (params_, stats_, iws_)

                trn_inputs = [
                    self.network.params, self.network.stats, self.network.iws
                ]

                t = Trainer(self.network,
                            self.loss(N=n_train_round),
                            trn_data=trn_data_round,
                            trn_inputs=trn_inputs,
                            seed=self.gen_newseed(),
                            monitor=self.monitor_dict_from_names(monitor),
                            **kwargs)

                # recover adam state variables
                if self.recover_adam and len(optim_state) != 0:
                    for p, value in zip(t.updates.keys(), optim_state):
                        p.set_value(value)

                # train
                logs.append(
                    t.train(epochs=epochs,
                            minibatch=minibatch,
                            verbose=verbose))

                # save state of optimizer
                optim_state = [p.get_value() for p in t.updates.keys()]

                # append posterior to list
                posteriors.append(self.predict(self.obs))

                pbar.update(1)

            return logs, trn_datasets, posteriors
示例#6
0
    def train(self,
              epochs=250,
              minibatch=50,
              patience=20,
              monitor_every=None,
              stop_on_nan=False,
              strict_batch_size=False,
              tol=None,
              verbose=False,
              print_each_epoch=False):
        """Trains the model

        Parameters
        ----------
        epochs : int
            number of epochs (iterations per sample)
        minibatch : int
            minibatch size
        monitor_every : int
            monitoring frequency
        stop_on_nan : bool (default: False)
            if True, will stop if loss becomes NaN
        tol : float
            tolerance criterion for stopping based on training set
        verbose : bool
            if True, print progress during training
        strict_batch_size : bool
            Whether to ignore last batch if it would be smaller than minibatch
        print_each_epoch: bool
            Whether to print a period `.' each epoch, useful to avoid timeouts in continuous integration.

        Returns
        -------
        dict : containing loss values and possibly additional keys
        """

        # initialize variables
        iter = 0
        patience_left = patience
        if monitor_every is None:
            monitor_every = min(10**5 / float(self.n_trn_data), 1.0)
        logger = sys.stdout

        # minibatch size
        minibatch = self.n_trn_data if minibatch is None else minibatch
        if minibatch > self.n_trn_data:
            minibatch = self.n_trn_data

        maxiter = int(self.n_trn_data / minibatch + 0.5) * epochs

        # placeholders for outputs
        trn_outputs = {}
        for key in self.trn_outputs_names:
            trn_outputs[key] = []

        if self.do_validation:
            trn_outputs['val_loss'], trn_outputs['val_loss_iter'] = [], []

        # cast trn_data
        self.trn_data = [x.astype(dtype) for x in self.trn_data]

        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=maxiter * minibatch)
            desc = 'Training on {0} samples'.format(self.trn_data[0].shape[0])
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        break_flag = False
        with pbar:
            # loop over epochs
            for epoch in range(epochs):
                # set learning rate
                lr_epoch = self.lr * (self.lr_decay**epoch)
                self.lr_op.set_value(lr_epoch)

                # loop over batches
                for trn_batch in iterate_minibatches(
                        self.trn_data,
                        minibatch,
                        seed=self.gen_newseed(),
                        strict_batch_size=strict_batch_size):

                    if self.assemble_extra_inputs is not None:
                        trn_batch = self.assemble_extra_inputs(
                            tuple(trn_batch))
                    else:
                        trn_batch = tuple(trn_batch)

                    outputs = self.make_update(*trn_batch)

                    for name, value in zip(self.trn_outputs_names, outputs):
                        trn_outputs[name].append(value)

                    trn_loss = trn_outputs['loss'][-1]
                    diff = self.loss - trn_loss
                    self.loss = trn_loss

                    # check for convergence
                    if tol is not None:
                        if abs(diff) < tol:
                            break_flag = True
                            break

                    # check for nan
                    if stop_on_nan and np.isnan(trn_loss):
                        break_flag = True
                        break

                    # validation-data tracking of convergence
                    if self.do_validation:
                        epoch_frac = (iter * minibatch) / self.n_trn_data
                        if epoch_frac % monitor_every == 0:  # do validation
                            val_loss = self.validate()
                            trn_outputs['val_loss'].append(val_loss)
                            trn_outputs['val_loss_iter'].append(iter)
                            patience_left -= 1

                            if val_loss < self.best_val_loss:
                                self.best_val_loss = val_loss
                                patience_left = patience  # reset patience_left

                            if patience_left <= 0:
                                break_flag = True
                                if verbose:
                                    print('Stopping at epoch = {0}, '
                                          'training loss = {1}, '
                                          'validation loss = {2}\n'.format(
                                              epoch, trn_loss, val_loss))
                                break
                    pbar.update(minibatch)
                    iter += 1
                if print_each_epoch:
                    print('.')
                if break_flag:
                    break

        # convert lists to arrays
        for name, value in trn_outputs.items():
            trn_outputs[name] = np.asarray(value)

        return trn_outputs
示例#7
0
    def gen(self, n_samples, n_reps=1, skip_feedback=False, verbose=True):
        """Draw parameters and run forward model

        Parameters
        ----------
        n_samples : int
            Number of samples
        n_reps: int
            Number of repetitions per parameter sample
        skip_feedback: bool
            If True, feedback checks on params, data and sum stats are skipped
        verbose : bool or str
            If False, will not display progress bars. If a string is passed,
            it will be appended to the description of the progress bar.

        Returns
        -------
        params : n_samples x n_reps x n_params
            Parameters
        stats : n_samples x n_reps x n_summary
            Summary statistics of data
        """
        assert n_reps == 1, 'n_reps > 1 is not yet supported'

        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=n_samples)
            desc = 'Draw parameters '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        # collect valid parameter vectors from the prior
        params = []  # list of parameter vectors
        with pbar:
            i = 0
            while i < n_samples:
                # sample parameter
                if self.proposal is None:
                    proposed_param = self.prior.gen(n_samples=1)  # dim params,
                else:
                    proposed_param = self.proposal.gen(n_samples=1)

                # check if parameter vector is valid
                response = self._feedback_proposed_param(proposed_param)
                if response == 'accept' or skip_feedback:
                    # add valid param vector to list
                    params.append(proposed_param.reshape(-1))
                    i += 1
                    pbar.update(1)
                elif response == 'resample':
                    # continue without increment on i or updating the bar
                    continue
                else:
                    raise ValueError('response not supported')

        # run forward model for all params, each n_reps times
        result = self.model.gen(params, n_reps=n_reps, verbose=verbose)

        # for every datum in data, check validity
        params_data_valid = []  # list of params with valid data
        data_valid = []  # list of lists containing n_reps dicts with data
        for param, datum in zip(params, result):
            # check validity
            response = self._feedback_forward_model(datum)
            if response == 'accept' or skip_feedback:
                data_valid.append(datum)
                # if data is accepted, accept the param as well
                params_data_valid.append(param)
            elif response == 'discard':
                continue
            else:
                raise ValueError('response not supported')

        # for every data in data, calculate summary stats
        final_params = []
        final_stats = []  # list of summary stats
        for param, datum in zip(params_data_valid, data_valid):
            # calculate summary statistics
            sum_stats = self.summary.calc(datum)  # n_reps x dim stats

            # check validity
            response = self._feedback_summary_stats(sum_stats)
            if response == 'accept' or skip_feedback:
                final_stats.append(sum_stats)
                # if sum stats is accepted, accept the param as well
                final_params.append(param)
            elif response == 'discard':
                continue
            else:
                raise ValueError('response not supported')

        # TODO: for n_reps > 1 duplicate params; reshape stats array

        # n_samples x n_reps x dim theta
        params = np.array(final_params)

        # n_samples x n_reps x dim summary stats
        stats = np.array(final_stats)
        stats = stats.squeeze(axis=1)

        return params, stats
示例#8
0
    def gen(self,
            n_samples,
            n_reps=1,
            skip_feedback=False,
            prior_mixin=0,
            minibatch=50,
            keep_data=True,
            verbose=True):
        """Draw parameters and run forward model

        Parameters
        ----------
        n_samples : int
            Number of samples
        n_reps: int
            Number of repetitions per parameter sample
        skip_feedback: bool
            If True, feedback checks on params, data and sum stats are skipped
        verbose : bool or str
            If False, will not display progress bars. If a string is passed,
            it will be appended to the description of the progress bar.

        Returns
        -------
        params : n_samples x n_reps x n_params
            Parameters
        stats : n_samples x n_reps x n_summary
            Summary statistics of data
        """
        assert n_reps == 1, 'n_reps > 1 is not yet supported'

        params = self.draw_params(n_samples=n_samples,
                                  skip_feedback=skip_feedback,
                                  prior_mixin=prior_mixin,
                                  verbose=verbose)

        # Run forward model for params (in batches)
        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=len(params))
            desc = 'Run simulations '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        final_params = []
        final_stats = []  # list of summary stats
        minibatches = self.iterate_minibatches(params, minibatch)
        done = False
        with pbar:
            while not done:
                active_list = []
                for w, p in zip(self.workers, self.pipes):
                    try:
                        params_batch = next(minibatches)
                    except StopIteration:
                        done = True
                        break

                    active_list.append((w, p))
                    self.log("Dispatching to worker (len = {})".format(
                        len(params_batch)))
                    p.send(params_batch)
                    self.log("Done")

                n_remaining = len(active_list)
                while n_remaining > 0:
                    self.log("Listening to worker")
                    msg = self.queue.get()
                    if type(msg) == int:
                        self.log("Received int")
                        pbar.update(msg)
                    elif type(msg) == tuple:
                        self.log("Received results")
                        stats, params = msg
                        final_stats += stats
                        final_params += params
                        n_remaining -= 1
                    else:
                        self.log(
                            "Warning: Received unknown message of type {}".
                            format(type(msg)))

        # TODO: for n_reps > 1 duplicate params; reshape stats array

        # n_samples x n_reps x dim theta
        params = np.array(final_params)

        # n_samples x n_reps x dim summary stats
        stats = np.array(final_stats)
        stats = stats.squeeze(axis=1)

        return params, stats
示例#9
0
    def train(self,
              epochs=250,
              minibatch=50,
              monitor_every=None,
              stop_on_nan=False,
              tol=None,
              verbose=False):
        """Trains the model

        Parameters
        ----------
        epochs : int
            number of epochs (iterations per sample)
        minibatch : int
            minibatch size
        monitor_every : int
            monitoring frequency
        stop_on_nan : bool (default: False)
            if True, will stop if loss becomes NaN
        tol : float
            tolerance criterion for stopping based on training set
        verbose : bool
            if True, print progress during training

        Returns
        -------
        dict : containing loss values and possibly additional keys
        """

        # initialize variables
        iter = 0

        # minibatch size
        minibatch = self.n_trn_data if minibatch is None else minibatch
        if minibatch > self.n_trn_data:
            minibatch = self.n_trn_data

        maxiter = int(self.n_trn_data / minibatch + 0.5) * epochs

        # placeholders for outputs
        trn_outputs = {}
        for key in self.trn_outputs_names:
            trn_outputs[key] = []

        # cast trn_data
        self.trn_data = [x.astype(dtype) for x in self.trn_data]

        if not verbose:
            pbar = no_tqdm()
        else:
            pbar = progressbar(total=maxiter * minibatch)
            desc = 'Training '
            if type(verbose) == str:
                desc += verbose
            pbar.set_description(desc)

        with pbar:
            # loop over epochs
            for epoch in range(epochs):
                # set learning rate
                lr_epoch = self.lr * (self.lr_decay**epoch)
                self.lr_op.set_value(lr_epoch)

                # loop over batches
                for trn_batch in iterate_minibatches(self.trn_data,
                                                     minibatch,
                                                     seed=self.gen_newseed()):
                    trn_batch = tuple(trn_batch)

                    outputs = self.make_update(*trn_batch)

                    for name, value in zip(self.trn_outputs_names, outputs):
                        trn_outputs[name].append(value)

                    trn_loss = trn_outputs['loss'][-1]
                    diff = self.loss - trn_loss
                    self.loss = trn_loss

                    # check for convergence
                    if tol is not None:
                        if abs(diff) < tol:
                            break

                    # check for nan
                    if stop_on_nan and np.isnan(trn_loss):
                        break

                    pbar.update(minibatch)

        # convert lists to arrays
        for name, value in trn_outputs.items():
            trn_outputs[name] = np.asarray(value)

        return trn_outputs