Пример #1
0
 def set_model_params(self, models=None, f_therm=None, pi=None, f=None, label='ground state'):
     # don't normalize f, because in a multiensemble the relative energy levels matter
     _StationaryModel.set_model_params(self, pi=pi, f=f, normalize_f=False)
     # check and set other parameters
     _types.assert_array(f_therm, ndim=1, kind='numeric')
     f_therm = _np.array(f_therm, dtype=float)
     for m in models:
         assert issubclass(m.__class__, _Model)
     self.update_model_params(models=models, f_therm=f_therm)
Пример #2
0
    def _estimate(self, trajs):
        """
        Parameters
        ----------
        trajs : ndarray(T, 2) or list of ndarray(T_i, 2)
            Thermodynamic trajectories. Each trajectory is a (T_i, 2)-array
            with T_i time steps. The first column is the thermodynamic state
            index, the second column is the configuration state index.
        """
        # format input if needed
        if isinstance(trajs, _np.ndarray):
            trajs = [trajs]
        # validate input
        assert _types.is_list(trajs)
        for ttraj in trajs:
            _types.assert_array(ttraj, ndim=2, kind='numeric')
            assert _np.shape(ttraj)[1] >= 2

        # harvest state counts
        self.state_counts_full = _util.state_counts(
            [_np.ascontiguousarray(t[:, :2]).astype(_np.intc) for t in trajs],
            nthermo=self.nthermo,
            nstates=self.nstates_full)

        # active set
        self.active_set = _np.where(self.state_counts_full.sum(axis=0) > 0)[0]
        self.state_counts = _np.ascontiguousarray(
            self.state_counts_full[:, self.active_set].astype(_np.intc))
        self.bias_energies = _np.ascontiguousarray(
            self.bias_energies_full[:, self.active_set], dtype=_np.float64)

        # run estimator
        self.therm_energies, self.conf_energies, self.increments, self.loglikelihoods = \
            _wham.estimate(
                self.state_counts, self.bias_energies,
                maxiter=self.maxiter, maxerr=self.maxerr,
                therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                save_convergence_info=self.save_convergence_info)

        # get stationary models
        models = [
            _StationaryModel(
                pi=_np.exp(self.therm_energies[K, _np.newaxis] -
                           self.bias_energies[K, :] - self.conf_energies),
                f=self.bias_energies[K, :] + self.conf_energies,
                normalize_energy=False,
                label="K=%d" % K) for K in range(self.nthermo)
        ]

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Пример #3
0
    def _estimate(self, trajs):
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest state counts
        self.state_counts_full = _util.state_counts(ttrajs,
                                                    dtrajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # active set
        self.active_set = _np.where(self.state_counts_full.sum(axis=0) > 0)[0]
        self.state_counts = _np.ascontiguousarray(
            self.state_counts_full[:, self.active_set].astype(_np.intc))
        self.bias_energies = _np.ascontiguousarray(
            self.bias_energies_full[:, self.active_set], dtype=_np.float64)

        # run estimator
        pg = _ProgressReporter()
        stage = 'WHAM'
        with pg.context(stage=stage):
            self.therm_energies, self.conf_energies, self.increments, self.loglikelihoods = \
                _wham.estimate(
                    self.state_counts, self.bias_energies,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, stage, self.maxiter, self.maxerr))

        # get stationary models
        models = [
            _StationaryModel(
                pi=_np.exp(self.therm_energies[K, _np.newaxis] -
                           self.bias_energies[K, :] - self.conf_energies),
                f=self.bias_energies[K, :] + self.conf_energies,
                normalize_energy=False,
                label="K=%d" % K) for K in range(self.nthermo)
        ]

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Пример #4
0
 def set_model_params(self, pi=None, f=None, normalize_f=True):
     r"""
     Parameters
     ----------
     pi : ndarray(n)
         Stationary distribution. If not already normalized, pi will be
         scaled to fulfill :math:`\sum_i \pi_i = 1`. The free energies f
         will be computed from pi via :math:`f_i = - \log(\pi_i)`. Only
         if normalize_f is True, a constant will be added to ensure
         consistency with :math:`\sum_i \pi_i = 1`.
     f : ndarray(n)
         Discrete-state free energies. If normalized_f = True, a constant
         will be added to normalize the stationary distribution. Otherwise
         f is left as given.
     normalize_f : bool, default=True
         If parametrized by free energy f, normalize them such that
         :math:`\sum_i \pi_i = 1`, which is achieved by :math:`\log \sum_i \exp(-f_i) = 0`.
     label : str, default='ground state'
         Human-readable description for the thermodynamic state of this
         model. May contain a temperature description, such as '300 K' or
         a description of bias energy such as 'unbiased' or 'Umbrella 1'
     """
     # check input
     if pi is None and f is None:
         raise ValueError('Trying to initialize model without parameters:'
                          ' Both pi (stationary distribution)'
                          'and f (free energy) are None.'
                          'At least one of them needs to be set.')
     # use f with preference
     if f is not None:
         _types.assert_array(f, ndim=1, kind='numeric')
         f = _np.array(f, dtype=float)
         if normalize_f:
             f += _logsumexp(
                 -f
             )  # normalize on the level on energies to achieve sum_i pi_i = 1
         pi = _np.exp(-f)
     else:  # if f is not given, use pi. pi can't be None at this point
         _types.assert_array(pi, ndim=1, kind='numeric')
         pi = _np.array(pi, dtype=float)
         f = -_np.log(pi)
     pi /= pi.sum()  # always normalize pi
     # set parameters
     self.update_model_params(pi=pi, f=f, normalize_energy=normalize_f)
     # set derived quantities
     self._nstates = len(pi)
Пример #5
0
    def set_model_params(self, pi=None, f=None, normalize_f=None, label=None):
        r"""Call to set all basic model parameters.

        Parameters
        ----------
        pi : ndarray(n)
            Stationary distribution. If not already normalized, pi will be
            scaled to fulfill :math:`\sum_i \pi_i = 1`. The free energies f
            will then be computed from pi via :math:`f_i = - \log(\pi_i)`.
        f : ndarray(n)
            Discrete-state free energies. If normalized_f = True, a constant
            will be added to normalize the stationary distribution. Otherwise
            f is left as given. Then, pi will be computed from f via :math:`\pi_i = \exp(-f_i)`
            and, if necessary, scaled to fulfill :math:`\sum_i \pi_i = 1`. If
            both (pi and f) are given, f takes precedence over pi.
        normalize_energy : bool, default=True
            If parametrized by free energy f, normalize them such that
            :math:`\sum_i \pi_i = 1`, which is achieved by :math:`\log \sum_i \exp(-f_i) = 0`.
        label : str, default=None
            Human-readable description for the thermodynamic state of this
            model. May contain a temperature description, such as '300 K' or
            a description of bias energy such as 'unbiased' or 'Umbrella 1'.
        """
        if f is not None:
            _types.assert_array(f, ndim=1, kind='numeric')
            f = _np.array(f, dtype=float)
            if normalize_f:
                f += _logsumexp(
                    -f
                )  # normalize on the level on energies to achieve sum_i pi_i = 1
            pi = _np.exp(-f)
        elif pi is not None:  # if f is not given, use pi. pi can't be None at this point
            _types.assert_array(pi, ndim=1, kind='numeric')
            pi = _np.array(pi, dtype=float)
            f = -_np.log(pi)
            f += _logsumexp(-f)  # always shift f when set by pi
        else:
            raise ValueError(
                "Trying to initialize model without parameters: both pi (stationary distribution)" \
                " and f (free energy) are None. At least one of them needs to be set.")
        # set parameters (None does not overwrite)
        self.update_model_params(pi=pi,
                                 f=f,
                                 normalize_energy=normalize_f,
                                 label=label)
Пример #6
0
    def _estimate(self, trajs):
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest transition counts
        self.count_matrices_full = _util.count_matrices(
            ttrajs,
            dtrajs,
            self.lag,
            sliding=self.count_mode,
            sparse_return=False,
            nstates=self.nstates_full)
        # harvest state counts (for WHAM)
        self.state_counts_full = _util.state_counts(ttrajs,
                                                    dtrajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # restrict to connected set
        C_sum = self.count_matrices_full.sum(axis=0)
        # TODO: use improved cset
        _, cset = _cset.compute_csets_dTRAM(self.connectivity,
                                            self.count_matrices_full)
        self.active_set = cset
        # correct counts
        self.count_matrices = self.count_matrices_full[:, cset[:, _np.newaxis],
                                                       cset]
        self.count_matrices = _np.require(self.count_matrices,
                                          dtype=_np.intc,
                                          requirements=['C', 'A'])
        # correct bias matrix
        self.bias_energies = self.bias_energies_full[:, cset]
        self.bias_energies = _np.require(self.bias_energies,
                                         dtype=_np.float64,
                                         requirements=['C', 'A'])
        # correct state counts
        self.state_counts = self.state_counts_full[:, cset]
        self.state_counts = _np.require(self.state_counts,
                                        dtype=_np.intc,
                                        requirements=['C', 'A'])

        # run initialisation
        pg = _ProgressReporter()

        if self.init is not None and self.init == 'wham':
            stage = 'WHAM init.'
            with pg.context(stage=stage):
                self.therm_energies, self.conf_energies, _increments, _loglikelihoods = \
                    _wham.estimate(
                        self.state_counts, self.bias_energies,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr, save_convergence_info=0,
                        therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.init_maxiter, self.init_maxerr))

        # run estimator
        stage = 'DTRAM'
        with pg.context(stage=stage):
            self.therm_energies, self.conf_energies, self.log_lagrangian_mult, \
                self.increments, self.loglikelihoods = _dtram.estimate(
                    self.count_matrices, self.bias_energies,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    log_lagrangian_mult=self.log_lagrangian_mult,
                    conf_energies=self.conf_energies,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, stage, self.maxiter, self.maxerr))

        # compute models
        fmsms = [
            _dtram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.bias_energies,
                self.conf_energies, self.count_matrices,
                _np.zeros(shape=self.conf_energies.shape,
                          dtype=_np.float64), K) for K in range(self.nthermo)
        ]

        active_sets = [
            _largest_connected_set(msm, directed=False) for msm in fmsms
        ]
        fmsms = [
            _np.ascontiguousarray((msm[lcc, :])[:, lcc])
            for msm, lcc in zip(fmsms, active_sets)
        ]

        models = []
        for i, (msm, acs) in enumerate(zip(fmsms, active_sets)):
            models.append(
                _ThermoMSM(
                    msm,
                    self.active_set[acs],
                    self.nstates_full,
                    pi=_np.exp(self.therm_energies[i] -
                               self.bias_energies[i, :] - self.conf_energies),
                    dt_model=self.timestep_traj.get_scaled(self.lag)))

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Пример #7
0
    def _estimate(self, X):
        ttrajs, dtrajs_full, btrajs = X
        # shape and type checks
        assert len(ttrajs) == len(dtrajs_full) == len(btrajs)
        for t in ttrajs:
            _types.assert_array(t, ndim=1, kind='i')
        for d in dtrajs_full:
            _types.assert_array(d, ndim=1, kind='i')
        for b in btrajs:
            _types.assert_array(b, ndim=2, kind='f')
        # find dimensions
        self.nstates_full = max(_np.max(d) for d in dtrajs_full) + 1
        self.nthermo = max(_np.max(t) for t in ttrajs) + 1
        # dimensionality checks
        for t, d, b, in zip(ttrajs, dtrajs_full, btrajs):
            assert t.shape[0] == d.shape[0] == b.shape[0]
            assert b.shape[1] == self.nthermo

        # cast types and change axis order if needed
        ttrajs = [
            _np.require(t, dtype=_np.intc, requirements='C') for t in ttrajs
        ]
        dtrajs_full = [
            _np.require(d, dtype=_np.intc, requirements='C')
            for d in dtrajs_full
        ]
        btrajs = [
            _np.require(b, dtype=_np.float64, requirements='C') for b in btrajs
        ]

        # find state visits
        self.state_counts_full = _util.state_counts(ttrajs, dtrajs_full)
        self.therm_state_counts_full = self.state_counts_full.sum(axis=1)

        self.active_set = _np.sort(
            _np.where(self.state_counts_full.sum(axis=0) > 0)[0])
        self.state_counts = _np.ascontiguousarray(
            self.state_counts_full[:, self.active_set].astype(_np.intc))

        if self.direct_space:
            mbar = _mbar_direct
        else:
            mbar = _mbar
        pg = _ProgressReporter()
        with pg.context():
            self.therm_energies, self.unbiased_conf_energies_full, self.biased_conf_energies_full, \
                self.increments = mbar.estimate(
                    self.state_counts_full.sum(axis=1), btrajs, dtrajs_full,
                    maxiter=self.maxiter, maxerr=self.maxerr,
                    save_convergence_info=self.save_convergence_info,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        pg, 'MBAR', self.maxiter, self.maxerr),
                    n_conf_states=self.nstates_full)
        try:
            self.loglikelihoods = _np.nan * self.increments
        except TypeError:
            self.loglikelihoods = None

        # get stationary models
        models = [
            _StationaryModel(f=self.biased_conf_energies_full[K,
                                                              self.active_set],
                             normalize_energy=False,
                             label="K=%d" % K) for K in range(self.nthermo)
        ]

        # set model parameters to self
        self.set_model_params(
            models=models,
            f_therm=self.therm_energies,
            f=self.unbiased_conf_energies_full[self.active_set])

        self.btrajs = btrajs

        # done
        return self
Пример #8
0
    def _estimate(self, X):
        ttrajs, dtrajs_full, btrajs = X
        # shape and type checks
        assert len(ttrajs) == len(dtrajs_full) == len(btrajs)
        for t in ttrajs:
            _types.assert_array(t, ndim=1, kind='i')
        for d in dtrajs_full:
            _types.assert_array(d, ndim=1, kind='i')
        for b in btrajs:
            _types.assert_array(b, ndim=2, kind='f')
        # find dimensions
        nstates_full = max(_np.max(d) for d in dtrajs_full) + 1
        if self.nstates_full is None:
            self.nstates_full = nstates_full
        elif self.nstates_full < nstates_full:
            raise RuntimeError("Found more states (%d) than specified by nstates_full (%d)" % (
                nstates_full, self.nstates_full))
        self.nthermo = max(_np.max(t) for t in ttrajs) + 1
        # dimensionality checks
        for t, d, b, in zip(ttrajs, dtrajs_full, btrajs):
            assert t.shape[0] == d.shape[0] == b.shape[0]
            assert b.shape[1] == self.nthermo

        # cast types and change axis order if needed
        ttrajs = [_np.require(t, dtype=_np.intc, requirements='C') for t in ttrajs]
        dtrajs_full = [_np.require(d, dtype=_np.intc, requirements='C') for d in dtrajs_full]
        btrajs = [_np.require(b, dtype=_np.float64, requirements='C') for b in btrajs]

        # if equilibrium information is given, separate the trajectories
        if self.equilibrium is not None:
            assert len(self.equilibrium) == len(ttrajs)
            _ttrajs, _dtrajs_full, _btrajs = ttrajs, dtrajs_full, btrajs
            ttrajs = [ttraj for eq, ttraj in zip(self.equilibrium, _ttrajs) if not eq]
            dtrajs_full = [dtraj for eq, dtraj in zip(self.equilibrium, _dtrajs_full) if not eq]
            self.btrajs = [btraj for eq, btraj in zip(self.equilibrium, _btrajs) if not eq]
            equilibrium_ttrajs = [ttraj for eq, ttraj in zip(self.equilibrium, _ttrajs) if eq]
            equilibrium_dtrajs_full = [dtraj for eq, dtraj in zip(self.equilibrium, _dtrajs_full) if eq]
            self.equilibrium_btrajs = [btraj for eq, btraj in zip(self.equilibrium, _btrajs) if eq]
        else: # set dummy values
            equilibrium_ttrajs = []
            equilibrium_dtrajs_full = []
            self.equilibrium_btrajs = []
            self.btrajs = btrajs

        # find state visits and transition counts
        state_counts_full = _util.state_counts(ttrajs, dtrajs_full, nstates=self.nstates_full, nthermo=self.nthermo)
        count_matrices_full = _util.count_matrices(ttrajs, dtrajs_full,
            self.lag, sliding=self.count_mode, sparse_return=False, nstates=self.nstates_full, nthermo=self.nthermo)
        self.therm_state_counts_full = state_counts_full.sum(axis=1)

        if self.equilibrium is not None:
            self.equilibrium_state_counts_full = _util.state_counts(equilibrium_ttrajs, equilibrium_dtrajs_full,
                nstates=self.nstates_full, nthermo=self.nthermo)
        else:
            self.equilibrium_state_counts_full = _np.zeros((self.nthermo, self.nstates_full), dtype=_np.float64)

        pg = _ProgressReporter()
        stage = 'cset'
        with pg.context(stage=stage):
            self.csets, pcset = _cset.compute_csets_TRAM(
                self.connectivity, state_counts_full, count_matrices_full,
                equilibrium_state_counts=self.equilibrium_state_counts_full,
                ttrajs=ttrajs+equilibrium_ttrajs, dtrajs=dtrajs_full+equilibrium_dtrajs_full, bias_trajs=self.btrajs+self.equilibrium_btrajs,
                nn=self.nn, factor=self.connectivity_factor,
                callback=_IterationProgressIndicatorCallBack(pg, 'finding connected set', stage=stage))
            self.active_set = pcset

        # check for empty states
        for k in range(self.nthermo):
            if len(self.csets[k]) == 0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no samples after reducing to the connected set.', EmptyState)

        # deactivate samples not in the csets, states are *not* relabeled
        self.state_counts, self.count_matrices, self.dtrajs, _  = _cset.restrict_to_csets(
            self.csets,
            state_counts=state_counts_full, count_matrices=count_matrices_full,
            ttrajs=ttrajs, dtrajs=dtrajs_full)

        if self.equilibrium is not None:
            self.equilibrium_state_counts, _, self.equilibrium_dtrajs, _ =  _cset.restrict_to_csets(
                self.csets,
                state_counts=self.equilibrium_state_counts_full, ttrajs=equilibrium_ttrajs, dtrajs=equilibrium_dtrajs_full)
        else:
            self.equilibrium_state_counts = _np.zeros((self.nthermo, self.nstates_full), dtype=_np.intc) # (remember: no relabeling)
            self.equilibrium_dtrajs = []

        # self-consistency tests
        assert _np.all(self.state_counts >= _np.maximum(self.count_matrices.sum(axis=1), \
            self.count_matrices.sum(axis=2)))
        assert _np.all(_np.sum(
            [_np.bincount(d[d>=0], minlength=self.nstates_full) for d in self.dtrajs],
            axis=0) == self.state_counts.sum(axis=0))
        assert _np.all(_np.sum(
            [_np.bincount(t[d>=0], minlength=self.nthermo) for t, d in zip(ttrajs, self.dtrajs)],
            axis=0) == self.state_counts.sum(axis=1))
        if self.equilibrium is not None:
            assert _np.all(_np.sum(
                [_np.bincount(d[d >= 0], minlength=self.nstates_full) for d in self.equilibrium_dtrajs],
                axis=0) == self.equilibrium_state_counts.sum(axis=0))
            assert _np.all(_np.sum(
                [_np.bincount(t[d >= 0], minlength=self.nthermo) for t, d in zip(equilibrium_ttrajs, self.equilibrium_dtrajs)],
                axis=0) ==  self.equilibrium_state_counts.sum(axis=1))

        # check for empty states
        for k in range(self.state_counts.shape[0]):
            if self.count_matrices[k, :, :].sum() == 0 and self.equilibrium_state_counts[k, :].sum()==0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no transitions and no equilibrium data after reducing to the connected set.', EmptyState)

        if self.init == 'mbar' and self.biased_conf_energies is None:
            if self.direct_space:
                mbar = _mbar_direct
            else:
                mbar = _mbar
            stage = 'MBAR init.'
            with pg.context(stage=stage):
                self.mbar_therm_energies, self.mbar_unbiased_conf_energies, \
                    self.mbar_biased_conf_energies, _ = mbar.estimate(
                        (state_counts_full.sum(axis=1)+self.equilibrium_state_counts_full.sum(axis=1)).astype(_np.intc),
                        self.btrajs+self.equilibrium_btrajs, dtrajs_full+equilibrium_dtrajs_full,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.init_maxiter, self.init_maxerr),
                        n_conf_states=self.nstates_full)
            self.biased_conf_energies = self.mbar_biased_conf_energies.copy()

        # run estimator
        if self.direct_space:
            tram = _tram_direct
            trammbar = _trammbar_direct
        else:
            tram = _tram
            trammbar = _trammbar
        #import warnings
        #with warnings.catch_warnings() as cm:
        # warnings.filterwarnings('ignore', RuntimeWarning)
        stage = 'TRAM'
        with pg.context(stage=stage):
            if self.equilibrium is None:
                self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
                    self.increments, self.loglikelihoods = tram.estimate(
                        self.count_matrices, self.state_counts, self.btrajs, self.dtrajs,
                        maxiter=self.maxiter, maxerr=self.maxerr,
                        biased_conf_energies=self.biased_conf_energies,
                        log_lagrangian_mult=self.log_lagrangian_mult,
                        save_convergence_info=self.save_convergence_info,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.maxiter, self.maxerr, subcallback=self.callback),
                        N_dtram_accelerations=self.N_dtram_accelerations)
            else: # use trammbar
                self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
                    self.increments, self.loglikelihoods = trammbar.estimate(
                        self.count_matrices, self.state_counts, self.btrajs, self.dtrajs,
                        equilibrium_therm_state_counts=self.equilibrium_state_counts.sum(axis=1).astype(_np.intc),
                        equilibrium_bias_energy_sequences=self.equilibrium_btrajs, equilibrium_state_sequences=self.equilibrium_dtrajs,
                        maxiter=self.maxiter, maxerr=self.maxerr,
                        save_convergence_info=self.save_convergence_info,
                        biased_conf_energies=self.biased_conf_energies,
                        log_lagrangian_mult=self.log_lagrangian_mult,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            pg, stage, self.maxiter, self.maxerr, subcallback=self.callback),
                        N_dtram_accelerations=self.N_dtram_accelerations,
                        overcounting_factor=self.overcounting_factor)

        # compute models
        fmsms = [_np.ascontiguousarray((
            _tram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.biased_conf_energies, self.count_matrices, None,
                K)[self.active_set, :])[:, self.active_set]) for K in range(self.nthermo)]

        active_sets = [_largest_connected_set(msm, directed=False) for msm in fmsms]
        fmsms = [_np.ascontiguousarray(
            (msm[lcc, :])[:, lcc]) for msm, lcc in zip(fmsms, active_sets)]

        models = []
        for i, (msm, acs) in enumerate(zip(fmsms, active_sets)):
            pi_acs = _np.exp(self.therm_energies[i] - self.biased_conf_energies[i, :])[self.active_set[acs]]
            pi_acs = pi_acs / pi_acs.sum()
            models.append(_ThermoMSM(
                msm, self.active_set[acs], self.nstates_full, pi=pi_acs,
                dt_model=self.timestep_traj.get_scaled(self.lag)))

        # set model parameters to self
        self.set_model_params(
            models=models, f_therm=self.therm_energies, f=conf_energies[self.active_set].copy())

        return self
Пример #9
0
    def _estimate(self, X):
        ttrajs, dtrajs_full, btrajs = X
        # shape and type checks
        assert len(ttrajs) == len(dtrajs_full) == len(btrajs)
        for t in ttrajs:
            _types.assert_array(t, ndim=1, kind='i')
        for d in dtrajs_full:
            _types.assert_array(d, ndim=1, kind='i')
        for b in btrajs:
            _types.assert_array(b, ndim=2, kind='f')
        # find dimensions
        self.nstates_full = max(_np.max(d) for d in dtrajs_full) + 1
        self.nthermo = max(_np.max(t) for t in ttrajs) + 1
        # dimensionality checks
        for t, d, b, in zip(ttrajs, dtrajs_full, btrajs):
            assert t.shape[0] == d.shape[0] == b.shape[0]
            assert b.shape[1] == self.nthermo

        # cast types and change axis order if needed
        ttrajs = [
            _np.require(t, dtype=_np.intc, requirements='C') for t in ttrajs
        ]
        dtrajs_full = [
            _np.require(d, dtype=_np.intc, requirements='C')
            for d in dtrajs_full
        ]
        btrajs = [
            _np.require(b, dtype=_np.float64, requirements='C') for b in btrajs
        ]

        # find state visits and transition counts
        state_counts_full = _util.state_counts(ttrajs, dtrajs_full)
        count_matrices_full = _util.count_matrices(ttrajs,
                                                   dtrajs_full,
                                                   self.lag,
                                                   sliding=self.count_mode,
                                                   sparse_return=False,
                                                   nstates=self.nstates_full)
        self.therm_state_counts_full = state_counts_full.sum(axis=1)

        self.csets, pcset = _cset.compute_csets_TRAM(
            self.connectivity,
            state_counts_full,
            count_matrices_full,
            ttrajs=ttrajs,
            dtrajs=dtrajs_full,
            bias_trajs=btrajs,
            nn=self.nn,
            factor=self.connectivity_factor,
            callback=_IterationProgressIndicatorCallBack(
                self, 'finding connected set', 'cset'))
        self.active_set = pcset

        # check for empty states
        for k in range(self.nthermo):
            if len(self.csets[k]) == 0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + ' contains no samples after reducing to the connected set.', EmptyState)

        # deactivate samples not in the csets, states are *not* relabeled
        self.state_counts, self.count_matrices, self.dtrajs, _ = _cset.restrict_to_csets(
            self.csets,
            state_counts=state_counts_full,
            count_matrices=count_matrices_full,
            ttrajs=ttrajs,
            dtrajs=dtrajs_full)

        # self-consistency tests
        assert _np.all(self.state_counts >= _np.maximum(self.count_matrices.sum(axis=1), \
            self.count_matrices.sum(axis=2)))
        assert _np.all(
            _np.sum([
                _np.bincount(d[d >= 0], minlength=self.nstates_full)
                for d in self.dtrajs
            ],
                    axis=0) == self.state_counts.sum(axis=0))
        assert _np.all(
            _np.sum([
                _np.bincount(t[d >= 0], minlength=self.nthermo)
                for t, d in zip(ttrajs, self.dtrajs)
            ],
                    axis=0) == self.state_counts.sum(axis=1))

        # check for empty states
        for k in range(self.state_counts.shape[0]):
            if self.count_matrices[k, :, :].sum() == 0:
                _warnings.warn(
                    'Thermodynamic state %d' % k \
                    + 'contains no transitions after reducing to the connected set.', EmptyState)

        if self.init == 'mbar' and self.biased_conf_energies is None:
            if self.direct_space:
                mbar = _mbar_direct
            else:
                mbar = _mbar
            self.mbar_therm_energies, self.mbar_unbiased_conf_energies, \
                self.mbar_biased_conf_energies, _ = mbar.estimate(
                    state_counts_full.sum(axis=1), btrajs, dtrajs_full,
                    maxiter=self.init_maxiter, maxerr=self.init_maxerr,
                    callback=_ConvergenceProgressIndicatorCallBack(
                        self, 'MBAR init.', self.init_maxiter, self.init_maxerr),
                    n_conf_states=self.nstates_full)
            self._progress_force_finish(stage='MBAR init.',
                                        description='MBAR init.')
            self.biased_conf_energies = self.mbar_biased_conf_energies.copy()

        # run estimator
        if self.direct_space:
            tram = _tram_direct
        else:
            tram = _tram
        #import warnings
        #with warnings.catch_warnings() as cm:
        # warnings.filterwarnings('ignore', RuntimeWarning)
        self.biased_conf_energies, conf_energies, self.therm_energies, self.log_lagrangian_mult, \
            self.increments, self.loglikelihoods = tram.estimate(
                self.count_matrices, self.state_counts, btrajs, self.dtrajs,
                maxiter=self.maxiter, maxerr=self.maxerr,
                biased_conf_energies=self.biased_conf_energies,
                log_lagrangian_mult=self.log_lagrangian_mult,
                save_convergence_info=self.save_convergence_info,
                callback=_ConvergenceProgressIndicatorCallBack(
                    self, 'TRAM', self.maxiter, self.maxerr),
                N_dtram_accelerations=self.N_dtram_accelerations)
        self._progress_force_finish(stage='TRAM', description='TRAM')
        self.btrajs = btrajs

        # compute models
        fmsms = [
            _np.ascontiguousarray((_tram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.biased_conf_energies,
                self.count_matrices, None,
                K)[self.active_set, :])[:, self.active_set])
            for K in range(self.nthermo)
        ]

        self.model_active_set = [
            _largest_connected_set(msm, directed=False) for msm in fmsms
        ]
        fmsms = [
            _np.ascontiguousarray((msm[lcc, :])[:, lcc])
            for msm, lcc in zip(fmsms, self.model_active_set)
        ]
        models = [
            _MSM(msm, dt_model=self.timestep_traj.get_scaled(self.lag))
            for msm in fmsms
        ]

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=conf_energies[self.active_set].copy())

        return self
Пример #10
0
    def _estimate(self, trajs):
        """
        Parameters
        ----------
        trajs : ndarray(T, 2) or list of ndarray(T_i, 2)
            Thermodynamic trajectories. Each trajectory is a (T_i, 2)-array
            with T_i time steps. The first column is the thermodynamic state
            index, the second column is the configuration state index.

        """
        # format input if needed
        if isinstance(trajs, _np.ndarray):
            trajs = [trajs]
        # validate input
        assert _types.is_list(trajs)
        for ttraj in trajs:
            _types.assert_array(ttraj, ndim=2, kind='numeric')
            assert _np.shape(ttraj)[1] >= 2

        # harvest transition counts
        self.count_matrices_full = _util.count_matrices(
            [_np.ascontiguousarray(t[:, :2]).astype(_np.intc) for t in trajs],
            self.lag,
            sliding=self.count_mode,
            sparse_return=False,
            nstates=self.nstates_full)
        # harvest state counts (for WHAM)
        self.state_counts_full = _util.state_counts(trajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # restrict to connected set
        C_sum = self.count_matrices_full.sum(axis=0)
        # TODO: use improved cset
        cset = _largest_connected_set(C_sum, directed=True)
        self.active_set = cset
        # correct counts
        self.count_matrices = self.count_matrices_full[:, cset[:, _np.newaxis],
                                                       cset]
        self.count_matrices = _np.require(self.count_matrices,
                                          dtype=_np.intc,
                                          requirements=['C', 'A'])
        # correct bias matrix
        self.bias_energies = self.bias_energies_full[:, cset]
        self.bias_energies = _np.require(self.bias_energies,
                                         dtype=_np.float64,
                                         requirements=['C', 'A'])
        # correct state counts
        self.state_counts = self.state_counts_full[:, cset]
        self.state_counts = _np.require(self.state_counts,
                                        dtype=_np.intc,
                                        requirements=['C', 'A'])

        # run initialisation
        if self.init is not None:
            if self.init == 'wham':
                self.therm_energies, self.conf_energies, _increments, _loglikelihoods = \
                    _wham.estimate(
                        self.state_counts, self.bias_energies,
                        maxiter=5000, maxerr=1.0E-8, save_convergence_info=0,
                        therm_energies=self.therm_energies, conf_energies=self.conf_energies)

        # run estimator
        self.therm_energies, self.conf_energies, self.log_lagrangian_mult, \
            self.increments, self.loglikelihoods = _dtram.estimate(
                self.count_matrices, self.bias_energies,
                maxiter=self.maxiter, maxerr=self.maxerr,
                log_lagrangian_mult=self.log_lagrangian_mult,
                conf_energies=self.conf_energies,
                save_convergence_info=self.save_convergence_info)

        # compute models
        models = [
            _dtram.estimate_transition_matrix(
                self.log_lagrangian_mult, self.bias_energies,
                self.conf_energies, self.count_matrices,
                _np.zeros(shape=self.conf_energies.shape,
                          dtype=_np.float64), K) for K in range(self.nthermo)
        ]
        self.model_active_set = [
            _largest_connected_set(msm, directed=False) for msm in models
        ]
        models = [
            _np.ascontiguousarray((msm[lcc, :])[:, lcc])
            for msm, lcc in zip(models, self.model_active_set)
        ]

        # set model parameters to self
        self.set_model_params(models=[
            _MSM(msm, dt_model=self.timestep_traj.get_scaled(self.lag))
            for msm in models
        ],
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Пример #11
0
    def _estimate(self, trajs):
        # TODO: fix docstring
        """
        Parameters
        ----------
        X : tuple of (ttrajs, dtrajs)
            Simulation trajectories. ttrajs contain the indices of the thermodynamic state and
            dtrajs contains the indices of the configurational states.
        ttrajs : list of numpy.ndarray(X_i, dtype=int)
            Every elements is a trajectory (time series). ttrajs[i][t] is the index of the
            thermodynamic state visited in trajectory i at time step t.
        dtrajs : list of numpy.ndarray(X_i, dtype=int)
            dtrajs[i][t] is the index of the configurational state (Markov state) visited in
            trajectory i at time step t.
        """
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest state counts
        self.state_counts_full = _util.state_counts(ttrajs,
                                                    dtrajs,
                                                    nthermo=self.nthermo,
                                                    nstates=self.nstates_full)

        # active set
        self.active_set = _np.where(self.state_counts_full.sum(axis=0) > 0)[0]
        self.state_counts = _np.ascontiguousarray(
            self.state_counts_full[:, self.active_set].astype(_np.intc))
        self.bias_energies = _np.ascontiguousarray(
            self.bias_energies_full[:, self.active_set], dtype=_np.float64)

        # run estimator
        self.therm_energies, self.conf_energies, self.increments, self.loglikelihoods = \
            _wham.estimate(
                self.state_counts, self.bias_energies,
                maxiter=self.maxiter, maxerr=self.maxerr,
                therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                save_convergence_info=self.save_convergence_info,
                callback=_ConvergenceProgressIndicatorCallBack(
                    self, 'WHAM', self.maxiter, self.maxerr))
        self._progress_force_finish(stage='WHAM')

        # get stationary models
        models = [
            _StationaryModel(
                pi=_np.exp(self.therm_energies[K, _np.newaxis] -
                           self.bias_energies[K, :] - self.conf_energies),
                f=self.bias_energies[K, :] + self.conf_energies,
                normalize_energy=False,
                label="K=%d" % K) for K in range(self.nthermo)
        ]

        # set model parameters to self
        self.set_model_params(models=models,
                              f_therm=self.therm_energies,
                              f=self.conf_energies)

        # done
        return self
Пример #12
0
    def _estimate(self, trajs):
        # check input
        assert isinstance(trajs, (tuple, list))
        assert len(trajs) == 2
        ttrajs = trajs[0]
        dtrajs = trajs[1]
        # validate input
        for ttraj, dtraj in zip(ttrajs, dtrajs):
            _types.assert_array(ttraj, ndim=1, kind='numeric')
            _types.assert_array(dtraj, ndim=1, kind='numeric')
            assert _np.shape(ttraj)[0] == _np.shape(dtraj)[0]

        # harvest transition counts
        self.count_matrices_full = _util.count_matrices(
            ttrajs, dtrajs, self.lag,
            sliding=self.count_mode, sparse_return=False, nstates=self.nstates_full)
        # harvest state counts (for WHAM)
        self.state_counts_full = _util.state_counts(
            ttrajs, dtrajs, nthermo=self.nthermo, nstates=self.nstates_full)

        # restrict to connected set
        C_sum = self.count_matrices_full.sum(axis=0)
        # TODO: use improved cset
        cset = _largest_connected_set(C_sum, directed=True)
        self.active_set = cset
        # correct counts
        self.count_matrices = self.count_matrices_full[:, cset[:, _np.newaxis], cset]
        self.count_matrices = _np.require(
            self.count_matrices, dtype=_np.intc ,requirements=['C', 'A'])
        # correct bias matrix
        self.bias_energies = self.bias_energies_full[:, cset]
        self.bias_energies = _np.require(
            self.bias_energies, dtype=_np.float64 ,requirements=['C', 'A'])
        # correct state counts
        self.state_counts = self.state_counts_full[:, cset]
        self.state_counts = _np.require(self.state_counts, dtype=_np.intc ,requirements=['C', 'A'])

        # run initialisation
        if self.init is not None:
            if self.init == 'wham':
                self.therm_energies, self.conf_energies, _increments, _loglikelihoods = \
                    _wham.estimate(
                        self.state_counts, self.bias_energies,
                        maxiter=self.init_maxiter, maxerr=self.init_maxerr, save_convergence_info=0,
                        therm_energies=self.therm_energies, conf_energies=self.conf_energies,
                        callback=_ConvergenceProgressIndicatorCallBack(
                            self, 'WHAM init.', self.init_maxiter, self.init_maxerr))
                self._progress_force_finish(stage='WHAM init.', description='WHAM init.')

        # run estimator
        self.therm_energies, self.conf_energies, self.log_lagrangian_mult, \
        self.increments, self.loglikelihoods = _dtram.estimate(
            self.count_matrices, self.bias_energies,
            maxiter=self.maxiter, maxerr=self.maxerr,
            log_lagrangian_mult=self.log_lagrangian_mult,
            conf_energies=self.conf_energies,
            save_convergence_info=self.save_convergence_info,
            callback=_ConvergenceProgressIndicatorCallBack(
                self, 'DTRAM', self.maxiter, self.maxerr))
        self._progress_force_finish(stage='DTRAM', description='DTRAM')

        # compute models
        models = [_dtram.estimate_transition_matrix(
            self.log_lagrangian_mult, self.bias_energies, self.conf_energies,
            self.count_matrices, _np.zeros(
                shape=self.conf_energies.shape, dtype=_np.float64), K) for K in range(self.nthermo)]
        self.model_active_set = [_largest_connected_set(msm, directed=False) for msm in models]
        models = [_np.ascontiguousarray(
            (msm[lcc, :])[:, lcc]) for msm, lcc in zip(models, self.model_active_set)]

        # set model parameters to self
        self.set_model_params(
            models=[_MSM(msm, dt_model=self.timestep_traj.get_scaled(self.lag)) for msm in models],
            f_therm=self.therm_energies, f=self.conf_energies)

        # done
        return self