示例#1
0
    def pmf(self, potential, property, bins=10, interval=None, **constants):

        if mics.verbose:
            info("\n=== Computing PMF with %s ===" %
                 self.engine.__class__.__name__)
            info("Reduced potential:", potential)
        u = self.__compute__(potential, constants)
        z = self.__compute__(property, constants)
        if interval:
            (zmin, zmax) = interval
        else:
            zmin = min(np.amin(x[0, :]) for x in z)
            zmax = max(np.amax(x[0, :]) for x in z)
        delta = (zmax - zmin) / bins
        ibin = [np.floor((x[0:1, :] - zmin) / delta).astype(int) for x in z]

        results = list()
        for i in range(bins):
            zc = zmin + delta * (i + 0.5)
            mics.verbose and info("Bin[%d]:" % (i + 1), "%s = %s" %
                                  (property, str(zc)))
            y = [np.equal(x, i).astype(np.float) for x in ibin]
            (yu, Theta) = self.engine.__reweight__(self, u, y)
            if yu[1] > 0.0:
                dyu = np.sqrt(max(0.0, Theta[1, 1]))
                results.append([zc, -np.log(yu[1]), dyu / yu[1]])

        return pd.DataFrame(results,
                            columns=[property, "pmf",
                                     errorTitle("pmf")])
示例#2
0
文件: MICS.py 项目: craabreu/mics
    def __initialize__(self, mixture):
        m = mixture.m
        neff = mixture.neff

        b = self.b = [s.b for s in mixture.samples]
        if self.composition is None:
            x = neff
        elif self.composition == "uniform":
            x = np.ones(m)
        else:
            x = np.array(self.composition)
        pi = self.pi = x / np.sum(x)
        mics.verbose and info("Mixture composition:", pi)

        mics.verbose and info("Solving self-consistent equations...")
        iter = 1
        df = self.__newton_raphson_iteration__(mixture)
        if m > 1:
            mics.verbose and info("Maximum deviation at iteration %d:" % iter,
                                  max(abs(df)))
            while any(abs(df) > self.tol):
                iter += 1
                mixture.f[1:m] += df
                df = self.__newton_raphson_iteration__(mixture)
                mics.verbose and info(
                    "Maximum deviation at iteration %d:" % iter, max(abs(df)))
        mics.verbose and info("Free energies after convergence:", mixture.f)

        self.Sp0 = sum(pi[i]**2 * covariance(self.P[i], self.pm[i], b[i])
                       for i in range(m))
        mixture.Theta = multi_dot([self.iB0, self.Sp0, self.iB0])
        mics.verbose and info("Free-energy covariance matrix:", mixture.Theta)

        mixture.Overlap = np.stack(self.pm)
        mics.verbose and info("Overlap matrix:", mixture.Overlap)
示例#3
0
    def __initialize__(self, mixture):
        m = mixture.m
        n = mixture.n

        mb = self.MBAR = mbar.MBAR(np.hstack(mixture.u), n,
                                   relative_tolerance=self.tol,
                                   initial_f_k=mixture.f,
                                   verbose=mics.verbose)

        mixture.f = mb.f_k
        mics.verbose and info("Free energies after convergence:", mixture.f)

        flnpi = (mixture.f + np.log(n/sum(n)))[:, np.newaxis]
        mixture.u0 = [-logsumexp(flnpi - u) for u in mixture.u]
        self.P = [np.exp(flnpi - mixture.u[i] + mixture.u0[i]) for i in range(m)]

        Theta = mb._computeAsymptoticCovarianceMatrix(np.exp(mb.Log_W_nk), mb.N_k)
        mixture.Theta = np.array(Theta)
        mics.verbose and info("Free-energy covariance matrix:", mixture.Theta)

        mixture.Overlap = mb.N_k*np.matmul(mb.W_nk.T, mb.W_nk)
        mics.verbose and info("Overlap matrix:", mixture.Overlap)
示例#4
0
文件: samples.py 项目: craabreu/mics
    def subsampling(self, integratedACF=True):
        """
        Performs inline subsampling based on the statistical inefficiency ``g``
        of the specified attribute `acfun` of :class:`sample`, aiming at
        obtaining a sample of :term:`IID` configurations. Subsampling is done
        via jumps of varying sizes around ``g``, so that the sample size decays
        by a factor of approximately ``1/g``.

        Parameters
        ----------
            integratedACF : bool, optional, default=True
                If true, the integrated :term:`ACF` method :cite:`Chodera_2007`
                will be used for computing the statistical inefficiency.
                Otherwise, the :term:`OBM` method will be used instead.

        Returns
        -------
            :class:`sample`
                Although the subsampling is done inline, the new sample is
                returned for chaining purposes.

        """
        n = len(self.dataset)
        if mics.verbose:
            info("\n=== Subsampling via %s ===" %
                 ("integrated ACF" if integratedACF else "OBM"))
            info("Original sample size:", n)
        if integratedACF:
            y = multimap([self.acfun.lambdify()], self.dataset)
            g = timeseries.statisticalInefficiency(y[0])
        else:
            g = n / self.neff
        new = timeseries.subsampleCorrelatedData(self.dataset.index, g)
        self.dataset = self.dataset.reindex(new)
        self.neff = len(new)
        if mics.verbose:
            info("Statistical inefficiency:", g)
            info("New sample size:", self.neff)
        return self
示例#5
0
    def __init__(self, samples, engine):

        self.samples = samples
        self.engine = engine
        m = self.m = len(samples)
        if mics.verbose:
            # np.set_printoptions(precision=4, threshold=15, edgeitems=4, suppress=True)
            info("\n=== Setting up mixture ===")
            info("Analysis method: ", self.engine.__class__.__name__)
            info("Number of samples:", m)

        if m == 0:
            raise InputError("list of samples is empty")

        self.n = np.array([len(sample.dataset) for sample in samples])
        self.neff = np.array([sample.neff for sample in samples])
        names = self.names = list(samples[0].dataset.columns)
        if mics.verbose:
            info("Sample sizes:", self.n)
            info("Effective sample sizes:", self.neff)
            info("Properties:", ", ".join(names))

        potentials = [sample.potential.lambdify() for sample in samples]
        self.u = [multimap(potentials, sample.dataset) for sample in samples]
        self.f = bennett(self.u)
        mics.verbose and info("Initial free-energy guess:", self.f)
        self.engine.__initialize__(self)
示例#6
0
    def reweighting(self,
                    potential,
                    properties={},
                    derivatives={},
                    combinations={},
                    conditions={},
                    reference=0,
                    **constants):
        """
        Computes averages of specified properties at target states defined by
        a given reduced `potential` function with distinct passed parameter
        values, as well as the free energies of such states with respect to a
        sampled `reference` state. Also, computes derivatives of these averages
        and free energies with respect to the mentioned parameters. In addition,
        evaluates combinations of free energies, averages, and derivatives. In
        all cases, uncertainty propagation is handled automatically by means of
        the delta method.

        Parameters
        ----------
            potential : str
                A mathematical expression defining the reduced potential of the
                target states. It might depend on the collective variables of
                the mixture samples, as well as on external parameters whose
                values will be passed via `conditions` or `constants`, such as
                explained below.
            properties : dict(str: str), optional, default={}
                A dictionary associating names to mathematical expressions, thus
                defining a set of properties whose averages must be evaluated at
                the target states. If it is omitted, then only the relative free
                energies of the target states will be evaluated. The expressions
                might depend on the same collective variables and parameters
                mentioned above for `potential`.
            derivatives : dict(str: (str, str)), optional, default={}
                A dictionary associating names to (property, parameter) pairs,
                thus specifying derivatives of average properties at the target
                states or relative free energies of these states with respect
                to external parameters. For each pair, property must be either
                "f" (for free energy) or a name defined in `properties`, while
                parameter must be an external parameter such as described above
                for `potential`.
            combinations : dict(str: str), optional, default={}
                A dictionary associating names to mathematical expressions, thus
                defining combinations among average properties at the target
                states, the relative free energies of these states, and their
                derivatives with respect to external parameters. The expressions
                might depend on "f" (for free energy) or on the names defined in
                `properties`, as well as on external parameters such as described
                above for `potential`.
            conditions : pandas.DataFrame or dict, optional, default={}
                A data frame whose column names are external parameters present
                in mathematical expressions specified in arguments `potential`,
                `properties`, and `combinations`. The rows of the data frame
                contain sets of values of these parameters, in such as way that
                the reweighting is carried out for every single set. This is a
                way of defining multiple target states from a single `potential`
                expression. The same information can be passed as a dictionary
                associating names to lists of numerical values, provided that
                all lists are equally sized. If it is empty, then a unique
                target state will be considered and all external parameters in
                `potential`, if any, must be passed as keyword arguments.
            reference : int, optional, default=0
                The index of a sampled state to be considered as a reference for
                computing relative free energies.
            **constants : keyword arguments
                A set of keyword arguments passed as name=value, aimed to define
                external parameter values for the evaluation of mathematical
                expressions. These values will be repeated at all target states
                specified via `potential` and `conditions`.

        Returns
        -------
            pandas.DataFrame
                A data frame containing the computed quantities, along with
                their estimated uncertainties, at all target states specified
                via `potential` and `conditions`.

        """
        if mics.verbose:
            info("\n=== Performing reweighting with %s ===" %
                 self.engine.__class__.__name__)
            info("Reduced potential:", potential)
            constants and info("Provided constants: ", constants)

        freeEnergy = "f"
        if freeEnergy in properties.keys():
            raise InputError("Word % is reserved for free energies" %
                             freeEnergy)
        condframe = pd.DataFrame(
            data=conditions) if isinstance(conditions, dict) else conditions
        propfuncs = list(properties.values())

        if not derivatives:
            propnames = [freeEnergy] + list(properties.keys())
            combs = combinations.values()

            gProps = self.__compute__(propfuncs, constants)
            if combinations:
                gDelta = deltaMethod(combs, propnames, constants)

            results = list()
            for (index, condition) in cases(condframe):
                mics.verbose and condition and info("Condition[%s]" % index,
                                                    condition)
                consts = dict(condition, **constants)
                u = self.__compute__(potential, consts)
                y = gProps if gProps else self.__compute__(propfuncs, consts)
                (yu, Theta) = self.engine.__reweight__(self, u, y, reference)
                result = propertyDict(propnames, yu, stdError(Theta))
                if combinations:
                    delta = gDelta if gDelta.valid else deltaMethod(
                        combs, propnames, consts)
                    (h, dh) = delta.evaluate(yu, Theta)
                    result.update(propertyDict(combinations.keys(), h, dh))
                results.append(result.to_frame(index))

            return condframe.join(pd.concat(results))

        else:
            symbols = list(condframe.columns) + list(constants.keys())
            parameters = set(x for (y, x) in derivatives.values())
            props = dict()
            for x in parameters:
                props[crypto(x)] = diff(potential, x, symbols)
            combs = dict()
            for (z, (y, x)) in derivatives.items():
                if y == freeEnergy:
                    combs[z] = crypto(x)
                else:
                    dydx = diff(properties[y], x, symbols)
                    props[crypto(z)] = "%s - (%s)*(%s)" % (
                        dydx, props[crypto(x)], properties[y])
                    combs[z] = "%s + (%s)*(%s)" % (crypto(z), crypto(x), y)
            unwanted = sum([[x, errorTitle(x)] for x in props.keys()], [])
            return self.reweighting(potential, dict(properties, **props), {},
                                    dict(combs, **combinations), condframe,
                                    reference, **constants).drop(unwanted,
                                                                 axis=1)
示例#7
0
文件: samples.py 项目: craabreu/mics
    def __init__(self,
                 dataset,
                 potential,
                 acfun=None,
                 batchsize=None,
                 **constants):
        names = dataset.columns.tolist()
        n = len(dataset)
        b = self.b = batchsize if batchsize else int(np.sqrt(n))

        if mics.verbose:
            info("\n=== Setting up new sample ===")
            info("Properties:", ", ".join(names))
            info("Constants:", constants)
            info("Reduced potential function:", potential)
            info("Autocorrelation analysis function:",
                 acfun if acfun else potential)
            info("Sample size:", n)
            info("Batch size:", b)

        self.dataset = dataset
        self.potential = func(potential, names, constants)
        self.acfun = self.potential if acfun is None else func(
            acfun, names, constants)
        y = multimap([self.acfun.lambdify()], dataset)
        ym = np.mean(y, axis=1)
        S1 = covariance(y, ym, 1).item(0)
        Sb = covariance(y, ym, b).item(0)
        if not (np.isfinite(S1) and np.isfinite(Sb)):
            raise FloatingPointError(
                "unable to determine effective sample size")
        self.neff = n * S1 / Sb

        if mics.verbose:
            info("Variance disregarding autocorrelation:", S1)
            info("Variance via Overlapping Batch Means:", Sb)
            info("Effective sample size:", self.neff)