示例#1
0
def finite_difference_bar(w, delta):
    fd_pymbar = np.zeros_like(w)
    for i in range(2):
        for j in range(len(w[0])):
            original = pymbar.BAR(w[0], w[1])[0]
            # central difference
            w[i][j] += 0.5 * delta
            left_edge = pymbar.BAR(w[0], w[1])[0]
            w[i][j] -= delta
            right_edge = pymbar.BAR(w[0], w[1])[0]
            fd = (left_edge - right_edge) / delta
            fd_pymbar[i][j] = fd
            w[i][j] += 0.5 * delta
    return fd_pymbar
示例#2
0
文件: analysis.py 项目: LaYeqa/perses
    def get_free_energies(self, environment):
        """
        Estimate the free energies between all pairs with bidirectional transitions of chemical states in the
        given environment

        Parameters
        ----------
        environment : str
            The name of the environment for which free energies are desired

        Returns
        -------
        free_energies : dict of (str, str): [float, float]
            Dictionary of pairwaise free energies and their uncertainty, computed with bootstrapping
        """
        logP_without_sams = self.extract_logP_values(environment, "logP_accept", subtract_sams=True)
        free_energies = {}
        n_bootstrap_iterations = 10000000

        for state_pair, logP_accepts in logP_without_sams.items():
            w_F = logP_accepts[0]
            w_R = -logP_accepts[1]
            bootstrapped_bar = np.zeros(n_bootstrap_iterations)
            for i in range(n_bootstrap_iterations):
                resampled_w_F = np.random.choice(w_F, len(w_F), replace=True)
                resampled_w_R = np.random.choice(w_R, len(w_R), replace=True)

                [df, ddf] = pymbar.BAR(resampled_w_F, resampled_w_R)
                bootstrapped_bar[i] = df

            free_energies[state_pair] = [np.mean(bootstrapped_bar), np.std(bootstrapped_bar)]

        return free_energies
示例#3
0
def bootstrap_BAR(w_F, w_R, repeats, sample_proportion):
    """
    :param w_F: ndarray
    :param w_R: ndarray
    :param repeats: int
    :return: std, float
    """
    assert 0 <= sample_proportion <= 1, "sample_proportion out of range"

    n_F = int(len(w_F) * sample_proportion)
    n_R = int(len(w_R) * sample_proportion)

    delta_Fs = []
    for _ in range(repeats):
        w_F_rand = np.random.choice(w_F, size=n_F, replace=True)
        w_R_rand = np.random.choice(w_R, size=n_R, replace=True)

        df = pymbar.BAR(w_F_rand,
                        w_R_rand,
                        compute_uncertainty=False,
                        relative_tolerance=1e-6,
                        verbose=False)
        delta_Fs.append(df)

    delta_Fs = np.asarray(delta_Fs)
    delta_Fs = delta_Fs[~np.isnan(delta_Fs)]
    delta_Fs = delta_Fs[~np.isinf(delta_Fs)]

    df_mean = delta_Fs.mean()
    df_std = delta_Fs.std()

    return df_mean, df_std
示例#4
0
def bennett(w_F, w_R):
    """
    Bennett Acceptance Ratio
    C. Bennett. Efficient Estimation of Free Energy Differences from Monte Carlo Data.
                                    Journal of Computational Physics 22, 245-268 (1976).
    G. Crooks. Path-ensemble averages in systems driven far from equilibrium. Physical Review E 61, 2361-2366 (2000).
    M. Shirts, E. Bair, G. Hooker, and V. Pande.
                    Equilibrium Free Energies from Nonequilibrium Measurements Using Maximum-Likelihood Methods.
                    Physical Review Letters 91, 140601 (2003).

    need pymmbar (https://github.com/choderalab/pymbar):

    :param w_F:   ndarray with shape (NF,)
            works done in forward direction starting from the initial (A) equilibrium ensemble, in unit kT

    :param w_R:   ndarray with shape (NR,)
            works done in forward direction starting from the initial (A) equilibrium ensemble, in unit of kT

    :return:
            df_AB   :   float
                        free energy difference between states A and B (df_AB = f_B - f_A), in unit of kT
    """

    assert w_F.ndim  == w_R.ndim == 1, "w_F, w_R must be 1d arrays"
    df_AB, ddf = pymbar.BAR(w_F, w_R, relative_tolerance=0.000001, verbose=False, compute_uncertainty=True)

    return df_AB
示例#5
0
def _run_mbar(u_kln, N_k):
    K = len(N_k)
    f_k_BAR = np.zeros(K)
    for k in range(K - 2):
        w_F = u_kln[k, k + 1, :N_k[k]] - u_kln[k, k, :N_k[k]]
        w_R = u_kln[k + 1, k, :N_k[k + 1]] - u_kln[k + 1, k + 1, :N_k[k + 1]]
        f_k_BAR[k + 1] = pymbar.BAR(w_F,
                                    w_R,
                                    relative_tolerance=0.000001,
                                    verbose=False,
                                    compute_uncertainty=False)
    f_k_BAR = np.cumsum(f_k_BAR)
    mbar = pymbar.MBAR(u_kln, N_k, verbose=True, initial_f_k=f_k_BAR)
    return mbar
示例#6
0
    def calculate(self, temp=300.):
        """Calculate the free energy difference and return a PMF object.

        Parameters
        ----------
        temp: float, optional
              temperature of calculation
        """
        beta = 1. / (sim.boltz * temp)
        pmf_values = [0.0]
        for low_lam, high_lam in zip(self.data, self.data[1:]):
            pmf_values.append(
                pmf_values[-1] +
                pymbar.BAR(-low_lam[1] * beta, -high_lam[0] * beta)[0] / beta)
        return PMF(self.lambdas, pmf_values)
示例#7
0
def run_mbar(u_kln, N_k):
    """
    :param u_kln: 3d numpy array,  reduced potential energy
    :param N_k: 1d numpy array, number of samples at state k
    :return: mbar, an object of pymbar.MBAR
    """
    K = len(N_k)
    f_k_BAR = np.zeros(K)
    for k in range(K-2):
        w_F = u_kln[ k, k+1, :N_k[k] ] - u_kln[ k, k, :N_k[k] ]
        w_R = u_kln[ k+1, k, :N_k[k+1] ] - u_kln[ k+1, k+1, :N_k[k+1] ]
        f_k_BAR[k+1] = pymbar.BAR(w_F, w_R, relative_tolerance=0.000001, \
                verbose=False, compute_uncertainty=False)
    f_k_BAR = np.cumsum(f_k_BAR)
    mbar = pymbar.MBAR(u_kln, N_k, verbose = True, initial_f_k = f_k_BAR)
    return mbar
示例#8
0
def dG_dw(w):
    """
    A function that calculates gradient of free energy difference with respect to work

    Parameters
    ---------
    w : np.ndarray, float, (2, N)
        forward and reverse work for N frames

    Returns
    ------
    np.ndarray, float, (2, N)
        the gradient of free energy difference with respect to work

    """
    dG, _ = pymbar.BAR(w[0], w[1])
    dBAR_dw = jax.grad(BARzero, argnums=(0,))
    dBAR_dA = jax.grad(BARzero, argnums=(1,))
    dG_dw = -dBAR_dw(w,dG)[0]/dBAR_dA(w,dG)[0]
    return dG_dw
示例#9
0
def _bennett_acceptance_ratio_pymbar(forward_work,
                                     reverse_work,
                                     compute_uncertainty=True,
                                     maximum_iterations=500,
                                     relative_tolerance=1e-12):
    """pymbar reference implementation"""
    import pymbar
    ctx = {"device": forward_work.device, "dtype": forward_work.dtype}
    f = io.StringIO()
    with redirect_stdout(f):
        result = pymbar.BAR(w_F=as_numpy(forward_work),
                            w_R=as_numpy(reverse_work),
                            return_dict=False,
                            compute_uncertainty=compute_uncertainty,
                            maximum_iterations=maximum_iterations,
                            relative_tolerance=relative_tolerance)

    if "poor overlap" in f.getvalue() or (compute_uncertainty
                                          and np.isnan(result[1])):
        return torch.tensor(np.nan, **ctx), torch.tensor(np.nan, **ctx)
    if compute_uncertainty:
        return torch.tensor(result[0], **ctx), torch.tensor(result[1], **ctx)
    else:
        return torch.tensor(result, **ctx), None
def run_alchemical_langevin_integrator(nsteps=0, splitting="O { V R H R V } O"):
    """Check that the AlchemicalLangevinSplittingIntegrator reproduces the analytical free energy difference for a harmonic oscillator deformation, using BAR.
    Up to 6*sigma is tolerated for error.
    The total work (protocol work + shadow work) is used.
    """

    #max deviation from the calculated free energy
    NSIGMA_MAX = 6
    n_iterations = 100  # number of forward and reverse protocols

    # These are the alchemical functions that will be used to control the system
    temperature = 298.0 * unit.kelvin
    sigma = 1.0 * unit.angstrom # stddev of harmonic oscillator
    kT = kB * temperature # thermal energy
    beta = 1.0 / kT # inverse thermal energy
    K = kT / sigma**2 # spring constant corresponding to sigma
    mass = 39.948 * unit.amu
    period = unit.sqrt(mass/K) # period of harmonic oscillator
    timestep = period / 20.0
    collision_rate = 1.0 / period
    dF_analytical = 1.0
    parameters = dict()
    parameters['testsystems_HarmonicOscillator_x0'] = (0 * sigma, 2 * sigma)
    parameters['testsystems_HarmonicOscillator_U0'] = (0 * kT, 1 * kT)
    forward_functions = { name : '(1-lambda)*%f + lambda*%f' % (value[0].value_in_unit_system(unit.md_unit_system), value[1].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() }
    reverse_functions = { name : '(1-lambda)*%f + lambda*%f' % (value[1].value_in_unit_system(unit.md_unit_system), value[0].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() }

    # Create harmonic oscillator testsystem
    testsystem = testsystems.HarmonicOscillator(K=K, mass=mass)
    system = testsystem.system
    positions = testsystem.positions

    # Get equilibrium samples from initial and final states
    burn_in = 5 * 20 # 5 periods
    thinning = 5 * 20 # 5 periods

    # Collect forward and reverse work values
    w_f = np.zeros([n_iterations], np.float64)
    w_r = np.zeros([n_iterations], np.float64)
    platform = openmm.Platform.getPlatformByName("Reference")
    for direction in ['forward', 'reverse']:
        positions = testsystem.positions
        for iteration in range(n_iterations):
            # Generate equilibrium sample
            equilibrium_integrator = GHMCIntegrator(temperature=temperature, collision_rate=collision_rate, timestep=timestep)
            equilibrium_context = openmm.Context(system, equilibrium_integrator, platform)
            for (name, value) in parameters.items():
                if direction == 'forward':
                    equilibrium_context.setParameter(name, value[0].value_in_unit_system(unit.md_unit_system))
                else:
                    equilibrium_context.setParameter(name, value[1].value_in_unit_system(unit.md_unit_system))
            equilibrium_context.setPositions(positions)
            equilibrium_integrator.step(thinning)
            positions = equilibrium_context.getState(getPositions=True).getPositions(asNumpy=True)
            del equilibrium_context, equilibrium_integrator
            # Generate nonequilibrium work sample
            if direction == 'forward':
                alchemical_functions = forward_functions
            else:
                alchemical_functions = reverse_functions
            nonequilibrium_integrator = AlchemicalNonequilibriumLangevinIntegrator(temperature=temperature, collision_rate=collision_rate, timestep=timestep,
                                                                                   alchemical_functions=alchemical_functions, splitting=splitting, nsteps_neq=nsteps,
                                                                                   measure_shadow_work=True)
            nonequilibrium_context = openmm.Context(system, nonequilibrium_integrator, platform)
            nonequilibrium_context.setPositions(positions)
            if nsteps == 0:
                nonequilibrium_integrator.step(1) # need to execute at least one step
            else:
                nonequilibrium_integrator.step(nsteps)
            if direction == 'forward':
                w_f[iteration] = nonequilibrium_integrator.get_total_work(dimensionless=True)
            else:
                w_r[iteration] = nonequilibrium_integrator.get_total_work(dimensionless=True)
            del nonequilibrium_context, nonequilibrium_integrator

    dF, ddF = pymbar.BAR(w_f, w_r)
    nsigma = np.abs(dF - dF_analytical) / ddF
    print("analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}".format(dF_analytical, dF, ddF, nsigma))
    if nsigma > NSIGMA_MAX:
        raise Exception("The free energy difference for the nonequilibrium switching for splitting '%s' and %d steps is not zero within statistical error." % (splitting, nsteps))
示例#11
0
def deltaG_from_results(model, results,
                        sys_params) -> Tuple[float, float, List]:

    assert len(sys_params) == len(model.unbound_potentials)

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, model.unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    if model.endpoint_correct:
        sim_results = results[:-1]
    else:
        sim_results = results

    U_knk = []
    N_k = []
    for result in sim_results:
        U_knk.append(result.lambda_us)
        N_k.append(len(result.lambda_us))  # number of frames

    U_knk = np.array(U_knk)

    bar_dG = 0
    bar_dG_err = 0

    delta_Us = extract_delta_Us_from_U_knk(U_knk)

    for lambda_idx in range(len(model.lambda_schedule) - 1):

        fwd_delta_u = model.beta * delta_Us[lambda_idx][0]
        rev_delta_u = model.beta * delta_Us[lambda_idx][1]

        dG_exact, exact_bar_err = pymbar.BAR(fwd_delta_u, rev_delta_u)
        bar_dG += dG_exact / model.beta
        exact_bar_overlap = endpoint_correction.overlap_from_cdf(
            fwd_delta_u, rev_delta_u)

        # probably off by a factor of two since we re-use samples.
        bar_dG_err += (exact_bar_err / model.beta)**2

        lamb_start = model.lambda_schedule[lambda_idx]
        lamb_end = model.lambda_schedule[lambda_idx + 1]

        print(
            f"{model.prefix}_BAR: lambda {lamb_start:.3f} -> {lamb_end:.3f} dG: {dG_exact/model.beta:.3f} dG_err: {exact_bar_err/model.beta:.3f} overlap: {exact_bar_overlap:.3f}"
        )

    # for MBAR we need to sanitize the energies
    clean_U_knks = []  # [K, F, K]
    for lambda_idx, full_us in enumerate(U_knk):
        clean_U_knks.append(sanitize_energies(full_us, lambda_idx))

    print(
        model.prefix,
        " MBAR: amin",
        np.amin(clean_U_knks),
        "median",
        np.median(clean_U_knks),
        "max",
        np.amax(clean_U_knks),
    )

    K = len(model.lambda_schedule)
    clean_U_knks = np.array(clean_U_knks)  # [K, F, K]
    U_kn = np.reshape(clean_U_knks, (-1, K)).transpose()  # [K, F*K]
    u_kn = U_kn * model.beta

    np.save(model.prefix + "_U_kn.npy", U_kn)

    mbar = pymbar.MBAR(u_kn, N_k)
    differences, error_estimates = mbar.getFreeEnergyDifferences()
    f_k, error_k = differences[0], error_estimates[0]
    mbar_dG = f_k[-1] / model.beta
    mbar_dG_err = error_k[-1] / model.beta

    bar_dG_err = np.sqrt(bar_dG_err)

    dG = bar_dG  # use the exact answer

    if model.endpoint_correct:
        core_restr = bound_potentials[-1]
        # (ytz): tbd, automatically find optimal k_translation/k_rotation such that
        # standard deviation and/or overlap is maximized
        k_translation = 200.0
        k_rotation = 100.0
        start = time.time()
        lhs_du, rhs_du, rotation_samples, translation_samples = endpoint_correction.estimate_delta_us(
            k_translation=k_translation,
            k_rotation=k_rotation,
            core_idxs=core_restr.get_idxs(),
            core_params=core_restr.params.reshape((-1, 2)),
            beta=model.beta,
            lhs_xs=results[-2].xs,
            rhs_xs=results[-1].xs,
            seed=2021,
        )
        dG_endpoint, endpoint_err = pymbar.BAR(model.beta * lhs_du,
                                               model.beta * np.array(rhs_du))
        dG_endpoint = dG_endpoint / model.beta
        endpoint_err = endpoint_err / model.beta
        # compute standard state corrections for translation and rotation
        dG_ssc_translation, dG_ssc_rotation = standard_state.release_orientational_restraints(
            k_translation, k_rotation, model.beta)
        overlap = endpoint_correction.overlap_from_cdf(lhs_du, rhs_du)
        lhs_mean = np.mean(lhs_du)
        rhs_mean = np.mean(rhs_du)
        print(
            f"{model.prefix} bar (A) {bar_dG:.3f} bar_err {bar_dG_err:.3f} mbar (A) {mbar_dG:.3f} mbar_err {mbar_dG_err:.3f} dG_endpoint (E) {dG_endpoint:.3f} dG_endpoint_err {endpoint_err:.3f} dG_ssc_translation {dG_ssc_translation:.3f} dG_ssc_rotation {dG_ssc_rotation:.3f} overlap {overlap:.3f} lhs_mean {lhs_mean:.3f} rhs_mean {rhs_mean:.3f} lhs_n {len(lhs_du)} rhs_n {len(rhs_du)} | time: {time.time()-start:.3f}s"
        )
        dG += dG_endpoint + dG_ssc_translation + dG_ssc_rotation
        bar_dG_err = np.sqrt(bar_dG_err**2 + endpoint_err**2)
    else:
        print(
            f"{model.prefix} bar (A) {bar_dG:.3f} bar_err {bar_dG_err:.3f} mbar (A) {mbar_dG:.3f} mbar_err {mbar_dG_err:.3f} "
        )

    return dG, bar_dG_err, results
示例#12
0
def check_2d(
    traj1,
    traj2,
    param1,
    param2,
    kb,
    pvconvert,
    quantity,
    dtempdpress=False,
    dtempdmu=False,
    cutoff=0.001,
    seed=None,
    bs_error=True,
    bs_repetitions=200,
    verbosity=1,
    screen=False,
    filename=None,
):
    r"""
    Checks whether the energy trajectories of two simulation performed at
    different temperatures have sampled distributions at the analytically
    expected ratio.

    Parameters
    ----------
    traj1 : array-like, 2d
        Trajectory of the first simulation
        If dtempdpress:

            * traj[0,:]: Potential energy U or total energy E = U + K
            * traj[1,:]: Volume V
    traj2 : array-like, 2d
        Trajectory of the second simulation
        If dtempdpress:

            * traj[0,:]: Potential energy U or total energy E = U + K
            * traj[1,:]: Volume V
    param1 : array-like
        If dtempdpress:
            Target temperature and pressure of the first simulation
    param2 : array-like
        If dtempdpress:
            Target temperature and pressure of the first simulation
    kb : float
        Boltzmann constant in same units as the energy trajectories
    pvconvert : float
        Conversion from pressure * volume to energy units
    quantity : List[str]
        Names of quantities analyzed (used for printing only)
    dtempdpress : bool, optional
        Set to True if trajectories were simulated at different
        temperature and pressure
        Default: False.
    dtempdmu : bool, optional
        Set to True if trajectories were simulated at different
        temperature and chemical potential
        Default: False.
    cutoff : float
        Tail cutoff of distributions.
        Default: 0.001 (0.1%)
    seed : int
        If set, bootstrapping will be reproducible.
        Default: None, bootstrapping non-reproducible.
    bs_error : bool
        Calculate the standard error via bootstrap resampling
        Default: True
    bs_repetitions : int
        Number of bootstrap repetitions drawn
        Default: 200
    verbosity : int
        Verbosity level.
        Default: 1 (only most important output)
    screen : bool, optional
        Plot distributions on screen.
        Default: False.
    filename : string, optional
        Plot distributions to `filename`.pdf.
        Default: None.

    Returns
    -------

    """

    if not (dtempdpress or dtempdmu) or (dtempdpress and dtempdmu):
        raise pv_error.InputError(
            ["dtempdpress", "dtempdmu"],
            "Need to specify exactly one of `dtempdpress` and `dtempdmu`.",
        )

    if dtempdmu:
        raise NotImplementedError(
            "check_2d: Testing of `dtempdmu` not implemented.")

    if screen or filename is not None:
        raise NotImplementedError("check_2d: Plotting not implemented.")

    # =============================== #
    # prepare constants, strings etc. #
    # =============================== #
    pstring = ("ln(P_2(" + quantity[0] + ", " + quantity[1] + ")/" + "P_1(" +
               quantity[0] + ", " + quantity[1] + "))")
    trueslope = np.zeros(2)
    if dtempdpress:
        trueslope = np.array([
            1 / (kb * param1[0]) - 1 / (kb * param2[0]),
            pvconvert * (1 / (kb * param1[0]) * param1[1] - 1 /
                         (kb * param2[0]) * param2[1]),
        ])

    if verbosity > 1:
        print("Analytical slope of {:s}: {:.8f}, {:.8f}".format(
            pstring, trueslope[0], trueslope[1]))

    quant = {}

    # ==================== #
    # prepare trajectories #
    # ==================== #
    # Discard burn-in period and time-correlated frames
    traj1 = trajectory.prepare(traj1,
                               cut=cutoff,
                               verbosity=verbosity,
                               name="Trajectory 1")
    traj2 = trajectory.prepare(traj2,
                               cut=cutoff,
                               verbosity=verbosity,
                               name="Trajectory 2")

    # calculate overlap
    traj1_full = traj1
    traj2_full = traj2
    traj1, traj2, min_ene, max_ene = trajectory.overlap(
        traj1=traj1_full,
        traj2=traj2_full,
    )
    if verbosity > 0:
        print("Overlap is {:.1%} of trajectory 1 and {:.1%} of trajectory 2.".
              format(
                  traj1.shape[1] / traj1_full.shape[1],
                  traj2.shape[1] / traj2_full.shape[1],
              ))
    if verbosity > 0 and dtempdpress:
        cov1 = np.cov(traj1_full)
        sig1 = np.sqrt(np.diag(cov1))
        sig1[1] *= pvconvert
        cov2 = np.cov(traj2_full)
        sig2 = np.sqrt(np.diag(cov2))
        sig2[1] *= pvconvert
        dt1 = 2 * kb * param1[0] * param1[0] / sig1[0]
        dt2 = 2 * kb * param2[0] * param2[0] / sig2[0]
        dp1 = 2 * kb * param1[0] / sig1[1]
        dp2 = 2 * kb * param2[0] / sig2[1]
        if verbosity > 1:
            print(
                "A rule of thumb states that a good overlap can be expected when choosing state\n"
                "points separated by about 2 standard deviations.\n"
                "For the current trajectories, dT = {:.1f}, and dP = {:.1f},\n"
                "with standard deviations sig1 = [{:.1f}, {:.1g}], and sig2 = [{:.1f}, {:.1g}].\n"
                "According to the rule of thumb, given point 1, the estimate is dT = {:.1f}, dP = {:.1f}, and\n"
                "                                given point 2, the estimate is dT = {:.1f}, dP = {:.1f}."
                .format(
                    param2[0] - param1[0],
                    param2[1] - param1[1],
                    sig1[0],
                    sig1[1],
                    sig2[0],
                    sig2[1],
                    dt1,
                    dt2,
                    dp1,
                    dp2,
                ))
        print(
            "Rule of thumb estimates that (dT,dP) = ({:.1f},{:.1f}) would be optimal "
            "(currently, (dT,dP) = ({:.1f},{:.1f}))".format(
                0.5 * (dt1 + dt2),
                0.5 * (dp1 + dp2),
                param2[0] - param1[0],
                param2[1] - param1[1],
            ))
    if min_ene is None:
        raise pv_error.InputError(["traj1", "traj2"],
                                  "No overlap between trajectories.")

    # calculate inefficiency
    g1 = np.array([
        pymbar.timeseries.statisticalInefficiency(traj1[0]),
        pymbar.timeseries.statisticalInefficiency(traj1[1]),
    ])
    g2 = np.array([
        pymbar.timeseries.statisticalInefficiency(traj2[0]),
        pymbar.timeseries.statisticalInefficiency(traj2[1]),
    ])

    w_f = -trueslope[0] * traj1[0] - trueslope[1] * traj1[1]
    w_r = trueslope[0] * traj2[0] + trueslope[1] * traj2[1]

    if verbosity > 2:
        print("Computing log of partition functions using pymbar.BAR...")
    df, ddf = pymbar.BAR(w_f, w_r)
    if verbosity > 2:
        print(
            "Using {:.5f} for log of partition functions as computed from BAR."
            .format(df))
        print("Uncertainty in quantity is {:.5f}.".format(ddf))
        print(
            "Assuming this is negligible compared to sampling error at individual points."
        )

    # ================== #
    # max-likelihood fit #
    # ================== #
    if verbosity > 2:
        print("Computing the maximum likelihood parameters")

    fitvals, dfitvals = do_max_likelihood_fit(
        traj1,
        traj2,
        g1,
        g2,
        init_params=[df, trueslope[0], trueslope[1]],
        verbose=(verbosity > 1),
    )

    slope = fitvals[1:]
    dslope = dfitvals[1:]
    quant["maxLikelihood"] = np.abs((slope - trueslope) / dslope)
    if verbosity > 0:
        print_stats(
            title="Maximum Likelihood Analysis (analytical error)",
            fitvals=fitvals,
            dfitvals=dfitvals,
            kb=kb,
            param1=param1,
            param2=param2,
            trueslope=trueslope,
            pvconvert=pvconvert,
            dtempdpress=dtempdpress,
            dtempdmu=dtempdmu,
        )

    if not bs_error:
        return quant["maxLikelihood"]

    # =============================== #
    # bootstrapped max-likelihood fit #
    # =============================== #
    if verbosity > 2:
        print("Computing bootstrapped maximum likelihood parameters")

    if seed is not None:
        np.random.seed(seed)

    bs_fitvals = []
    for t1, t2 in zip(
            trajectory.bootstrap(traj1, bs_repetitions),
            trajectory.bootstrap(traj2, bs_repetitions),
    ):
        # use overlap region
        t1, t2, min_ene, max_ene = trajectory.overlap(traj1=t1, traj2=t2)
        # calculate inefficiency
        g1 = np.array([
            pymbar.timeseries.statisticalInefficiency(t1[0]),
            pymbar.timeseries.statisticalInefficiency(t1[1]),
        ])
        g2 = np.array([
            pymbar.timeseries.statisticalInefficiency(t2[0]),
            pymbar.timeseries.statisticalInefficiency(t2[1]),
        ])
        # calculate max_likelihood fit
        fv, _ = do_max_likelihood_fit(
            t1,
            t2,
            g1,
            g2,
            init_params=[df, trueslope[0], trueslope[1]],
            verbose=(verbosity > 2),
        )
        bs_fitvals.append(fv)

    bs_fitvals = np.array(bs_fitvals)
    # slope = np.average(fitvals[:, 1:])
    dslope = np.std(bs_fitvals[:, 1:], axis=0)
    quant["bootstrap"] = np.abs((slope - trueslope) / dslope)
    if verbosity > 0:
        print_stats(
            title="Maximum Likelihood Analysis (bootstrapped error)",
            fitvals=np.concatenate(([fitvals], bs_fitvals)),
            dfitvals=None,
            kb=kb,
            param1=param1,
            param2=param2,
            trueslope=trueslope,
            pvconvert=pvconvert,
            dtempdpress=dtempdpress,
            dtempdmu=dtempdmu,
        )

    return quant["bootstrap"]
示例#13
0
def test_periodic_langevin_integrator(splitting="H V R O R V H",
                                      ncycles=40,
                                      nsteps_neq=1000,
                                      nsteps_eq=1000,
                                      write_trajectory=False):
    """
    Test PeriodicNonequilibriumIntegrator

    Parameters
    ----------
    integrator_flavor : openmmtools.integrator.PeriodicNonequilibriumIntegrator (or subclass)
        integrator to run
    ncycles : int, optional, default=40
        number of cycles
    nsteps_neq : int, optional, default=1000
        number of forward/backward annealing steps
    nsteps_eq : int, optional, default=1000
        number of equilibration steps to run at endstates before annealing
    write_trajectory : bool, optional, default=True
        If True, will generate a PDB file that contains the harmonic oscillator trajectory
    """
    #max deviation from the calculated free energy
    NSIGMA_MAX = 6

    # These are the alchemical functions that will be used to control the system
    temperature = 298.0 * unit.kelvin
    sigma = 1.0 * unit.angstrom  # stddev of harmonic oscillator
    kT = kB * temperature  # thermal energy
    beta = 1.0 / kT  # inverse thermal energy
    K = kT / sigma**2  # spring constant corresponding to sigma
    mass = 39.948 * unit.amu
    period = unit.sqrt(mass / K)  # period of harmonic oscillator
    timestep = period / 20.0
    collision_rate = 1.0 / period
    dF_analytical = 5.0
    parameters = dict()
    displacement = 10 * sigma
    parameters['testsystems_HarmonicOscillator_x0'] = (0 * sigma, displacement)
    parameters['testsystems_HarmonicOscillator_U0'] = (0 * kT, 5 * kT)
    integrator_kwargs = {
        'temperature': temperature,
        'collision_rate': collision_rate,
        'timestep': timestep,
        'measure_shadow_work': False,
        'measure_heat': False
    }
    alchemical_functions = {
        name: '(1-lambda)*%f + lambda*%f' %
        (value[0].value_in_unit_system(unit.md_unit_system),
         value[1].value_in_unit_system(unit.md_unit_system))
        for (name, value) in parameters.items()
    }
    # Create harmonic oscillator testsystem
    testsystem = testsystems.HarmonicOscillator(K=K, mass=mass)
    system = testsystem.system
    positions = testsystem.positions
    topology = testsystem.topology

    # Create integrator
    from openmmtools.integrators import PeriodicNonequilibriumIntegrator
    integrator = PeriodicNonequilibriumIntegrator(
        alchemical_functions=alchemical_functions,
        splitting=splitting,
        nsteps_eq=nsteps_eq,
        nsteps_neq=nsteps_neq,
        **integrator_kwargs)
    platform = openmm.Platform.getPlatformByName("Reference")
    context = openmm.Context(system, integrator, platform)
    context.setPositions(positions)

    nsteps_per_cycle = nsteps_eq + nsteps_neq + nsteps_eq + nsteps_neq
    assert integrator.getGlobalVariableByName(
        "n_steps_per_cycle") == nsteps_per_cycle

    if write_trajectory:
        from simtk.openmm.app import PDBFile
        filename = 'neq-trajectory.pdb'
        print(f'Writing trajectory to {filename}')
        with open(filename, 'wt') as outfile:
            # Write reference
            import copy
            pos1 = copy.deepcopy(positions)
            pos2 = copy.deepcopy(positions)
            pos2[0, 0] += displacement
            PDBFile.writeModel(topology, pos1, outfile)
            PDBFile.writeModel(topology, pos2, outfile)

            interval = 10
            PDBFile.writeModel(topology, positions, outfile, modelIndex=0)
            for step in range(0, 2 * nsteps_per_cycle, interval):
                integrator.step(interval)
                positions = context.getState(getPositions=True).getPositions(
                    asNumpy=True)
                PDBFile.writeModel(topology,
                                   positions,
                                   outfile,
                                   modelIndex=step)

                PDBFile.writeModel(topology, pos1, outfile)
                PDBFile.writeModel(topology, pos2, outfile)

        # Reset the integrator
        integrator.reset()

    step = 0
    for cycle in range(2):
        # eq (0)
        for i in range(nsteps_eq):
            integrator.step(1)
            step += 1
            assert integrator.getGlobalVariableByName("step") == (
                step % nsteps_per_cycle)
            assert np.isclose(integrator.getGlobalVariableByName("lambda"),
                              0.0)
        # neq (0 -> 1)
        for i in range(nsteps_neq):
            integrator.step(1)
            step += 1
            assert integrator.getGlobalVariableByName("step") == (
                step % nsteps_per_cycle)
            assert np.isclose(
                integrator.getGlobalVariableByName("lambda"),
                (i + 1) / nsteps_neq
            ), f'{step} {integrator.getGlobalVariableByName("lambda")}'
        # eq (1)
        for i in range(nsteps_eq):
            integrator.step(1)
            step += 1
            assert integrator.getGlobalVariableByName("step") == (
                step % nsteps_per_cycle)
            assert np.isclose(integrator.getGlobalVariableByName("lambda"),
                              1.0)
        # neq (1 -> 0)
        for i in range(nsteps_neq):
            integrator.step(1)
            step += 1
            assert integrator.getGlobalVariableByName("step") == (
                step % nsteps_per_cycle)
            assert np.isclose(integrator.getGlobalVariableByName("lambda"),
                              1 - (i + 1) / nsteps_neq)

    assert np.isclose(integrator.getGlobalVariableByName("lambda"), 0.0)

    # Reset the integrator
    integrator.reset()

    forward_works, reverse_works = list(), list()
    for _ in range(ncycles):
        # Equilibrium (lambda = 0)
        integrator.step(nsteps_eq)
        # Forward (0 -> 1)
        initial_work = integrator.get_protocol_work(dimensionless=True)
        integrator.step(nsteps_neq)
        final_work = integrator.get_protocol_work(dimensionless=True)
        forward_work = final_work - initial_work
        forward_works.append(forward_work)
        # Equilibrium (lambda = 1)
        integrator.step(nsteps_eq)
        # Reverse work (1 -> 0)
        initial_work = integrator.get_protocol_work(dimensionless=True)
        integrator.step(nsteps_neq)
        final_work = integrator.get_protocol_work(dimensionless=True)
        reverse_work = final_work - initial_work
        reverse_works.append(reverse_work)

    print(np.array(forward_works).std())
    print(np.array(reverse_works).std())

    dF, ddF = pymbar.BAR(np.array(forward_works), np.array(reverse_works))
    nsigma = np.abs(dF - dF_analytical) / ddF
    assert np.isclose(integrator.getGlobalVariableByName("lambda"), 0.0)
    print(
        "analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}"
        .format(dF_analytical, dF, ddF, nsigma))
    if nsigma > NSIGMA_MAX:
        raise Exception(
            f"The free energy difference for the nonequilibrium switching for splitting {splitting} is not zero within statistical error."
        )

    # Clean up
    del context
    del integrator
示例#14
0
def run_alchemical_langevin_integrator(nsteps=0,
                                       splitting="O { V R H R V } O"):
    """Check that the AlchemicalLangevinSplittingIntegrator reproduces the analytical free energy difference for a harmonic oscillator deformation, using BAR.
    Up to 6*sigma is tolerated for error.
    The total work (protocol work + shadow work) is used.
    """

    #max deviation from the calculated free energy
    NSIGMA_MAX = 6
    n_iterations = 200  # number of forward and reverse protocols

    # These are the alchemical functions that will be used to control the system
    temperature = 298.0 * unit.kelvin
    sigma = 1.0 * unit.angstrom  # stddev of harmonic oscillator
    kT = kB * temperature  # thermal energy
    beta = 1.0 / kT  # inverse thermal energy
    K = kT / sigma**2  # spring constant corresponding to sigma
    mass = 39.948 * unit.amu
    period = unit.sqrt(mass / K)  # period of harmonic oscillator
    timestep = period / 20.0
    collision_rate = 1.0 / period
    dF_analytical = 1.0
    parameters = dict()
    parameters['testsystems_HarmonicOscillator_x0'] = (0 * sigma, 2 * sigma)
    parameters['testsystems_HarmonicOscillator_U0'] = (0 * kT, 1 * kT)
    alchemical_functions = {
        'forward': {
            name: '(1-lambda)*%f + lambda*%f' %
            (value[0].value_in_unit_system(unit.md_unit_system),
             value[1].value_in_unit_system(unit.md_unit_system))
            for (name, value) in parameters.items()
        },
        'reverse': {
            name: '(1-lambda)*%f + lambda*%f' %
            (value[1].value_in_unit_system(unit.md_unit_system),
             value[0].value_in_unit_system(unit.md_unit_system))
            for (name, value) in parameters.items()
        },
    }

    # Create harmonic oscillator testsystem
    testsystem = testsystems.HarmonicOscillator(K=K, mass=mass)
    system = testsystem.system
    positions = testsystem.positions

    # Get equilibrium samples from initial and final states
    burn_in = 5 * 20  # 5 periods
    thinning = 5 * 20  # 5 periods

    # Collect forward and reverse work values
    directions = ['forward', 'reverse']
    work = {
        direction: np.zeros([n_iterations], np.float64)
        for direction in directions
    }
    platform = openmm.Platform.getPlatformByName("Reference")
    for direction in directions:
        positions = testsystem.positions

        # Create equilibrium and nonequilibrium integrators
        equilibrium_integrator = GHMCIntegrator(temperature=temperature,
                                                collision_rate=collision_rate,
                                                timestep=timestep)
        nonequilibrium_integrator = AlchemicalNonequilibriumLangevinIntegrator(
            temperature=temperature,
            collision_rate=collision_rate,
            timestep=timestep,
            alchemical_functions=alchemical_functions[direction],
            splitting=splitting,
            nsteps_neq=nsteps,
            measure_shadow_work=True)

        # Create compound integrator
        compound_integrator = openmm.CompoundIntegrator()
        compound_integrator.addIntegrator(equilibrium_integrator)
        compound_integrator.addIntegrator(nonequilibrium_integrator)

        # Create Context
        context = openmm.Context(system, compound_integrator, platform)
        context.setPositions(positions)

        # Collect work samples
        for iteration in range(n_iterations):
            #
            # Generate equilibrium sample
            #

            compound_integrator.setCurrentIntegrator(0)
            equilibrium_integrator.reset()
            compound_integrator.step(thinning)

            #
            # Generate nonequilibrium work sample
            #

            compound_integrator.setCurrentIntegrator(1)
            nonequilibrium_integrator.reset()

            # Check initial conditions after reset
            current_lambda = nonequilibrium_integrator.getGlobalVariableByName(
                'lambda')
            assert current_lambda == 0.0, 'initial lambda should be 0.0 (was %f)' % current_lambda
            current_step = nonequilibrium_integrator.getGlobalVariableByName(
                'step')
            assert current_step == 0.0, 'initial step should be 0 (was %f)' % current_step

            compound_integrator.step(max(
                1, nsteps))  # need to execute at least one step
            work[direction][
                iteration] = nonequilibrium_integrator.get_total_work(
                    dimensionless=True)

            # Check final conditions before reset
            current_lambda = nonequilibrium_integrator.getGlobalVariableByName(
                'lambda')
            assert current_lambda == 1.0, 'final lambda should be 1.0 (was %f) for splitting %s' % (
                current_lambda, splitting)
            current_step = nonequilibrium_integrator.getGlobalVariableByName(
                'step')
            assert int(current_step) == max(
                1, nsteps
            ), 'final step should be %d (was %f) for splitting %s' % (max(
                1, nsteps), current_step, splitting)
            nonequilibrium_integrator.reset()

        # Clean up
        del context
        del compound_integrator

    dF, ddF = pymbar.BAR(work['forward'], work['reverse'])
    nsigma = np.abs(dF - dF_analytical) / ddF
    print(
        "analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}"
        .format(dF_analytical, dF, ddF, nsigma))
    if nsigma > NSIGMA_MAX:
        raise Exception(
            "The free energy difference for the nonequilibrium switching for splitting '%s' and %d steps is not zero within statistical error."
            % (splitting, nsteps))
示例#15
0
 def current_free_energy_estimate(self):
     [df, ddf] = pymbar.BAR(self._forward_total_work,
                            self._reverse_total_work)
     return [df, ddf]
示例#16
0
def check_1d(
    traj1,
    traj2,
    param1,
    param2,
    kb,
    quantity,
    dtemp=False,
    dpress=False,
    dmu=False,
    temp=None,
    pvconvert=None,
    nbins=40,
    cutoff=0.001,
    seed=None,
    bs_error=True,
    bs_repetitions=200,
    verbosity=1,
    screen=False,
    filename=None,
    xlabel="Energy",
    xunit=None,
):
    r"""
    Checks whether the energy trajectories of two simulation performed at
    different temperatures have sampled distributions at the analytically
    expected ratio.

    Parameters
    ----------
    traj1 : array-like
        Trajectory of the first simulation
        If dtemp:

            * NVT: Potential energy U or total energy E = U + K
            * NPT: Enthalpy H = U + pV or total energy E = H + K

        If dpress:

            * NPT: Volume V

    traj2 : array-like
        Trajectory of the second simulation
        If dtemp:

            * NVT: Potential energy U or total energy E = U + K
            * NPT: Enthalpy H = U + pV or total energy E = H + K

        If dpress:

            * NPT: Volume V

    param1 : float
        Target temperature or pressure of the first simulation
    param2 : float
        Target temperature or pressure of the second simulation
    kb : float
        Boltzmann constant in same units as the energy trajectories
    quantity : str
        Name of quantity analyzed (used for printing only)
    dtemp : bool, optional
        Set to True if trajectories were simulated at different temperature
        Default: False.
    dpress : bool, optional
        Set to True if trajectories were simulated at different pressure
        Default: False.
    temp : float, optional
        The temperature in equal temperature, differring pressure NPT simulations.
        Needed to print optimal dP.
    pvconvert : float, optional
        Conversion from pressure * volume to energy units.
        Needed to print optimal dP.
    dmu : bool, optional
        Set to True if trajectories were simulated at different chemical potential
        Default: False.
    nbins : int, optional
        Number of bins used to assess distributions of the trajectories
        Default: 40
    cutoff : float, optional
        Tail cutoff of distributions.
        Default: 0.001 (0.1%)
    seed : int, optional
        If set, bootstrapping will be reproducible.
        Default: None, bootstrapping non-reproducible.
    bs_error : bool
        Calculate the standard error via bootstrap resampling
        Default: True
    bs_repetitions : int
        Number of bootstrap repetitions drawn
        Default: 200
    verbosity : int, optional
        Verbosity level.
        Default: 1 (only most important output)
    screen : bool, optional
        Plot distributions on screen.
        Default: False.
    filename : string, optional
        Plot distributions to `filename`.pdf.
        Default: None.
    xlabel : string, optional
        x-axis label used for plotting
        Default: 'Energy'
    xunit : string, optional
        x-axis label unit used for plotting
        Default: None

    Returns
    -------

    """

    if (not (dtemp or dpress or dmu) or (dtemp and dpress) or (dtemp and dmu)
            or (dpress and dmu)):
        raise pv_error.InputError(
            ["dtemp", "dpress", "dmu"],
            "Need to specify exactly one of `dtemp`, `dpress` and `dmu`.",
        )

    if dmu:
        raise NotImplementedError(
            "check_1d: Testing of `dmu` not implemented.")

    if dpress and (temp is None or pvconvert is None):
        raise pv_error.InputError(
            ["dpress", "temp", "pvconvert"],
            "`ensemble.check_1d` with `dpress=True` requires `temp` and `pvconvert`.",
        )

    # =============================== #
    # prepare constants, strings etc. #
    # =============================== #
    pstring = "ln(P_2(" + quantity + ")/P_1(" + quantity + "))"
    trueslope = 0
    if dtemp:
        trueslope = 1 / (kb * param1) - 1 / (kb * param2)
    elif dpress:
        trueslope = (param1 - param2) / (kb * temp) * pvconvert

    if verbosity > 1:
        print("Analytical slope of {:s}: {:.8f}".format(pstring, trueslope))

    quant = {}

    # ==================== #
    # prepare trajectories #
    # ==================== #
    # Discard burn-in period and time-correlated frames
    traj1 = trajectory.prepare(traj1,
                               cut=cutoff,
                               verbosity=verbosity,
                               name="Trajectory 1")
    traj2 = trajectory.prepare(traj2,
                               cut=cutoff,
                               verbosity=verbosity,
                               name="Trajectory 2")

    # calculate overlap
    traj1_full = traj1
    traj2_full = traj2
    traj1, traj2, min_ene, max_ene = trajectory.overlap(traj1=traj1_full,
                                                        traj2=traj2_full)
    if verbosity > 0:
        print("Overlap is {:.1%} of trajectory 1 and {:.1%} of trajectory 2.".
              format(
                  traj1.shape[0] / traj1_full.shape[0],
                  traj2.shape[0] / traj2_full.shape[0],
              ))
    if verbosity > 0 and dtemp:
        sig1 = np.std(traj1_full)
        sig2 = np.std(traj2_full)
        dt1 = 2 * kb * param1 * param1 / sig1
        dt2 = 2 * kb * param2 * param2 / sig2
        if verbosity > 1:
            print(
                "A rule of thumb states that a good overlap is found when dT/T = (2*kB*T)/(sig),\n"
                "where sig is the standard deviation of the energy distribution.\n"
                "For the current trajectories, dT = {:.1f}, sig1 = {:.1f} and sig2 = {:.1f}.\n"
                "According to the rule of thumb, given T1, a good dT is dT = {:.1f}, and\n"
                "                                given T2, a good dT is dT = {:.1f}."
                .format(param2 - param1, sig1, sig2, dt1, dt2))
        print("Rule of thumb estimates that dT = {:.1f} would be optimal "
              "(currently, dT = {:.1f})".format(0.5 * (dt1 + dt2),
                                                param2 - param1))
    if verbosity > 0 and dpress:
        sig1 = np.std(traj1_full) * pvconvert
        sig2 = np.std(traj2_full) * pvconvert
        dp1 = 2 * kb * temp / sig1
        dp2 = 2 * kb * temp / sig2
        if verbosity > 1:
            print(
                "A rule of thumb states that a good overlap is found when dP = (2*kB*T)/(sig),\n"
                "where sig is the standard deviation of the volume distribution.\n"
                "For the current trajectories, dP = {:.1f}, sig1 = {:.1g} and sig2 = {:.1g}.\n"
                "According to the rule of thumb, given P1, a good dP is dP = {:.1f}, and\n"
                "                                given P2, a good dP is dP = {:.1f}."
                .format(param2 - param1, sig1, sig2, dp1, dp2))
        print("Rule of thumb estimates that dP = {:.1f} would be optimal "
              "(currently, dP = {:.1f})".format(0.5 * (dp1 + dp2),
                                                param2 - param1))
    if not min_ene:
        raise pv_error.InputError(["traj1", "traj2"],
                                  "No overlap between trajectories.")
    # calculate bins
    bins = np.linspace(min_ene, max_ene, nbins + 1)
    bins = check_bins(traj1, traj2, bins)
    if np.size(bins) < 3:
        raise pv_error.InputError(
            ["traj1", "traj2", "nbins", "cutoff"],
            "Less than 3 bins were filled in the overlap region.\n"
            "Ensure sufficient overlap between the trajectories, and "
            "consider increasing `cutoff` or `nbins` if there is "
            "sufficient overlap but unusually long tails.",
        )

    # calculate inefficiency
    g1 = pymbar.timeseries.statisticalInefficiency(traj1)
    g2 = pymbar.timeseries.statisticalInefficiency(traj2)

    w_f = -trueslope * traj1
    w_r = trueslope * traj2

    if verbosity > 2:
        print("Computing log of partition functions using pymbar.BAR...")
    df, ddf = pymbar.BAR(w_f, w_r)
    if verbosity > 2:
        print(
            "Using {:.5f} for log of partition functions as computed from BAR."
            .format(df))
        print("Uncertainty in quantity is {:.5f}.".format(ddf))
        print(
            "Assuming this is negligible compared to sampling error at individual points."
        )

    # ========== #
    # linear fit #
    # ========== #
    if verbosity > 2:
        print("Computing linear fit parameters (for plotting / comparison)")

    fitvals, dfitvals = do_linear_fit(
        traj1=traj1,
        traj2=traj2,
        g1=g1,
        g2=g2,
        bins=bins,
        screen=screen,
        filename=filename,
        trueslope=trueslope,
        trueoffset=df,
        units=xunit,
        xlabel=xlabel,
        ylabel=r"$\log\frac{P_2(" + quantity + ")}{P_1(" + quantity + ")}$",
    )

    slope = fitvals[1]
    dslope = dfitvals[1]
    quant["linear"] = [abs((slope - trueslope) / dslope)]
    if verbosity > 1:
        print_stats(
            title="Linear Fit Analysis (analytical error)",
            fitvals=fitvals,
            dfitvals=dfitvals,
            kb=kb,
            param1=param1,
            param2=param2,
            trueslope=trueslope,
            temp=temp,
            pvconvert=pvconvert,
            dtemp=dtemp,
            dpress=dpress,
            dmu=dmu,
        )

    # ================== #
    # max-likelihood fit #
    # ================== #
    if verbosity > 2:
        print("Computing the maximum likelihood parameters")

    fitvals, dfitvals = do_max_likelihood_fit(traj1,
                                              traj2,
                                              g1,
                                              g2,
                                              init_params=[df, trueslope],
                                              verbose=(verbosity > 1))

    slope = fitvals[1]
    dslope = dfitvals[1]
    quant["maxLikelihood"] = [abs((slope - trueslope) / dslope)]
    if (verbosity > 0 and not bs_error) or verbosity > 1:
        print_stats(
            title="Maximum Likelihood Analysis (analytical error)",
            fitvals=fitvals,
            dfitvals=dfitvals,
            kb=kb,
            param1=param1,
            param2=param2,
            trueslope=trueslope,
            temp=temp,
            pvconvert=pvconvert,
            dtemp=dtemp,
            dpress=dpress,
            dmu=dmu,
        )

    if not bs_error:
        return quant["maxLikelihood"]

    # =============================== #
    # bootstrapped max-likelihood fit #
    # =============================== #
    if verbosity > 0:
        print(
            "Computing bootstrapped maximum likelihood parameters... "
            "[0/{:d}]".format(bs_repetitions),
            end="",
        )

    if seed is not None:
        np.random.seed(seed)

    bs_fitvals = []
    for n, (t1, t2) in enumerate(
            zip(
                trajectory.bootstrap(traj1, bs_repetitions),
                trajectory.bootstrap(traj2, bs_repetitions),
            )):

        # use overlap region
        t1, t2, min_ene, max_ene = trajectory.overlap(traj1=t1, traj2=t2)
        # calculate inefficiency
        g1 = pymbar.timeseries.statisticalInefficiency(t1)
        g2 = pymbar.timeseries.statisticalInefficiency(t2)
        # calculate max_likelihood fit
        fv, _ = do_max_likelihood_fit(t1,
                                      t2,
                                      g1,
                                      g2,
                                      init_params=[df, trueslope],
                                      verbose=(verbosity > 2))
        bs_fitvals.append(fv)
        # print progress
        if verbosity > 0:
            print(
                "\rComputing bootstrapped maximum likelihood parameters... "
                "[{:d}/{:d}]".format(n + 1, bs_repetitions),
                end="",
            )

    print()
    bs_fitvals = np.array(bs_fitvals)
    # slope = np.average(fitvals[:, 1])
    dslope = np.std(bs_fitvals[:, 1], axis=0)
    quant["bootstrap"] = [abs((slope - trueslope) / dslope)]
    if verbosity > 0:
        print_stats(
            title="Maximum Likelihood Analysis (bootstrapped error)",
            fitvals=np.concatenate(([fitvals], bs_fitvals)),
            dfitvals=None,
            kb=kb,
            param1=param1,
            param2=param2,
            trueslope=trueslope,
            temp=temp,
            pvconvert=pvconvert,
            dtemp=dtemp,
            dpress=dpress,
            dmu=dmu,
        )

    return quant["bootstrap"]
示例#17
0
def bayes_factor_v2(model_ini,
                    sample_ini,
                    model_fin,
                    sample_fin,
                    model_ini_name="2c",
                    model_fin_name="rm",
                    aug_with="GaussMix",
                    sigma_robust=False,
                    n_components=1,
                    covariance_type="full",
                    bootstrap=None,
                    sample_proportion=None):
    """
    :param model_ini: pymc3 model
    :param sample_ini: dict: var_name -> ndarray
    :param model_fin: pymc3 model
    :param sample_fin: dict: var_name -> ndarray
    :param model_ini_name: str
    :param model_fin_name: str
    :param aug_with: str
    :param sigma_robust: bool, only used when aug_with="Normal"
    :param n_components: int, only used when aug_with="GaussMix"
    :param covariance_type: str, only used when aug_with="GaussMix"
    :param bootstrap: int
    :return: bf if bootstrap is None
             (bf, err) if bootstrap is an int
    """
    print("Use estimator version 2")

    if (sample_proportion is not None) and bootstrap is None:
        raise ValueError(
            "When sample_proportion = %0.5f, bootstrap must not be None" %
            sample_proportion)

    assert aug_with in ["Normal", "Uniform",
                        "GaussMix"], "Unknown aug_with: " + aug_with
    print("aug_with:", aug_with)

    ini_fin_name = model_ini_name + "_" + model_fin_name
    assert ini_fin_name in ["2c_rm", "2c_em",
                            "rm_em"], "Unknown ini_fin_name: " + ini_fin_name

    vars_ini = [var for var in sample_ini.keys() if var != "logp"]
    print("vars_ini:", vars_ini)
    nsamples_ini = len(sample_ini[vars_ini[0]])
    print("nsamples_ini = %d" % nsamples_ini)

    vars_fin = [var for var in sample_fin.keys() if var != "logp"]
    print("vars_fin:", vars_fin)
    nsamples_fin = len(sample_fin[vars_fin[0]])
    print("nsamples_fin = %d" % nsamples_fin)

    # get var names
    if ini_fin_name in ["2c_rm", "2c_em"]:
        dg1_var_f = var_starts_with("DeltaG1", vars_fin)
        ddg_var_f = var_starts_with("DeltaDeltaG", vars_fin)
        dh1_var_f = var_starts_with("DeltaH1", vars_fin)
        dh2_var_f = var_starts_with("DeltaH2", vars_fin)

        dg_var_i = var_starts_with("DeltaG", vars_ini)
        dh_var_i = var_starts_with("DeltaH", vars_ini)

        if ini_fin_name == "2c_em":
            r_var_f = var_starts_with("rho", vars_fin)

    elif ini_fin_name == "rm_em":
        r_var_f = var_starts_with("rho", vars_fin)

    # get redundant parameters from final state
    sample_redun_fin = {}
    if ini_fin_name in ["2c_rm", "2c_em"]:
        sample_redun_fin["DeltaDeltaG"] = sample_fin[ddg_var_f]
        sample_redun_fin[
            "DeltaDeltaH"] = sample_fin[dh2_var_f] - sample_fin[dh1_var_f]

        if ini_fin_name == "2c_em":
            sample_redun_fin["rho"] = sample_fin[r_var_f]

    elif ini_fin_name == "rm_em":
        sample_redun_fin["rho"] = sample_fin[r_var_f]
    else:
        pass
    print("Vars of sample_redun_fin:", list(sample_redun_fin.keys()))

    # fit models to sample_redun_fin
    if aug_with == "Normal":
        mu_sigma_fin = fit_normal_trace(sample_redun_fin,
                                        sigma_robust=sigma_robust)
        sample_aug_ini = draw_normal_samples(mu_sigma_fin, nsamples_ini)

    elif aug_with == "Uniform":
        lower_upper_fin = fit_uniform_trace(sample_redun_fin)
        sample_aug_ini = draw_uniform_samples(lower_upper_fin, nsamples_ini)

    elif aug_with == "GaussMix":
        print("n_components:", n_components)
        print("covariance_type:", covariance_type)
        gauss_mix = GaussMix(n_components=n_components,
                             covariance_type=covariance_type)
        gauss_mix.fit(sample_redun_fin)
        sample_aug_ini = gauss_mix.sample(n_samples=nsamples_ini)
    else:
        pass

    print("Vars of sample_aug_ini:", list(sample_aug_ini.keys()))

    ini_fin_var_match = [("P0", "P0"), ("Ls", "Ls"), ("DeltaH_0", "DeltaH_0"),
                         ("log_sigma", "log_sigma")]
    ini_fin_var_match_extra = [("DeltaG1", "DeltaG1"),
                               ("DeltaDeltaG", "DeltaDeltaG"),
                               ("DeltaH1", "DeltaH1"), ("DeltaH2", "DeltaH2")]

    # potential for sample drawn from i estimated at state i
    if aug_with == "Normal":
        u_i_i = pot_ener_normal_aug(sample_ini, model_ini, sample_aug_ini,
                                    mu_sigma_fin)

    elif aug_with == "Uniform":
        u_i_i = pot_ener_uniform_aug(sample_ini, model_ini, sample_aug_ini,
                                     lower_upper_fin)

    elif aug_with == "GaussMix":
        u_i_i = pot_ener_gauss_mix_aug(sample_ini, model_ini, sample_aug_ini,
                                       gauss_mix)

    else:
        pass

    # potential for sample drawn from i estimated at state f
    sample_tmp_ini = {}
    for ki, kf in ini_fin_var_match:
        var_ini = var_starts_with(ki, vars_ini)
        var_fin = var_starts_with(kf, vars_fin)
        sample_tmp_ini[var_fin] = sample_ini[var_ini]

    if ini_fin_name in ["2c_rm", "2c_em"]:
        sample_tmp_ini[dg1_var_f] = sample_ini[
            dg_var_i] - 0.5 * sample_aug_ini["DeltaDeltaG"]
        sample_tmp_ini[ddg_var_f] = sample_aug_ini["DeltaDeltaG"]
        sample_tmp_ini[dh1_var_f] = sample_ini[
            dh_var_i] - 0.5 * sample_aug_ini["DeltaDeltaH"]
        sample_tmp_ini[dh2_var_f] = sample_ini[
            dh_var_i] + 0.5 * sample_aug_ini["DeltaDeltaH"]

        if ini_fin_name == "2c_em":
            sample_tmp_ini[r_var_f] = sample_aug_ini["rho"]

    elif ini_fin_name == "rm_em":

        for ki, kf in ini_fin_var_match_extra:
            var_ini = var_starts_with(ki, vars_ini)
            var_fin = var_starts_with(kf, vars_fin)
            sample_tmp_ini[var_fin] = sample_ini[var_ini]

        sample_tmp_ini[r_var_f] = sample_aug_ini["rho"]

    else:
        pass
    u_i_f = pot_ener(sample_tmp_ini, model_fin)
    del sample_tmp_ini

    # potential for sample drawn from f estimated at state i
    sample_tmp_fin = {}
    for ki, kf in ini_fin_var_match:
        var_ini = var_starts_with(ki, vars_ini)
        var_fin = var_starts_with(kf, vars_fin)
        sample_tmp_fin[var_ini] = sample_fin[var_fin]

    if ini_fin_name in ["2c_rm", "2c_em"]:
        sample_tmp_fin[
            dg_var_i] = sample_fin[dg1_var_f] + 0.5 * sample_fin[ddg_var_f]
        sample_tmp_fin[dh_var_i] = 0.5 * (sample_fin[dh1_var_f] +
                                          sample_fin[dh2_var_f])

    elif ini_fin_name == "rm_em":
        for ki, kf in ini_fin_var_match_extra:
            var_ini = var_starts_with(ki, vars_ini)
            var_fin = var_starts_with(kf, vars_fin)
            sample_tmp_fin[var_ini] = sample_fin[var_fin]

    if aug_with == "Normal":
        u_f_i = pot_ener_normal_aug(sample_tmp_fin, model_ini,
                                    sample_redun_fin, mu_sigma_fin)

    elif aug_with == "Uniform":
        u_f_i = pot_ener_uniform_aug(sample_tmp_fin, model_ini,
                                     sample_redun_fin, lower_upper_fin)

    elif aug_with == "GaussMix":
        u_f_i = pot_ener_gauss_mix_aug(sample_tmp_fin, model_ini,
                                       sample_redun_fin, gauss_mix)

    else:
        pass
    del sample_tmp_fin

    # potential for sample drawn from f estimated at state f
    u_f_f = pot_ener(sample_fin, model_fin)

    w_F = u_i_f - u_i_i
    w_R = u_f_i - u_f_f

    w_F = filter_nan_inf(w_F)
    w_R = filter_nan_inf(w_R)

    if (len(w_F) == 0) or (len(w_R) == 0):
        print("Empty work arrays:", w_F.shape, w_R.shape)
        if bootstrap is None:
            return 0.
        else:
            return 0., 0.

    if sample_proportion is None:
        delta_F = pymbar.BAR(w_F,
                             w_R,
                             compute_uncertainty=False,
                             relative_tolerance=1e-12,
                             verbose=True)
        bf = -delta_F

        if bootstrap is None:
            print("log10(bf) = %0.5f" % (bf * np.log10(np.e)))
            return bf
        else:
            print("Running %d bootstraps to estimate error." % bootstrap)
            _, bf_err = bootstrap_BAR(w_F,
                                      w_R,
                                      bootstrap,
                                      sample_proportion=1.)
            print("log10(bf) = %0.5f +/- %0.5f" %
                  (bf * np.log10(np.e), bf_err * np.log10(np.e)))
            return bf, bf_err

    else:
        bf, bf_err = bootstrap_BAR(w_F,
                                   w_R,
                                   bootstrap,
                                   sample_proportion=sample_proportion)
        return bf, bf_err
示例#18
0
def mybar_impl(w):
    A, _ = pymbar.BAR(w[0], w[1])
    return A
示例#19
0
N_k = npoints*np.ones([nstates],int)

mbar = pymbar.MBAR(u_kln,N_k,relative_tolerance=1.0e-10,verbose=True)
(Delta_f_ij_estimated, dDelta_f_ij_estimated) = mbar.getFreeEnergyDifferences()
print Delta_f_ij_estimated
print dDelta_f_ij_estimated

# check these in the case of two.
# try exponential averaging, to see if there is a difference. Seems to work.

if len(dirnames) == 2:
    wf = -(u_kln[0,1:,]-u_kln[0,0,:])
    (df_forward,ddf_forward) = pymbar.EXP(wf)
    print('EXP forward %10.4f +/- %7.4f' % (df_forward, ddf_forward))
    wr = -(u_kln[1,1:,]-u_kln[1,0,:])
    (df_rev, ddf_rev) = pymbar.EXP(wr)
    print('EXP reverse %10.4f +/- %7.4f' % (df_rev, ddf_rev))
    
    pdb.set_trace()
    
    (df_bar, ddf_bar) = pymbar.BAR(-wf,wr)
    print('BAR reverse %10.4f +/- %7.4f' % (-df_bar, ddf_bar))
    
    # plots the overlap in energy betwen two.  Looks good!
    if plot:
        plt.clf()
        plt.hist(wf.T, facecolor='red')
        plt.hist(wr.T, facecolor='blue')
        plt.show()