Пример #1
0
    def _load_data(self):
        """ Calculate relative free energy details from the simulation by performing MBAR on the vacuum and solvent legs of the simualtion.

        Parameters
        ----------

        Returns
        -------
        None

        """
        from pymbar import MBAR
        from perses.analysis import utils
        import os
        from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer
        from simtk import unit

        # find the output files
        output = [
            x for x in os.listdir(self.directory)
            if x[-3:] == '.nc' and 'checkpoint' not in x
        ]

        for out in output:
            if 'vacuum' in out:
                vacuum_reporter = MultiStateReporter(f'{self.directory}/{out}')
                vacuum_analyzer = MultiStateSamplerAnalyzer(vacuum_reporter)
                f_ij, df_ij = vacuum_analyzer.get_free_energy()
                f = f_ij[0, -1] * vacuum_analyzer.kT
                self._vacdg = f.in_units_of(unit.kilocalories_per_mole)
                df = df_ij[0, -1] * vacuum_analyzer.kT
                self._vacddg = df.in_units_of(unit.kilocalories_per_mole)
                self._vacf_ij = f_ij
                self._vacdf_ij = df_ij
            elif 'solvent' in out:
                solvent_reporter = MultiStateReporter(
                    f'{self.directory}/{out}')
                solvent_analyzer = MultiStateSamplerAnalyzer(solvent_reporter)
                f_ij, df_ij = solvent_analyzer.get_free_energy()
                f = f_ij[0, -1] * solvent_analyzer.kT
                self._soldg = f.in_units_of(unit.kilocalories_per_mole)
                df = df_ij[0, -1] * solvent_analyzer.kT
                self._solddg = df.in_units_of(unit.kilocalories_per_mole)
                self._solf_ij = f_ij
                self._soldf_ij = df_ij
            elif 'complex' in out:
                complex_reporter = MultiStateReporter(
                    f'{self.directory}/{out}')
                complex_analyzer = MultiStateSamplerAnalyzer(complex_reporter)
                f_ij, df_ij = complex_analyzer.get_free_energy()
                f = f_ij[0, -1] * complex_analyzer.kT
                self._comdg = f.in_units_of(unit.kilocalories_per_mole)
                df = df_ij[0, -1] * complex_analyzer.kT
                self._comddg = df.in_units_of(unit.kilocalories_per_mole)
                self._comf_ij = f_ij
                self._comdf_ij = df_ij
        return
Пример #2
0
def create_hss(reporter_name,
               hybrid_factory,
               selection_string='all',
               checkpoint_interval=1,
               n_states=13):
    lambda_protocol = LambdaProtocol(functions='default')
    reporter = MultiStateReporter(
        reporter_name,
        analysis_particle_indices=hybrid_factory.hybrid_topology.select(
            selection_string),
        checkpoint_interval=checkpoint_interval)
    hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
        timestep=4.0 * unit.femtoseconds,
        collision_rate=5.0 / unit.picosecond,
        n_steps=250,
        reassign_velocities=False,
        n_restart_attempts=20,
        splitting="V R R R O R R R V",
        constraint_tolerance=1e-06),
                             hybrid_factory=hybrid_factory,
                             online_analysis_interval=10)
    hss.setup(n_states=n_states,
              temperature=300 * unit.kelvin,
              storage_file=reporter,
              lambda_protocol=lambda_protocol,
              endstates=False)
    return hss, reporter
Пример #3
0
def create_hss(pkl, suffix, selection, checkpoint_interval, n_states):
    with open(pkl, 'rb') as f:
        htf = pickle.load(f)
    lambda_protocol = LambdaProtocol(functions='default')
    reporter_file = pkl[:-3] + suffix + '.nc'
    reporter = MultiStateReporter(
        reporter_file,
        analysis_particle_indices=htf.hybrid_topology.select(selection),
        checkpoint_interval=checkpoint_interval)
    hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
        timestep=4.0 * unit.femtoseconds,
        collision_rate=5.0 / unit.picosecond,
        n_steps=250,
        reassign_velocities=False,
        n_restart_attempts=20,
        splitting="V R R R O R R R V",
        constraint_tolerance=1e-06),
                             hybrid_factory=htf,
                             online_analysis_interval=10)
    hss.setup(n_states=n_states,
              temperature=300 * unit.kelvin,
              storage_file=reporter,
              lambda_protocol=lambda_protocol,
              endstates=False)
    return hss, reporter
    def load_data(self):
        """ Calculate relative free energy details from the simulation by performing MBAR on the vacuum and solvent legs of the simualtion.

        Parameters
        ----------

        Returns
        -------
        None

        """
        from pymbar import timeseries
        from pymbar import MBAR
        from perses.analysis import utils
        import os
        from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer

        # find the output files
        output = [x for x in os.listdir(self.directory) if x[-3:] == '.nc' and 'checkpoint' not in x]


        for out in output:
            if 'vacuum' in out:
                vacuum_reporter = MultiStateReporter(f'{self.directory}/{out}')
                vacuum_analyzer = MultiStateSamplerAnalyzer(vacuum_reporter)
                f_ij, df_ij = vacuum_analyzer.get_free_energy()
                self.vacdg = f_ij[1, -2]
                self.vacddg = df_ij[1, -2] ** 2
                self.vacf_ij = f_ij
                self.vacdf_ij = df_ij
            elif'solvent' in out:
                solvent_reporter = MultiStateReporter(f'{self.directory}/{out}')
                solvent_analyzer = MultiStateSamplerAnalyzer(solvent_reporter)
                f_ij, df_ij = solvent_analyzer.get_free_energy()
                self.soldg = f_ij[1, -2]
                self.solddg = df_ij[1, -2] ** 2
                self.solf_ij = f_ij
                self.soldf_ij = df_ij
            elif 'complex' in out:
                complex_reporter = MultiStateReporter(f'{self.directory}/{out}')
                complex_analyzer = MultiStateSamplerAnalyzer(complex_reporter)
                f_ij, df_ij = complex_analyzer.get_free_energy()
                self.comdg = f_ij[1, -2]
                self.comddg = df_ij[1, -2] ** 2
                self.comf_ij = f_ij
                self.comdf_ij = df_ij
        return
    def historic_fes(self,stepsize=100):
        from pymbar import timeseries
        from pymbar import MBAR
        from perses.analysis import utils
        import os
        from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer

        # find the output files
        output = [x for x in os.listdir(self.directory) if x[-3:] == '.nc' and 'checkpoint' not in x]

        for out in output:
            if 'vacuum' in out:
                vacuum_reporter = MultiStateReporter(f'{self.directory}/{out}')
                ncfile = utils.open_netcdf(f'{self.directory}/{out}')
                n_iterations = ncfile.variables['last_iteration'][0]
                for step in range(stepsize, n_iterations, stepsize):
                    vacuum_analyzer = MultiStateSamplerAnalyzer(vacuum_reporter,max_n_iterations=step)
                    f_ij, df_ij = vacuum_analyzer.get_free_energy()
                    self.vacdg_history.append(f_ij[1, -2])
                    self.vacddg_history.append(df_ij[1,-2])
                    self.vacdg_history_es.append(f_ij[0, -1])
                    self.vacddg_history_es.append(df_ij[0,-1])
            if 'solvent' in out:
                solvent_reporter = MultiStateReporter(f'{self.directory}/{out}')
                ncfile = utils.open_netcdf(f'{self.directory}/{out}')
                n_iterations = ncfile.variables['last_iteration'][0]
                for step in range(stepsize, n_iterations, stepsize):
                    solvent_analyzer = MultiStateSamplerAnalyzer(solvent_reporter,max_n_iterations=step)
                    f_ij, df_ij = solvent_analyzer.get_free_energy()
                    self.soldg_history.append(f_ij[1, -2])
                    self.solddg_history.append(df_ij[1,-2])
                    self.soldg_history_es.append(f_ij[0, -1])
                    self.solddg_history_es.append(df_ij[0,-1])
            if 'complex' in out:
                complex_reporter = MultiStateReporter(f'{self.directory}/{out}')
                ncfile = utils.open_netcdf(f'{self.directory}/{out}')
                n_iterations = ncfile.variables['last_iteration'][0]
                for step in range(stepsize, n_iterations, stepsize):
                    complex_analyzer = MultiStateSamplerAnalyzer(complex_reporter,max_n_iterations=step)
                    f_ij, df_ij = complex_analyzer.get_free_energy()
                    self.comdg_history.append(f_ij[1, -2])
                    self.comddg_history.append(df_ij[1,-2])
                    self.comdg_history_es.append(f_ij[0, -1])
                    self.comddg_history_es.append(df_ij[0,-1])
        return
def analyze_logZ():
    from openmmtools.multistate import SAMSSampler, MultiStateReporter, MultiStateSamplerAnalyzer
    iteration = 250000
    reporter = MultiStateReporter('traj.nc', open_mode='r',checkpoint_interval=1)
    analyzer = MultiStateSamplerAnalyzer(reporter)
    Deltaf_ij, dDeltaf_ij = analyzer.get_free_energy()
    print(Deltaf_ij)
    print(dDeltaf_ij)
    return
Пример #7
0
def expectations_free_energy(Q,
                             Q_folded,
                             temperature_list,
                             frame_begin=0,
                             sample_spacing=1,
                             output_data="output/output.nc",
                             bootstrap_energies=None,
                             num_intermediate_states=0):
    """
    This function calculates the free energy difference (with uncertainty) between all conformational states as a function of temperature.

    :param Q: native contact fraction array of size [n_frames x n_states]
    :type Q: 2D numpy array ( float )
    
    :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded)
    :type Q_folded: float

    :param temperature_list: List of temperatures for the simulation data (necessary because bootstrap version doesn't read in the file)
    :type temperature_list: List( float * simtk.unit.temperature )
    
    :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0)
    :type frame_begin: int    
    
    :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData
    :type sample_spacing: int     
    
    :param output_data: Path to the simulation .nc file.
    :type output_data: str
    
    :param num_intermediate_states: Number of unsampled thermodynamic states between sampled states to include in the calculation
    :type num_intermediate_states: int
    
    :param bootstrap_energies: a custom replica_energies array to be used for bootstrapping calculations. Used instead of the energies in the .nc file.
    :type bootstrap_energies: 2d numpy array (float)
    
    :returns:
      - full_T_list - A 1D numpy array listing of all temperatures, including sampled and intermediate unsampled
      - deltaF_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing free energy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaF uncertainty - A dictionary containing 1D numpy arrays of uncertainties corresponding to deltaF_values
    """

    if bootstrap_energies is not None:
        # Use a subsampled replica_energy matrix instead of reading from file
        replica_energies = bootstrap_energies

    else:
        # extract reduced energies and the state indices from the .nc
        reporter = MultiStateReporter(output_data, open_mode="r")
        analyzer = ReplicaExchangeAnalyzer(reporter)
        (
            replica_energies_all,
            unsampled_state_energies,
            neighborhoods,
            replica_state_indices,
        ) = analyzer.read_energies()

        # Select production frames to analyze
        replica_energies = replica_energies_all[:, :,
                                                frame_begin::sample_spacing]

        # Check the size of the Q array:
        if np.shape(replica_energies)[2] != np.shape(Q)[0]:
            # Mismatch in number of frames.
            if np.shape(replica_energies_all[:, :, frame_begin::sample_spacing]
                        )[2] == np.shape(Q[::sample_spacing, :])[0]:
                # Correct starting frame, need to apply sampling stride:
                Q = Q[::sample_spacing, :]
            elif np.shape(replica_energies_all)[2] == np.shape(Q)[0]:
                # This is the full Q, slice production frames:
                Q = Q[production_start::sample_spacing, :]
            else:
                print(
                    f'Error: Q array of shape {Q.shape} incompatible with energies array of shape{replica_energies.shape}'
                )
                exit()

    # Classify Q into folded/unfolded states
    array_folded_states = classify_Q_states(Q, Q_folded)

    # Number of configurational states:
    n_conf_states = len(np.unique(array_folded_states))

    # convert the energies from replica/evaluated state/sample form to evaluated state/sample form
    replica_energies = pymbar.utils.kln_to_kn(replica_energies)
    n_samples = len(replica_energies[0, :])

    # Reshape array_folded_states to row vector for pymbar
    # We need to order the data by replica, rather than by frame
    array_folded_states = np.reshape(array_folded_states,
                                     (np.size(array_folded_states)),
                                     order='F')

    # determine the numerical values of beta at each state in units consisten with the temperature
    Tunit = temperature_list[0].unit
    temps = np.array([temp.value_in_unit(Tunit) for temp in temperature_list
                      ])  # should this just be array to begin with
    beta_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole / Tunit) * temps)

    # calculate the number of states we need expectations at.  We want it at all of the original
    # temperatures, each intermediate temperature, and then temperatures +/- from the original
    # to take finite derivatives.

    # create  an array for the temperature and energy for each state, including the
    # finite different state.
    n_sampled_T = len(temps)
    n_unsampled_states = (n_sampled_T +
                          (n_sampled_T - 1) * num_intermediate_states)
    unsampled_state_energies = np.zeros([n_unsampled_states, n_samples])
    full_T_list = np.zeros(n_unsampled_states)

    # delta is the spacing between temperatures.
    delta = np.zeros(n_sampled_T - 1)

    # fill in a list of temperatures at all original temperatures and all intermediate states.
    full_T_list[0] = temps[0]
    t = 0
    for i in range(n_sampled_T - 1):
        delta[i] = (temps[i + 1] - temps[i]) / (num_intermediate_states + 1)
        for j in range(num_intermediate_states + 1):
            full_T_list[t] = temps[i] + delta[i] * j
            t += 1
    full_T_list[t] = temps[-1]
    n_T_vals = t + 1

    # calculate betas of all of these temperatures
    beta_full_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole / Tunit) *
                       full_T_list)

    ti = 0
    N_k = np.zeros(n_unsampled_states)
    for k in range(n_unsampled_states):
        # Calculate the reduced energies at all temperatures, sampled and unsample.
        unsampled_state_energies[
            k, :] = replica_energies[0, :] * (beta_full_k[k] / beta_k[0])
        if ti < len(temps):
            # store in N_k which states do and don't have samples.
            if full_T_list[k] == temps[ti]:
                ti += 1
                N_k[k] = n_samples // len(
                    temps)  # these are the states that have samples

    # call MBAR to find weights at all states, sampled and unsampled
    mbarT = pymbar.MBAR(unsampled_state_energies,
                        N_k,
                        verbose=False,
                        relative_tolerance=1e-12)

    # Calculate N expectations that a structure is in configurational state n
    # We need the probabilities of being in each state - first construct vectors of 0
    # (not in current state) and 1 (in current state)

    bool_i = np.zeros((n_conf_states, array_folded_states.shape[0]))

    for i in range(n_conf_states):
        i_vector = np.full_like(array_folded_states, i)
        # Convert True/False to integer 1/0 for each energy data point:
        bool_i[i] = np.multiply((i_vector == array_folded_states), 1)

    # Calculate the expectation of F at each unsampled states

    # Loop over each thermodynamic state:
    results = {}
    for i in range(len(full_T_list)):
        U_n = unsampled_state_energies[i, :]

        # compute expectations of being in conformational state n
        # Store results in a dictionary
        results[str(i)] = mbarT.computeMultipleExpectations(
            bool_i, U_n, compute_covariance=True)

    deltaF_values = {}
    deltaF_uncertainty = {}
    n_trans = 0  # store the number of unique transitions
    F_unit = (-kB * full_T_list[0] * Tunit).unit  # units of free energy

    # Initialize the results dictionaries
    for s1 in range(n_conf_states):
        for s2 in range(s1 + 1, n_conf_states):
            n_trans += 1
            deltaF_values[f"state{s1}_state{s2}"] = np.zeros(len(full_T_list))
            deltaF_uncertainty[f"state{s1}_state{s2}"] = np.zeros(
                len(full_T_list))

    # Compute free energies from probability ratios:
    for i in range(len(full_T_list)):
        for s1 in range(n_conf_states):
            for s2 in range(s1 + 1, n_conf_states):
                # Free energy change for s2 --> s1 at temp i
                deltaF_values[f"state{s1}_state{s2}"][i] = (
                    -kB * full_T_list[i] * Tunit *
                    (np.log(results[str(i)][0][s1]) -
                     np.log(results[str(i)][0][s2]))).value_in_unit(F_unit)

                # Get covariance matrix:
                theta_i = results[str(i)][2]
                deltaF_uncertainty[f"state{s1}_state{s2}"][i] = (
                    kB * full_T_list[i] * unit.kelvin *
                    np.sqrt(theta_i[s1, s1] + theta_i[s2, s2] -
                            (theta_i[s2, s1] + theta_i[s1, s2]))
                ).value_in_unit(F_unit)

    # Add the units back on:
    for s1 in range(n_conf_states):
        for s2 in range(s1 + 1, n_conf_states):
            deltaF_values[f"state{s1}_state{s2}"] *= F_unit
            deltaF_uncertainty[f"state{s1}_state{s2}"] *= F_unit
    full_T_list *= Tunit

    return full_T_list, deltaF_values, deltaF_uncertainty
Пример #8
0
def run_replica_exchange(
    topology,
    system,
    positions,
    total_simulation_time=1.0 * unit.picosecond,
    simulation_time_step=None,
    temperature_list=None,
    friction=1.0 / unit.picosecond,
    minimize=True,
    exchange_frequency=1000,
    output_data="output/output.nc",
):
    """
    Run a OpenMMTools replica exchange simulation using an OpenMM coarse grained model.
    
    :param topology: OpenMM Topology
    :type topology: `Topology() <https://simtk.org/api_docs/openmm/api4_1/python/classsimtk_1_1openmm_1_1app_1_1topology_1_1Topology.html>`_

    :param system: OpenMM System()
    :type system: `System() <https://simtk.org/api_docs/openmm/api4_1/python/classsimtk_1_1openmm_1_1openmm_1_1System.html>`_

    :param positions: Positions array for the model we would like to test
    :type positions: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ( np.array( [cgmodel.num_beads,3] ), simtk.unit )

    :param total_simulation_time: Total run time for individual simulations
    :type total_simulation_time: `SIMTK <https://simtk.org/>`_ `Unit() <http://docs.openmm.org/7.1.0/api-python/generated/simtk.unit.unit.Unit.html>`_

    :param simulation_time_step: Simulation integration time step
    :type simulation_time_step: `SIMTK <https://simtk.org/>`_ `Unit() <http://docs.openmm.org/7.1.0/api-python/generated/simtk.unit.unit.Unit.html>`_

    :param temperature_list: List of temperatures for which to perform replica exchange simulations, default = None
    :type temperature: List( float * simtk.unit.temperature )

    :param friction: Langevin thermostat friction coefficient, default = 1 / ps
    :type friction: `SIMTK <https://simtk.org/>`_ `Unit() <http://docs.openmm.org/7.1.0/api-python/generated/simtk.unit.unit.Unit.html>`_

    :param minimize: Whether minimization is done before running the simulation
    :type minimize: bool

    :param output_data: Name of NETCDF file where we will write simulation data
    :type output_data: string

    :param exchange_frequency: Number of time steps between replica exchange attempts, Default = None
    :type exchange_frequency: int	

    :param output_data: file to put the output .nc 
    :type output_data: netCDF4 file as generated by OpenMM  

    :returns:
        - replica_energies ( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ( np.float( [number_replicas,number_simulation_steps] ), simtk.unit ) ) - The potential energies for all replicas at all (printed) time steps
        - replica_positions ( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ( np.float( [number_replicas,number_simulation_steps,cgmodel.num_beads,3] ), simtk.unit ) ) - The positions for all replicas at all (printed) time steps

        - replica_state_indices ( np.int64( [number_replicas,number_simulation_steps] ), simtk.unit ) - The thermodynamic state assignments for all replicas at all (printed) time steps

    :Example:

    >>> from foldamers.cg_model.cgmodel import CGModel
    >>> from cg_openmm.simulation.rep_exch import *
    >>> cgmodel = CGModel()
    >>> replica_energies,replica_positions,replica_state_indices = run_replica_exchange(cgmodel.topology,cgmodel.system,cgmodel.positions)

    """

    simulation_steps = int(
        np.floor(total_simulation_time / simulation_time_step))

    exchange_attempts = int(np.floor(simulation_steps / exchange_frequency))

    if temperature_list is None:
        temperature_list = [((300.0 + i) * unit.kelvin)
                            for i in range(-50, 50, 10)]

    num_replicas = len(temperature_list)
    sampler_states = list()
    thermodynamic_states = list()

    # Define thermodynamic states.
    # box_vectors = system.getDefaultPeriodicBoxVectors()
    for temperature in temperature_list:
        thermodynamic_state = openmmtools.states.ThermodynamicState(
            system=system, temperature=temperature)
        thermodynamic_states.append(thermodynamic_state)
        sampler_states.append(openmmtools.states.SamplerState(
            positions))  # no box vectors, non-periodic system.

    # Create and configure simulation object.
    move = openmmtools.mcmc.LangevinDynamicsMove(
        timestep=simulation_time_step,
        collision_rate=friction,
        n_steps=exchange_frequency,
        reassign_velocities=False,
    )

    simulation = ReplicaExchangeSampler(
        mcmc_moves=move,
        number_of_iterations=exchange_attempts,
        replica_mixing_scheme='swap-neighbors',
    )

    if os.path.exists(output_data):
        os.remove(output_data)

    reporter = MultiStateReporter(output_data, checkpoint_interval=1)
    simulation.create(thermodynamic_states, sampler_states, reporter)

    if minimize:
        simulation.minimize()

    print("Running replica exchange simulations with OpenMM...")
    print(f"Using a time step of {simulation_time_step}")
    try:
        simulation.run()
    except BaseException:
        print(
            "Replica exchange simulation failed, try verifying your model/simulation settings."
        )
        exit()
Пример #9
0
def process_replica_exchange_data(
    output_data="output/output.nc",
    output_directory="output",
    series_per_page=4,
    write_data_file=True,
    plot_production_only=False,
    print_timing=False,
    equil_nskip=1,
    frame_begin=0,
    frame_end=-1,
):
    """
    Read replica exchange simulation data, detect equilibrium and decorrelation time, and plot replica exchange results.
    
    :param output_data: path to output .nc file from replica exchange simulation, (default='output/output.nc')
    :type output_data: str
    
    :param output_directory: path to which output files will be written (default='output')
    :type output_directory: stry

    :param series_per_page: number of replica data series to plot per pdf page (default=4)
    :type series_per_page: int
    
    :param write_data_file: Option to write a text data file containing the state_energies array (default=True)
    :type write_data_file: Boolean
    
    :param plot_production_only: Option to plot only the production region, as determined from pymbar detectEquilibration (default=False)
    :type plot_production_only: Boolean    

    :param equil_nskip: skip this number of frames to sparsify the energy timeseries for pymbar detectEquilibration (default=1) - this is used only when frame_begin=0 and the trajectory has less than 40000 frames.
    :type equil_nskip: Boolean
    
    :param frame_begin: analyze starting from this frame, discarding all prior as equilibration period (default=0)
    :type frame_begin: int
    
    :param frame_end: analyze up to this frame only, discarding the rest (default=-1).
    :type frame_end: int

    :returns:
        - replica_energies ( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ( np.float( [number_replicas,number_simulation_steps] ), simtk.unit ) ) - The potential energies for all replicas at all (printed) time steps
        - replica_state_indices ( np.int64( [number_replicas,number_simulation_steps] ), simtk.unit ) - The thermodynamic state assignments for all replicas at all (printed) time steps
        - production_start ( int - The frame at which the production region begins for all replicas, as determined from pymbar detectEquilibration
        - sample_spacing ( int - The number of frames between uncorrelated state energies, estimated using heuristic algorithm )
        - n_transit ( np.float( [number_replicas] ) ) - Number of half-transitions between state 0 and n for each replica
        - mixing_stats ( tuple ( np.float( [number_replicas x number_replicas] ) , np.float( [ number_replicas ] ) , float( statistical inefficiency ) ) ) - transition matrix, corresponding eigenvalues, and statistical inefficiency
    """

    t1 = time.perf_counter()

    # Read the simulation coordinates for individual temperature replicas
    reporter = MultiStateReporter(output_data, open_mode="r")

    t2 = time.perf_counter()
    if print_timing:
        print(f"open data time: {t2-t1}")

    # figure out what the time between output is.
    # We assume all use the same time step (which i think is required)

    mcmove = reporter.read_mcmc_moves()[0]
    time_interval = mcmove.n_steps * mcmove.timestep

    t3 = time.perf_counter()
    if print_timing:
        print(f"read_mcmc_moves time: {t3-t2}")

    # figure out what the temperature list is
    states = reporter.read_thermodynamic_states()[0]

    t4 = time.perf_counter()
    if print_timing:
        print(f"read_thermodynamics_states time: {t4-t3}")

    temperature_list = []
    for s in states:
        temperature_list.append(s.temperature)

    analyzer = ReplicaExchangeAnalyzer(reporter)

    t5 = time.perf_counter()

    (
        replica_energies,
        unsampled_state_energies,
        neighborhoods,
        replica_state_indices,
    ) = analyzer.read_energies()

    # Truncate output of read_energies() to last frame of interest
    if frame_end > 0:
        # Use frames from frame_begin to frame_end
        replica_energies = replica_energies[:, :, :frame_end]
        unsampled_state_energies = unsampled_state_energies[:, :, :frame_end]
        neighborhoods = neighborhoods[:, :, :frame_end]
        replica_state_indices = replica_state_indices[:, :frame_end]

    t6 = time.perf_counter()
    if print_timing:
        print(f"read_energies time: {t6-t5}")

    n_particles = np.shape(
        reporter.read_sampler_states(iteration=0)[0].positions)[0]
    temps = np.array([temp._value for temp in temperature_list])
    beta_k = 1 / (kB * temps)
    n_replicas = len(temperature_list)
    for k in range(n_replicas):
        replica_energies[:, k, :] *= beta_k[k]**(-1)

    t7 = time.perf_counter()
    if print_timing:
        print(f"reduce replica energies time: {t7-t6}")

    total_steps = len(replica_energies[0][0])
    state_energies = np.zeros([n_replicas, total_steps])

    t8 = time.perf_counter()
    # there must be some better way to do this as list comprehension.
    for step in range(total_steps):
        for state in range(n_replicas):
            state_energies[state, step] = replica_energies[np.where(
                replica_state_indices[:, step] == state)[0], 0, step]

    t9 = time.perf_counter()
    if print_timing:
        print(f"assign state energies time: {t9-t8}")

    # can run physical-valication on these state_energies

    # Use pymbar timeseries module to detect production period

    t10 = time.perf_counter()

    # Start of equilibrated data:
    t0 = np.zeros((n_replicas))
    # Statistical inefficiency:
    g = np.zeros((n_replicas))

    subsample_indices = {}

    # If sufficiently large, discard the first 20000 frames as equilibration period and use
    # subsampleCorrelatedData to get the energy decorrelation time.
    if total_steps >= 40000 or frame_begin > 0:
        if frame_begin > 0:
            # If specified, use frame_begin as the start of the production region
            production_start = frame_begin
        else:
            # Otherwise, use frame 20000
            production_start = 20000

        for state in range(n_replicas):
            subsample_indices[state] = timeseries.subsampleCorrelatedData(
                state_energies[state][production_start:],
                conservative=True,
            )
            g[state] = subsample_indices[state][1] - subsample_indices[state][0]

    else:
        # For small trajectories, use detectEquilibration
        for state in range(n_replicas):
            t0[state], g[state], Neff_max = timeseries.detectEquilibration(
                state_energies[state], nskip=equil_nskip)

            # Choose the latest equil timestep to apply to all states
            production_start = int(np.max(t0))

    # Assume a normal distribution (very rough approximation), and use mean plus
    # the number of standard deviations which leads to (n_replica-1)/n_replica coverage
    # For 12 replicas this should be the mean + 1.7317 standard deviations

    # x standard deviations is the solution to (n_replica-1)/n_replica = erf(x/sqrt(2))
    # This is equivalent to a target of 23/24 CDF value

    print(f"g: {g.astype(int)}")

    def erf_fun(x):
        return np.power((erf(x / np.sqrt(2)) - (n_replicas - 1) / n_replicas),
                        2)

    # x must be larger than zero
    opt_g_results = minimize_scalar(erf_fun, bounds=(0, 10))

    if not opt_g_results.success:
        print("Error solving for correlation time, exiting...")
        print(f"erf opt results: {opt_g_results}")
        exit()

    sample_spacing = int(np.ceil(np.mean(g) + opt_g_results.x * np.std(g)))

    t11 = time.perf_counter()
    if print_timing:
        print(f"detect equil and subsampling time: {t11-t10}")

    print("state    mean energies  variance")
    for state in range(n_replicas):
        state_mean = np.mean(state_energies[state,
                                            production_start::sample_spacing])
        state_std = np.std(state_energies[state,
                                          production_start::sample_spacing])
        print(f"  {state:4d}    {state_mean:10.6f} {state_std:10.6f}")

    t12 = time.perf_counter()

    if write_data_file == True:
        f = open(os.path.join(output_directory, "replica_energies.dat"), "w")
        for step in range(total_steps):
            f.write(f"{step:10d}")
            for replica_index in range(n_replicas):
                f.write(
                    f"{replica_energies[replica_index,replica_index,step]:12.6f}"
                )
            f.write("\n")
        f.close()

    t13 = time.perf_counter()
    if print_timing:
        print(f"Optionally write .dat file: {t13-t12}")

    t14 = time.perf_counter()

    if plot_production_only == True:
        plot_replica_exchange_energies(
            state_energies[:, production_start:],
            temperature_list,
            series_per_page,
            time_interval=time_interval,
            time_shift=production_start * time_interval,
            file_name=f"{output_directory}/rep_ex_ener.pdf",
        )

        plot_replica_exchange_energy_histograms(
            state_energies[:, production_start:],
            temperature_list,
            file_name=f"{output_directory}/rep_ex_ener_hist.pdf",
        )

        plot_replica_exchange_summary(
            replica_state_indices[:, production_start:],
            temperature_list,
            series_per_page,
            time_interval=time_interval,
            time_shift=production_start * time_interval,
            file_name=f"{output_directory}/rep_ex_states.pdf",
        )

        plot_replica_state_matrix(
            replica_state_indices[:, production_start:],
            file_name=f"{output_directory}/state_probability_matrix.pdf",
        )

    else:
        plot_replica_exchange_energies(
            state_energies,
            temperature_list,
            series_per_page,
            time_interval=time_interval,
            file_name=f"{output_directory}/rep_ex_ener.pdf",
        )

        plot_replica_exchange_energy_histograms(
            state_energies,
            temperature_list,
            file_name=f"{output_directory}/rep_ex_ener_hist.pdf",
        )

        plot_replica_exchange_summary(
            replica_state_indices,
            temperature_list,
            series_per_page,
            time_interval=time_interval,
            file_name=f"{output_directory}/rep_ex_states.pdf",
        )

        plot_replica_state_matrix(
            replica_state_indices,
            file_name=f"{output_directory}/state_probability_matrix.pdf",
        )

    t15 = time.perf_counter()

    if print_timing:
        print(f"plotting time: {t15-t14}")

    # Analyze replica exchange state transitions
    # For each replica, how many times does the thermodynamic state go between state 0 and state n
    # For consistency with the other mixing statistics, use only the production region here

    replica_state_indices_prod = replica_state_indices[:, production_start:]

    # Number of one-way transitions from states 0 to n or states n to 0
    n_transit = np.zeros((n_replicas, 1))

    # Replica_state_indices is [n_replicas x n_iterations]
    for rep in range(n_replicas):
        last_bound = None
        for i in range(replica_state_indices_prod.shape[1]):
            if replica_state_indices_prod[
                    rep, i] == 0 or replica_state_indices_prod[rep, i] == (
                        n_replicas - 1):
                if last_bound is None:
                    # This is the first time state 0 or n is visited
                    pass
                else:
                    if last_bound != replica_state_indices_prod[rep, i]:
                        # This is a completed transition from 0 to n or n to 0
                        n_transit[rep] += 1
                last_bound = replica_state_indices_prod[rep, i]

    t16 = time.perf_counter()

    if print_timing:
        print(f"replica transition analysis: {t16-t15}")

    # Compute transition matrix from the analyzer
    mixing_stats = analyzer.generate_mixing_statistics(
        number_equilibrated=production_start)

    t17 = time.perf_counter()

    if print_timing:
        print(f"compute transition matrix: {t17-t16}")
        print(f"total time elapsed: {t17-t1}")

    return (replica_energies, replica_state_indices, production_start,
            sample_spacing, n_transit, mixing_stats)
Пример #10
0
def physical_validation_ensemble(output_data="output.nc",
                                 output_directory="ouput",
                                 plotfile='ensemble_check',
                                 pairs='single',
                                 ref_state_index=0):
    """
    Run ensemble physical validation test for 2 states in replica exchange simulation

    :param output_data: Path to the output data for a NetCDF-formatted file containing replica exchange simulation data
    :type output_data: str
    
    :param plotfile: Filename for outputting ensemble check plot
    :type plotfile: str
    
    :param pairs: Option for running ensemble validation on all replica pair combinations ('all'), adjacent pairs ('adjacent'), or single pair with optimal spacing ('single')
    
    :param ref_state_index: Index in temperature_list to use as one of the states in the ensemble check. The other state will be chosen based on the energy standard deviation at the reference state. Ignored if pairs='all'
    :type ref_state_index: int
    
    """

    # Get temperature list and read the energies for individual temperature replicas
    reporter = MultiStateReporter(output_data, open_mode="r")
    analyzer = ReplicaExchangeAnalyzer(reporter)

    states = reporter.read_thermodynamic_states()[0]
    temperature_list = []
    for s in states:
        temperature_list.append(s.temperature)

    (
        replica_energies,
        unsampled_state_energies,
        neighborhoods,
        replica_state_indices,
    ) = analyzer.read_energies()

    n_particles = np.shape(
        reporter.read_sampler_states(iteration=0)[0].positions)[0]
    T_unit = temperature_list[0].unit
    temps = np.array([temp.value_in_unit(T_unit) for temp in temperature_list])
    beta_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole / T_unit) * temps)
    n_replicas = len(temperature_list)
    for k in range(n_replicas):
        replica_energies[:, k, :] *= beta_k[k]**(-1)

    total_steps = len(replica_energies[0][0])
    state_energies = np.zeros([n_replicas, total_steps])

    for step in range(total_steps):
        for state in range(n_replicas):
            state_energies[state, step] = replica_energies[np.where(
                replica_state_indices[:, step] == state)[0], 0, step]

    state_energies *= unit.kilojoule_per_mole

    T_array = np.zeros(len(temperature_list))
    for i in range(len(temperature_list)):
        T_array[i] = temperature_list[i].value_in_unit(T_unit)

    if pairs.lower() != 'single' and pairs.lower(
    ) != 'adjacent' and pairs.lower() != 'all':
        print(
            f"Error: Pair option '{pairs}' not recognized, using default option 'single'"
        )
        pairs = 'single'

    if pairs.lower() == 'single':
        # Run ensemble validation on one optimally spaced temperature pair
        quantiles = {}

        # Find optimal state pair for ensemble check:
        # Compute standard deviations of each energy distribution:
        state_energies_std = np.std(state_energies, axis=1)

        # Select reference state point
        T_ref = temperature_list[ref_state_index]
        std_ref = state_energies_std[ref_state_index]

        # Compute optimal spacing:
        deltaT = 2 * kB * T_ref**2 / std_ref
        #print("DeltaT: %r" %deltaT)

        # Find closest match
        T_diff = np.abs(T_ref.value_in_unit(T_unit) - T_array)

        T_opt_index = np.argmin(np.abs(deltaT.value_in_unit(T_unit) - T_diff))
        T_opt = temperature_list[T_opt_index]

        # Set SimulationData for physical validation
        state1_index = ref_state_index
        state2_index = T_opt_index

        sim_data1, sim_data2 = set_simulation_data(state_energies, T_array,
                                                   state1_index, state2_index)

        # Run physical validation
        try:
            quantiles_ij = pv.ensemble.check(sim_data1,
                                             sim_data2,
                                             total_energy=False,
                                             filename=plotfile)

            quantiles[
                f"state{state1_index}_state{state2_index}"] = quantiles_ij[0]

        except InputError:
            print(
                f"Insufficient overlap between trajectories for states {state1_index} and {state2_index}. Skipping..."
            )

    elif pairs.lower() == 'adjacent':
        # Run ensemble validation on all adjacent temperature pairs
        quantiles = {}

        for i in range(len(temperature_list) - 1):
            # Set SimulationData for physical validation
            state1_index = i
            state2_index = i + 1

            sim_data1, sim_data2 = set_simulation_data(state_energies, T_array,
                                                       state1_index,
                                                       state2_index)

            # Run physical validation
            try:
                quantiles_ij = pv.ensemble.check(
                    sim_data1,
                    sim_data2,
                    total_energy=False,
                    filename=f"{plotfile}_{state1_index}_{state2_index}")

                quantiles[
                    f"state{state1_index}_state{state2_index}"] = quantiles_ij[
                        0]

            except InputError:
                print(
                    f"Insufficient overlap between trajectories for states {state1_index} and {state2_index}. Skipping..."
                )

    elif pairs.lower() == 'all':
        # Run ensemble validation on all combinations of temperature pairs
        quantiles = {}
        for i in range(len(temperature_list)):
            for j in range(i + 1, len(temperature_list)):
                # Set SimulationData for physical validation
                state1_index = i
                state2_index = j

                sim_data1, sim_data2 = set_simulation_data(
                    state_energies, T_array, state1_index, state2_index)

                # Run physical validation
                try:
                    quantiles_ij = pv.ensemble.check(
                        sim_data1,
                        sim_data2,
                        total_energy=False,
                        filename=f"{plotfile}_{state1_index}_{state2_index}")

                    quantiles[
                        f"state{state1_index}_state{state2_index}"] = quantiles_ij[
                            0]

                except InputError:
                    print(
                        f"Insufficient overlap between trajectories for states {state1_index} and {state2_index}. Skipping..."
                    )

    return quantiles
Пример #11
0
def run_setup(setup_options, serialize_systems=True, build_samplers=True):
    """
    Run the setup pipeline and return the relevant setup objects based on a yaml input file.
    Parameters
    ----------
    setup_options : dict
        result of loading yaml input file
    Returns
    -------
    setup_dict: dict
        {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss}
        - 'topology_proposals':
    """
    phases = setup_options['phases']
    known_phases = ['complex', 'solvent', 'vacuum']
    for phase in phases:
        assert (
            phase in known_phases
        ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}"

    if 'use_given_geometries' not in list(setup_options.keys()):
        use_given_geometries = False
    else:
        assert type(setup_options['use_given_geometries']) == type(True)
        use_given_geometries = setup_options['use_given_geometries']

    if 'complex' in phases:
        _logger.info(f"\tPulling receptor (as pdb or mol2)...")
        # We'll need the protein PDB file (without missing atoms)
        try:
            protein_pdb_filename = setup_options['protein_pdb']
            assert protein_pdb_filename is not None
            receptor_mol2 = None
        except KeyError:
            try:
                receptor_mol2 = setup_options['receptor_mol2']
                assert receptor_mol2 is not None
                protein_pdb_filename = None
            except KeyError as e:
                print(
                    "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation"
                )
                raise e
    else:
        protein_pdb_filename = None
        receptor_mol2 = None

    # And a ligand file containing the pair of ligands between which we will transform
    ligand_file = setup_options['ligand_file']
    _logger.info(f"\tdetected ligand file: {ligand_file}")

    # get the indices of ligands out of the file:
    old_ligand_index = setup_options['old_ligand_index']
    new_ligand_index = setup_options['new_ligand_index']
    _logger.info(
        f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}"
    )

    _logger.info(f"\tsetting up forcefield files...")
    forcefield_files = setup_options['forcefield_files']

    if "timestep" in setup_options:
        if isinstance(setup_options['timestep'], float):
            timestep = setup_options['timestep'] * unit.femtoseconds
        else:
            timestep = setup_options['timestep']
        _logger.info(f"\ttimestep: {timestep}.")
    else:
        timestep = 1.0 * unit.femtoseconds
        _logger.info(f"\tno timestep detected: setting default as 1.0fs.")

    if "neq_splitting" in setup_options:
        neq_splitting = setup_options['neq_splitting']
        _logger.info(f"\tneq_splitting: {neq_splitting}")

        try:
            eq_splitting = setup_options['eq_splitting']
            _logger.info(f"\teq_splitting: {eq_splitting}")
        except KeyError as e:
            print(
                "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one."
            )
            raise e

    else:
        eq_splitting = "V R O R V"
        neq_splitting = "V R O R V"
        _logger.info(
            f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}."
        )

    if "measure_shadow_work" in setup_options:
        measure_shadow_work = setup_options['measure_shadow_work']
        _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.")
    else:
        measure_shadow_work = False
        _logger.info(
            f"\tno measure_shadow_work specified: defaulting to False.")
    if isinstance(setup_options['pressure'], float):
        pressure = setup_options['pressure'] * unit.atmosphere
    else:
        pressure = setup_options['pressure']
    if isinstance(setup_options['temperature'], float):
        temperature = setup_options['temperature'] * unit.kelvin
    else:
        temperature = setup_options['temperature']
    if isinstance(setup_options['solvent_padding'], float):
        solvent_padding_angstroms = setup_options[
            'solvent_padding'] * unit.angstrom
    else:
        solvent_padding_angstroms = setup_options['solvent_padding']
    if isinstance(setup_options['ionic_strength'], float):
        ionic_strength = setup_options['ionic_strength'] * unit.molar
    else:
        ionic_strength = setup_options['ionic_strength']
    _logger.info(f"\tsetting pressure: {pressure}.")
    _logger.info(f"\tsetting temperature: {temperature}.")
    _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.")
    _logger.info(f"\tsetting ionic strength: {ionic_strength}M.")

    setup_pickle_file = setup_options[
        'save_setup_pickle_as'] if 'save_setup_pickle_as' in list(
            setup_options) else None
    _logger.info(f"\tsetup pickle file: {setup_pickle_file}")
    trajectory_directory = setup_options['trajectory_directory']
    _logger.info(f"\ttrajectory directory: {trajectory_directory}")
    try:
        atom_map_file = setup_options['atom_map']
        with open(atom_map_file, 'r') as f:
            atom_map = {
                int(x.split()[0]): int(x.split()[1])
                for x in f.readlines()
            }
        _logger.info(f"\tsucceeded parsing atom map.")
    except Exception:
        atom_map = None
        _logger.info(f"\tno atom map specified: default to None.")

    if 'topology_proposal' not in list(setup_options.keys(
    )) or setup_options['topology_proposal'] is None:
        _logger.info(
            f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n"
        )
        if 'set_solvent_box_dims_to_complex' in list(setup_options.keys(
        )) and setup_options['set_solvent_box_dims_to_complex']:
            set_solvent_box_dims_to_complex = True
        else:
            set_solvent_box_dims_to_complex = False

        _logger.info(
            f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}'
        )
        fe_setup = RelativeFEPSetup(
            ligand_file,
            old_ligand_index,
            new_ligand_index,
            forcefield_files,
            phases=phases,
            protein_pdb_filename=protein_pdb_filename,
            receptor_mol2_filename=receptor_mol2,
            pressure=pressure,
            temperature=temperature,
            solvent_padding=solvent_padding_angstroms,
            spectator_filenames=setup_options['spectators'],
            map_strength=setup_options['map_strength'],
            atom_expr=setup_options['atom_expr'],
            bond_expr=setup_options['bond_expr'],
            atom_map=atom_map,
            neglect_angles=setup_options['neglect_angles'],
            anneal_14s=setup_options['anneal_1,4s'],
            small_molecule_forcefield=setup_options[
                'small_molecule_forcefield'],
            small_molecule_parameters_cache=setup_options[
                'small_molecule_parameters_cache'],
            trajectory_directory=trajectory_directory,
            trajectory_prefix=setup_options['trajectory_prefix'],
            nonbonded_method=setup_options['nonbonded_method'],
            complex_box_dimensions=setup_options['complex_box_dimensions'],
            solvent_box_dimensions=setup_options['solvent_box_dimensions'],
            ionic_strength=ionic_strength,
            remove_constraints=setup_options['remove_constraints'],
            use_given_geometries=use_given_geometries)

        _logger.info(f"\twriting pickle output...")
        if setup_pickle_file is not None:
            with open(
                    os.path.join(os.getcwd(), trajectory_directory,
                                 setup_pickle_file), 'wb') as f:
                try:
                    pickle.dump(fe_setup, f)
                    _logger.info(f"\tsuccessfully dumped pickle.")
                except Exception as e:
                    print(e)
                    print("\tUnable to save setup object as a pickle")

            _logger.info(
                f"\tsetup is complete.  Writing proposals and positions for each phase to top_prop dict..."
            )
        else:
            _logger.info(
                f"\tsetup is complete.  Omitted writing proposals and positions for each phase to top_prop dict..."
            )

        top_prop = dict()
        for phase in phases:
            top_prop[f'{phase}_topology_proposal'] = getattr(
                fe_setup, f'{phase}_topology_proposal')
            top_prop[f'{phase}_geometry_engine'] = getattr(
                fe_setup, f'_{phase}_geometry_engine')
            top_prop[f'{phase}_old_positions'] = getattr(
                fe_setup, f'{phase}_old_positions')
            top_prop[f'{phase}_new_positions'] = getattr(
                fe_setup, f'{phase}_new_positions')
            top_prop[f'{phase}_added_valence_energy'] = getattr(
                fe_setup, f'_{phase}_added_valence_energy')
            top_prop[f'{phase}_subtracted_valence_energy'] = getattr(
                fe_setup, f'_{phase}_subtracted_valence_energy')
            top_prop[f'{phase}_logp_proposal'] = getattr(
                fe_setup, f'_{phase}_logp_proposal')
            top_prop[f'{phase}_logp_reverse'] = getattr(
                fe_setup, f'_{phase}_logp_reverse')
            top_prop[f'{phase}_forward_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_forward_neglected_angles')
            top_prop[f'{phase}_reverse_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_reverse_neglected_angles')

        top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old
        top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new
        top_prop[
            'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map
        _logger.info(f"\twriting atom_mapping.png")
        atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory,
                                        'atom_mapping.png')

        if 'render_atom_map' in list(
                setup_options.keys()) and setup_options['render_atom_map']:
            render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old,
                                fe_setup._ligand_oemol_new,
                                fe_setup.non_offset_new_to_old_atom_map)

    else:
        _logger.info(f"\tloading topology proposal from yaml setup options...")
        top_prop = np.load(setup_options['topology_proposal']).item()

    n_steps_per_move_application = setup_options[
        'n_steps_per_move_application']
    _logger.info(
        f"\t steps per move application: {n_steps_per_move_application}")
    trajectory_directory = setup_options['trajectory_directory']

    trajectory_prefix = setup_options['trajectory_prefix']
    _logger.info(f"\ttrajectory prefix: {trajectory_prefix}")

    if 'atom_selection' in setup_options:
        atom_selection = setup_options['atom_selection']
        _logger.info(f"\tatom selection detected: {atom_selection}")
    else:
        _logger.info(f"\tno atom selection detected: default to all.")
        atom_selection = 'all'

    if setup_options['fe_type'] == 'neq':
        _logger.info(f"\tInstantiating nonequilibrium switching FEP")
        n_equilibrium_steps_per_iteration = setup_options[
            'n_equilibrium_steps_per_iteration']
        ncmc_save_interval = setup_options['ncmc_save_interval']
        write_ncmc_configuration = setup_options['write_ncmc_configuration']
        if setup_options['LSF']:
            _internal_parallelism = {
                'library': ('dask', 'LSF'),
                'num_processes': setup_options['processes']
            }
        else:
            _internal_parallelism = None

        ne_fep = dict()
        for phase in phases:
            _logger.info(f"\t\tphase: {phase}")
            hybrid_factory = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

            if build_samplers:
                ne_fep[phase] = SequentialMonteCarlo(
                    factory=hybrid_factory,
                    lambda_protocol=setup_options['lambda_protocol'],
                    temperature=temperature,
                    trajectory_directory=trajectory_directory,
                    trajectory_prefix=f"{trajectory_prefix}_{phase}",
                    atom_selection=atom_selection,
                    timestep=timestep,
                    eq_splitting_string=eq_splitting,
                    neq_splitting_string=neq_splitting,
                    collision_rate=setup_options['ncmc_collision_rate_ps'],
                    ncmc_save_interval=ncmc_save_interval,
                    internal_parallelism=_internal_parallelism)

        print("Nonequilibrium switching driver class constructed")

        return {'topology_proposals': top_prop, 'ne_fep': ne_fep}

    else:
        _logger.info(f"\tno nonequilibrium detected.")
        htf = dict()
        hss = dict()
        _logger.info(f"\tcataloging HybridTopologyFactories...")

        for phase in phases:
            _logger.info(f"\t\tphase: {phase}:")
            #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP
            _logger.info(
                f"\t\twriting HybridTopologyFactory for phase {phase}...")
            htf[phase] = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

        for phase in phases:
            # Define necessary vars to check energy bookkeeping
            _top_prop = top_prop['%s_topology_proposal' % phase]
            _htf = htf[phase]
            _forward_added_valence_energy = top_prop['%s_added_valence_energy'
                                                     % phase]
            _reverse_subtracted_valence_energy = top_prop[
                '%s_subtracted_valence_energy' % phase]

            if not use_given_geometries:
                zero_state_error, one_state_error = validate_endstate_energies(
                    _top_prop,
                    _htf,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD
                )  #, trajectory_directory=f'{xml_directory}{phase}')
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")
            else:
                _logger.info(
                    f"'use_given_geometries' was passed to setup; skipping endstate validation"
                )

            #TODO expose more of these options in input
            if build_samplers:

                n_states = setup_options['n_states']
                _logger.info(f"\tn_states: {n_states}")
                if 'n_replicas' not in setup_options:
                    n_replicas = n_states
                else:
                    n_replicas = setup_options['n_replicas']

                checkpoint_interval = setup_options['checkpoint_interval']

                # generating lambda protocol
                lambda_protocol = LambdaProtocol(
                    functions=setup_options['protocol-type'])
                _logger.info(
                    f'Using lambda protocol : {setup_options["protocol-type"]}'
                )

                if atom_selection:
                    selection_indices = htf[phase].hybrid_topology.select(
                        atom_selection)
                else:
                    selection_indices = None

                storage_name = str(trajectory_directory) + '/' + str(
                    trajectory_prefix) + '-' + str(phase) + '.nc'
                _logger.info(f'\tstorage_name: {storage_name}')
                _logger.info(f'\tselection_indices {selection_indices}')
                _logger.info(f'\tcheckpoint interval {checkpoint_interval}')
                reporter = MultiStateReporter(
                    storage_name,
                    analysis_particle_indices=selection_indices,
                    checkpoint_interval=checkpoint_interval)

                if phase == 'vacuum':
                    endstates = False
                else:
                    endstates = True

                if setup_options['fe_type'] == 'fah':
                    _logger.info('SETUP FOR FAH DONE')
                    return {
                        'topology_proposals': top_prop,
                        'hybrid_topology_factories': htf
                    }

                if setup_options['fe_type'] == 'sams':
                    hss[phase] = HybridSAMSSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'],
                        online_analysis_minimum_iterations=10,
                        flatness_criteria=setup_options['flatness-criteria'],
                        gamma0=setup_options['gamma0'])
                    hss[phase].setup(n_states=n_states,
                                     n_replicas=n_replicas,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
                elif setup_options['fe_type'] == 'repex':
                    hss[phase] = HybridRepexSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'])
                    hss[phase].setup(n_states=n_states,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
            else:
                _logger.info(f"omitting sampler construction")

            if serialize_systems:
                # save the systems and the states
                pass

                _logger.info('WRITING OUT XML FILES')
                #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop)

                xml_directory = f'{setup_options["trajectory_directory"]}/xml/'
                if not os.path.exists(xml_directory):
                    os.makedirs(xml_directory)
                from perses.utils import data
                _logger.info('WRITING OUT XML FILES')
                _logger.info(f'Saving the hybrid, old and new system to disk')
                data.serialize(
                    htf[phase].hybrid_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz'
                )
                data.serialize(
                    htf[phase]._old_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz'
                )
                data.serialize(
                    htf[phase]._new_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz'
                )

        return {
            'topology_proposals': top_prop,
            'hybrid_topology_factories': htf,
            'hybrid_samplers': hss
        }
Пример #12
0
from perses.samplers.multistate import HybridRepexSampler
from openmmtools.multistate import MultiStateReporter

reporter = MultiStateReporter(storage='out-solvent.nc')
simulation = HybridRepexSampler.from_storage(reporter)

total_steps = 10000
run_so_far = simulation.iteration
left_to_do = total_steps - run_so_far
simulation.extend(n_iterations=left_to_do)
Пример #13
0
top = mdtraj.Topology.from_openmm(testsystem.topology)
trj = mdtraj.Trajectory([testsystem.positions / unit.nanometers], top)
trj.save(stem + '.pdb')

# save system as .xml
serialized_system = mm.openmm.XmlSerializer.serialize(testsystem.system)
with open(stem + '.xml', 'w') as fp:
    print(serialized_system, file=fp)

n_replicas = 3  # Number of temperature replicas.
T_min = 298.0 * unit.kelvin  # Minimum temperature.
T_max = 600.0 * unit.kelvin  # Maximum temperature.
reference_state = states.ThermodynamicState(system=testsystem.system,
                                            temperature=T_min)

move = mcmc.GHMCMove(timestep=2.0 * unit.femtoseconds, n_steps=50)
sampler = ParallelTemperingSampler(mcmc_moves=move,
                                   number_of_iterations=float('inf'),
                                   online_analysis_interval=None)

storage_path = stem + '.nc'
reporter = MultiStateReporter(storage_path, checkpoint_interval=1)
sampler.create(reference_state,
               states.SamplerState(testsystem.positions),
               reporter,
               min_temperature=T_min,
               max_temperature=T_max,
               n_temperatures=n_replicas)

sampler.run(n_iterations=10)
Пример #14
0
def bootstrap_free_energy_folding(Q,
                                  Q_folded,
                                  output_data="output/output.nc",
                                  frame_begin=0,
                                  sample_spacing=1,
                                  n_trial_boot=200,
                                  num_intermediate_states=0,
                                  conf_percent='sigma',
                                  plotfile_dir="output"):
    """
    Function for computing uncertainty of free energy, entropy, and enthalpy using bootstrapping with varying starting frames.

    :param Q: native contact fraction array of size [n_frames x n_states] (with equilibration region already trimmed)
    :type Q: 2D numpy array ( float )
    
    :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded)
    :type Q_folded: float    
    
    :param output_data: Path to the simulation .nc file.
    :type output_data: str    
    
    :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0)
    :type frame_begin: int    
    
    :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData
    :type sample_spacing: int     
    
    :param n_trial_boot: number of trials to run for generating bootstrapping uncertainties (default=200)
    :type n_trial_boot: int
    
    :param num_intermediate_states: Number of unsampled thermodynamic states between sampled states to include in the calculation
    :type num_intermediate_states: int
    
    :returns:
      - full_T_list - A 1D numpy array listing of all temperatures, including sampled and intermediate unsampled
      - deltaF_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing free energy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaF uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper of uncertainties corresponding to deltaF_values  
      - deltaS_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing entropy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaS uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper uncertainties corresponding to deltaS_values 
      - deltaU_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing enthalpy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaU uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper of uncertainties corresponding to deltaU_values
    """

    # extract reduced energies and the state indices from the .nc
    reporter = MultiStateReporter(output_data, open_mode="r")
    analyzer = ReplicaExchangeAnalyzer(reporter)
    (
        replica_energies_all,
        unsampled_state_energies,
        neighborhoods,
        replica_state_indices,
    ) = analyzer.read_energies()

    # Get temperature_list from .nc file:
    states = reporter.read_thermodynamic_states()[0]

    temperature_list = []
    for s in states:
        temperature_list.append(s.temperature)

    # Select production frames to analyze
    replica_energies_prod = replica_energies_all[:, :, frame_begin::]

    # For shifting reference frame bootstrap, we need the entire Q and energy arrays starting from frame_start

    if np.shape(replica_energies_prod)[2] != np.shape(Q)[0]:
        print(
            f'Error: Q array of shape {Q.shape} incompatible with energies array of shape{replica_energies_prod.shape}'
        )
        exit()

    Q_all = Q

    # Overall results:
    deltaF_values = {}
    deltaF_uncertainty = {}
    deltaS_values = {}
    deltaS_uncertainty = {}
    deltaU_values = {}
    deltaU_uncertainty = {}

    # Uncertainty for each sampling trial:
    deltaF_values_boot = {}
    deltaF_uncertainty_boot = {}

    deltaS_values_boot = {}
    deltaS_uncertainty_boot = {}

    deltaU_values_boot = {}
    deltaU_uncertainty_boot = {}

    # Get units:
    F_unit = (kB * unit.kelvin).unit  # units of free energy
    T_unit = temperature_list[0].unit
    S_unit = F_unit / T_unit
    U_unit = F_unit

    for i_boot in range(n_trial_boot):
        # Here we can potentially change the reference frame for each bootstrap trial.
        # This requires the array slicing to be done here, not above.
        ref_shift = np.random.randint(sample_spacing)

        # Replica energies and Q already have equilibration period removed:
        replica_energies = replica_energies_prod[:, :,
                                                 ref_shift::sample_spacing]
        Q = Q_all[ref_shift::sample_spacing, :]

        # Get all possible sample indices
        sample_indices_all = np.arange(0, len(replica_energies[0, 0, :]))
        # n_samples should match the size of the sliced replica energy dataset
        sample_indices = resample(sample_indices_all,
                                  replace=True,
                                  n_samples=len(sample_indices_all))

        n_states = len(Q[0, :])

        replica_energies_resample = np.zeros_like(replica_energies)
        # replica_energies is [n_states x n_states x n_frame]
        # Q is [nframes x n_states]
        Q_resample = np.zeros((len(sample_indices), n_states))

        # Select the sampled frames from Q and replica_energies:
        j = 0
        for i in sample_indices:
            replica_energies_resample[:, :, j] = replica_energies[:, :, i]
            Q_resample[j, :] = Q[i, :]
            j += 1

        # Run free energy expectation calculation:
        full_T_list, deltaF_values_boot[i_boot], deltaF_uncertainty_boot[
            i_boot] = expectations_free_energy(
                Q_resample,
                Q_folded,
                temperature_list,
                bootstrap_energies=replica_energies_resample,
                num_intermediate_states=num_intermediate_states,
            )

        # Get entropy/enthalpy for fitting current free energy data:
        # The inner dictionary keys will be transition names
        deltaS_values_boot[i_boot] = {}
        deltaU_values_boot[i_boot] = {}

        deltaS_values_boot[i_boot], deltaU_values_boot[
            i_boot] = get_entropy_enthalpy(deltaF_values_boot[i_boot],
                                           full_T_list)

    arr_deltaF_values_boot = {}
    arr_deltaS_values_boot = {}
    arr_deltaU_values_boot = {}

    # Loop over all conformational transitions:
    for key, value in deltaF_values_boot[0].items():
        arr_deltaF_values_boot[key] = np.zeros(
            (n_trial_boot, len(full_T_list)))
        arr_deltaS_values_boot[key] = np.zeros(
            (n_trial_boot, len(full_T_list)))
        arr_deltaU_values_boot[key] = np.zeros(
            (n_trial_boot, len(full_T_list)))

    # Compute mean values:
    # Free energy:
    for i_boot in range(n_trial_boot):
        for key, value in deltaF_values_boot[i_boot].items():
            arr_deltaF_values_boot[key][i_boot, :] = value.value_in_unit(
                F_unit)

    deltaF_values = {}
    for key, value in arr_deltaF_values_boot.items():
        deltaF_values[key] = np.mean(value, axis=0) * F_unit

    # Entropy:
    for i_boot in range(n_trial_boot):
        arr_deltaS_values_boot[key][
            i_boot, :] = deltaS_values_boot[i_boot][key].value_in_unit(S_unit)

    deltaS_values = {}

    for key, value in arr_deltaS_values_boot.items():
        deltaS_values[key] = np.mean(value, axis=0) * S_unit

    # Enthalpy:
    for i_boot in range(n_trial_boot):
        arr_deltaU_values_boot[key][
            i_boot, :] = deltaU_values_boot[i_boot][key].value_in_unit(U_unit)

    deltaU_values = {}

    for key, value in arr_deltaU_values_boot.items():
        deltaU_values[key] = np.mean(value, axis=0) * U_unit

    # Compute confidence intervals:
    deltaF_uncertainty = {}
    deltaS_uncertainty = {}
    deltaU_uncertainty = {}

    if conf_percent == 'sigma':
        # Use analytical standard deviation instead of percentile method:
        # Free energy:
        for key, value in arr_deltaF_values_boot.items():
            F_std = np.std(value, axis=0) * F_unit
            deltaF_uncertainty[key] = (-F_std, F_std)

        # Entropy:
        for key, value in arr_deltaS_values_boot.items():
            S_std = np.std(value, axis=0) * S_unit
            deltaS_uncertainty[key] = (-S_std, S_std)

        # Enthalpy:
        for key, value in arr_deltaU_values_boot.items():
            U_std = np.std(value, axis=0) * U_unit
            deltaU_uncertainty[key] = (-U_std, U_std)

    else:
        # Compute specified confidence interval:
        p_lo = (100 - conf_percent) / 2
        p_hi = 100 - p_lo

        # Free energy:
        for key, value in arr_deltaF_values_boot.items():
            F_diff = value - np.mean(value, axis=0)
            F_conf_lo = np.percentile(
                F_diff, p_lo, axis=0, interpolation='linear') * F_unit
            F_conf_hi = np.percentile(
                F_diff, p_hi, axis=0, interpolation='linear') * F_unit
            deltaF_uncertainty[key] = (F_conf_lo, F_conf_hi)

        # Entropy:
        for key, value in arr_deltaS_values_boot.items():
            S_diff = value - np.mean(value, axis=0)
            S_conf_lo = np.percentile(
                S_diff, p_lo, axis=0, interpolation='linear') * S_unit
            S_conf_hi = np.percentile(
                S_diff, p_hi, axis=0, interpolation='linear') * S_unit
            deltaS_uncertainty[key] = (S_conf_lo, S_conf_hi)

        # Enthalpy:
        for key, value in arr_deltaU_values_boot.items():
            U_diff = value - np.mean(value, axis=0)
            U_conf_lo = np.percentile(
                U_diff, p_lo, axis=0, interpolation='linear') * U_unit
            U_conf_hi = np.percentile(
                U_diff, p_hi, axis=0, interpolation='linear') * U_unit
            deltaU_uncertainty[key] = (U_conf_lo, U_conf_hi)

    # Plot results:

    # Free energy:
    plot_free_energy_results(full_T_list,
                             deltaF_values,
                             deltaF_uncertainty,
                             plotfile=f"{plotfile_dir}/free_energy_boot.pdf")

    # Entropy and enthalpy:
    plot_entropy_enthalpy(
        full_T_list,
        deltaS_values,
        deltaU_values,
        deltaS_uncertainty=deltaS_uncertainty,
        deltaU_uncertainty=deltaU_uncertainty,
        plotfile_entropy=f"{plotfile_dir}/entropy_boot.pdf",
        plotfile_enthalpy=f"{plotfile_dir}/enthalpy_boot.pdf")

    return full_T_list, deltaF_values, deltaF_uncertainty, deltaS_values, deltaS_uncertainty, deltaU_values, deltaU_uncertainty
Пример #15
0
def bootstrap_heat_capacity(frame_begin=0,
                            sample_spacing=1,
                            frame_end=-1,
                            plot_file='heat_capacity_boot.pdf',
                            output_data="output/output.nc",
                            num_intermediate_states=0,
                            frac_dT=0.05,
                            conf_percent='sigma',
                            n_trial_boot=200):
    """
    Calculate and plot the heat capacity curve, with uncertainty determined using bootstrapping.
    Uncorrelated datasets are selected using a random starting frame, repeated n_trial_boot 
    times. Uncertainty in melting point and full-width half maximum of the C_v curve are also returned.
    
    :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0)
    :type frame_begin: int
    
    :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData (default=1)
    :type sample_spacing: int
    
    :param frame_end: index of last frame to include in heat capacity calculation (default=-1)
    :type frame_end: int

    :param output_data: Path to the output data for a NetCDF-formatted file containing replica exchange simulation data (default = "output/output.nc")                                                                                          
    :type output_data: str    
    
    :param num_intermediate_states: The number of states to insert between existing states in 'temperature_list' (default=0)
    :type num_intermediate_states: int

    :param frac_dT: The fraction difference between temperatures points used to calculate finite difference derivatives (default=0.05)
    :type num_intermediate_states: float    
    
    :param conf_percent: Confidence level in percent for outputting uncertainties (default = 68.27 = 1 sigma)
    :type conf_percent: float
    
    :param n_trial_boot: number of trials to run for generating bootstrapping uncertainties
    :type n_trial_boot: int
    
    :returns:
       - T_list ( List( float * unit.simtk.temperature ) ) - The temperature list corresponding to the heat capacity values in 'C_v'
       - C_v_values ( List( float * kJ/mol/K ) ) - The heat capacity values for all (including inserted intermediates) states
       - C_v_uncertainty ( Tuple ( np.array(float) * kJ/mol/K ) ) - confidence interval for all C_v_values computed from bootstrapping
       - Tm_value ( float * unit.simtk.temperature ) - Melting point mean value computed from bootstrapping
       - Tm_uncertainty ( Tuple ( float * unit.simtk.temperature ) ) - confidence interval for melting point computed from bootstrapping
       - FWHM_value ( float * unit.simtk.temperature ) - C_v full width half maximum mean value computed from bootstrapping
       - FWHM_uncertainty ( Tuple ( float * unit.simtk.temperature ) ) - confidence interval for C_v full width half maximum computed from bootstrapping
    
    """

    # extract reduced energies and the state indices from the .nc
    reporter = MultiStateReporter(output_data, open_mode="r")
    analyzer = ReplicaExchangeAnalyzer(reporter)
    (
        replica_energies_all,
        unsampled_state_energies,
        neighborhoods,
        replica_state_indices,
    ) = analyzer.read_energies()

    # Store data for each sampling trial:
    C_v_values_boot = {}
    C_v_uncertainty_boot = {}

    Tm_boot = np.zeros(n_trial_boot)
    Cv_height = np.zeros(n_trial_boot)
    FWHM = np.zeros(n_trial_boot)

    for i_boot in range(n_trial_boot):

        # Select production frames to analyze
        # Here we can potentially change the reference frame for each bootstrap trial.
        ref_shift = np.random.randint(sample_spacing)
        # ***We should check if these energies arrays will be the same size for
        # different reference frames
        replica_energies = replica_energies_all[:, :,
                                                (frame_begin +
                                                 ref_shift)::sample_spacing]

        # Get all possible sample indices
        sample_indices_all = np.arange(0, len(replica_energies[0, 0, :]))
        # n_samples should match the size of the sliced replica energy dataset
        sample_indices = resample(sample_indices_all,
                                  replace=True,
                                  n_samples=len(sample_indices_all))

        n_state = replica_energies.shape[0]

        replica_energies_resample = np.zeros_like(replica_energies)
        # replica_energies is [n_states x n_states x n_frame]

        # Select the sampled frames from array_folded_states and replica_energies:
        j = 0
        for i in sample_indices:
            replica_energies_resample[:, :, j] = replica_energies[:, :, i]
            j += 1

        # Run heat capacity expectation calculation:
        C_v_values_boot[i_boot], C_v_uncertainty_boot[
            i_boot], T_list = get_heat_capacity(
                output_data=output_data,
                num_intermediate_states=num_intermediate_states,
                frac_dT=frac_dT,
                plot_file=None,
                bootstrap_energies=replica_energies_resample,
            )

        if i_boot == 0:
            # Get units:
            C_v_unit = C_v_values_boot[0][0].unit
            T_unit = T_list[0].unit

        # Compute the melting point:
        max_index = np.argmax(C_v_values_boot[i_boot])
        Tm_boot[i_boot] = T_list[max_index].value_in_unit(T_unit)

        # Compute the peak height, relative to lowest C_v value in the temp range:
        Cv_height[i_boot] = (
            np.max(C_v_values_boot[i_boot]) -
            np.min(C_v_values_boot[i_boot])).value_in_unit(C_v_unit)

        # Compute the FWHM:
        # C_v value at half-maximum:
        mid_val = np.min(C_v_values_boot[i_boot]).value_in_unit(
            C_v_unit) + Cv_height[i_boot] / 2

        #***Note: this assumes that there is only a single heat capacity peak, with
        # monotonic behavior on each side of the peak.

        half_lo_found = False
        half_hi_found = False

        T_half_lo = None
        T_half_hi = None

        # Reverse scan for lower half:
        k = 1
        while half_lo_found == False:
            index = max_index - k
            if index < 0:
                # The lower range does not contain the lower midpoint
                break
            else:
                curr_val = C_v_values_boot[i_boot][index].value_in_unit(
                    C_v_unit)
                prev_val = C_v_values_boot[i_boot][index +
                                                   1].value_in_unit(C_v_unit)

            if curr_val <= mid_val:
                # The lower midpoint lies within T[index] and T[index+1]
                # Interpolate solution:
                T_half_lo = T_list[index] + (mid_val - curr_val) * (
                    T_list[index + 1] - T_list[index]) / (prev_val - curr_val)
                half_lo_found = True
            else:
                k += 1

        # Forward scan for upper half:
        m = 1

        while half_hi_found == False:
            index = max_index + m
            if index == len(T_list):
                # The upper range does not contain the upper midpoint
                break
            else:
                curr_val = C_v_values_boot[i_boot][index].value_in_unit(
                    C_v_unit)
                prev_val = C_v_values_boot[i_boot][index -
                                                   1].value_in_unit(C_v_unit)
            if curr_val <= mid_val:
                # The upper midpoint lies within T[index] and T[index-1]
                # Interpolate solution:
                T_half_hi = T_list[index] + (mid_val - curr_val) * (
                    T_list[index - 1] - T_list[index]) / (prev_val - curr_val)
                half_hi_found = True
            else:
                m += 1

        if half_lo_found and half_hi_found:
            FWHM[i_boot] = (T_half_hi - T_half_lo).value_in_unit(T_unit)
        elif half_lo_found == True and half_hi_found == False:
            FWHM[i_boot] = 2 * (Tm_boot[i_boot] -
                                T_half_lo.value_in_unit(T_unit))
        elif half_lo_found == False and half_hi_found == True:
            FWHM[i_boot] = 2 * (T_half_hi.value_in_unit(T_unit) -
                                Tm_boot[i_boot])

    # Compute uncertainty at all temps in T_list over the n_trial_boot trials performed:

    # Convert dicts to array
    arr_C_v_values_boot = np.zeros((n_trial_boot, len(T_list)))

    for i_boot in range(n_trial_boot):
        arr_C_v_values_boot[i_boot, :] = C_v_values_boot[i_boot].value_in_unit(
            C_v_unit)

    # Compute mean values:
    C_v_values = np.mean(arr_C_v_values_boot, axis=0) * C_v_unit
    Cv_height_value = np.mean(Cv_height) * C_v_unit
    Tm_value = np.mean(Tm_boot) * T_unit
    FWHM_value = np.mean(FWHM) * T_unit

    # Compute confidence intervals:
    if conf_percent == 'sigma':
        # Use analytical standard deviation instead of percentile method:

        # C_v values:
        C_v_std = np.std(arr_C_v_values_boot, axis=0)
        C_v_uncertainty = (-C_v_std * C_v_unit, C_v_std * C_v_unit)

        # C_v peak height:
        Cv_height_std = np.std(Cv_height)
        Cv_height_uncertainty = (-Cv_height_std * C_v_unit,
                                 Cv_height_std * C_v_unit)

        # Melting point:
        Tm_std = np.std(Tm_boot)
        Tm_uncertainty = (-Tm_std * T_unit, Tm_std * T_unit)

        # Full width half maximum:
        FWHM_std = np.std(FWHM)
        FWHM_uncertainty = (-FWHM_std * T_unit, FWHM_std * T_unit)

    else:
        # Compute specified confidence interval:
        p_lo = (100 - conf_percent) / 2
        p_hi = 100 - p_lo

        # C_v values:
        C_v_diff = arr_C_v_values_boot - np.mean(arr_C_v_values_boot, axis=0)
        C_v_conf_lo = np.percentile(C_v_diff,
                                    p_lo,
                                    axis=0,
                                    interpolation='linear')
        C_v_conf_hi = np.percentile(C_v_diff,
                                    p_hi,
                                    axis=0,
                                    interpolation='linear')

        C_v_uncertainty = (C_v_conf_lo * C_v_unit, C_v_conf_hi * C_v_unit)

        # C_v peak height:
        Cv_height_diff = Cv_height - np.mean(Cv_height)
        Cv_height_conf_lo = np.percentile(Cv_height_diff,
                                          p_lo,
                                          interpolation='linear')
        Cv_height_conf_hi = np.percentile(Cv_height_diff,
                                          p_hi,
                                          interpolation='linear')

        Cv_height_uncertainty = (Cv_height_conf_lo * C_v_unit,
                                 Cv_height_conf_hi * C_v_unit)

        # Melting point:
        Tm_diff = Tm_boot - np.mean(Tm_boot)
        Tm_conf_lo = np.percentile(Tm_diff, p_lo, interpolation='linear')
        Tm_conf_hi = np.percentile(Tm_diff, p_hi, interpolation='linear')

        Tm_uncertainty = (Tm_conf_lo * T_unit, Tm_conf_hi * T_unit)

        # Full width half maximum:
        FWHM_diff = FWHM - np.mean(FWHM)
        FWHM_conf_lo = np.percentile(FWHM_diff, p_lo, interpolation='linear')
        FWHM_conf_hi = np.percentile(FWHM_diff, p_hi, interpolation='linear')

        FWHM_uncertainty = (FWHM_conf_lo * T_unit, FWHM_conf_hi * T_unit)

    # Plot and return the heat capacity (with units)
    if plot_file is not None:
        plot_heat_capacity(C_v_values,
                           C_v_uncertainty,
                           T_list,
                           file_name=plot_file)

    return T_list, C_v_values, C_v_uncertainty, Tm_value, Tm_uncertainty, Cv_height_value, Cv_height_uncertainty, FWHM_value, FWHM_uncertainty
Пример #16
0
def get_heat_capacity(frame_begin=0,
                      sample_spacing=1,
                      frame_end=-1,
                      output_data="output/output.nc",
                      num_intermediate_states=0,
                      frac_dT=0.05,
                      plot_file=None,
                      bootstrap_energies=None):
    """
    Given a .nc output and a number of intermediate states to insert for the temperature list, this function calculates and plots the heat capacity profile.
                             
    :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0)
    :type frame_begin: int
    
    :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData (default=1)
    :type sample_spacing: int
    
    :param frame_end: index of last frame to include in heat capacity calculation (default=-1)
    :type frame_end: int

    :param output_data: Path to the output data for a NetCDF-formatted file containing replica exchange simulation data (default = "output/output.nc")                                                                                          
    :type output_data: str    
    
    :param num_intermediate_states: The number of states to insert between existing states in 'temperature_list' (default=0)
    :type num_intermediate_states: int

    :param frac_dT: The fraction difference between temperatures points used to calculate finite difference derivatives (default=0.05)
    :type num_intermediate_states: float
    
    :param plotfile: path to filename to output plot
    :type plotfile: str
    
    :param bootstrap_energies: a custom replica_energies array to be used for bootstrapping calculations. Used instead of the energies in the .nc file.
    :type bootstrap_energies: 2d numpy array (float)

    :returns:
          - C_v ( List( float ) ) - The heat capacity values for all (including inserted intermediates) states
          - dC_v ( List( float ) ) - The uncertainty in the heat capacity values for intermediate states
          - new_temp_list ( List( float * unit.simtk.temperature ) ) - The temperature list corresponding to the heat capacity values in 'C_v'

    """

    if bootstrap_energies is not None:
        # Use a subsampled replica_energy matrix instead of reading from file
        replica_energies = bootstrap_energies
        # Still need to get the thermodynamic states
        reporter = MultiStateReporter(output_data, open_mode="r")
    else:
        # extract reduced energies and the state indices from the .nc
        reporter = MultiStateReporter(output_data, open_mode="r")
        analyzer = ReplicaExchangeAnalyzer(reporter)
        (
            replica_energies_all,
            unsampled_state_energies,
            neighborhoods,
            replica_state_indices,
        ) = analyzer.read_energies()

        # Select production frames to analyze
        if frame_end > 0:
            replica_energies = replica_energies_all[:, :, frame_begin:
                                                    frame_end:sample_spacing]
        else:
            replica_energies = replica_energies_all[:, :, frame_begin::
                                                    sample_spacing]

    # Get the temperature list from .nc file:
    states = reporter.read_thermodynamic_states()[0]

    temperature_list = []
    for s in states:
        temperature_list.append(s.temperature)

    # determine the numerical values of beta at each state in units consistent with the temperature
    Tunit = temperature_list[0].unit
    temps = np.array([temp.value_in_unit(Tunit) for temp in temperature_list
                      ])  # should this just be array to begin with
    beta_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole / Tunit) * temps)

    # convert the energies from replica/evaluated state/sample form to evaluated state/sample form
    replica_energies = pymbar.utils.kln_to_kn(replica_energies)

    n_samples = len(replica_energies[0, :])

    # calculate the number of states we need expectations at.  We want it at all of the original
    # temperatures, each intermediate temperature, and then temperatures +/- from the original
    # to take finite derivatives.

    # create  an array for the temperature and energy for each state, including the
    # finite different state.
    num_sampled_T = len(temps)
    n_unsampled_states = 3 * (num_sampled_T +
                              (num_sampled_T - 1) * num_intermediate_states)
    unsampled_state_energies = np.zeros([n_unsampled_states, n_samples])
    full_T_list = np.zeros(n_unsampled_states)

    # delta is the spacing between temperatures.
    delta = np.zeros(num_sampled_T - 1)

    # fill in a list of temperatures at all original temperatures and all intermediate states.
    full_T_list[0] = temps[0]
    t = 0
    for i in range(num_sampled_T - 1):
        delta[i] = (temps[i + 1] - temps[i]) / (num_intermediate_states + 1)
        for j in range(num_intermediate_states + 1):
            full_T_list[t] = temps[i] + delta[i] * j
            t += 1
    full_T_list[t] = temps[-1]
    n_T_vals = t + 1

    # add additional states for finite difference calculation and the requested spacing/
    full_T_list[n_T_vals] = full_T_list[0] - delta[0] * frac_dT
    full_T_list[2 * n_T_vals] = full_T_list[0] + delta[0] * frac_dT
    for i in range(1, n_T_vals - 1):
        ii = i // (num_intermediate_states + 1)
        full_T_list[i + n_T_vals] = full_T_list[i] - delta[ii] * frac_dT
        full_T_list[i + 2 * n_T_vals] = full_T_list[i] + delta[ii] * frac_dT
    full_T_list[2 * n_T_vals -
                1] = full_T_list[n_T_vals - 1] - delta[-1] * frac_dT
    full_T_list[3 * n_T_vals -
                1] = full_T_list[n_T_vals - 1] + delta[-1] * frac_dT

    # calculate betas of all of these temperatures
    beta_full_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole / Tunit) *
                       full_T_list)

    ti = 0
    N_k = np.zeros(n_unsampled_states)
    for k in range(n_unsampled_states):
        # Calculate the reduced energies at all temperatures, sampled and unsample.
        unsampled_state_energies[
            k, :] = replica_energies[0, :] * (beta_full_k[k] / beta_k[0])
        if ti < len(temps):
            # store in N_k which states do and don't have samples.
            if full_T_list[k] == temps[ti]:
                ti += 1
                N_k[k] = n_samples // len(
                    temps)  # these are the states that have samples

    # call MBAR to find weights at all states, sampled and unsampled
    mbarT = pymbar.MBAR(unsampled_state_energies,
                        N_k,
                        verbose=False,
                        relative_tolerance=1e-12)

    for k in range(n_unsampled_states):
        # get the 'unreduced' potential -- we can't take differences of reduced potentials
        # because the beta is different; math is much more confusing with derivatives of the reduced potentials.
        unsampled_state_energies[k, :] /= beta_full_k[k]

    # we don't actually need these expectations, but this code can be used to validate
    #results = mbarT.computeExpectations(unsampled_state_energies, state_dependent=True)
    #E_expect = results[0]
    #dE_expect = results[1]

    # expectations for the differences between states, which we need for numerical derivatives
    results = mbarT.computeExpectations(unsampled_state_energies,
                                        output="differences",
                                        state_dependent=True)
    DeltaE_expect = results[0]
    dDeltaE_expect = results[1]

    # Now calculate heat capacity (with uncertainties) using the finite difference approach.
    Cv = np.zeros(n_T_vals)
    dCv = np.zeros(n_T_vals)
    for k in range(n_T_vals):
        im = k + n_T_vals  # +/- delta up and down.
        ip = k + 2 * n_T_vals
        Cv[k] = (DeltaE_expect[im, ip]) / (full_T_list[ip] - full_T_list[im])
        dCv[k] = (dDeltaE_expect[im, ip]) / (full_T_list[ip] - full_T_list[im])

    # add units so the plot has the right units.
    Cv *= unit.kilojoule_per_mole / Tunit  # always kJ/mol, since the OpenMM output is in kJ/mol.
    dCv *= unit.kilojoule_per_mole / Tunit
    full_T_list *= Tunit

    # plot and return the heat capacity (with units)
    if plot_file is not None:
        plot_heat_capacity(Cv,
                           dCv,
                           full_T_list[0:n_T_vals],
                           file_name=plot_file)
    return (Cv, dCv, full_T_list[0:n_T_vals])
move = mcmc.LangevinDynamicsMove(timestep=4 * unit.femtosecond,
                                 collision_rate=1.0 / unit.picoseconds,
                                 n_steps=500,
                                 reassign_velocities=False)
simulation = SAMSSampler(mcmc_moves=move,
                         minimum_round_trips=1,
                         histogram_flatness=0.3,
                         number_of_iterations=1,
                         online_analysis_interval=None,
                         beta_factor=0.6)

analysis_particle_indices = topology.select(
    '(protein and mass > 3.0) or (resname CM7 and mass > 3.0)')
reporter = MultiStateReporter(
    'alchemical_test.nc',
    checkpoint_interval=50,
    analysis_particle_indices=analysis_particle_indices)
simulation.create(thermodynamic_states=compound_states,
                  sampler_states=[sampler_state],
                  storage=reporter)
simulation.extend(n_iterations=1)
for step in range(100000):
    ts = simulation._thermodynamic_states[
        simulation._replica_thermodynamic_states[0]]
    context, _ = openmmtools.cache.global_context_cache.get_context(ts)
    context.setParameter('report', 1.0)
    cv_logger.debug('{}, {}, {}, {}, {}'.format(
        context.getParameter('lambda_restraints'),
        context.getState(getEnergy=True,
                         groups=2**30).getPotentialEnergy()._value,
        context.getState(getEnergy=True,
Пример #18
0
from perses.samplers.multistate import HybridRepexSampler
from openmmtools.multistate import MultiStateReporter

import logging

logging.basicConfig(level=logging.NOTSET)
_logger = logging.getLogger("utils.openeye")
_logger.setLevel(logging.DEBUG)

reporter = MultiStateReporter(storage='out-complex.nc')
simulation = HybridRepexSampler.from_storage(reporter)

total_steps = 10000
run_so_far = simulation.iteration
left_to_do = total_steps - run_so_far
_logger.info(f'{left_to_do}')
_logger.debug('debugging')
simulation.extend(n_iterations=left_to_do)
Пример #19
0
def make_state_dcd_files(topology,
                         timestep=5 * unit.femtosecond,
                         time_interval=200,
                         output_dir="output",
                         output_data="output.nc",
                         checkpoint_data="output_checkpoint.nc",
                         frame_begin=0,
                         frame_stride=1,
                         center=True):
    """
    Make dcd files by state from replica exchange simulation trajectory data.
    Note: these are discontinuous trajectories with constant temperature state.
    
    :param topology: OpenMM Topology
    :type topology: `Topology() <https://simtk.org/api_docs/openmm/api4_1/python/classsimtk_1_1openmm_1_1app_1_1topology_1_1Topology.html>`_
    
    :param timestep: Time step used in the simulation (default=5*unit.femtosecond)
    :type timestep: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>` float * simtk.unit
    
    :param time_interval: frequency, in number of time steps, at which positions were recorded (default=200)
    :type time_interval: int
    
    :param output_directory: path to which we will write the output (default='output')
    :type output_directory: str
    
    :param output_data: name of output .nc data file (default='output.nc')
    :type output_data: str    
    
    :param checkpoint_data: name of checkpoint .nc data file (default='output_checkpoint.nc')
    :type checkpoint_data: str   
    
    :param frame_begin: Frame at which to start writing the dcd trajectory (default=0)
    :type frame_begin: int
    
    :param frame_stride: advance by this many time intervals when writing dcd trajectories (default=1)
    :type frame_stride: int 
    
    :param center: align the center of mass of each structure in the discontinuous state trajectory (default=True)
    :type center: Boolean
    
    """

    file_list = []

    output_data_path = os.path.join(output_dir, output_data)

    # Get number of states:
    reporter = MultiStateReporter(output_data_path, open_mode="r")
    states = reporter.read_thermodynamic_states()[0]

    sampler_states = reporter.read_sampler_states(iteration=0)
    xunit = sampler_states[0].positions[0].unit

    for state_index in range(len(states)):
        state_trajectory = extract_trajectory(topology,
                                              state_index=state_index,
                                              output_data=output_data_path,
                                              checkpoint_data=checkpoint_data,
                                              frame_begin=frame_begin,
                                              frame_stride=frame_stride)

        file_name = f"{output_dir}/state_{state_index+1}.dcd"
        file = open(file_name, "wb")
        dcd_file = DCDFile(file,
                           topology,
                           timestep,
                           firstStep=frame_begin,
                           interval=time_interval)

        # TODO: replace this with MDTraj alignment tool
        if center == True:
            center_x = np.mean(state_trajectory[0, :, 0])
            center_y = np.mean(state_trajectory[0, :, 1])
            center_z = np.mean(state_trajectory[0, :, 2])

        for positions in state_trajectory:
            if center == True:
                positions[:, 0] += (center_x - np.mean(positions[:, 0]))
                positions[:, 1] += (center_y - np.mean(positions[:, 1]))
                positions[:, 2] += (center_z - np.mean(positions[:, 2]))

            # Add the units consistent with replica_energies
            positions *= xunit
            DCDFile.writeModel(dcd_file, positions)

        file.close()
        file_list.append(file_name)

    return file_list
Пример #20
0
    def historic_fes(self, stepsize=100):
        """ Function that performs mbar at intervals of the simulation
        by postprocessing. Can be slow if stepsize is small

        Parameters
        ----------
        stepsize : int, optional, default=100
            number of iterations at which to run MBAR

        Returns
        -------
        None

        """
        from perses.analysis import utils
        from simtk import unit
        import os
        from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer

        # find the output files
        output = [
            x for x in os.listdir(self.directory)
            if x[-3:] == '.nc' and 'checkpoint' not in x
        ]

        for out in output:
            if 'vacuum' in out:
                vacuum_reporter = MultiStateReporter(f'{self.directory}/{out}')
                ncfile = utils.open_netcdf(f'{self.directory}/{out}')
                n_iterations = ncfile.variables['last_iteration'][0]
                for step in range(stepsize, n_iterations, stepsize):
                    vacuum_analyzer = MultiStateSamplerAnalyzer(
                        vacuum_reporter, max_n_iterations=step)
                    f_ij, df_ij = vacuum_analyzer.get_free_energy()
                    f = f_ij[0, -1] * vacuum_analyzer.kT
                    self._vacdg_history.append(
                        f.in_units_of(unit.kilocalories_per_mole))
                    df = df_ij[0, -1] * vacuum_analyzer.kT
                    self._vacddg_history.append(
                        df.in_units_of(unit.kilocalories_per_mole))
            if 'solvent' in out:
                solvent_reporter = MultiStateReporter(
                    f'{self.directory}/{out}')
                ncfile = utils.open_netcdf(f'{self.directory}/{out}')
                n_iterations = ncfile.variables['last_iteration'][0]
                for step in range(stepsize, n_iterations, stepsize):
                    solvent_analyzer = MultiStateSamplerAnalyzer(
                        solvent_reporter, max_n_iterations=step)
                    f_ij, df_ij = solvent_analyzer.get_free_energy()
                    f = f_ij[0, -1] * solvent_analyzer.kT
                    self._soldg_history.append(
                        f.in_units_of(unit.kilocalories_per_mole))
                    df = df_ij[0, -1] * solvent_analyzer.kT
                    self._solddg_history.append(
                        df.in_units_of(unit.kilocalories_per_mole))
            if 'complex' in out:
                complex_reporter = MultiStateReporter(
                    f'{self.directory}/{out}')
                ncfile = utils.open_netcdf(f'{self.directory}/{out}')
                n_iterations = ncfile.variables['last_iteration'][0]
                for step in range(stepsize, n_iterations, stepsize):
                    complex_analyzer = MultiStateSamplerAnalyzer(
                        complex_reporter, max_n_iterations=step)
                    f_ij, df_ij = complex_analyzer.get_free_energy()
                    f = f_ij[0, -1] * complex_analyzer.kT
                    self._comdg_history.append(
                        f.in_units_of(unit.kilocalories_per_mole))
                    df = df_ij[0, -1] * complex_analyzer.kT
                    self._comddg_history.append(
                        df.in_units_of(unit.kilocalories_per_mole))
        return
Пример #21
0
def make_state_pdb_files(topology,
                         output_dir="output",
                         output_data="output.nc",
                         checkpoint_data="output_checkpoint.nc",
                         frame_begin=0,
                         frame_stride=1,
                         center=True):
    """
    Make pdb files by state from replica exchange simulation trajectory data.
    Note: these are discontinuous trajectories with constant temperature state.
    
    :param topology: OpenMM Topology
    :type topology: `Topology() <https://simtk.org/api_docs/openmm/api4_1/python/classsimtk_1_1openmm_1_1app_1_1topology_1_1Topology.html>`_
    
    :param output_directory: path to which we will write the output (default='output')
    :type output_directory: str
    
    :param output_data: name of output .nc data file (default='output.nc')
    :type output_data: str    
    
    :param checkpoint_data: name of checkpoint .nc data file (default='output_checkpoint.nc')
    :type checkpoint_data: str   
    
    :param frame_begin: Frame at which to start writing the pdb trajectory (default=0)
    :type frame_begin: int    
    
    :param frame_stride: advance by this many frames when writing pdb trajectories (default=1)
    :type frame_stride: int   

    :param center: align the center of mass of each structure in the discontinuous state trajectory (default=True)
    :type center: Boolean
    
    :returns:
        - file_list ( List( str ) ) - A list of names for the files that were written
    """
    file_list = []

    output_data_path = os.path.join(output_dir, output_data)

    # Get number of states:
    reporter = MultiStateReporter(output_data_path, open_mode="r")
    states = reporter.read_thermodynamic_states()[0]

    sampler_states = reporter.read_sampler_states(iteration=0)
    xunit = sampler_states[0].positions[0].unit

    for state_index in range(len(states)):
        state_trajectory = extract_trajectory(topology,
                                              state_index=state_index,
                                              output_data=output_data_path,
                                              checkpoint_data=checkpoint_data,
                                              frame_begin=frame_begin,
                                              frame_stride=frame_stride)

        file_name = f"{output_dir}/state_{state_index+1}.pdb"
        file = open(file_name, "w")

        PDBFile.writeHeader(topology, file=file)
        modelIndex = 1

        # TODO: replace this with MDTraj alignment tool
        if center == True:
            center_x = np.mean(state_trajectory[0, :, 0])
            center_y = np.mean(state_trajectory[0, :, 1])
            center_z = np.mean(state_trajectory[0, :, 2])

        for positions in state_trajectory:
            if center == True:
                positions[:, 0] += (center_x - np.mean(positions[:, 0]))
                positions[:, 1] += (center_y - np.mean(positions[:, 1]))
                positions[:, 2] += (center_z - np.mean(positions[:, 2]))

            # Add the units consistent with replica_energies
            positions *= xunit

            PDBFile.writeModel(topology,
                               positions,
                               file=file,
                               modelIndex=modelIndex)

        PDBFile.writeFooter(topology, file=file)

        file.close()
        file_list.append(file_name)

    return file_list
Пример #22
0
from perses.samplers.multistate import HybridRepexSampler
from openmmtools.multistate import MultiStateReporter

reporter = MultiStateReporter(storage='out-vacuum.nc')
simulation = HybridRepexSampler.from_storage(reporter)


total_steps = 4999 
run_so_far = simulation.iteration
left_to_do = total_steps - run_so_far
simulation.extend(n_iterations=left_to_do)
Пример #23
0
def make_replica_dcd_files(topology,
                           timestep=5 * unit.femtosecond,
                           time_interval=200,
                           output_dir="output",
                           output_data="output.nc",
                           checkpoint_data="output_checkpoint.nc",
                           frame_begin=0,
                           frame_stride=1):
    """
    Make dcd files from replica exchange simulation trajectory data.
    
    :param topology: OpenMM Topology
    :type topology: `Topology() <https://simtk.org/api_docs/openmm/api4_1/python/classsimtk_1_1openmm_1_1app_1_1topology_1_1Topology.html>`_
    
    :param timestep: Time step used in the simulation (default=5*unit.femtosecond)
    :type timestep: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>` float * simtk.unit
    
    :param time_interval: frequency, in number of time steps, at which positions were recorded (default=200)
    :type time_interval: int
    
    :param output_directory: path to which we will write the output (default='output')
    :type output_directory: str
    
    :param output_data: name of output .nc data file (default='output.nc')
    :type output_data: str    
    
    :param checkpoint_data: name of checkpoint .nc data file (default='output_checkpoint.nc')
    :type checkpoint_data: str   
    
    :param frame_begin: Frame at which to start writing the dcd trajectory (default=0)
    :type frame_begin: int
    
    :param frame_stride: advance by this many time intervals when writing dcd trajectories (default=1)
    :type frame_stride: int 
    """

    file_list = []

    output_data_path = os.path.join(output_dir, output_data)

    # Get number of replicas:
    reporter = MultiStateReporter(output_data_path, open_mode="r")
    states = reporter.read_thermodynamic_states()[0]
    n_replicas = len(states)

    sampler_states = reporter.read_sampler_states(iteration=0)
    xunit = sampler_states[0].positions[0].unit

    for replica_index in range(n_replicas):
        replica_trajectory = extract_trajectory(
            topology,
            replica_index=replica_index,
            output_data=output_data_path,
            checkpoint_data=checkpoint_data,
            frame_begin=frame_begin,
            frame_stride=frame_stride)

        file_name = f"{output_dir}/replica_{replica_index+1}.dcd"
        file = open(file_name, "wb")
        dcd_file = DCDFile(file,
                           topology,
                           timestep,
                           firstStep=frame_begin,
                           interval=time_interval)

        for positions in replica_trajectory:
            # Add the units consistent with replica_energies
            positions *= xunit
            DCDFile.writeModel(dcd_file, positions)

        file.close()
        file_list.append(file_name)

    return file_list
Пример #24
0
def extract_trajectory(topology,
                       output_data="output/output.nc",
                       checkpoint_data="output_checkpoint.nc",
                       state_index=None,
                       replica_index=None,
                       frame_begin=0,
                       frame_stride=1,
                       frame_end=-1):
    """
    Internal function for extract trajectory (replica or state) from .nc file,
    Based on YANK extract_trajectory code.
    """

    reporter = MultiStateReporter(output_data,
                                  open_mode='r',
                                  checkpoint_storage=checkpoint_data)

    # Get dimensions
    trajectory_storage = reporter._storage_checkpoint
    n_iterations = reporter.read_last_iteration()
    n_frames = trajectory_storage.variables['positions'].shape[0]
    n_atoms = trajectory_storage.variables['positions'].shape[2]

    # Determine frames to extract.
    # Convert negative indices to last indices.
    if frame_begin < 0:
        frame_begin = n_frames + frame_begin
    if frame_end < 0:
        frame_end = n_frames + frame_end + 1
    frame_indices = range(frame_begin, frame_end, frame_stride)
    if len(frame_indices) == 0:
        raise ValueError('No frames selected')

    # Determine the number of frames that the trajectory will have.
    if state_index is None:
        n_trajectory_frames = len(frame_indices)
    else:
        # With SAMS, an iteration can have 0 or more replicas in a given state.
        # Deconvolute state indices.
        state_indices = [None for _ in frame_indices]
        for i, iteration in enumerate(frame_indices):
            replica_indices = reporter._storage_analysis.variables['states'][
                iteration, :]
            state_indices[i] = np.where(replica_indices == state_index)[0]
        n_trajectory_frames = sum(len(x) for x in state_indices)

    # Initialize positions and box vectors arrays.
    # MDTraj Cython code expects float32 positions.
    positions = np.zeros((n_trajectory_frames, n_atoms, 3), dtype=np.float32)

    # Extract state positions and box vectors.
    if state_index is not None:
        # Extract state positions
        frame_idx = 0
        for i, iteration in enumerate(frame_indices):
            for replica_index in state_indices[i]:
                positions[frame_idx, :, :] = trajectory_storage.variables[
                    'positions'][iteration,
                                 replica_index, :, :].astype(np.float32)
                frame_idx += 1

    else:  # Extract replica positions
        for i, iteration in enumerate(frame_indices):
            positions[i, :, :] = trajectory_storage.variables['positions'][
                iteration, replica_index, :, :].astype(np.float32)

    return positions
Пример #25
0
    def setup_class(cls):
        """Shared test cases and variables."""
        cls.checkpoint_interval = 2
        # Make sure we collect the same number of samples for all tests to avoid instabilities in MBAR.
        base_steps = 50
        cls.n_steps = int(np.ceil(base_steps / cls.N_SAMPLERS))

        # Test case with host guest in vacuum at 3 different positions and alchemical parameters.
        # -----------------------------------------------------------------------------------------
        hostguest_test = mmtools.testsystems.HostGuestVacuum()
        factory = mmtools.alchemy.AbsoluteAlchemicalFactory()
        alchemical_region = mmtools.alchemy.AlchemicalRegion(
            alchemical_atoms=range(126, 156))
        hostguest_alchemical = factory.create_alchemical_system(
            hostguest_test.system, alchemical_region)

        # Add restraint force between host and guest.
        restraint_force = mmtools.forces.HarmonicRestraintBondForce(
            spring_constant=2.0 * unit.kilojoule_per_mole / unit.angstrom**2,
            restrained_atom_index1=10,
            restrained_atom_index2=16,
        )
        hostguest_alchemical.addForce(copy.deepcopy(restraint_force))

        # Translate the sampler states to be different one from each other.
        positions = hostguest_test.positions
        box_vectors = hostguest_test.system.getDefaultPeriodicBoxVectors()
        hostguest_sampler_states = [
            mmtools.states.SamplerState(
                positions=positions + 10 * i * unit.nanometers,
                box_vectors=box_vectors) for i in range(cls.N_SAMPLERS)
        ]

        # Create the basic thermodynamic states.
        hostguest_thermodynamic_states = [
            mmtools.states.ThermodynamicState(hostguest_alchemical,
                                              300 * unit.kelvin)
            for _ in range(cls.N_STATES)
        ]

        # Create alchemical states at different parameter values.
        alchemical_states = [
            mmtools.alchemy.AlchemicalState.from_system(hostguest_alchemical)
            for _ in range(cls.N_STATES)
        ]
        for i, alchemical_state in enumerate(alchemical_states):
            alchemical_state.set_alchemical_parameters(
                float(i) / (cls.N_STATES - 1))

        # Create compound states.
        hostguest_compound_states = list()
        for i in range(cls.N_STATES):
            hostguest_compound_states.append(
                mmtools.states.CompoundThermodynamicState(
                    thermodynamic_state=hostguest_thermodynamic_states[i],
                    composable_states=[alchemical_states[i]]))

        # Unsampled states.
        cls.n_unsampled_states = 2
        nonalchemical_system = copy.deepcopy(hostguest_test.system)
        nonalchemical_system.addForce(copy.deepcopy(restraint_force))
        nonalchemical_state = mmtools.states.ThermodynamicState(
            nonalchemical_system, 300 * unit.kelvin)
        nonalchemical_compound_state = mmtools.states.CompoundThermodynamicState(
            thermodynamic_state=nonalchemical_state,
            composable_states=[RestraintState(lambda_restraints=1.0)])
        hostguest_unsampled_states = [
            copy.deepcopy(nonalchemical_compound_state)
            for _ in range(cls.n_unsampled_states)
        ]

        cls.hostguest_test = (hostguest_compound_states,
                              hostguest_sampler_states,
                              hostguest_unsampled_states)

        # Run a quick simulation
        thermodynamic_states, sampler_states, unsampled_states = copy.deepcopy(
            cls.hostguest_test)
        n_states = len(thermodynamic_states)

        # Prepare metadata for analysis.
        reference_state = mmtools.states.ThermodynamicState(
            hostguest_test.system, 300 * unit.kelvin)
        topography = Topography(hostguest_test.topology,
                                ligand_atoms=range(126, 156))
        metadata = {
            'standard_state_correction': 4.0,
            'reference_state': mmtools.utils.serialize(reference_state),
            'topography': mmtools.utils.serialize(topography)
        }
        analysis_atoms = topography.receptor_atoms

        # Create simulation and storage file.
        cls.tmp_dir = tempfile.mkdtemp()
        storage_path = os.path.join(cls.tmp_dir, 'test_analyze.nc')
        move = mmtools.mcmc.LangevinDynamicsMove(n_steps=1)
        if cls.ONLINE_ANALYSIS:
            online_analysis_interval = cls.n_steps - 1
        else:
            online_analysis_interval = None
        cls.sampler = cls.SAMPLER(
            mcmc_moves=move,
            number_of_iterations=cls.n_steps,
            online_analysis_interval=online_analysis_interval,
            online_analysis_minimum_iterations=0)
        cls.reporter = MultiStateReporter(
            storage_path,
            checkpoint_interval=cls.checkpoint_interval,
            analysis_particle_indices=analysis_atoms)
        cls.call_sampler_create(cls.sampler,
                                cls.reporter,
                                thermodynamic_states,
                                sampler_states,
                                unsampled_states,
                                metadata=metadata)

        # Run some iterations.
        cls.n_replicas = cls.N_SAMPLERS
        cls.n_states = n_states
        cls.analysis_atoms = analysis_atoms
        cls.sampler.run(cls.n_steps - 1)  # Initial config
        cls.repex_name = "RepexAnalyzer"  # kind of an unused test

        # Debugging Messages to sent to Nose with --nocapture enabled
        online_flag = " "
        if cls.ONLINE_ANALYSIS:
            online_flag += "Online "
        output_descr = "Testing{}Analyzer: {}  -- States: {}  -- Samplers: {}".format(
            online_flag, cls.SAMPLER.__name__, cls.N_STATES, cls.N_SAMPLERS)
        len_output = len(output_descr)
        print("#" * len_output)
        print(output_descr)
        print("#" * len_output)