示例#1
0
def test_write_object():
    """Test writing of a object.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')

    #use names we might encounter in simulation
    envname = 'vacuum'
    modname = 'ExpandedEnsembleSampler'
    varname = 'energy'


    view = NetCDFStorageView(storage, envname, modname)

    obj = { 0 : 0 }
    view.write_object('singleton', obj)

    for iteration in range(10):
        obj = { 'iteration' : iteration }
        view.write_object(varname, obj, iteration=iteration)

    for iteration in range(10):
        obj = storage.get_object(envname, modname, varname, iteration=iteration)
        assert ('iteration' in obj)
        assert (obj['iteration'] == iteration)
示例#2
0
    def __init__(self, complex_sampler, solvent_sampler, log_state_penalties, storage=None, verbose=False):
        """
        Initialize a protonation state sampler with fixed target probabilities for ligand in solvent.

        Parameters
        ----------
        complex_sampler : ExpandedEnsembleSampler
            Ligand in complex sampler
        solvent_sampler : SAMSSampler
            Ligand in solution sampler
        log_state_penalties : dict
            log_state_penalties[smiles] is the log state free energy (in kT) for ligand state 'smiles'
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        """
        # Store target samplers.
        self.log_state_penalties = log_state_penalties
        self.samplers = [complex_sampler, solvent_sampler]
        self.complex_sampler = complex_sampler
        self.solvent_sampler = solvent_sampler

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = { key : - log_state_penalties[key] for key in log_state_penalties }
        self.verbose = verbose
        self.iteration = 0
def test_storage_view():
    """Test writing of a quantity.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view1 = NetCDFStorageView(storage, envname='envname')
    view2 = NetCDFStorageView(view1, modname='modname')
    assert (view1._envname == 'envname')
    assert (view2._envname == 'envname')
    assert (view2._modname == 'modname')
示例#4
0
def test_write_quantity():
    """Test writing of a quantity.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view = NetCDFStorageView(storage, 'envname', 'modname')

    view.write_quantity('singleton', 1.0)

    for iteration in range(10):
        view.write_quantity('varname', float(iteration), iteration=iteration)

    for iteration in range(10):
        assert (storage._ncfile['/envname/modname/varname'][iteration] == float(iteration))
示例#5
0
    def __init__(self, complex_sampler, solvent_sampler, log_state_penalties, storage=None, verbose=False):
        """
        Initialize a protonation state sampler with fixed target probabilities for ligand in solvent.

        Parameters
        ----------
        complex_sampler : ExpandedEnsembleSampler
            Ligand in complex sampler
        solvent_sampler : SAMSSampler
            Ligand in solution sampler
        log_state_penalties : dict
            log_state_penalties[smiles] is the log state free energy (in kT) for ligand state 'smiles'
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        """
        # Store target samplers.
        self.log_state_penalties = log_state_penalties
        self.samplers = [complex_sampler, solvent_sampler]
        self.complex_sampler = complex_sampler
        self.solvent_sampler = solvent_sampler

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = { key : - log_state_penalties[key] for key in log_state_penalties }
        self.verbose = verbose
        self.iteration = 0
示例#6
0
    def __init__(self, target_samplers, storage=None, verbose=False):
        """
        Initialize a multi-objective design sampler with the specified target sampler powers.

        Parameters
        ----------
        target_samplers : dict
            target_samplers[sampler] is the exponent associated with SAMS sampler `sampler` in the multi-objective design.
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        The target sampler weights for N samplers with specified exponents \alpha_n are given by

        \pi_{nk} \propto \prod_{n=1}^N Z_{nk}^{alpha_n}

        where \pi_{nk} is the target weight for sampler n state k,
        and Z_{nk} is the relative partition function of sampler n among states k.

        Examples
        --------
        Set up a mutation sampler to maximize implicit solvent hydration free energy.
        >>> from perses.tests.testsystems import AlanineDipeptideTestSystem
        >>> testsystem = AlanineDipeptideTestSystem()
        >>> # Set up target samplers.
        >>> target_samplers = { testsystem.sams_samplers['implicit'] : 1.0, testsystem.sams_samplers['vacuum'] : -1.0 }
        >>> # Set up the design sampler.
        >>> designer = MultiTargetDesign(target_samplers)

        """
        # Store target samplers.
        self.sampler_exponents = target_samplers
        self.samplers = list(target_samplers.keys())

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage,
                                             modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = dict()
        self.verbose = verbose
        self.iteration = 0
示例#7
0
def test_write_object():
    """Test writing of a object.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view = NetCDFStorageView(storage, 'envname', 'modname')

    obj = { 0 : 0 }
    view.write_object('singleton', obj)

    for iteration in range(10):
        obj = { 'iteration' : iteration }
        view.write_object('varname', obj, iteration=iteration)

    for iteration in range(10):
        encoded = storage._ncfile['/envname/modname/varname'][iteration]
        obj = json.loads(encoded)
        assert ('iteration' in obj)
        assert (obj['iteration'] == iteration)
def test_write_object():
    """Test writing of a object.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')

    #use names we might encounter in simulation
    envname = 'vacuum'
    modname = 'ExpandedEnsembleSampler'
    varname = 'energy'

    view = NetCDFStorageView(storage, envname, modname)

    obj = {0: 0}
    view.write_object('singleton', obj)

    for iteration in range(10):
        obj = {'iteration': iteration}
        view.write_object(varname, obj, iteration=iteration)

    for iteration in range(10):
        obj = storage.get_object(envname,
                                 modname,
                                 varname,
                                 iteration=iteration)
        assert ('iteration' in obj)
        assert (obj['iteration'] == iteration)
def test_write_array():
    """Test writing of a array.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view1 = NetCDFStorageView(storage, 'envname1', 'modname')
    view2 = NetCDFStorageView(storage, 'envname2', 'modname')

    from numpy.random import random
    shape = (10, 3)
    array = random(shape)
    view1.write_array('singleton', array)

    for iteration in range(10):
        array = random(shape)
        view1.write_array('varname', array, iteration=iteration)
        view2.write_array('varname', array, iteration=iteration)

    for iteration in range(10):
        array = storage._ncfile['/envname1/modname/varname'][iteration]
        assert array.shape == shape
        array = storage._ncfile['/envname2/modname/varname'][iteration]
        assert array.shape == shape
示例#10
0
    def __init__(self, target_samplers, storage=None, verbose=False):
        """
        Initialize a multi-objective design sampler with the specified target sampler powers.

        Parameters
        ----------
        target_samplers : dict
            target_samplers[sampler] is the exponent associated with SAMS sampler `sampler` in the multi-objective design.
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        The target sampler weights for N samplers with specified exponents \alpha_n are given by

        \pi_{nk} \propto \prod_{n=1}^N Z_{nk}^{alpha_n}

        where \pi_{nk} is the target weight for sampler n state k,
        and Z_{nk} is the relative partition function of sampler n among states k.

        Examples
        --------
        Set up a mutation sampler to maximize implicit solvent hydration free energy.
        >>> from perses.tests.testsystems import AlanineDipeptideTestSystem
        >>> testsystem = AlanineDipeptideTestSystem()
        >>> # Set up target samplers.
        >>> target_samplers = { testsystem.sams_samplers['implicit'] : 1.0, testsystem.sams_samplers['vacuum'] : -1.0 }
        >>> # Set up the design sampler.
        >>> designer = MultiTargetDesign(target_samplers)

        """
        # Store target samplers.
        self.sampler_exponents = target_samplers
        self.samplers = list(target_samplers.keys())

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = dict()
        self.verbose = verbose
        self.iteration = 0
示例#11
0
def test_write_quantity():
    """Test writing of a quantity.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view = NetCDFStorageView(storage, 'envname', 'modname')

    view.write_quantity('singleton', 1.0)

    for iteration in range(10):
        view.write_quantity('varname', float(iteration), iteration=iteration)

    for iteration in range(10):
        assert (storage._ncfile['/envname/modname/varname'][iteration] == float(iteration))
示例#12
0
def test_write_array():
    """Test writing of a array.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view1 = NetCDFStorageView(storage, 'envname1', 'modname')
    view2 = NetCDFStorageView(storage, 'envname2', 'modname')

    from numpy.random import random
    shape = (10,3)
    array = random(shape)
    view1.write_array('singleton', array)

    for iteration in range(10):
        array = random(shape)
        view1.write_array('varname', array, iteration=iteration)
        view2.write_array('varname', array, iteration=iteration)

    for iteration in range(10):
        array = storage._ncfile['/envname1/modname/varname'][iteration]
        assert array.shape == shape
        array = storage._ncfile['/envname2/modname/varname'][iteration]
        assert array.shape == shape
示例#13
0
def test_write_object():
    """Test writing of a object.
    """
    tmpfile = tempfile.NamedTemporaryFile()
    storage = NetCDFStorage(tmpfile.name, mode='w')
    view = NetCDFStorageView(storage, 'envname', 'modname')

    obj = { 0 : 0 }
    view.write_object('singleton', obj)

    for iteration in range(10):
        obj = { 'iteration' : iteration }
        view.write_object('varname', obj, iteration=iteration)

    for iteration in range(10):
        obj = storage.get_object('/envname/modname/varname', iteration=iteration)
        assert ('iteration' in obj)
        assert (obj['iteration'] == iteration)
示例#14
0
class ExpandedEnsembleSampler(object):
    """
    Method of expanded ensembles sampling engine.

    The acceptance criteria is given in the reference document. Roughly, the proposal scheme is:

    * Draw a proposed chemical state k', and calculate reverse proposal probability
    * Conditioned on k' and the current positions x, generate new positions with the GeometryEngine
    * With new positions, jump to a hybrid system at lambda=0
    * Anneal from lambda=0 to lambda=1, accumulating work
    * Jump from the hybrid system at lambda=1 to the k' system, and compute reverse GeometryEngine proposal
    * Add weight of chemical states k and k' to acceptance probabilities

    Properties
    ----------
    sampler : MCMCSampler
        The MCMC sampler used for updating positions.
    proposal_engine : ProposalEngine
        The ProposalEngine to use for proposing new sampler states and topologies.
    system_generator : SystemGenerator
        The SystemGenerator to use for creating System objects following proposals.
    state : hashable object
        The current sampler state. Can be any hashable object.
    states : set of hashable object
        All known states.
    iteration : int
        Iterations completed.
    naccepted : int
        Number of accepted thermodynamic/chemical state changes.
    nrejected : int
        Number of rejected thermodynamic/chemical state changes.
    number_of_state_visits : dict of state_key
        Cumulative counts of visited states.
    verbose : bool
        If True, verbose output is printed.

    References
    ----------
    [1] Lyubartsev AP, Martsinovski AA, Shevkunov SV, and Vorontsov-Velyaminov PN. New approach to Monte Carlo calculation of the free energy: Method of expanded ensembles. JCP 96:1776, 1992
    http://dx.doi.org/10.1063/1.462133


    Examples
    --------
    >>> # Create a test system
    >>> test = testsystems.AlanineDipeptideVacuum()
    >>> # Create a SystemGenerator and rebuild the System.
    >>> from perses.rjmc.topology_proposal import SystemGenerator
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None })
    >>> test.system = system_generator.build_system(test.topology)
    >>> # Create a sampler state.
    >>> sampler_state = SamplerState(system=test.system, positions=test.positions)
    >>> # Create a thermodynamic state.
    >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin)
    >>> # Create an MCMC sampler
    >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state)
    >>> # Turn off verbosity
    >>> mcmc_sampler.verbose = False
    >>> # Create an Expanded Ensemble sampler
    >>> from perses.rjmc.topology_proposal import PointMutationEngine
    >>> from perses.rjmc.geometry import FFAllAngleGeometryEngine
    >>> geometry_engine = FFAllAngleGeometryEngine(metadata={})
    >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]]
    >>> proposal_engine = PointMutationEngine(test.topology, system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations)
    >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine, geometry_engine)
    >>> # Run the sampler
    >>> exen_sampler.run()

    """
    def __init__(self, sampler, topology, state_key, proposal_engine, geometry_engine, log_weights=None, options=None, platform=None, envname=None, storage=None, ncmc_write_interval=1):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        geometry_engine : GeometryEngine
            GeometryEngine to use for dimension matching
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.
        storage : NetCDFStorageView, optional, default=None
            If specified, use this storage layer.
        ncmc_write_interval : int, default 1
            How frequently to write out NCMC protocol steps.
        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self._pressure = sampler.thermodynamic_state.pressure
        self._temperature = sampler.thermodynamic_state.temperature
        self.topology = md.Topology.from_openmm(topology)
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        if self.log_weights is None: self.log_weights = dict()

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions', 'nsteps_mcmc', 'splitting']

        if options is None:
            options = dict()
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None
        
        if options['splitting']:
            self._ncmc_splitting = options['splitting']
        else:
            self._ncmc_splitting = "V R O H R V"

        if options['nsteps']:
            self._switching_nsteps = options['nsteps']
            self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature,
                                          timestep=options['timestep'], nsteps=options['nsteps'],
                                          functions=options['functions'], integrator_splitting=self._ncmc_splitting,
                                          platform=platform, storage=self.storage,
                                          write_ncmc_interval=ncmc_write_interval)
        else:
            self._switching_nsteps = 0

        if options['nsteps_mcmc']:
            self._n_iterations_per_update = options['nsteps_mcmc']
        else:
            self._n_iterations_per_update = 100

        self.geometry_engine = geometry_engine
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs
        self.logPs = list()
        self.sampler.minimize(max_iterations=40)

    @property
    def state_keys(self):
        return self.log_weights.keys()

    def get_log_weight(self, state_key):
        """
        Get the log weight of the specified state.

        Parameters
        ----------
        state_key : hashable object
            The state key (e.g. chemical state key) to look up.

        Returns
        -------
        log_weight : float
            The log weight of the provided state key.

        Note
        ----
        This adds the key to the self.log_weights dict.

        """
        if state_key not in self.log_weights:
            self.log_weights[state_key] = 0.0
        return self.log_weights[state_key]

    def _system_to_thermodynamic_state(self, system):
        """
        Given an OpenMM system object, create a corresponding ThermodynamicState that has the same
        temperature and pressure as the current thermodynamic state.

        Arguments
        ---------
        system : openmm.System
            The OpenMM system for which to create the thermodynamic state
        
        Returns
        -------
        new_thermodynamic_state : openmmtools.states.ThermodynamicState
            The thermodynamic state object representing the given system
        """
        return ThermodynamicState(system, temperature=self._temperature, pressure=self._pressure)

    def _geometry_forward(self, topology_proposal, old_sampler_state):
        """
        Run geometry engine to propose new positions and compute logP

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        old_sampler_state : openmmtools.states.SamplerState
            Configurational properties of the old system atoms.

        Returns
        -------
        new_sampler_state : openmmtools.states.SamplerState
            Configurational properties of new atoms proposed by geometry engine calculation.
        geometry_logp_propose : float
            The log probability of the forward-only proposal
        """
        if self.verbose: print("Geometry engine proposal...")
        # Generate coordinates for new atoms and compute probability ratio of old and new probabilities.
        initial_time = time.time()
        new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, old_sampler_state.positions, self.sampler.thermodynamic_state.beta)
        if self.verbose: print('proposal took %.3f s' % (time.time() - initial_time))

        if self.geometry_pdbfile is not None:
            print("Writing proposed geometry...")
            from simtk.openmm.app import PDBFile
            PDBFile.writeFile(topology_proposal.new_topology, new_positions, file=self.geometry_pdbfile)
            self.geometry_pdbfile.flush()

        new_sampler_state = SamplerState(new_positions, box_vectors=old_sampler_state.box_vectors)  

        return new_sampler_state, geometry_logp_propose

    def _geometry_reverse(self, topology_proposal, new_sampler_state, old_sampler_state):
        """
        Run geometry engine reverse calculation to determine logP
        of proposing the old positions based on the new positions

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        new_sampler_state : openmmtools.states.SamplerState
            Configurational properties of the new atoms.
        old_sampler_state : openmmtools.states.SamplerState
            Configurational properties of the old atoms.

        Returns
        -------
        geometry_logp_reverse : float
            The log probability of the proposal for the given transformation
        """
        if self.verbose: print("Geometry engine logP_reverse calculation...")
        initial_time = time.time()
        geometry_logp_reverse = self.geometry_engine.logp_reverse(topology_proposal, new_sampler_state.positions, old_sampler_state.positions, self.sampler.thermodynamic_state.beta)
        if self.verbose: print('calculation took %.3f s' % (time.time() - initial_time))
        return geometry_logp_reverse

    def _ncmc_hybrid(self, topology_proposal, old_sampler_state, new_sampler_state):
        """
        Run a hybrid NCMC protocol from lambda = 0 to lambda = 1

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        old_sampler_State : openmmtools.states.SamplerState
            SamplerState of old system at the beginning of NCMCSwitching
        new_sampler_state : openmmtools.states.SamplerState
            SamplerState of new system at the beginning of NCMCSwitching

        Returns
        -------
        old_final_sampler_state : openmmtools.states.SamplerState
            SamplerState of old system at the end of switching
        new_final_sampler_state : openmmtools.states.SamplerState
            SamplerState of new system at the end of switching
        logP_work : float
            The NCMC work contribution to the log acceptance probability (Eq. 44)
        logP_energy : float
            The contribution of switching to and from the hybrid system to the acceptance probability (Eq. 45)
        """
        if self.verbose: print("Performing NCMC switching")
        initial_time = time.time()
        [ncmc_old_sampler_state, ncmc_new_sampler_state, logP_work, logP_initial_hybrid, logP_final_hybrid] = self.ncmc_engine.integrate(topology_proposal, old_sampler_state, new_sampler_state, iteration=self.iteration)
        if self.verbose: print('NCMC took %.3f s' % (time.time() - initial_time))
        # Check that positions are not NaN
        if new_sampler_state.has_nan():
            raise Exception("Positions are NaN after NCMC insert with %d steps" % self._switching_nsteps)
        return ncmc_old_sampler_state, ncmc_new_sampler_state, logP_work, logP_initial_hybrid, logP_final_hybrid

    def _geometry_ncmc_geometry(self, topology_proposal, sampler_state, old_log_weight, new_log_weight):
        """
        Use a hybrid NCMC protocol to switch from the old system to new system
        Will calculate new positions for the new system first, then give both
        sets of positions to the hybrid NCMC integrator, and finally use the
        final positions of the old and new systems to calculate the reverse
        geometry probability

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        sampler_state : openmmtools.states.SamplerState
            Configurational properties of old atoms at the beginning of the NCMC switching.
        old_log_weight : float
            Chemical state weight from SAMSSampler
        new_log_weight : float
            Chemical state weight from SAMSSampler

        Returns
        -------
        logP_accept : float
            Log of acceptance probability of entire Expanded Ensemble switch (Eq. 25 or 46)
        ncmc_new_sampler_state : openmmtools.states.SamplerState
            Configurational properties of new atoms at the end of the NCMC switching.
        """
        if self.verbose: print("Updating chemical state with geometry-ncmc-geometry scheme...")

        from perses.tests.utils import compute_potential

        logP_chemical_proposal = topology_proposal.logp_proposal

        old_thermodynamic_state = self.sampler.thermodynamic_state
        new_thermodynamic_state = self._system_to_thermodynamic_state(topology_proposal.new_system)

        initial_reduced_potential = feptasks.compute_reduced_potential(old_thermodynamic_state, sampler_state)
        logP_initial_nonalchemical = - initial_reduced_potential

        new_geometry_sampler_state, logP_geometry_forward = self._geometry_forward(topology_proposal, sampler_state)
        
        #if we aren't doing any switching, then skip running the NCMC engine at all.
        if self._switching_nsteps == 0:
            ncmc_old_sampler_state = sampler_state
            ncmc_new_sampler_state = new_geometry_sampler_state
            logP_work = 0.0
            logP_initial_hybrid = 0.0
            logP_final_hybrid = 0.0
        else:
            ncmc_old_sampler_state, ncmc_new_sampler_state, logP_work, logP_initial_hybrid, logP_final_hybrid = self._ncmc_hybrid(topology_proposal, sampler_state, new_geometry_sampler_state)

        if logP_work > -np.inf and logP_initial_hybrid > -np.inf and logP_final_hybrid > -np.inf:
            logP_geometry_reverse = self._geometry_reverse(topology_proposal, ncmc_new_sampler_state, ncmc_old_sampler_state)
            logP_to_hybrid = logP_initial_hybrid - logP_initial_nonalchemical

            final_reduced_potential = feptasks.compute_reduced_potential(new_thermodynamic_state, ncmc_new_sampler_state)
            logP_final_nonalchemical = -final_reduced_potential
            logP_from_hybrid = logP_final_nonalchemical - logP_final_hybrid
            logP_sams_weight = new_log_weight - old_log_weight

            # Compute total log acceptance probability according to Eq. 46
            logP_accept = logP_to_hybrid - logP_geometry_forward + logP_work + logP_from_hybrid + logP_geometry_reverse + logP_sams_weight
        else:
            logP_geometry_reverse = 0.0
            logP_final = 0.0
            logP_to_hybrid = 0.0
            logP_from_hybrid = 0.0
            logP_sams_weight = new_log_weight - old_log_weight
            logP_accept = logP_to_hybrid - logP_geometry_forward + logP_work + logP_from_hybrid + logP_geometry_reverse + logP_sams_weight
            #TODO: mark failed proposals as unproposable

        if self.verbose:
            print("logP_accept = %+10.4e [logP_to_hybrid = %+10.4e, logP_chemical_proposal = %10.4e, logP_reverse = %+10.4e, -logP_forward = %+10.4e, logP_work = %+10.4e, logP_from_hybrid = %+10.4e, logP_sams_weight = %+10.4e]"
                % (logP_accept, logP_to_hybrid, logP_chemical_proposal, logP_geometry_reverse, -logP_geometry_forward, logP_work, logP_from_hybrid, logP_sams_weight))
        # Write to storage.
        if self.storage:
            self.storage.write_quantity('logP_accept', logP_accept, iteration=self.iteration)
            # Write components to storage
            self.storage.write_quantity('logP_ncmc_work', logP_work, iteration=self.iteration)
            self.storage.write_quantity('logP_from_hybrid', logP_from_hybrid, iteration=self.iteration)
            self.storage.write_quantity('logP_to_hybrid', logP_to_hybrid, iteration=self.iteration)
            self.storage.write_quantity('logP_chemical_proposal', logP_chemical_proposal, iteration=self.iteration)
            self.storage.write_quantity('logP_reverse', logP_geometry_reverse, iteration=self.iteration)
            self.storage.write_quantity('logP_forward', logP_geometry_forward, iteration=self.iteration)
            self.storage.write_quantity('logP_sams_weight', logP_sams_weight, iteration=self.iteration)
            # Write some aggregate statistics to storage to make contributions to acceptance probability easier to analyze
            self.storage.write_quantity('logP_groups_chemical', logP_chemical_proposal, iteration=self.iteration)
            self.storage.write_quantity('logP_groups_geometry', logP_geometry_reverse - logP_geometry_forward, iteration=self.iteration)

        return logP_accept, ncmc_new_sampler_state

    def update_positions(self, n_iterations=1):
        """
        Sample new positions.
        """
        self.sampler.run(n_iterations=n_iterations)

    def update_state(self):
        """
        Sample the thermodynamic state.
        """

        initial_time = time.time()

        # Propose new chemical state.
        if self.verbose: print("Proposing new topology...")
        [system, topology, positions] = [self.sampler.thermodynamic_state.get_system(remove_thermostat=True), self.topology, self.sampler.sampler_state.positions]
        omm_topology = topology.to_openmm() #convert to OpenMM topology for proposal engine
        omm_topology.setPeriodicBoxVectors(self.sampler.sampler_state.box_vectors) #set the box vectors because in OpenMM topology has these...
        topology_proposal = self.proposal_engine.propose(system, omm_topology)
        if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key))

        # Determine state keys
        old_state_key = self.state_key
        new_state_key = topology_proposal.new_chemical_state_key

        # Determine log weight
        old_log_weight = self.get_log_weight(old_state_key)
        new_log_weight = self.get_log_weight(new_state_key)

        logp_accept, ncmc_new_sampler_state = self._geometry_ncmc_geometry(topology_proposal, self.sampler.sampler_state, old_log_weight, new_log_weight)

        # Accept or reject.
        if np.isnan(logp_accept):
            accept = False
            print('logp_accept = NaN')
        else:
            accept = ((logp_accept>=0.0) or (np.random.uniform() < np.exp(logp_accept)))
            if self.accept_everything:
                print('accept_everything option is turned on; accepting')
                accept = True

        if accept:
            self.sampler.thermodynamic_state.set_system(topology_proposal.new_system, fix_state=True)
            self.sampler.sampler_state.system = topology_proposal.new_system
            self.topology = md.Topology.from_openmm(topology_proposal.new_topology)
            self.sampler.sampler_state = ncmc_new_sampler_state
            self.sampler.topology = self.topology
            self.state_key = topology_proposal.new_chemical_state_key
            self.naccepted += 1
            if self.verbose: print("    accepted")
        else:
            self.nrejected += 1
            if self.verbose: print("    rejected")

        if self.storage:
            self.storage.write_configuration('positions', self.sampler.sampler_state.positions, self.topology, iteration=self.iteration)
            self.storage.write_object('state_key', self.state_key, iteration=self.iteration)
            self.storage.write_object('proposed_state_key', topology_proposal.new_chemical_state_key, iteration=self.iteration)
            self.storage.write_quantity('naccepted', self.naccepted, iteration=self.iteration)
            self.storage.write_quantity('nrejected', self.nrejected, iteration=self.iteration)
            self.storage.write_quantity('logp_accept', logp_accept, iteration=self.iteration)
            self.storage.write_quantity('logp_topology_proposal', topology_proposal.logp_proposal, iteration=self.iteration)


        # Update statistics.
        self.update_statistics()

    def update(self):
        """
        Update the sampler with one step of sampling.
        """
        if self.verbose:
            print("-" * 80)
            print("Expanded Ensemble sampler iteration %8d" % self.iteration)
        self.update_positions(n_iterations=self._n_iterations_per_update)
        self.update_state()
        self.iteration += 1
        if self.verbose:
            print("-" * 80)

        if self.pdbfile is not None:
            print("Writing frame...")
            from simtk.openmm.app import PDBFile
            PDBFile.writeModel(self.topology.to_openmm(), self.sampler.sampler_state.positions, self.pdbfile, self.iteration)
            self.pdbfile.flush()

        if self.storage:
            self.storage.sync()

    def run(self, niterations=1):
        """
        Run the sampler for the specified number of iterations

        Parameters
        ----------
        niterations : int, optional, default=1
            Number of iterations to run the sampler for.
        """
        for iteration in range(niterations):
            self.update()

    def update_statistics(self):
        """
        Update sampler statistics.
        """
        if self.state_key not in self.number_of_state_visits:
            self.number_of_state_visits[self.state_key] = 0
        self.number_of_state_visits[self.state_key] += 1
示例#15
0
    def __init__(self, sampler, logZ=None, log_target_probabilities=None, update_method='two-stage', storage=None, second_stage_start=1000):
        """
        Create a SAMS Sampler.

        Parameters
        ----------
        sampler : ExpandedEnsembleSampler
            The expanded ensemble sampler used to sample both configurations and discrete thermodynamic states.
        logZ : dict of key : float, optional, default=None
            If specified, the log partition functions for each state will be initialized to the specified dictionary.
        log_target_probabilities : dict of key : float, optional, default=None
            If specified, unnormalized target probabilities; default is all 0.
        update_method : str, optional, default='default'
            SAMS update algorithm
        storage : NetCDFStorageView, optional, default=None
        second_state_start : int, optional, default None
            At what iteration number to switch to the optimal gain decay

        """
        from scipy.misc import logsumexp
        from perses.tests.utils import createOEMolFromSMILES
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.chemical_states = None
        self._reference_state = None
        try:
            self.chemical_states = self.sampler.proposal_engine.chemical_state_list
        except NotImplementedError:
            logger.warn("The proposal engine has not properly implemented the chemical state property; SAMS will add states on the fly.")

        if self.chemical_states:
            # Select a reference state that will always be subtracted (ensure that dict ordering does not change)
            self._reference_state = self.chemical_states[0]

            # Initialize the logZ dictionary with scores based on the number of atoms
            # This is not the negative because the weights are set to the negative of the initial weights
            self.logZ = {chemical_state: self._num_dof_compensation(chemical_state) for chemical_state in self.chemical_states}

            #Initialize log target probabilities with log(1/n_states)
            self.log_target_probabilities = {chemical_state : np.log(len(self.chemical_states)) for chemical_state in self.chemical_states}

            #If initial weights are specified, override any weight with what is provided
            #However, if the chemical state is not in the reachable chemical state list,throw an exception
            if logZ is not None:
                for (chemical_state, logZ_value) in logZ:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a logZ initial value for an un-proposable chemical state")
                    self.logZ[chemical_state] = logZ_value

            if log_target_probabilities is not None:
                for (chemical_state, log_target_probability) in log_target_probabilities:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a log target probability for an un-proposable chemical state.")
                    self.log_target_probabilities[chemical_state] = log_target_probability

                #normalize target probabilities
                #this is likely not necessary, but it is copying the algorithm in Ref 1
                log_sum_target_probabilities = logsumexp((list(self.log_target_probabilities.values())))
                self.log_target_probabilities = {chemical_state : log_target_probability - log_sum_target_probabilities for chemical_state, log_target_probability in self.log_target_probabilities}
        else:
            self.logZ = dict()
            self.log_target_probabilities = dict()

        self.update_method = update_method

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize.
        self.iteration = 0
        self.verbose = False
        self.sampler.log_weights = {state_key: - self.logZ[state_key] for state_key in self.logZ.keys()}

        self.second_stage_start = 0
        if second_stage_start is not None:
            self.second_stage_start = second_stage_start
示例#16
0
class ProtonationStateSampler(object):
    """
    Protonation state sampler with given fixed target probabilities for ligand in solvent.

    Parameters
    ----------
    samplers : list of SAMSSampler
        The SAMS samplers whose relative partition functions go into the design objective computation.
    sampler_exponents : dict of SAMSSampler : float
        samplers.keys() are the samplers, and samplers[key]
    log_target_probabilities : dict of hashable object : float
        log_target_probabilities[key] is the computed log objective function (target probability) for chemical state `key`
    verbose : bool
        If True, verbose output is printed.

    """
    def __init__(self, complex_sampler, solvent_sampler, log_state_penalties, storage=None, verbose=False):
        """
        Initialize a protonation state sampler with fixed target probabilities for ligand in solvent.

        Parameters
        ----------
        complex_sampler : ExpandedEnsembleSampler
            Ligand in complex sampler
        solvent_sampler : SAMSSampler
            Ligand in solution sampler
        log_state_penalties : dict
            log_state_penalties[smiles] is the log state free energy (in kT) for ligand state 'smiles'
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        """
        # Store target samplers.
        self.log_state_penalties = log_state_penalties
        self.samplers = [complex_sampler, solvent_sampler]
        self.complex_sampler = complex_sampler
        self.solvent_sampler = solvent_sampler

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = { key : - log_state_penalties[key] for key in log_state_penalties }
        self.verbose = verbose
        self.iteration = 0

    @property
    def state_keys(self):
        return self.log_target_probabilities.keys()

    def update_samplers(self):
        """
        Update all samplers.
        """
        for sampler in self.samplers:
            sampler.update()

    def update_target_probabilities(self):
        """
        Update all target probabilities.
        """
        # Update the complex sampler log weights using the solvent sampler log weights
        for key in self.solvent_sampler.state_keys:
            self.complex_sampler.log_weights[key] = self.solvent_sampler.sampler.log_weights[key]

        if self.verbose:
            print("log_weights = %s" % str(self.solvent_sampler.sampler.log_weights))

    def update(self):
        """
        Run one iteration of the sampler.
        """
        if self.verbose:
            print("*" * 80)
            print("ProtonationStateSampler iteration %8d" % self.iteration)
        self.update_samplers()
        self.update_target_probabilities()
        if self.storage: self.storage.sync()
        self.iteration += 1
        if self.verbose:
            print("*" * 80)

    def run(self, niterations=1):
        """
        Run the protonation state sampler for the specified number of iterations.

        Parameters
        ----------
        niterations : int
            The number of iterations to run the sampler for.

        """
        # Update all samplers.
        for iteration in range(niterations):
            self.update()
示例#17
0
class MultiTargetDesign(object):
    """
    Multi-objective design using self-adjusted mixture sampling with additional recursion steps
    that update target weights on the fly.

    Parameters
    ----------
    samplers : list of SAMSSampler
        The SAMS samplers whose relative partition functions go into the design objective computation.
    sampler_exponents : dict of SAMSSampler : float
        samplers.keys() are the samplers, and samplers[key]
    log_target_probabilities : dict of hashable object : float
        log_target_probabilities[key] is the computed log objective function (target probability) for chemical state `key`
    verbose : bool
        If True, verbose output is printed.

    """
    def __init__(self, target_samplers, storage=None, verbose=False):
        """
        Initialize a multi-objective design sampler with the specified target sampler powers.

        Parameters
        ----------
        target_samplers : dict
            target_samplers[sampler] is the exponent associated with SAMS sampler `sampler` in the multi-objective design.
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        The target sampler weights for N samplers with specified exponents \alpha_n are given by

        \pi_{nk} \propto \prod_{n=1}^N Z_{nk}^{alpha_n}

        where \pi_{nk} is the target weight for sampler n state k,
        and Z_{nk} is the relative partition function of sampler n among states k.

        Examples
        --------
        Set up a mutation sampler to maximize implicit solvent hydration free energy.
        >>> from perses.tests.testsystems import AlanineDipeptideTestSystem
        >>> testsystem = AlanineDipeptideTestSystem()
        >>> # Set up target samplers.
        >>> target_samplers = { testsystem.sams_samplers['implicit'] : 1.0, testsystem.sams_samplers['vacuum'] : -1.0 }
        >>> # Set up the design sampler.
        >>> designer = MultiTargetDesign(target_samplers)

        """
        # Store target samplers.
        self.sampler_exponents = target_samplers
        self.samplers = list(target_samplers.keys())

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = dict()
        self.verbose = verbose
        self.iteration = 0

    @property
    def state_keys(self):
        return self.log_target_probabilities.keys()

    def update_samplers(self):
        """
        Update all samplers.
        """
        for sampler in self.samplers:
            sampler.update()

    def update_target_probabilities(self):
        """
        Update all target probabilities.
        """
        # Gather list of all keys.
        state_keys = set()
        for sampler in self.samplers:
            for key in sampler.state_keys:
                state_keys.add(key)

        # Compute unnormalized log target probabilities.
        log_target_probabilities = { key : 0.0 for key in state_keys }
        for (sampler, log_weight) in self.sampler_exponents.items():
            for key in sampler.state_keys:
                log_target_probabilities[key] += log_weight * sampler.logZ[key]

        # Normalize
        log_sum = log_sum_exp(log_target_probabilities)
        for key in log_target_probabilities:
            log_target_probabilities[key] -= log_sum

        # Store.
        self.log_target_probabilities = log_target_probabilities

        if self.verbose:
            print("log_target_probabilities = %s" % str(self.log_target_probabilities))

        if self.storage:
            self.storage.write_object('log_target_probabilities', self.log_target_probabilities, iteration=self.iteration)

    def update(self):
        """
        Run one iteration of the sampler.
        """
        if self.verbose:
            print("*" * 80)
            print("MultiTargetDesign sampler iteration %8d" % self.iteration)
        self.update_samplers()
        self.update_target_probabilities()
        self.iteration += 1
        if self.storage: self.storage.sync()
        if self.verbose:
            print("*" * 80)

    def run(self, niterations=1):
        """
        Run the multi-target design sampler for the specified number of iterations.

        Parameters
        ----------
        niterations : int
            The number of iterations to run the sampler for.

        """
        # Update all samplers.
        for iteration in range(niterations):
            self.update()
示例#18
0
    def __init__(self, temperature=default_temperature, functions=None, nsteps=default_nsteps,
                 steps_per_propagation=default_steps_per_propagation, timestep=default_timestep,
                 constraint_tolerance=None, platform=None, write_ncmc_interval=1, measure_shadow_work=False,
                 integrator_splitting='V R O H R V', storage=None, verbose=False, LRUCapacity=10, pressure=None, bond_softening_constant=1.0, angle_softening_constant=1.0):
        """
        This is the base class for NCMC switching between two different systems.

        Arguments
        ---------
        temperature : simtk.unit.Quantity with units compatible with kelvin
            The temperature at which switching is to be run
        functions : dict of str:str, optional, default=default_functions
            functions[parameter] is the function (parameterized by 't' which switched from 0 to 1) that
            controls how alchemical context parameter 'parameter' is switched
        nsteps : int, optional, default=1
            The number of steps to use for switching.
        steps_per_propagation : int, optional, default=1
            The number of intermediate propagation steps taken at each switching step
        timestep : simtk.unit.Quantity with units compatible with femtoseconds, optional, default=1*femtosecond
            The timestep to use for integration of switching velocity Verlet steps.
        constraint_tolerance : float, optional, default=None
            If not None, this relative constraint tolerance is used for position and velocity constraints.
        platform : simtk.openmm.Platform, optional, default=None
            If specified, the platform to use for OpenMM simulations.
        write_ncmc_interval : int, optional, default=None
            If a positive integer is specified, a snapshot frame will be written to storage with the specified interval on NCMC switching.
            'storage' must also be specified.
        measure_shadow_work : bool, optional, default False
            Whether to measure shadow work
        integrator_splitting : str, optional, default='V R O H R V'
            NCMC internal integrator splitting based on OpenMMTools Langevin splittings
        storage : NetCDFStorageView, optional, default=None
            If specified, write data using this class.
        verbose : bool, optional, default=False
            If True, print debug information.
        LRUCapacity : int, default 10
            Capacity of LRU cache for hybrid systems
        pressure : float, default None
            The pressure to use for the simulation. If None, no barostat
        """
        # Handle some defaults.
        if functions == None:
            functions = python_hybrid_functions
        if nsteps == None:
            nsteps = default_nsteps
        if timestep == None:
            timestep = default_timestep
        if temperature == None:
            temperature = default_temperature

        self._temperature = temperature
        self._functions = copy.deepcopy(functions)
        self._nsteps = nsteps
        self._timestep = timestep
        self._constraint_tolerance = constraint_tolerance
        self._platform = platform
        self._integrator_splitting = integrator_splitting
        self._steps_per_propagation = steps_per_propagation
        self._verbose = verbose
        self._pressure = pressure
        self._bond_softening_constant = bond_softening_constant
        self._angle_softening_constant = angle_softening_constant
        self._disable_barostat = False
        self._hybrid_cache = LRUCache(capacity=LRUCapacity)
        self._measure_shadow_work = measure_shadow_work

        self._nattempted = 0

        self._storage = None
        if storage is not None:
            self._storage = NetCDFStorageView(storage, modname=self.__class__.__name__)
            self._save_configuration = True
        else:
            self._save_configuration = False
        if write_ncmc_interval is not None:
            self._write_ncmc_interval = write_ncmc_interval
        else:
            self._write_ncmc_interval = 1
        self._work_save_interval = write_ncmc_interval
示例#19
0
class MultiTargetDesign(object):
    """
    Multi-objective design using self-adjusted mixture sampling with additional recursion steps
    that update target weights on the fly.

    Parameters
    ----------
    samplers : list of SAMSSampler
        The SAMS samplers whose relative partition functions go into the design objective computation.
    sampler_exponents : dict of SAMSSampler : float
        samplers.keys() are the samplers, and samplers[key]
    log_target_probabilities : dict of hashable object : float
        log_target_probabilities[key] is the computed log objective function (target probability) for chemical state `key`
    verbose : bool
        If True, verbose output is printed.

    """
    def __init__(self, target_samplers, storage=None, verbose=False):
        """
        Initialize a multi-objective design sampler with the specified target sampler powers.

        Parameters
        ----------
        target_samplers : dict
            target_samplers[sampler] is the exponent associated with SAMS sampler `sampler` in the multi-objective design.
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        The target sampler weights for N samplers with specified exponents \alpha_n are given by

        \pi_{nk} \propto \prod_{n=1}^N Z_{nk}^{alpha_n}

        where \pi_{nk} is the target weight for sampler n state k,
        and Z_{nk} is the relative partition function of sampler n among states k.

        Examples
        --------
        Set up a mutation sampler to maximize implicit solvent hydration free energy.
        >>> from perses.tests.testsystems import AlanineDipeptideTestSystem
        >>> testsystem = AlanineDipeptideTestSystem()
        >>> # Set up target samplers.
        >>> target_samplers = { testsystem.sams_samplers['implicit'] : 1.0, testsystem.sams_samplers['vacuum'] : -1.0 }
        >>> # Set up the design sampler.
        >>> designer = MultiTargetDesign(target_samplers)

        """
        # Store target samplers.
        self.sampler_exponents = target_samplers
        self.samplers = list(target_samplers.keys())

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = dict()
        self.verbose = verbose
        self.iteration = 0

    @property
    def state_keys(self):
        return self.log_target_probabilities.keys()

    def update_samplers(self):
        """
        Update all samplers.
        """
        for sampler in self.samplers:
            sampler.update()

    def update_target_probabilities(self):
        """
        Update all target probabilities.
        """
        # Gather list of all keys.
        state_keys = set()
        for sampler in self.samplers:
            for key in sampler.state_keys:
                state_keys.add(key)

        # Compute unnormalized log target probabilities.
        log_target_probabilities = { key : 0.0 for key in state_keys }
        for (sampler, log_weight) in self.sampler_exponents.items():
            for key in sampler.state_keys:
                log_target_probabilities[key] += log_weight * sampler.logZ[key]

        # Normalize
        log_sum = log_sum_exp(log_target_probabilities)
        for key in log_target_probabilities:
            log_target_probabilities[key] -= log_sum

        # Store.
        self.log_target_probabilities = log_target_probabilities

        if self.verbose:
            print("log_target_probabilities = %s" % str(self.log_target_probabilities))

        if self.storage:
            self.storage.write_object('log_target_probabilities', self.log_target_probabilities, iteration=self.iteration)

    def update(self):
        """
        Run one iteration of the sampler.
        """
        if self.verbose:
            print("*" * 80)
            print("MultiTargetDesign sampler iteration %8d" % self.iteration)
        self.update_samplers()
        self.update_target_probabilities()
        self.iteration += 1
        if self.storage: self.storage.sync()
        if self.verbose:
            print("*" * 80)

    def run(self, niterations=1):
        """
        Run the multi-target design sampler for the specified number of iterations.

        Parameters
        ----------
        niterations : int
            The number of iterations to run the sampler for.

        """
        # Update all samplers.
        for iteration in range(niterations):
            self.update()
示例#20
0
class SAMSSampler(object):
    """
    Self-adjusted mixture sampling engine.

    Properties
    ----------
    state_keys : set of objects
        The names of states sampled by the sampler.
    logZ : dict() of keys : float
        logZ[key] is the log partition function (up to an additive constant) estimate for chemical state `key`
    update_method : str
        Update method.  One of ['default']
    iteration : int
        Iterations completed.
    verbose : bool
        If True, verbose debug output is printed.

    References
    ----------
    [1] Tan, Z. (2015) Optimally adjusted mixture sampling and locally weighted histogram analysis, Journal of Computational and Graphical Statistics, to appear. (Supplement)
    http://www.stat.rutgers.edu/home/ztan/Publication/SAMS_redo4.pdf

    Examples
    --------
    >>> # Create a test system
    >>> test = testsystems.AlanineDipeptideVacuum()
    >>> # Create a SystemGenerator and rebuild the System.
    >>> from perses.rjmc.topology_proposal import SystemGenerator
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={'implicitSolvent' : None, 'constraints' : None }, nonperiodic_forcefield_kwargs={'nonbondedMethod' : app.NoCutoff})
    >>> test.system = system_generator.build_system(test.topology)
    >>> # Create a sampler state.
    >>> sampler_state = SamplerState(system=test.system, positions=test.positions)
    >>> # Create a thermodynamic state.
    >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin)
    >>> # Create an MCMC sampler
    >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state)
    >>> # Turn off verbosity
    >>> mcmc_sampler.verbose = False
    >>> from perses.rjmc.geometry import FFAllAngleGeometryEngine
    >>> geometry_engine = FFAllAngleGeometryEngine(metadata={})
    >>> # Create an Expanded Ensemble sampler
    >>> from perses.rjmc.topology_proposal import PointMutationEngine
    >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]]
    >>> proposal_engine = PointMutationEngine(test.topology, system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations)
    >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine, geometry_engine)
    >>> # Create a SAMS sampler
    >>> sams_sampler = SAMSSampler(exen_sampler)
    >>> # Run the sampler
    >>> sams_sampler.run() # doctest: +ELLIPSIS
    ...
    """
    def __init__(self, sampler, logZ=None, log_target_probabilities=None, update_method='two-stage', storage=None, second_stage_start=1000):
        """
        Create a SAMS Sampler.

        Parameters
        ----------
        sampler : ExpandedEnsembleSampler
            The expanded ensemble sampler used to sample both configurations and discrete thermodynamic states.
        logZ : dict of key : float, optional, default=None
            If specified, the log partition functions for each state will be initialized to the specified dictionary.
        log_target_probabilities : dict of key : float, optional, default=None
            If specified, unnormalized target probabilities; default is all 0.
        update_method : str, optional, default='default'
            SAMS update algorithm
        storage : NetCDFStorageView, optional, default=None
        second_state_start : int, optional, default None
            At what iteration number to switch to the optimal gain decay

        """
        from scipy.special import logsumexp
        from perses.utils.openeye import smiles_to_oemol

        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.chemical_states = None
        self._reference_state = None
        try:
            self.chemical_states = self.sampler.proposal_engine.chemical_state_list
        except NotImplementedError:
            _logger.warning("The proposal engine has not properly implemented the chemical state property; SAMS will add states on the fly.")

        if self.chemical_states:
            # Select a reference state that will always be subtracted (ensure that dict ordering does not change)
            self._reference_state = self.chemical_states[0]

            # Initialize the logZ dictionary with scores based on the number of atoms
            # This is not the negative because the weights are set to the negative of the initial weights
            self.logZ = {chemical_state: self._num_dof_compensation(chemical_state) for chemical_state in self.chemical_states}

            #Initialize log target probabilities with log(1/n_states)
            self.log_target_probabilities = {chemical_state : np.log(len(self.chemical_states)) for chemical_state in self.chemical_states}

            #If initial weights are specified, override any weight with what is provided
            #However, if the chemical state is not in the reachable chemical state list,throw an exception
            if logZ is not None:
                for (chemical_state, logZ_value) in logZ:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a logZ initial value for an un-proposable chemical state")
                    self.logZ[chemical_state] = logZ_value

            if log_target_probabilities is not None:
                for (chemical_state, log_target_probability) in log_target_probabilities:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a log target probability for an un-proposable chemical state.")
                    self.log_target_probabilities[chemical_state] = log_target_probability

                #normalize target probabilities
                #this is likely not necessary, but it is copying the algorithm in Ref 1
                log_sum_target_probabilities = logsumexp((list(self.log_target_probabilities.values())))
                self.log_target_probabilities = {chemical_state : log_target_probability - log_sum_target_probabilities for chemical_state, log_target_probability in self.log_target_probabilities}
        else:
            self.logZ = dict()
            self.log_target_probabilities = dict()

        self.update_method = update_method

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize.
        self.iteration = 0
        self.verbose = False
        self.sampler.log_weights = {state_key: - self.logZ[state_key] for state_key in self.logZ.keys()}

        self.second_stage_start = 0
        if second_stage_start is not None:
            self.second_stage_start = second_stage_start

    @property
    def state_keys(self):
        return self.logZ.keys()

    def _num_dof_compensation(self, smiles):
        """
        Compute an approximate compensating factor for a chemical state based on the number of degrees of freedom that it has.

        The formula is:
        (num_heavy*heavy_factor) + (num_hydrogen*h_factor) where
        heavy_factor = 4.5 and
        light_factor = 3.8

        Parameters
        ----------
        smiles : str
            The SMILES string of the molecule

        Returns
        -------
        correction_factor : float
        """
        mol = smiles_to_oemol(smiles)
        num_heavy = 0
        num_light = 0

        heavy_factor = 4.5
        light_factor = 3.8

        for atom in mol.GetAtoms():
            if atom.GetAtomicNum() == 1:
                num_light += 1
            else:
                num_heavy += 1

        correction_factor = num_heavy*heavy_factor + num_light*light_factor

        return correction_factor

    def update_sampler(self):
        """
        Update the underlying expanded ensembles sampler.
        """
        self.sampler.update()

    def update_logZ_estimates(self):
        """
        Update the logZ estimates according to self.update_method.
        """
        state_key = self.sampler.state_key

        # Add state key to dictionaries if we haven't visited this state before.
        if state_key not in self.logZ:
            _logger.warning("A new state key is being added to the logZ; note that this makes the resultant algorithm different from SAMS")
            self.logZ[state_key] = 0.0
        if state_key not in self.log_target_probabilities:
            _logger.warning("A new state key is being added to the target probabilities; note that this makes the resultant algorithm different from SAMS")
            self.log_target_probabilities[state_key] = 0.0

        # Update estimates of logZ.
        if self.update_method == 'one-stage':
            # Based on Eq. 9 of Ref. [1]
            gamma = 1.0 / float(self.iteration+1)
        elif self.update_method == 'two-stage':
            # Keep gamma large until second stage is activated.
            if self.iteration < self.second_stage_start:
                # First stage.
                gamma = 1.0
                # TODO: Determine when to switch to second stage
            else:
                # Second stage.
                gamma = 1.0 / float(self.iteration - self.second_stage_start + 1)
        else:
            raise Exception("SAMS update method '%s' unknown." % self.update_method)

        #get the (t-1/2) update from equation 9 in ref 1
        self.logZ[state_key] += gamma / np.exp(self.log_target_probabilities[state_key])

        if self._reference_state:
            #the second step of the (t-1/2 update), subtracting the reference state from everything else.
            #we can only do this for cases where all states have been enumerated
            self.logZ = {state_key : logZ_estimate - self.logZ[self._reference_state] for state_key, logZ_estimate in self.logZ.items()}

        # Update log weights for sampler.
        self.sampler.log_weights = { state_key : - self.logZ[state_key] for state_key in self.logZ.keys()}

        if self.storage:
            self.storage.write_object('logZ', self.logZ, iteration=self.iteration)
            self.storage.write_object('log_weights', self.sampler.log_weights, iteration=self.iteration)

    def update(self):
        """
        Update the sampler with one step of sampling.
        """
        if self.verbose:
            print("=" * 80)
            print("SAMS sampler iteration %5d" % self.iteration)
        self.update_sampler()
        self.update_logZ_estimates()
        if self.storage: self.storage.sync()
        self.iteration += 1
        if self.verbose:
            print("=" * 80)

    def run(self, niterations=1):
        """
        Run the sampler for the specified number of iterations

        Parameters
        ----------
        niterations : int, optional, default=1
            Number of iterations to run the sampler for.
        """
        for iteration in range(niterations):
            self.update()
示例#21
0
    def __init__(self, **kwargs):
        super(Selectivity, self).__init__(**kwargs)

        solvents = ['explicit'] # DEBUG
        components = ['src-imatinib', 'abl-imatinib'] # TODO: Add 'ATP:kinase' complex to enable resistance design
        padding = 9.0*unit.angstrom
        explicit_solvent_model = 'tip3p'
        setup_path = 'data/abl-src'
        thermodynamic_states = dict()
        temperature = 300*unit.kelvin
        pressure = 1.0*unit.atmospheres
        geometry_engine = geometry.FFAllAngleGeometryEngine()


        # Construct list of all environments
        environments = list()
        for solvent in solvents:
            for component in components:
                environment = solvent + '-' + component
                environments.append(environment)

        # Read SMILES from CSV file of clinical kinase inhibitors.
        from pkg_resources import resource_filename
        smiles_filename = resource_filename('perses', 'data/clinical-kinase-inhibitors.csv')
        import csv
        molecules = list()
        with open(smiles_filename, 'r') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
            for row in csvreader:
                name = row[0]
                smiles = row[1]
                molecules.append(smiles)
        # Add current molecule
        molecules.append('Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C')
        self.molecules = molecules

        # Expand molecules without explicit stereochemistry and make canonical isomeric SMILES.
        molecules = sanitizeSMILES(self.molecules)

        # Create a system generator for desired forcefields
        from perses.rjmc.topology_proposal import SystemGenerator
        from pkg_resources import resource_filename
        gaff_xml_filename = resource_filename('perses', 'data/gaff.xml')
        system_generators = dict()
        system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'],
            forcefield_kwargs={ 'nonbondedMethod' : app.CutoffPeriodic, 'nonbondedCutoff' : 9.0 * unit.angstrom, 'implicitSolvent' : None, 'constraints' : None },
            use_antechamber=True)
# NOTE implicit solvent not supported by this SystemGenerator
#        system_generators['implicit'] = SystemGenerator([gaff_xml_filename, 'amber99sbildn.xml', 'amber99_obc.xml'],
#            forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : app.OBC2, 'constraints' : None },
#            use_antechamber=True)
        system_generators['vacuum'] = SystemGenerator([gaff_xml_filename, 'amber99sbildn.xml'],
            forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None },
            use_antechamber=True)
        # Copy system generators for all environments
        for solvent in solvents:
            for component in components:
                environment = solvent + '-' + component
                system_generators[environment] = system_generators[solvent]

        # Load topologies and positions for all components
        from simtk.openmm.app import PDBFile, Modeller
        topologies = dict()
        positions = dict()
        for component in components:
            pdb_filename = resource_filename('perses', os.path.join(setup_path, '%s.pdb' % component))
            print(pdb_filename)
            pdbfile = PDBFile(pdb_filename)
            topologies[component] = pdbfile.topology
            positions[component] = pdbfile.positions

        # Construct positions and topologies for all solvent environments
        for solvent in solvents:
            for component in components:
                environment = solvent + '-' + component
                if solvent == 'explicit':
                    # Create MODELLER object.
                    modeller = app.Modeller(topologies[component], positions[component])
                    modeller.addSolvent(system_generators[solvent].getForceField(), model='tip3p', padding=9.0*unit.angstrom)
                    topologies[environment] = modeller.getTopology()
                    positions[environment] = modeller.getPositions()
                else:
                    environment = solvent + '-' + component
                    topologies[environment] = topologies[component]
                    positions[environment] = positions[component]

        # Set up the proposal engines.
        from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
        proposal_metadata = { }
        proposal_engines = dict()
        for environment in environments:
            storage = None
            if self.storage:
                storage = NetCDFStorageView(self.storage, envname=environment)
            proposal_engines[environment] = SmallMoleculeSetProposalEngine(molecules, system_generators[environment], residue_name='MOL', storage=storage)

        # Generate systems
        systems = dict()
        for environment in environments:
            systems[environment] = system_generators[environment].build_system(topologies[environment])

        # Define thermodynamic state of interest.
        from perses.samplers.thermodynamics import ThermodynamicState
        thermodynamic_states = dict()
        temperature = 300*unit.kelvin
        pressure = 1.0*unit.atmospheres
        for component in components:
            for solvent in solvents:
                environment = solvent + '-' + component
                if solvent == 'explicit':
                    thermodynamic_states[environment] = ThermodynamicState(system=systems[environment], temperature=temperature, pressure=pressure)
                else:
                    thermodynamic_states[environment] = ThermodynamicState(system=systems[environment], temperature=temperature)

        # Create SAMS samplers
        from perses.samplers.samplers import SamplerState, MCMCSampler, ExpandedEnsembleSampler, SAMSSampler
        mcmc_samplers = dict()
        exen_samplers = dict()
        sams_samplers = dict()
        for solvent in solvents:
            for component in components:
                environment = solvent + '-' + component
                chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment])

                storage = None
                if self.storage:
                    storage = NetCDFStorageView(self.storage, envname=environment)

                if solvent == 'explicit':
                    thermodynamic_state = ThermodynamicState(system=systems[environment], temperature=temperature, pressure=pressure)
                    sampler_state = SamplerState(system=systems[environment], positions=positions[environment], box_vectors=systems[environment].getDefaultPeriodicBoxVectors())
                else:
                    thermodynamic_state = ThermodynamicState(system=systems[environment], temperature=temperature)
                    sampler_state = SamplerState(system=systems[environment], positions=positions[environment])

                mcmc_samplers[environment] = MCMCSampler(thermodynamic_state, sampler_state, storage=storage)
                mcmc_samplers[environment].nsteps = 5 # reduce number of steps for testing
                mcmc_samplers[environment].verbose = True
                exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], options={'nsteps':10}, geometry_engine=geometry_engine, storage=storage)
                exen_samplers[environment].verbose = True
                sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage)
                sams_samplers[environment].verbose = True
                thermodynamic_states[environment] = thermodynamic_state

        # Create test MultiTargetDesign sampler.

        from perses.samplers.samplers import MultiTargetDesign
        target_samplers = {sams_samplers['explicit-src-imatinib']: 1.0, sams_samplers['explicit-abl-imatinib']: -1.0}
        designer = MultiTargetDesign(target_samplers, storage=self.storage)
        designer.verbose = True

        # Store things.
        self.molecules = molecules
        self.environments = environments
        self.topologies = topologies
        self.positions = positions
        self.system_generators = system_generators
        self.systems = systems
        self.proposal_engines = proposal_engines
        self.thermodynamic_states = thermodynamic_states
        self.mcmc_samplers = mcmc_samplers
        self.exen_samplers = exen_samplers
        self.sams_samplers = sams_samplers
        self.designer = designer

        # This system must currently be minimized.
        minimize(self)
示例#22
0
class ProtonationStateSampler(object):
    """
    Protonation state sampler with given fixed target probabilities for ligand in solvent.

    Parameters
    ----------
    samplers : list of SAMSSampler
        The SAMS samplers whose relative partition functions go into the design objective computation.
    sampler_exponents : dict of SAMSSampler : float
        samplers.keys() are the samplers, and samplers[key]
    log_target_probabilities : dict of hashable object : float
        log_target_probabilities[key] is the computed log objective function (target probability) for chemical state `key`
    verbose : bool
        If True, verbose output is printed.

    """
    def __init__(self, complex_sampler, solvent_sampler, log_state_penalties, storage=None, verbose=False):
        """
        Initialize a protonation state sampler with fixed target probabilities for ligand in solvent.

        Parameters
        ----------
        complex_sampler : ExpandedEnsembleSampler
            Ligand in complex sampler
        solvent_sampler : SAMSSampler
            Ligand in solution sampler
        log_state_penalties : dict
            log_state_penalties[smiles] is the log state free energy (in kT) for ligand state 'smiles'
        storage : NetCDFStorage, optional, default=None
            If specified, will use the storage layer to write trajectory data.
        verbose : bool, optional, default=False
            If true, will print verbose output

        """
        # Store target samplers.
        self.log_state_penalties = log_state_penalties
        self.samplers = [complex_sampler, solvent_sampler]
        self.complex_sampler = complex_sampler
        self.solvent_sampler = solvent_sampler

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize storage for target probabilities.
        self.log_target_probabilities = { key : - log_state_penalties[key] for key in log_state_penalties }
        self.verbose = verbose
        self.iteration = 0

    @property
    def state_keys(self):
        return self.log_target_probabilities.keys()

    def update_samplers(self):
        """
        Update all samplers.
        """
        for sampler in self.samplers:
            sampler.update()

    def update_target_probabilities(self):
        """
        Update all target probabilities.
        """
        # Update the complex sampler log weights using the solvent sampler log weights
        for key in self.solvent_sampler.state_keys:
            self.complex_sampler.log_weights[key] = self.solvent_sampler.sampler.log_weights[key]

        if self.verbose:
            print("log_weights = %s" % str(self.solvent_sampler.sampler.log_weights))

    def update(self):
        """
        Run one iteration of the sampler.
        """
        if self.verbose:
            print("*" * 80)
            print("ProtonationStateSampler iteration %8d" % self.iteration)
        self.update_samplers()
        self.update_target_probabilities()
        if self.storage: self.storage.sync()
        self.iteration += 1
        if self.verbose:
            print("*" * 80)

    def run(self, niterations=1):
        """
        Run the protonation state sampler for the specified number of iterations.

        Parameters
        ----------
        niterations : int
            The number of iterations to run the sampler for.

        """
        # Update all samplers.
        for iteration in range(niterations):
            self.update()
示例#23
0
    def __init__(self, sampler, topology, state_key, proposal_engine, geometry_engine, log_weights=None, options=None, platform=None, envname=None, storage=None, ncmc_write_interval=1):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        geometry_engine : GeometryEngine
            GeometryEngine to use for dimension matching
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.
        storage : NetCDFStorageView, optional, default=None
            If specified, use this storage layer.
        ncmc_write_interval : int, default 1
            How frequently to write out NCMC protocol steps.
        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self._pressure = sampler.thermodynamic_state.pressure
        self._temperature = sampler.thermodynamic_state.temperature
        self._omm_topology = topology
        self.topology = md.Topology.from_openmm(topology)
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        if self.log_weights is None: self.log_weights = dict()

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions', 'nsteps_mcmc', 'splitting']

        if options is None:
            options = dict()
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None

        if options['splitting']:
            self._ncmc_splitting = options['splitting']
        else:
            self._ncmc_splitting = "V R O H R V"

        if options['nsteps']:
            self._switching_nsteps = options['nsteps']
            self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature,
                                          timestep=options['timestep'], nsteps=options['nsteps'],
                                          functions=options['functions'], integrator_splitting=self._ncmc_splitting,
                                          platform=platform, storage=self.storage,
                                          write_ncmc_interval=ncmc_write_interval)
        else:
            self._switching_nsteps = 0

        if options['nsteps_mcmc']:
            self._n_iterations_per_update = options['nsteps_mcmc']
        else:
            self._n_iterations_per_update = 100

        self.geometry_engine = geometry_engine
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs
        self.logPs = list()
        self.sampler.minimize(max_iterations=40)
示例#24
0
                    modeller.addSolvent(system_generators[solvent].getForceField(), model='tip3p', padding=9.0*unit.angstrom)
                    topologies[environment] = modeller.getTopology()
                    positions[environment] = modeller.getPositions()
                else:
                    environment = solvent + '-' + component
                    topologies[environment] = topologies[component]
                    positions[environment] = positions[component]

        # Set up the proposal engines.
        from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
        proposal_metadata = { }
        proposal_engines = dict()
        for environment in environments:
            storage = None
            if self.storage:
                storage = NetCDFStorageView(self.storage, envname=environment)
            proposal_engines[environment] = SmallMoleculeSetProposalEngine(molecules, system_generators[environment], residue_name='MOL', storage=storage)

        # Generate systems
        systems = dict()
        for environment in environments:
            systems[environment] = system_generators[environment].build_system(topologies[environment])

        # Define thermodynamic state of interest.
        from perses.samplers.thermodynamics import ThermodynamicState
        thermodynamic_states = dict()
        temperature = 300*unit.kelvin
        pressure = 1.0*unit.atmospheres
        for component in components:
            for solvent in solvents:
                environment = solvent + '-' + component
示例#25
0
    def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None):
        self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules]
        environments = ['explicit', 'vacuum']
        temperature = 298.15 * unit.kelvin
        pressure = 1.0 * unit.atmospheres
        constraints = app.HBonds
        self._storage = NetCDFStorage(output_filename)
        self._ncmc_switching_times = ncmc_switching_times
        self._n_equilibrium_steps = equilibrium_steps
        self._geometry_options = geometry_options

        # Create a system generator for our desired forcefields.
        from perses.rjmc.topology_proposal import SystemGenerator
        system_generators = dict()
        from pkg_resources import resource_filename
        gaff_xml_filename = resource_filename('perses', 'data/gaff.xml')
        barostat = openmm.MonteCarloBarostat(pressure, temperature)
        system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'],
                                                        forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom,
                                                                           'implicitSolvent': None,
                                                                           'constraints': constraints,
                                                                           'ewaldErrorTolerance': 1e-5,
                                                                           'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME}
                                                        barostat=barostat)
        system_generators['vacuum'] = SystemGenerator([gaff_xml_filename],
                                                      forcefield_kwargs={'implicitSolvent': None,
                                                                         'constraints': constraints,
                                                                         'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff})

        #
        # Create topologies and positions
        #
        topologies = dict()
        positions = dict()

        from openmoltools import forcefield_generators
        forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml')
        forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator)

        # Create molecule in vacuum.
        from perses.utils.openeye import extractPositionsFromOEMol
        from openmoltools.openeye import smiles_to_oemol, generate_conformers
        if initial_molecule:
            smiles = initial_molecule
        else:
            smiles = np.random.choice(molecules)
        molecule = smiles_to_oemol(smiles)
        molecule = generate_conformers(molecule, max_confs=1)
        topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule)
        positions['vacuum'] = extractPositionsFromOEMol(molecule)

        # Create molecule in solvent.
        modeller = app.Modeller(topologies['vacuum'], positions['vacuum'])
        modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom)
        topologies['explicit'] = modeller.getTopology()
        positions['explicit'] = modeller.getPositions()

        # Set up the proposal engines.
        proposal_metadata = {}
        proposal_engines = dict()

        for environment in environments:
            proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules,
                                                                               system_generators[environment])

        # Generate systems
        systems = dict()
        for environment in environments:
            systems[environment] = system_generators[environment].build_system(topologies[environment])

        # Define thermodynamic state of interest.

        thermodynamic_states = dict()
        thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'],
                                                                     temperature=temperature, pressure=pressure)
        thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature)

        # Create SAMS samplers
        from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler
        mcmc_samplers = dict()
        exen_samplers = dict()
        sams_samplers = dict()
        for environment in environments:
            storage = NetCDFStorageView(self._storage, envname=environment)

            if self._geometry_options:
                n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment]
                use_sterics = self._geometry_options['use_sterics'][environment]

            else:
                n_torsion_divisions = 180
                use_sterics = False

            geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics)
            move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V",
                                                       n_restart_attempts=10)
            chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment])
            if environment == 'explicit':
                sampler_state = states.SamplerState(positions=positions[environment],
                                                    box_vectors=systems[environment].getDefaultPeriodicBoxVectors())
            else:
                sampler_state = states.SamplerState(positions=positions[environment])
            mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move)


            exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment],
                                                                 chemical_state_key, proposal_engines[environment],
                                                                 geometry_engine,
                                                                 options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment])
            exen_samplers[environment].verbose = True
            sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage)
            sams_samplers[environment].verbose = True

        # Create test MultiTargetDesign sampler.
        from perses.samplers.samplers import MultiTargetDesign
        target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0}
        designer = MultiTargetDesign(target_samplers, storage=self._storage)

        # Store things.
        self.molecules = molecules
        self.environments = environments
        self.topologies = topologies
        self.positions = positions
        self.system_generators = system_generators
        self.proposal_engines = proposal_engines
        self.thermodynamic_states = thermodynamic_states
        self.mcmc_samplers = mcmc_samplers
        self.exen_samplers = exen_samplers
        self.sams_samplers = sams_samplers
        self.designer = designer
示例#26
0
class NCMCEngine(object):
    """
    NCMC switching engine

    Examples
    --------

    Create a transformation for an alanine dipeptide test system where the N-methyl group is eliminated.

    >>> from openmmtools import testsystems
    >>> testsystem = testsystems.AlanineDipeptideVacuum()
    >>> from perses.rjmc.topology_proposal import TopologyProposal
    >>> new_to_old_atom_map = { index : index for index in range(testsystem.system.getNumParticles()) if (index > 3) } # all atoms but N-methyl
    >>> topology_proposal = TopologyProposal(old_system=testsystem.system, old_topology=testsystem.topology, old_chemical_state_key='AA', new_chemical_state_key='AA', new_system=testsystem.system, new_topology=testsystem.topology, logp_proposal=0.0, new_to_old_atom_map=new_to_old_atom_map, metadata=dict())
    >>> ncmc_engine = NCMCEngine(temperature=300.0*unit.kelvin, functions=default_functions, nsteps=50, timestep=1.0*unit.femtoseconds)
    >>> positions = testsystem.positions
    >>> [positions, logP_delete, potential_delete] = ncmc_engine.integrate(topology_proposal, positions, direction='delete')
    >>> [positions, logP_insert, potential_insert] = ncmc_engine.integrate(topology_proposal, positions, direction='insert')

    """
    def __init__(self,
                 temperature=default_temperature,
                 functions=None,
                 nsteps=default_nsteps,
                 steps_per_propagation=default_steps_per_propagation,
                 timestep=default_timestep,
                 constraint_tolerance=None,
                 platform=None,
                 write_ncmc_interval=1,
                 measure_shadow_work=False,
                 integrator_splitting='V R O H R V',
                 storage=None,
                 verbose=False,
                 LRUCapacity=10,
                 pressure=None,
                 bond_softening_constant=1.0,
                 angle_softening_constant=1.0):
        """
        This is the base class for NCMC switching between two different systems.

        Parameters
        ----------
        temperature : simtk.unit.Quantity with units compatible with kelvin
            The temperature at which switching is to be run
        functions : dict of str:str, optional, default=default_functions
            functions[parameter] is the function (parameterized by 't' which switched from 0 to 1) that
            controls how alchemical context parameter 'parameter' is switched
        nsteps : int, optional, default=1
            The number of steps to use for switching.
        steps_per_propagation : int, optional, default=1
            The number of intermediate propagation steps taken at each switching step
        timestep : simtk.unit.Quantity with units compatible with femtoseconds, optional, default=1*femtosecond
            The timestep to use for integration of switching velocity Verlet steps.
        constraint_tolerance : float, optional, default=None
            If not None, this relative constraint tolerance is used for position and velocity constraints.
        platform : simtk.openmm.Platform, optional, default=None
            If specified, the platform to use for OpenMM simulations.
        write_ncmc_interval : int, optional, default=None
            If a positive integer is specified, a snapshot frame will be written to storage with the specified interval on NCMC switching.
            'storage' must also be specified.
        measure_shadow_work : bool, optional, default False
            Whether to measure shadow work
        integrator_splitting : str, optional, default='V R O H R V'
            NCMC internal integrator splitting based on OpenMMTools Langevin splittings
        storage : NetCDFStorageView, optional, default=None
            If specified, write data using this class.
        verbose : bool, optional, default=False
            If True, print debug information.
        LRUCapacity : int, default 10
            Capacity of LRU cache for hybrid systems
        pressure : float, default None
            The pressure to use for the simulation. If None, no barostat
        """
        # Handle some defaults.
        if functions == None:
            functions = LambdaProtocol.default_functions
        if nsteps == None:
            nsteps = default_nsteps
        if timestep == None:
            timestep = default_timestep
        if temperature == None:
            temperature = default_temperature

        self._temperature = temperature
        self._functions = copy.deepcopy(functions)
        self._nsteps = nsteps
        self._timestep = timestep
        self._constraint_tolerance = constraint_tolerance
        self._platform = platform
        self._integrator_splitting = integrator_splitting
        self._steps_per_propagation = steps_per_propagation
        self._verbose = verbose
        self._pressure = pressure
        self._bond_softening_constant = bond_softening_constant
        self._angle_softening_constant = angle_softening_constant
        self._disable_barostat = False
        self._hybrid_cache = LRUCache(capacity=LRUCapacity)
        self._measure_shadow_work = measure_shadow_work

        self._nattempted = 0

        self._storage = None
        if storage is not None:
            self._storage = NetCDFStorageView(storage,
                                              modname=self.__class__.__name__)
            self._save_configuration = True
        else:
            self._save_configuration = False
        if write_ncmc_interval is not None:
            self._write_ncmc_interval = write_ncmc_interval
        else:
            self._write_ncmc_interval = 1
        self._work_save_interval = write_ncmc_interval

    @property
    def beta(self):
        kT = kB * self._temperature
        beta = 1.0 / kT
        return beta

    def _compute_energy_contribution(self, hybrid_thermodynamic_state,
                                     initial_sampler_state,
                                     final_sampler_state):
        """
        Compute NCMC energy contribution to log probability.

        See Eqs. 62 and 63 (two-stage) and Eq. 45 (hybrid) of reference document.
        In both cases, the contribution is u(final_positions, final_lambda) - u(initial_positions, initial_lambda).

        Parameters
        ----------
        hybrid_thermodynamic_state : openmmtools.states.CompoundThermodynamicState
            The thermodynamic state of the hybrid sampler.
        initial_sampler_state : openmmtools.states.SamplerState
            The sampler state of the nonalchemical system at the start of the NCMC protocol with box vectors
        final_sampler_state : openmmtools.states.SamplerState
            The sampler state of the nonalchemical system at the end of the NCMC protocol

        Returns
        -------
        logP_energy : float
            The NCMC energy contribution to log probability.
        """
        hybrid_thermodynamic_state.set_alchemical_parameters(0.0)
        initial_reduced_potential = compute_reduced_potential(
            hybrid_thermodynamic_state, initial_sampler_state)

        hybrid_thermodynamic_state.set_alchemical_parameters(1.0)
        final_reduced_potential = compute_reduced_potential(
            hybrid_thermodynamic_state, final_sampler_state)

        return final_reduced_potential - initial_reduced_potential

    def _topology_proposal_to_thermodynamic_states(self, topology_proposal):
        """
        Convert a topology proposal to thermodynamic states for the end systems. This will be used to compute the
        "logP_energy" quantity.

        Parameters
        ----------
        topology_proposal : perses.rjmc.TopologyProposal
            topology proposal for whose endpoint systems we want ThermodynamicStates

        Returns
        -------
        old_thermodynamic_state : openmmtools.states.ThermodynamicState
            The old system (nonalchemical) thermodynamic state
        new_thermodynamic_state : openmmtools.states.ThermodynamicState
            The new system (nonalchemical) thermodynamic state
        """
        systems = [topology_proposal.old_system, topology_proposal.new_system]
        thermostates = []
        for system in systems:
            thermodynamic_state = ThermodynamicState(
                system, temperature=self._temperature, pressure=self._pressure)
            thermostates.append(thermodynamic_state)

        return thermostates[0], thermostates[1]

    def make_alchemical_system(self, topology_proposal, current_positions,
                               new_positions):
        """
        Generate an alchemically-modified system at the correct atoms
        based on the topology proposal. This method generates a hybrid system using the new
        HybridTopologyFactory. It memoizes so that calling multiple times (within a recent time period)
        will immediately return a cached object.

        Parameters
        ----------
        topology_proposal : perses.rjmc.TopologyProposal
            Unmodified real system corresponding to appropriate leg of transformation.
        current_positions : np.ndarray of float
            Positions of "old" system
        new_positions : np.ndarray of float
            Positions of "new" system atoms

        Returns
        -------
        hybrid_factory : perses.annihilation.relative.HybridTopologyFactory
            a factory object containing the hybrid system
        """
        try:
            hybrid_factory = self._hybrid_cache[topology_proposal]

            #If we've retrieved the factory from the cache, update it to include the relevant positions
            hybrid_factory._old_positions = current_positions
            hybrid_factory._new_positions = new_positions
            hybrid_factory._compute_hybrid_positions()
        except KeyError:
            try:
                hybrid_factory = HybridTopologyFactory(
                    topology_proposal,
                    current_positions,
                    new_positions,
                    bond_softening_constant=self._bond_softening_constant,
                    angle_softening_constant=self._angle_softening_constant)
                self._hybrid_cache[topology_proposal] = hybrid_factory
            except:
                hybrid_factory = None

        return hybrid_factory

    def integrate(self,
                  topology_proposal,
                  initial_sampler_state,
                  proposed_sampler_state,
                  iteration=None):
        """
        Performs NCMC switching to either delete or insert atoms according to the provided `topology_proposal`.

        For `delete`, the system is first modified from fully interacting to alchemically modified, and then NCMC switching is used to eliminate atoms.
        For `insert`, the system begins with eliminated atoms in an alchemically noninteracting form and NCMC switching is used to turn atoms on, followed by making system real.

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        initial_sampler_state : openmmtools.states.SamplerState representing the initial (old) system
            Configurational properties of the atoms at the beginning of the NCMC switching.
        proposed_sampler_state : openmmtools.states.SamplerState representing the proposed (post-geometry new) system
            Configurational properties new system atoms at beginning of NCMC switching
        iteration : int, optional, default=None
            Iteration number, for storage purposes.

        Returns
        -------
        final_old_sampler_state : openmmtools.State.SamplerState
            The final configurational properties of the old system after hybrid alchemical switching
        final_sampler_state : openmmtools.states.SamplerState
            The final configurational properties after `nsteps` steps of alchemical switching, and reversion to the nonalchemical system
        logP_work : float
            The NCMC work contribution to the log acceptance probability (Eqs. 62 and 63)
        logP_initial : float
            The initial logP of the hybrid configuration
        logP_final : float
            The final logP of the hybrid configuration
        """

        assert not initial_sampler_state.has_nan(
        ) and not proposed_sampler_state.has_nan()

        #generate or retrieve the hybrid topology factory:
        hybrid_factory = self.make_alchemical_system(
            topology_proposal, initial_sampler_state.positions,
            proposed_sampler_state.positions)

        if hybrid_factory is None:
            _logger.warning(
                "Unable to construct hybrid system for {} -> {}".format(
                    topology_proposal.old_chemical_state_key,
                    topology_proposal.new_chemical_state_key))
            return initial_sampler_state, proposed_sampler_state, -np.inf, 0.0, 0.0

        topology = hybrid_factory.hybrid_topology

        #generate the corresponding thermodynamic and sampler states so that we can use the NonequilibriumSwitchingMove:

        #First generate the thermodynamic state:
        hybrid_system = hybrid_factory.hybrid_system
        hybrid_thermodynamic_state = ThermodynamicState(
            hybrid_system,
            temperature=self._temperature,
            pressure=self._pressure)

        #Now create an RelativeAlchemicalState from the hybrid system:
        alchemical_state = RelativeAlchemicalState.from_system(hybrid_system)
        alchemical_state.set_alchemical_parameters(0.0)

        #Now create a compound thermodynamic state that combines the hybrid thermodynamic state with the alchemical state:
        compound_thermodynamic_state = CompoundThermodynamicState(
            hybrid_thermodynamic_state, composable_states=[alchemical_state])

        #construct a sampler state from the hybrid positions and the box vectors of the initial sampler state:
        initial_hybrid_positions = hybrid_factory.hybrid_positions
        initial_hybrid_box_vectors = initial_sampler_state.box_vectors

        initial_hybrid_sampler_state = SamplerState(
            initial_hybrid_positions, box_vectors=initial_hybrid_box_vectors)
        final_hybrid_sampler_state = copy.deepcopy(
            initial_hybrid_sampler_state)

        #create the nonequilibrium move:
        #ne_move = NonequilibriumSwitchingMove(self._functions, self._integrator_splitting, self._temperature, self._nsteps, self._timestep,
        #                                      work_save_interval=self._write_ncmc_interval, top=topology,subset_atoms=None,
        #                                      save_configuration=self._save_configuration, measure_shadow_work=self._measure_shadow_work)

        ne_move = ExternalNonequilibriumSwitchingMove(
            self._functions,
            nsteps_neq=self._nsteps,
            timestep=self._timestep,
            temperature=self._temperature,
            work_configuration_save_interval=self._work_save_interval,
            splitting="V R O R V")

        #run the NCMC protocol
        try:
            ne_move.apply(compound_thermodynamic_state,
                          final_hybrid_sampler_state)
        except Exception as e:
            _logger.warn("NCMC failed because {}; rejecting.".format(str(e)))
            logP_work = -np.inf
            return [
                initial_sampler_state, proposed_sampler_state, -np.inf, 0.0,
                0.0
            ]

        #get the total work:
        logP_work = -ne_move.cumulative_work[-1]

        # Compute contribution of transforming to and from the hybrid system:
        context, integrator = global_context_cache.get_context(
            hybrid_thermodynamic_state)

        #set all alchemical parameters to zero:
        for parameter in self._functions.keys():
            context.setParameter(parameter, 0.0)

        initial_hybrid_sampler_state.apply_to_context(context,
                                                      ignore_velocities=True)
        initial_reduced_potential = hybrid_thermodynamic_state.reduced_potential(
            context)

        #set all alchemical parameters to one:
        for parameter in self._functions.keys():
            context.setParameter(parameter, 1.0)

        final_hybrid_sampler_state.apply_to_context(context,
                                                    ignore_velocities=True)
        final_reduced_potential = hybrid_thermodynamic_state.reduced_potential(
            context)

        #reset the parameters back to zero just in case
        for parameter in self._functions.keys():
            context.setParameter(parameter, 0.0)

        #compute the output SamplerState, which has the atoms only for the new system post-NCMC:
        new_positions = hybrid_factory.new_positions(
            final_hybrid_sampler_state.positions)
        new_box_vectors = final_hybrid_sampler_state.box_vectors
        final_sampler_state = SamplerState(new_positions,
                                           box_vectors=new_box_vectors)

        #compute the output SamplerState for the atoms only in the old system (required for geometry_logP_reverse)
        old_positions = hybrid_factory.old_positions(
            final_hybrid_sampler_state.positions)
        old_box_vectors = copy.deepcopy(
            new_box_vectors)  #these are the same as the new system
        final_old_sampler_state = SamplerState(old_positions,
                                               box_vectors=old_box_vectors)

        #extract the trajectory and box vectors from the move:
        trajectory = ne_move.trajectory[::-self.
                                        _write_ncmc_interval, :, :][::-1]
        topology = hybrid_factory.hybrid_topology
        position_varname = "ncmcpositions"
        nframes = np.shape(trajectory)[0]

        #extract box vectors:
        box_vec_varname = "ncmcboxvectors"
        box_lengths = ne_move.box_lengths[::-self.
                                          _write_ncmc_interval, :][::-1]
        box_angles = ne_move.box_angles[::-self._write_ncmc_interval, :][::-1]
        box_lengths_and_angles = np.stack([box_lengths, box_angles])

        #write out the positions of the topology
        if self._storage:
            for frame in range(nframes):
                self._storage.write_configuration(position_varname,
                                                  trajectory[frame, :, :],
                                                  topology,
                                                  iteration=iteration,
                                                  frame=frame,
                                                  nframes=nframes)

        #write out the periodict box vectors:
        if self._storage:
            self._storage.write_array(box_vec_varname,
                                      box_lengths_and_angles,
                                      iteration=iteration)

        #retrieve the protocol work and write that out too:
        protocol_work = ne_move.cumulative_work
        if self._storage:
            self._storage.write_array("protocolwork",
                                      protocol_work,
                                      iteration=iteration)

        # Return
        return [
            final_old_sampler_state, final_sampler_state, logP_work,
            -initial_reduced_potential, -final_reduced_potential
        ]
示例#27
0
class SAMSSampler(object):
    """
    Self-adjusted mixture sampling engine.

    Properties
    ----------
    state_keys : set of objects
        The names of states sampled by the sampler.
    logZ : dict() of keys : float
        logZ[key] is the log partition function (up to an additive constant) estimate for chemical state `key`
    update_method : str
        Update method.  One of ['default']
    iteration : int
        Iterations completed.
    verbose : bool
        If True, verbose debug output is printed.

    References
    ----------
    [1] Tan, Z. (2015) Optimally adjusted mixture sampling and locally weighted histogram analysis, Journal of Computational and Graphical Statistics, to appear. (Supplement)
    http://www.stat.rutgers.edu/home/ztan/Publication/SAMS_redo4.pdf

    Examples
    --------
    >>> # Create a test system
    >>> test = testsystems.AlanineDipeptideVacuum()
    >>> # Create a SystemGenerator and rebuild the System.
    >>> from perses.rjmc.topology_proposal import SystemGenerator
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None })
    >>> test.system = system_generator.build_system(test.topology)
    >>> # Create a sampler state.
    >>> sampler_state = SamplerState(system=test.system, positions=test.positions)
    >>> # Create a thermodynamic state.
    >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin)
    >>> # Create an MCMC sampler
    >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state)
    >>> # Turn off verbosity
    >>> mcmc_sampler.verbose = False
    >>> from perses.rjmc.geometry import FFAllAngleGeometryEngine
    >>> geometry_engine = FFAllAngleGeometryEngine(metadata={})
    >>> # Create an Expanded Ensemble sampler
    >>> from perses.rjmc.topology_proposal import PointMutationEngine
    >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]]
    >>> proposal_engine = PointMutationEngine(test.topology, system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations)
    >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine, geometry_engine)
    >>> # Create a SAMS sampler
    >>> sams_sampler = SAMSSampler(exen_sampler)
    >>> # Run the sampler
    >>> sams_sampler.run() # doctest: +ELLIPSIS
    ...
    """
    def __init__(self, sampler, logZ=None, log_target_probabilities=None, update_method='two-stage', storage=None, second_stage_start=1000):
        """
        Create a SAMS Sampler.

        Parameters
        ----------
        sampler : ExpandedEnsembleSampler
            The expanded ensemble sampler used to sample both configurations and discrete thermodynamic states.
        logZ : dict of key : float, optional, default=None
            If specified, the log partition functions for each state will be initialized to the specified dictionary.
        log_target_probabilities : dict of key : float, optional, default=None
            If specified, unnormalized target probabilities; default is all 0.
        update_method : str, optional, default='default'
            SAMS update algorithm
        storage : NetCDFStorageView, optional, default=None
        second_state_start : int, optional, default None
            At what iteration number to switch to the optimal gain decay

        """
        from scipy.misc import logsumexp
        from perses.tests.utils import createOEMolFromSMILES
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.chemical_states = None
        self._reference_state = None
        try:
            self.chemical_states = self.sampler.proposal_engine.chemical_state_list
        except NotImplementedError:
            logger.warn("The proposal engine has not properly implemented the chemical state property; SAMS will add states on the fly.")

        if self.chemical_states:
            # Select a reference state that will always be subtracted (ensure that dict ordering does not change)
            self._reference_state = self.chemical_states[0]

            # Initialize the logZ dictionary with scores based on the number of atoms
            # This is not the negative because the weights are set to the negative of the initial weights
            self.logZ = {chemical_state: self._num_dof_compensation(chemical_state) for chemical_state in self.chemical_states}

            #Initialize log target probabilities with log(1/n_states)
            self.log_target_probabilities = {chemical_state : np.log(len(self.chemical_states)) for chemical_state in self.chemical_states}

            #If initial weights are specified, override any weight with what is provided
            #However, if the chemical state is not in the reachable chemical state list,throw an exception
            if logZ is not None:
                for (chemical_state, logZ_value) in logZ:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a logZ initial value for an un-proposable chemical state")
                    self.logZ[chemical_state] = logZ_value

            if log_target_probabilities is not None:
                for (chemical_state, log_target_probability) in log_target_probabilities:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a log target probability for an un-proposable chemical state.")
                    self.log_target_probabilities[chemical_state] = log_target_probability

                #normalize target probabilities
                #this is likely not necessary, but it is copying the algorithm in Ref 1
                log_sum_target_probabilities = logsumexp((list(self.log_target_probabilities.values())))
                self.log_target_probabilities = {chemical_state : log_target_probability - log_sum_target_probabilities for chemical_state, log_target_probability in self.log_target_probabilities}
        else:
            self.logZ = dict()
            self.log_target_probabilities = dict()

        self.update_method = update_method

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize.
        self.iteration = 0
        self.verbose = False
        self.sampler.log_weights = {state_key: - self.logZ[state_key] for state_key in self.logZ.keys()}

        self.second_stage_start = 0
        if second_stage_start is not None:
            self.second_stage_start = second_stage_start

    @property
    def state_keys(self):
        return self.logZ.keys()

    def _num_dof_compensation(self, smiles):
        """
        Compute an approximate compensating factor for a chemical state based on the number of degrees of freedom that it has.

        The formula is:
        (num_heavy*heavy_factor) + (num_hydrogen*h_factor) where
        heavy_factor = 4.5 and
        light_factor = 3.8

        Parameters
        ----------
        smiles : str
            The SMILES string of the molecule

        Returns
        -------
        correction_factor : float
        """
        mol = createOEMolFromSMILES(smiles)
        num_heavy = 0
        num_light = 0

        heavy_factor = 4.5
        light_factor = 3.8

        for atom in mol.GetAtoms():
            if atom.GetAtomicNum() == 1:
                num_light += 1
            else:
                num_heavy += 1

        correction_factor = num_heavy*heavy_factor + num_light*light_factor

        return correction_factor

    def update_sampler(self):
        """
        Update the underlying expanded ensembles sampler.
        """
        self.sampler.update()

    def update_logZ_estimates(self):
        """
        Update the logZ estimates according to self.update_method.
        """
        state_key = self.sampler.state_key

        # Add state key to dictionaries if we haven't visited this state before.
        if state_key not in self.logZ:
            logger.warn("A new state key is being added to the logZ; note that this makes the resultant algorithm different from SAMS")
            self.logZ[state_key] = 0.0
        if state_key not in self.log_target_probabilities:
            logger.warn("A new state key is being added to the target probabilities; note that this makes the resultant algorithm different from SAMS")
            self.log_target_probabilities[state_key] = 0.0

        # Update estimates of logZ.
        if self.update_method == 'one-stage':
            # Based on Eq. 9 of Ref. [1]
            gamma = 1.0 / float(self.iteration+1)
        elif self.update_method == 'two-stage':
            # Keep gamma large until second stage is activated.
            if self.iteration < self.second_stage_start:
                # First stage.
                gamma = 1.0
                # TODO: Determine when to switch to second stage
            else:
                # Second stage.
                gamma = 1.0 / float(self.iteration - self.second_stage_start + 1)
        else:
            raise Exception("SAMS update method '%s' unknown." % self.update_method)

        #get the (t-1/2) update from equation 9 in ref 1
        self.logZ[state_key] += gamma / np.exp(self.log_target_probabilities[state_key])

        if self._reference_state:
            #the second step of the (t-1/2 update), subtracting the reference state from everything else.
            #we can only do this for cases where all states have been enumerated
            self.logZ = {state_key : logZ_estimate - self.logZ[self._reference_state] for state_key, logZ_estimate in self.logZ.items()}

        # Update log weights for sampler.
        self.sampler.log_weights = { state_key : - self.logZ[state_key] for state_key in self.logZ.keys()}

        if self.storage:
            self.storage.write_object('logZ', self.logZ, iteration=self.iteration)
            self.storage.write_object('log_weights', self.sampler.log_weights, iteration=self.iteration)

    def update(self):
        """
        Update the sampler with one step of sampling.
        """
        if self.verbose:
            print("=" * 80)
            print("SAMS sampler iteration %5d" % self.iteration)
        self.update_sampler()
        self.update_logZ_estimates()
        if self.storage: self.storage.sync()
        self.iteration += 1
        if self.verbose:
            print("=" * 80)

    def run(self, niterations=1):
        """
        Run the sampler for the specified number of iterations

        Parameters
        ----------
        niterations : int, optional, default=1
            Number of iterations to run the sampler for.
        """
        for iteration in range(niterations):
            self.update()
示例#28
0
    def __init__(self, sampler, logZ=None, log_target_probabilities=None, update_method='two-stage', storage=None, second_stage_start=1000):
        """
        Create a SAMS Sampler.

        Parameters
        ----------
        sampler : ExpandedEnsembleSampler
            The expanded ensemble sampler used to sample both configurations and discrete thermodynamic states.
        logZ : dict of key : float, optional, default=None
            If specified, the log partition functions for each state will be initialized to the specified dictionary.
        log_target_probabilities : dict of key : float, optional, default=None
            If specified, unnormalized target probabilities; default is all 0.
        update_method : str, optional, default='default'
            SAMS update algorithm
        storage : NetCDFStorageView, optional, default=None
        second_state_start : int, optional, default None
            At what iteration number to switch to the optimal gain decay

        """
        from scipy.special import logsumexp
        from perses.utils.openeye import smiles_to_oemol

        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self.chemical_states = None
        self._reference_state = None
        try:
            self.chemical_states = self.sampler.proposal_engine.chemical_state_list
        except NotImplementedError:
            _logger.warning("The proposal engine has not properly implemented the chemical state property; SAMS will add states on the fly.")

        if self.chemical_states:
            # Select a reference state that will always be subtracted (ensure that dict ordering does not change)
            self._reference_state = self.chemical_states[0]

            # Initialize the logZ dictionary with scores based on the number of atoms
            # This is not the negative because the weights are set to the negative of the initial weights
            self.logZ = {chemical_state: self._num_dof_compensation(chemical_state) for chemical_state in self.chemical_states}

            #Initialize log target probabilities with log(1/n_states)
            self.log_target_probabilities = {chemical_state : np.log(len(self.chemical_states)) for chemical_state in self.chemical_states}

            #If initial weights are specified, override any weight with what is provided
            #However, if the chemical state is not in the reachable chemical state list,throw an exception
            if logZ is not None:
                for (chemical_state, logZ_value) in logZ:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a logZ initial value for an un-proposable chemical state")
                    self.logZ[chemical_state] = logZ_value

            if log_target_probabilities is not None:
                for (chemical_state, log_target_probability) in log_target_probabilities:
                    if chemical_state not in self.chemical_states:
                        raise ValueError("Provided a log target probability for an un-proposable chemical state.")
                    self.log_target_probabilities[chemical_state] = log_target_probability

                #normalize target probabilities
                #this is likely not necessary, but it is copying the algorithm in Ref 1
                log_sum_target_probabilities = logsumexp((list(self.log_target_probabilities.values())))
                self.log_target_probabilities = {chemical_state : log_target_probability - log_sum_target_probabilities for chemical_state, log_target_probability in self.log_target_probabilities}
        else:
            self.logZ = dict()
            self.log_target_probabilities = dict()

        self.update_method = update_method

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize.
        self.iteration = 0
        self.verbose = False
        self.sampler.log_weights = {state_key: - self.logZ[state_key] for state_key in self.logZ.keys()}

        self.second_stage_start = 0
        if second_stage_start is not None:
            self.second_stage_start = second_stage_start
示例#29
0
    def __init__(self, sampler, topology, state_key, proposal_engine, geometry_engine, log_weights=None, options=None, platform=None, envname=None, storage=None, ncmc_write_interval=1):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        geometry_engine : GeometryEngine
            GeometryEngine to use for dimension matching
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.
        storage : NetCDFStorageView, optional, default=None
            If specified, use this storage layer.
        ncmc_write_interval : int, default 1
            How frequently to write out NCMC protocol steps.
        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self._pressure = sampler.thermodynamic_state.pressure
        self._temperature = sampler.thermodynamic_state.temperature
        self.topology = md.Topology.from_openmm(topology)
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        if self.log_weights is None: self.log_weights = dict()

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions', 'nsteps_mcmc', 'splitting']

        if options is None:
            options = dict()
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None
        
        if options['splitting']:
            self._ncmc_splitting = options['splitting']
        else:
            self._ncmc_splitting = "V R O H R V"

        if options['nsteps']:
            self._switching_nsteps = options['nsteps']
            self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature,
                                          timestep=options['timestep'], nsteps=options['nsteps'],
                                          functions=options['functions'], integrator_splitting=self._ncmc_splitting,
                                          platform=platform, storage=self.storage,
                                          write_ncmc_interval=ncmc_write_interval)
        else:
            self._switching_nsteps = 0

        if options['nsteps_mcmc']:
            self._n_iterations_per_update = options['nsteps_mcmc']
        else:
            self._n_iterations_per_update = 100

        self.geometry_engine = geometry_engine
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs
        self.logPs = list()
        self.sampler.minimize(max_iterations=40)
示例#30
0
class ExpandedEnsembleSampler(object):
    """
    Method of expanded ensembles sampling engine.

    The acceptance criteria is given in the reference document. Roughly, the proposal scheme is:

    * Draw a proposed chemical state k', and calculate reverse proposal probability
    * Conditioned on k' and the current positions x, generate new positions with the GeometryEngine
    * With new positions, jump to a hybrid system at lambda=0
    * Anneal from lambda=0 to lambda=1, accumulating work
    * Jump from the hybrid system at lambda=1 to the k' system, and compute reverse GeometryEngine proposal
    * Add weight of chemical states k and k' to acceptance probabilities

    Properties
    ----------
    sampler : MCMCSampler
        The MCMC sampler used for updating positions.
    proposal_engine : ProposalEngine
        The ProposalEngine to use for proposing new sampler states and topologies.
    system_generator : SystemGenerator
        The SystemGenerator to use for creating System objects following proposals.
    state : hashable object
        The current sampler state. Can be any hashable object.
    states : set of hashable object
        All known states.
    iteration : int
        Iterations completed.
    naccepted : int
        Number of accepted thermodynamic/chemical state changes.
    nrejected : int
        Number of rejected thermodynamic/chemical state changes.
    number_of_state_visits : dict of state_key
        Cumulative counts of visited states.
    verbose : bool
        If True, verbose output is printed.

    References
    ----------
    [1] Lyubartsev AP, Martsinovski AA, Shevkunov SV, and Vorontsov-Velyaminov PN. New approach to Monte Carlo calculation of the free energy: Method of expanded ensembles. JCP 96:1776, 1992
    http://dx.doi.org/10.1063/1.462133


    Examples
    --------
    >>> # Create a test system
    >>> test = testsystems.AlanineDipeptideVacuum()
    >>> # Create a SystemGenerator and rebuild the System.
    >>> from perses.rjmc.topology_proposal import SystemGenerator
    >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={'implicitSolvent' : None, 'constraints' : None }, nonperiodic_forcefield_kwargs={'nonbondedMethod' : app.NoCutoff})
    >>> test.system = system_generator.build_system(test.topology)
    >>> # Create a sampler state.
    >>> sampler_state = SamplerState(system=test.system, positions=test.positions)
    >>> # Create a thermodynamic state.
    >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin)
    >>> # Create an MCMC sampler
    >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state)
    >>> # Turn off verbosity
    >>> mcmc_sampler.verbose = False
    >>> # Create an Expanded Ensemble sampler
    >>> from perses.rjmc.topology_proposal import PointMutationEngine
    >>> from perses.rjmc.geometry import FFAllAngleGeometryEngine
    >>> geometry_engine = FFAllAngleGeometryEngine(metadata={})
    >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]]
    >>> proposal_engine = PointMutationEngine(test.topology, system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations)
    >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine, geometry_engine)
    >>> # Run the sampler
    >>> exen_sampler.run()

    """
    def __init__(self, sampler, topology, state_key, proposal_engine, geometry_engine, log_weights=None, options=None, platform=None, envname=None, storage=None, ncmc_write_interval=1):
        """
        Create an expanded ensemble sampler.

        p(x,k) \propto \exp[-u_k(x) + g_k]

        where g_k is the log weight.

        Parameters
        ----------
        sampler : MCMCSampler
            MCMCSampler initialized with current SamplerState
        topology : simtk.openmm.app.Topology
            Current topology
        state : hashable object
            Current chemical state
        proposal_engine : ProposalEngine
            ProposalEngine to use for proposing new chemical states
        geometry_engine : GeometryEngine
            GeometryEngine to use for dimension matching
        log_weights : dict of object : float
            Log weights to use for expanded ensemble biases.
        options : dict, optional, default=dict()
            Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC
        platform : simtk.openmm.Platform, optional, default=None
            Platform to use for NCMC switching.  If `None`, default (fastest) platform is used.
        storage : NetCDFStorageView, optional, default=None
            If specified, use this storage layer.
        ncmc_write_interval : int, default 1
            How frequently to write out NCMC protocol steps.
        """
        # Keep copies of initializing arguments.
        # TODO: Make deep copies?
        self.sampler = sampler
        self._pressure = sampler.thermodynamic_state.pressure
        self._temperature = sampler.thermodynamic_state.temperature
        self._omm_topology = topology
        self.topology = md.Topology.from_openmm(topology)
        self.state_key = state_key
        self.proposal_engine = proposal_engine
        self.log_weights = log_weights
        if self.log_weights is None: self.log_weights = dict()

        self.storage = None
        if storage is not None:
            self.storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        # Initialize
        self.iteration = 0
        option_names = ['timestep', 'nsteps', 'functions', 'nsteps_mcmc', 'splitting']

        if options is None:
            options = dict()
        for option_name in option_names:
            if option_name not in options:
                options[option_name] = None

        if options['splitting']:
            self._ncmc_splitting = options['splitting']
        else:
            self._ncmc_splitting = "V R O H R V"

        if options['nsteps']:
            self._switching_nsteps = options['nsteps']
            self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature,
                                          timestep=options['timestep'], nsteps=options['nsteps'],
                                          functions=options['functions'], integrator_splitting=self._ncmc_splitting,
                                          platform=platform, storage=self.storage,
                                          write_ncmc_interval=ncmc_write_interval)
        else:
            self._switching_nsteps = 0

        if options['nsteps_mcmc']:
            self._n_iterations_per_update = options['nsteps_mcmc']
        else:
            self._n_iterations_per_update = 100

        self.geometry_engine = geometry_engine
        self.naccepted = 0
        self.nrejected = 0
        self.number_of_state_visits = dict()
        self.verbose = False
        self.pdbfile = None # if not None, write PDB file
        self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals
        self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs
        self.logPs = list()
        self.sampler.minimize(max_iterations=40)

    @property
    def state_keys(self):
        return self.log_weights.keys()

    def get_log_weight(self, state_key):
        """
        Get the log weight of the specified state.

        Parameters
        ----------
        state_key : hashable object
            The state key (e.g. chemical state key) to look up.

        Returns
        -------
        log_weight : float
            The log weight of the provided state key.

        Notes
        -----
        This adds the key to the self.log_weights dict.

        """
        if state_key not in self.log_weights:
            self.log_weights[state_key] = 0.0
        return self.log_weights[state_key]

    def _system_to_thermodynamic_state(self, system):
        """
        Given an OpenMM system object, create a corresponding ThermodynamicState that has the same
        temperature and pressure as the current thermodynamic state.

        Parameters
        ----------
        system : openmm.System
            The OpenMM system for which to create the thermodynamic state

        Returns
        -------
        new_thermodynamic_state : openmmtools.states.ThermodynamicState
            The thermodynamic state object representing the given system
        """
        return ThermodynamicState(system, temperature=self._temperature, pressure=self._pressure)

    def _geometry_forward(self, topology_proposal, old_sampler_state):
        """
        Run geometry engine to propose new positions and compute logP

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        old_sampler_state : openmmtools.states.SamplerState
            Configurational properties of the old system atoms.

        Returns
        -------
        new_sampler_state : openmmtools.states.SamplerState
            Configurational properties of new atoms proposed by geometry engine calculation.
        geometry_logp_propose : float
            The log probability of the forward-only proposal
        """
        if self.verbose: print("Geometry engine proposal...")
        # Generate coordinates for new atoms and compute probability ratio of old and new probabilities.
        initial_time = time.time()
        new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, old_sampler_state.positions, self.sampler.thermodynamic_state.beta)
        if self.verbose: print('proposal took %.3f s' % (time.time() - initial_time))

        if self.geometry_pdbfile is not None:
            print("Writing proposed geometry...")
            from simtk.openmm.app import PDBFile
            PDBFile.writeFile(topology_proposal.new_topology, new_positions, file=self.geometry_pdbfile)
            self.geometry_pdbfile.flush()

        new_sampler_state = SamplerState(new_positions, box_vectors=old_sampler_state.box_vectors)

        return new_sampler_state, geometry_logp_propose

    def _geometry_reverse(self, topology_proposal, new_sampler_state, old_sampler_state):
        """
        Run geometry engine reverse calculation to determine logP
        of proposing the old positions based on the new positions

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        new_sampler_state : openmmtools.states.SamplerState
            Configurational properties of the new atoms.
        old_sampler_state : openmmtools.states.SamplerState
            Configurational properties of the old atoms.

        Returns
        -------
        geometry_logp_reverse : float
            The log probability of the proposal for the given transformation
        """
        if self.verbose: print("Geometry engine logP_reverse calculation...")
        initial_time = time.time()
        geometry_logp_reverse = self.geometry_engine.logp_reverse(topology_proposal, new_sampler_state.positions, old_sampler_state.positions, self.sampler.thermodynamic_state.beta)
        if self.verbose: print('calculation took %.3f s' % (time.time() - initial_time))
        return geometry_logp_reverse

    def _ncmc_hybrid(self, topology_proposal, old_sampler_state, new_sampler_state):
        """
        Run a hybrid NCMC protocol from lambda = 0 to lambda = 1

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        old_sampler_State : openmmtools.states.SamplerState
            SamplerState of old system at the beginning of NCMCSwitching
        new_sampler_state : openmmtools.states.SamplerState
            SamplerState of new system at the beginning of NCMCSwitching

        Returns
        -------
        old_final_sampler_state : openmmtools.states.SamplerState
            SamplerState of old system at the end of switching
        new_final_sampler_state : openmmtools.states.SamplerState
            SamplerState of new system at the end of switching
        logP_work : float
            The NCMC work contribution to the log acceptance probability (Eq. 44)
        logP_energy : float
            The contribution of switching to and from the hybrid system to the acceptance probability (Eq. 45)
        """
        if self.verbose: print("Performing NCMC switching")
        initial_time = time.time()
        [ncmc_old_sampler_state, ncmc_new_sampler_state, logP_work, logP_initial_hybrid, logP_final_hybrid] = self.ncmc_engine.integrate(topology_proposal, old_sampler_state, new_sampler_state, iteration=self.iteration)
        if self.verbose: print('NCMC took %.3f s' % (time.time() - initial_time))
        # Check that positions are not NaN
        if new_sampler_state.has_nan():
            raise Exception("Positions are NaN after NCMC insert with %d steps" % self._switching_nsteps)
        return ncmc_old_sampler_state, ncmc_new_sampler_state, logP_work, logP_initial_hybrid, logP_final_hybrid

    def _geometry_ncmc_geometry(self, topology_proposal, sampler_state, old_log_weight, new_log_weight):
        """
        Use a hybrid NCMC protocol to switch from the old system to new system
        Will calculate new positions for the new system first, then give both
        sets of positions to the hybrid NCMC integrator, and finally use the
        final positions of the old and new systems to calculate the reverse
        geometry probability

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        sampler_state : openmmtools.states.SamplerState
            Configurational properties of old atoms at the beginning of the NCMC switching.
        old_log_weight : float
            Chemical state weight from SAMSSampler
        new_log_weight : float
            Chemical state weight from SAMSSampler

        Returns
        -------
        logP_accept : float
            Log of acceptance probability of entire Expanded Ensemble switch (Eq. 25 or 46)
        ncmc_new_sampler_state : openmmtools.states.SamplerState
            Configurational properties of new atoms at the end of the NCMC switching.
        """
        if self.verbose: print("Updating chemical state with geometry-ncmc-geometry scheme...")

        from perses.tests.utils import compute_potential

        logP_chemical_proposal = topology_proposal.logp_proposal

        old_thermodynamic_state = self.sampler.thermodynamic_state
        new_thermodynamic_state = self._system_to_thermodynamic_state(topology_proposal.new_system)

        initial_reduced_potential = feptasks.compute_reduced_potential(old_thermodynamic_state, sampler_state)
        logP_initial_nonalchemical = - initial_reduced_potential

        new_geometry_sampler_state, logP_geometry_forward = self._geometry_forward(topology_proposal, sampler_state)

        #if we aren't doing any switching, then skip running the NCMC engine at all.
        if self._switching_nsteps == 0:
            ncmc_old_sampler_state = sampler_state
            ncmc_new_sampler_state = new_geometry_sampler_state
            logP_work = 0.0
            logP_initial_hybrid = 0.0
            logP_final_hybrid = 0.0
        else:
            ncmc_old_sampler_state, ncmc_new_sampler_state, logP_work, logP_initial_hybrid, logP_final_hybrid = self._ncmc_hybrid(topology_proposal, sampler_state, new_geometry_sampler_state)

        if logP_work > -np.inf and logP_initial_hybrid > -np.inf and logP_final_hybrid > -np.inf:
            logP_geometry_reverse = self._geometry_reverse(topology_proposal, ncmc_new_sampler_state, ncmc_old_sampler_state)
            logP_to_hybrid = logP_initial_hybrid - logP_initial_nonalchemical

            final_reduced_potential = feptasks.compute_reduced_potential(new_thermodynamic_state, ncmc_new_sampler_state)
            logP_final_nonalchemical = -final_reduced_potential
            logP_from_hybrid = logP_final_nonalchemical - logP_final_hybrid
            logP_sams_weight = new_log_weight - old_log_weight

            # Compute total log acceptance probability according to Eq. 46
            logP_accept = logP_to_hybrid - logP_geometry_forward + logP_work + logP_from_hybrid + logP_geometry_reverse + logP_sams_weight
        else:
            logP_geometry_reverse = 0.0
            logP_final = 0.0
            logP_to_hybrid = 0.0
            logP_from_hybrid = 0.0
            logP_sams_weight = new_log_weight - old_log_weight
            logP_accept = logP_to_hybrid - logP_geometry_forward + logP_work + logP_from_hybrid + logP_geometry_reverse + logP_sams_weight
            #TODO: mark failed proposals as unproposable

        if self.verbose:
            print("logP_accept = %+10.4e [logP_to_hybrid = %+10.4e, logP_chemical_proposal = %10.4e, logP_reverse = %+10.4e, -logP_forward = %+10.4e, logP_work = %+10.4e, logP_from_hybrid = %+10.4e, logP_sams_weight = %+10.4e]"
                % (logP_accept, logP_to_hybrid, logP_chemical_proposal, logP_geometry_reverse, -logP_geometry_forward, logP_work, logP_from_hybrid, logP_sams_weight))
        # Write to storage.
        if self.storage:
            self.storage.write_quantity('logP_accept', logP_accept, iteration=self.iteration)
            # Write components to storage
            self.storage.write_quantity('logP_ncmc_work', logP_work, iteration=self.iteration)
            self.storage.write_quantity('logP_from_hybrid', logP_from_hybrid, iteration=self.iteration)
            self.storage.write_quantity('logP_to_hybrid', logP_to_hybrid, iteration=self.iteration)
            self.storage.write_quantity('logP_chemical_proposal', logP_chemical_proposal, iteration=self.iteration)
            self.storage.write_quantity('logP_reverse', logP_geometry_reverse, iteration=self.iteration)
            self.storage.write_quantity('logP_forward', logP_geometry_forward, iteration=self.iteration)
            self.storage.write_quantity('logP_sams_weight', logP_sams_weight, iteration=self.iteration)
            # Write some aggregate statistics to storage to make contributions to acceptance probability easier to analyze
            self.storage.write_quantity('logP_groups_chemical', logP_chemical_proposal, iteration=self.iteration)
            self.storage.write_quantity('logP_groups_geometry', logP_geometry_reverse - logP_geometry_forward, iteration=self.iteration)

        return logP_accept, ncmc_new_sampler_state

    def update_positions(self, n_iterations=1):
        """
        Sample new positions.
        """
        self.sampler.run(n_iterations=n_iterations)

    def update_state(self):
        """
        Sample the thermodynamic state.
        """

        initial_time = time.time()

        # Propose new chemical state.
        if self.verbose: print("Proposing new topology...")
        [system, positions] = [self.sampler.thermodynamic_state.get_system(remove_thermostat=True), self.sampler.sampler_state.positions]

        #omm_topology = topology.to_openmm() #convert to OpenMM topology for proposal engine
        self._omm_topology.setPeriodicBoxVectors(self.sampler.sampler_state.box_vectors) #set the box vectors because in OpenMM topology has these...
        topology_proposal = self.proposal_engine.propose(system, self._omm_topology)
        if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key))

        # Determine state keys
        old_state_key = self.state_key
        new_state_key = topology_proposal.new_chemical_state_key

        # Determine log weight
        old_log_weight = self.get_log_weight(old_state_key)
        new_log_weight = self.get_log_weight(new_state_key)

        logp_accept, ncmc_new_sampler_state = self._geometry_ncmc_geometry(topology_proposal, self.sampler.sampler_state, old_log_weight, new_log_weight)

        # Accept or reject.
        if np.isnan(logp_accept):
            accept = False
            print('logp_accept = NaN')
        else:
            accept = ((logp_accept>=0.0) or (np.random.uniform() < np.exp(logp_accept)))
            if self.accept_everything:
                print('accept_everything option is turned on; accepting')
                accept = True

        if accept:
            self.sampler.thermodynamic_state.set_system(topology_proposal.new_system, fix_state=True)
            self.sampler.sampler_state.system = topology_proposal.new_system
            self.topology = md.Topology.from_openmm(topology_proposal.new_topology)
            self.sampler.sampler_state = ncmc_new_sampler_state
            self.sampler.topology = self.topology
            self.state_key = topology_proposal.new_chemical_state_key
            self.naccepted += 1
            if self.verbose: print("    accepted")
        else:
            self.nrejected += 1
            if self.verbose: print("    rejected")

        if self.storage:
            self.storage.write_configuration('positions', self.sampler.sampler_state.positions, self.topology, iteration=self.iteration)
            self.storage.write_object('state_key', self.state_key, iteration=self.iteration)
            self.storage.write_object('proposed_state_key', topology_proposal.new_chemical_state_key, iteration=self.iteration)
            self.storage.write_quantity('naccepted', self.naccepted, iteration=self.iteration)
            self.storage.write_quantity('nrejected', self.nrejected, iteration=self.iteration)
            self.storage.write_quantity('logp_accept', logp_accept, iteration=self.iteration)
            self.storage.write_quantity('logp_topology_proposal', topology_proposal.logp_proposal, iteration=self.iteration)


        # Update statistics.
        self.update_statistics()

    def update(self):
        """
        Update the sampler with one step of sampling.
        """
        if self.verbose:
            print("-" * 80)
            print("Expanded Ensemble sampler iteration %8d" % self.iteration)
        self.update_positions(n_iterations=self._n_iterations_per_update)
        self.update_state()
        self.iteration += 1
        if self.verbose:
            print("-" * 80)

        if self.pdbfile is not None:
            print("Writing frame...")
            from simtk.openmm.app import PDBFile
            PDBFile.writeModel(self.topology.to_openmm(), self.sampler.sampler_state.positions, self.pdbfile, self.iteration)
            self.pdbfile.flush()

        if self.storage:
            self.storage.sync()

    def run(self, niterations=1):
        """
        Run the sampler for the specified number of iterations

        Parameters
        ----------
        niterations : int, optional, default=1
            Number of iterations to run the sampler for.
        """
        for iteration in range(niterations):
            self.update()

    def update_statistics(self):
        """
        Update sampler statistics.
        """
        if self.state_key not in self.number_of_state_visits:
            self.number_of_state_visits[self.state_key] = 0
        self.number_of_state_visits[self.state_key] += 1
示例#31
0
    def __init__(self,
                 temperature=default_temperature,
                 functions=None,
                 nsteps=default_nsteps,
                 steps_per_propagation=default_steps_per_propagation,
                 timestep=default_timestep,
                 constraint_tolerance=None,
                 platform=None,
                 write_ncmc_interval=1,
                 measure_shadow_work=False,
                 integrator_splitting='V R O H R V',
                 storage=None,
                 verbose=False,
                 LRUCapacity=10,
                 pressure=None,
                 bond_softening_constant=1.0,
                 angle_softening_constant=1.0):
        """
        This is the base class for NCMC switching between two different systems.

        Parameters
        ----------
        temperature : simtk.unit.Quantity with units compatible with kelvin
            The temperature at which switching is to be run
        functions : dict of str:str, optional, default=default_functions
            functions[parameter] is the function (parameterized by 't' which switched from 0 to 1) that
            controls how alchemical context parameter 'parameter' is switched
        nsteps : int, optional, default=1
            The number of steps to use for switching.
        steps_per_propagation : int, optional, default=1
            The number of intermediate propagation steps taken at each switching step
        timestep : simtk.unit.Quantity with units compatible with femtoseconds, optional, default=1*femtosecond
            The timestep to use for integration of switching velocity Verlet steps.
        constraint_tolerance : float, optional, default=None
            If not None, this relative constraint tolerance is used for position and velocity constraints.
        platform : simtk.openmm.Platform, optional, default=None
            If specified, the platform to use for OpenMM simulations.
        write_ncmc_interval : int, optional, default=None
            If a positive integer is specified, a snapshot frame will be written to storage with the specified interval on NCMC switching.
            'storage' must also be specified.
        measure_shadow_work : bool, optional, default False
            Whether to measure shadow work
        integrator_splitting : str, optional, default='V R O H R V'
            NCMC internal integrator splitting based on OpenMMTools Langevin splittings
        storage : NetCDFStorageView, optional, default=None
            If specified, write data using this class.
        verbose : bool, optional, default=False
            If True, print debug information.
        LRUCapacity : int, default 10
            Capacity of LRU cache for hybrid systems
        pressure : float, default None
            The pressure to use for the simulation. If None, no barostat
        """
        # Handle some defaults.
        if functions == None:
            functions = LambdaProtocol.default_functions
        if nsteps == None:
            nsteps = default_nsteps
        if timestep == None:
            timestep = default_timestep
        if temperature == None:
            temperature = default_temperature

        self._temperature = temperature
        self._functions = copy.deepcopy(functions)
        self._nsteps = nsteps
        self._timestep = timestep
        self._constraint_tolerance = constraint_tolerance
        self._platform = platform
        self._integrator_splitting = integrator_splitting
        self._steps_per_propagation = steps_per_propagation
        self._verbose = verbose
        self._pressure = pressure
        self._bond_softening_constant = bond_softening_constant
        self._angle_softening_constant = angle_softening_constant
        self._disable_barostat = False
        self._hybrid_cache = LRUCache(capacity=LRUCapacity)
        self._measure_shadow_work = measure_shadow_work

        self._nattempted = 0

        self._storage = None
        if storage is not None:
            self._storage = NetCDFStorageView(storage,
                                              modname=self.__class__.__name__)
            self._save_configuration = True
        else:
            self._save_configuration = False
        if write_ncmc_interval is not None:
            self._write_ncmc_interval = write_ncmc_interval
        else:
            self._write_ncmc_interval = 1
        self._work_save_interval = write_ncmc_interval
示例#32
0
class NCMCEngine(object):
    """
    NCMC switching engine

    Examples
    --------

    Create a transformation for an alanine dipeptide test system where the N-methyl group is eliminated.

    >>> from openmmtools import testsystems
    >>> testsystem = testsystems.AlanineDipeptideVacuum()
    >>> from perses.rjmc.topology_proposal import TopologyProposal
    >>> new_to_old_atom_map = { index : index for index in range(testsystem.system.getNumParticles()) if (index > 3) } # all atoms but N-methyl
    >>> topology_proposal = TopologyProposal(old_system=testsystem.system, old_topology=testsystem.topology, old_chemical_state_key='AA', new_chemical_state_key='AA', new_system=testsystem.system, new_topology=testsystem.topology, logp_proposal=0.0, new_to_old_atom_map=new_to_old_atom_map, metadata=dict())
    >>> ncmc_engine = NCMCEngine(temperature=300.0*unit.kelvin, functions=default_functions, nsteps=50, timestep=1.0*unit.femtoseconds)
    >>> positions = testsystem.positions
    >>> [positions, logP_delete, potential_delete] = ncmc_engine.integrate(topology_proposal, positions, direction='delete')
    >>> [positions, logP_insert, potential_insert] = ncmc_engine.integrate(topology_proposal, positions, direction='insert')

    """

    def __init__(self, temperature=default_temperature, functions=None, nsteps=default_nsteps,
                 steps_per_propagation=default_steps_per_propagation, timestep=default_timestep,
                 constraint_tolerance=None, platform=None, write_ncmc_interval=1, measure_shadow_work=False,
                 integrator_splitting='V R O H R V', storage=None, verbose=False, LRUCapacity=10, pressure=None, bond_softening_constant=1.0, angle_softening_constant=1.0):
        """
        This is the base class for NCMC switching between two different systems.

        Arguments
        ---------
        temperature : simtk.unit.Quantity with units compatible with kelvin
            The temperature at which switching is to be run
        functions : dict of str:str, optional, default=default_functions
            functions[parameter] is the function (parameterized by 't' which switched from 0 to 1) that
            controls how alchemical context parameter 'parameter' is switched
        nsteps : int, optional, default=1
            The number of steps to use for switching.
        steps_per_propagation : int, optional, default=1
            The number of intermediate propagation steps taken at each switching step
        timestep : simtk.unit.Quantity with units compatible with femtoseconds, optional, default=1*femtosecond
            The timestep to use for integration of switching velocity Verlet steps.
        constraint_tolerance : float, optional, default=None
            If not None, this relative constraint tolerance is used for position and velocity constraints.
        platform : simtk.openmm.Platform, optional, default=None
            If specified, the platform to use for OpenMM simulations.
        write_ncmc_interval : int, optional, default=None
            If a positive integer is specified, a snapshot frame will be written to storage with the specified interval on NCMC switching.
            'storage' must also be specified.
        measure_shadow_work : bool, optional, default False
            Whether to measure shadow work
        integrator_splitting : str, optional, default='V R O H R V'
            NCMC internal integrator splitting based on OpenMMTools Langevin splittings
        storage : NetCDFStorageView, optional, default=None
            If specified, write data using this class.
        verbose : bool, optional, default=False
            If True, print debug information.
        LRUCapacity : int, default 10
            Capacity of LRU cache for hybrid systems
        pressure : float, default None
            The pressure to use for the simulation. If None, no barostat
        """
        # Handle some defaults.
        if functions == None:
            functions = python_hybrid_functions
        if nsteps == None:
            nsteps = default_nsteps
        if timestep == None:
            timestep = default_timestep
        if temperature == None:
            temperature = default_temperature

        self._temperature = temperature
        self._functions = copy.deepcopy(functions)
        self._nsteps = nsteps
        self._timestep = timestep
        self._constraint_tolerance = constraint_tolerance
        self._platform = platform
        self._integrator_splitting = integrator_splitting
        self._steps_per_propagation = steps_per_propagation
        self._verbose = verbose
        self._pressure = pressure
        self._bond_softening_constant = bond_softening_constant
        self._angle_softening_constant = angle_softening_constant
        self._disable_barostat = False
        self._hybrid_cache = LRUCache(capacity=LRUCapacity)
        self._measure_shadow_work = measure_shadow_work

        self._nattempted = 0

        self._storage = None
        if storage is not None:
            self._storage = NetCDFStorageView(storage, modname=self.__class__.__name__)
            self._save_configuration = True
        else:
            self._save_configuration = False
        if write_ncmc_interval is not None:
            self._write_ncmc_interval = write_ncmc_interval
        else:
            self._write_ncmc_interval = 1
        self._work_save_interval = write_ncmc_interval

    @property
    def beta(self):
        kT = kB * self._temperature
        beta = 1.0 / kT
        return beta

    def _compute_energy_contribution(self, hybrid_thermodynamic_state, initial_sampler_state, final_sampler_state):
        """
        Compute NCMC energy contribution to log probability.

        See Eqs. 62 and 63 (two-stage) and Eq. 45 (hybrid) of reference document.
        In both cases, the contribution is u(final_positions, final_lambda) - u(initial_positions, initial_lambda).

        Parameters
        ----------
        hybrid_thermodynamic_state : openmmtools.states.CompoundThermodynamicState
            The thermodynamic state of the hybrid sampler.
        initial_sampler_state : openmmtools.states.SamplerState
            The sampler state of the nonalchemical system at the start of the NCMC protocol with box vectors
        final_sampler_state : openmmtools.states.SamplerState
            The sampler state of the nonalchemical system at the end of the NCMC protocol

        Returns
        -------
        logP_energy : float
            The NCMC energy contribution to log probability.
        """
        hybrid_thermodynamic_state.set_alchemical_parameters(0.0)
        initial_reduced_potential = compute_reduced_potential(hybrid_thermodynamic_state, initial_sampler_state)

        hybrid_thermodynamic_state.set_alchemical_parameters(1.0)
        final_reduced_potential = compute_reduced_potential(hybrid_thermodynamic_state, final_sampler_state)

        return final_reduced_potential - initial_reduced_potential

    def _topology_proposal_to_thermodynamic_states(self, topology_proposal):
        """
        Convert a topology proposal to thermodynamic states for the end systems. This will be used to compute the
        "logP_energy" quantity.

        Arguments
        ---------
        topology_proposal : perses.rjmc.TopologyProposal
            topology proposal for whose endpoint systems we want ThermodynamicStates
        
        Returns
        -------
        old_thermodynamic_state : openmmtools.states.ThermodynamicState
            The old system (nonalchemical) thermodynamic state
        new_thermodynamic_state : openmmtools.states.ThermodynamicState
            The new system (nonalchemical) thermodynamic state
        """
        systems = [topology_proposal.old_system, topology_proposal.new_system]
        thermostates = []
        for system in systems:
            thermodynamic_state = ThermodynamicState(system, temperature=self._temperature, pressure=self._pressure)
            thermostates.append(thermodynamic_state)
        
        return thermostates[0], thermostates[1]

    def make_alchemical_system(self, topology_proposal, current_positions, new_positions):
        """
        Generate an alchemically-modified system at the correct atoms
        based on the topology proposal. This method generates a hybrid system using the new 
        HybridTopologyFactory. It memoizes so that calling multiple times (within a recent time period)
        will immediately return a cached object.

        Arguments
        ---------
        topology_proposal : perses.rjmc.TopologyProposal
            Unmodified real system corresponding to appropriate leg of transformation.
        current_positions : np.ndarray of float
            Positions of "old" system
        new_positions : np.ndarray of float
            Positions of "new" system atoms

        Returns
        -------
        hybrid_factory : perses.annihilation.new_relative.HybridTopologyFactory
            a factory object containing the hybrid system
        """
        try:
            hybrid_factory = self._hybrid_cache[topology_proposal]

            #If we've retrieved the factory from the cache, update it to include the relevant positions
            hybrid_factory._old_positions = current_positions
            hybrid_factory._new_positions = new_positions
            hybrid_factory._compute_hybrid_positions()
        except KeyError:
            try:
                hybrid_factory = HybridTopologyFactory(topology_proposal, current_positions, new_positions, bond_softening_constant=self._bond_softening_constant, angle_softening_constant=self._angle_softening_constant)
                self._hybrid_cache[topology_proposal] = hybrid_factory
            except:
                hybrid_factory = None


        return hybrid_factory

    def integrate(self, topology_proposal, initial_sampler_state, proposed_sampler_state, iteration=None):
        """
        Performs NCMC switching to either delete or insert atoms according to the provided `topology_proposal`.

        For `delete`, the system is first modified from fully interacting to alchemically modified, and then NCMC switching is used to eliminate atoms.
        For `insert`, the system begins with eliminated atoms in an alchemically noninteracting form and NCMC switching is used to turn atoms on, followed by making system real.

        Parameters
        ----------
        topology_proposal : TopologyProposal
            Contains old/new Topology and System objects and atom mappings.
        initial_sampler_state : openmmtools.states.SamplerState representing the initial (old) system
            Configurational properties of the atoms at the beginning of the NCMC switching.
        proposed_sampler_state : openmmtools.states.SamplerState representing the proposed (post-geometry new) system
            Configurational properties new system atoms at beginning of NCMC switching
        iteration : int, optional, default=None
            Iteration number, for storage purposes.

        Returns
        -------
        final_old_sampler_state : openmmtools.State.SamplerState
            The final configurational properties of the old system after hybrid alchemical switching
        final_sampler_state : openmmtools.states.SamplerState
            The final configurational properties after `nsteps` steps of alchemical switching, and reversion to the nonalchemical system
        logP_work : float
            The NCMC work contribution to the log acceptance probability (Eqs. 62 and 63)
        logP_initial : float
            The initial logP of the hybrid configuration
        logP_final : float
            The final logP of the hybrid configuration
        """

        assert not initial_sampler_state.has_nan() and not proposed_sampler_state.has_nan()

        #generate or retrieve the hybrid topology factory:
        hybrid_factory = self.make_alchemical_system(topology_proposal, initial_sampler_state.positions, proposed_sampler_state.positions)

        if hybrid_factory is None:
            _logger.warning("Unable to construct hybrid system for {} -> {}".format(topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key))
            return initial_sampler_state, proposed_sampler_state, -np.inf, 0.0, 0.0


        topology = hybrid_factory.hybrid_topology

        #generate the corresponding thermodynamic and sampler states so that we can use the NonequilibriumSwitchingMove:
        
        #First generate the thermodynamic state:
        hybrid_system = hybrid_factory.hybrid_system
        hybrid_thermodynamic_state = ThermodynamicState(hybrid_system, temperature=self._temperature, pressure=self._pressure)

        #Now create an RelativeAlchemicalState from the hybrid system:
        alchemical_state = RelativeAlchemicalState.from_system(hybrid_system)
        alchemical_state.set_alchemical_parameters(0.0)

        #Now create a compound thermodynamic state that combines the hybrid thermodynamic state with the alchemical state:
        compound_thermodynamic_state = CompoundThermodynamicState(hybrid_thermodynamic_state, composable_states=[alchemical_state])

        #construct a sampler state from the hybrid positions and the box vectors of the initial sampler state:
        initial_hybrid_positions = hybrid_factory.hybrid_positions
        initial_hybrid_box_vectors = initial_sampler_state.box_vectors

        initial_hybrid_sampler_state = SamplerState(initial_hybrid_positions, box_vectors=initial_hybrid_box_vectors)
        final_hybrid_sampler_state = copy.deepcopy(initial_hybrid_sampler_state)

        #create the nonequilibrium move:
        #ne_move = NonequilibriumSwitchingMove(self._functions, self._integrator_splitting, self._temperature, self._nsteps, self._timestep,
        #                                      work_save_interval=self._write_ncmc_interval, top=topology,subset_atoms=None,
        #                                      save_configuration=self._save_configuration, measure_shadow_work=self._measure_shadow_work)

        ne_move = ExternalNonequilibriumSwitchingMove(self._functions, nsteps_neq=self._nsteps,
                                                      timestep=self._timestep, temperature=self._temperature,
                                                      work_configuration_save_interval=self._work_save_interval,
                                                      splitting="V R O R V")


        #run the NCMC protocol
        try:
            ne_move.apply(compound_thermodynamic_state, final_hybrid_sampler_state)
        except Exception as e:
            _logger.warn("NCMC failed because {}; rejecting.".format(str(e)))
            logP_work = -np.inf
            return [initial_sampler_state, proposed_sampler_state, -np.inf, 0.0, 0.0]

        #get the total work:
        logP_work = - ne_move.cumulative_work[-1]

        # Compute contribution of transforming to and from the hybrid system:
        context, integrator = global_context_cache.get_context(hybrid_thermodynamic_state)

        #set all alchemical parameters to zero:
        for parameter in self._functions.keys():
            context.setParameter(parameter, 0.0)

        initial_hybrid_sampler_state.apply_to_context(context, ignore_velocities=True)
        initial_reduced_potential = hybrid_thermodynamic_state.reduced_potential(context)

        #set all alchemical parameters to one:
        for parameter in self._functions.keys():
            context.setParameter(parameter, 1.0)

        final_hybrid_sampler_state.apply_to_context(context, ignore_velocities=True)
        final_reduced_potential = hybrid_thermodynamic_state.reduced_potential(context)

        #reset the parameters back to zero just in case
        for parameter in self._functions.keys():
            context.setParameter(parameter, 0.0)

        #compute the output SamplerState, which has the atoms only for the new system post-NCMC:
        new_positions = hybrid_factory.new_positions(final_hybrid_sampler_state.positions)
        new_box_vectors = final_hybrid_sampler_state.box_vectors
        final_sampler_state = SamplerState(new_positions, box_vectors=new_box_vectors)

        #compute the output SamplerState for the atoms only in the old system (required for geometry_logP_reverse)
        old_positions = hybrid_factory.old_positions(final_hybrid_sampler_state.positions)
        old_box_vectors = copy.deepcopy(new_box_vectors) #these are the same as the new system
        final_old_sampler_state = SamplerState(old_positions, box_vectors=old_box_vectors)

        #extract the trajectory and box vectors from the move:
        trajectory = ne_move.trajectory[::-self._write_ncmc_interval, :, :][::-1]
        topology = hybrid_factory.hybrid_topology
        position_varname = "ncmcpositions"
        nframes = np.shape(trajectory)[0]

        #extract box vectors:
        box_vec_varname = "ncmcboxvectors"
        box_lengths = ne_move.box_lengths[::-self._write_ncmc_interval, :][::-1]
        box_angles = ne_move.box_angles[::-self._write_ncmc_interval, :][::-1]
        box_lengths_and_angles = np.stack([box_lengths, box_angles])

        #write out the positions of the topology
        if self._storage:
            for frame in range(nframes):
                self._storage.write_configuration(position_varname, trajectory[frame, :, :], topology, iteration=iteration, frame=frame, nframes=nframes)

        #write out the periodict box vectors:
        if self._storage:
            self._storage.write_array(box_vec_varname, box_lengths_and_angles, iteration=iteration)

        #retrieve the protocol work and write that out too:
        protocol_work = ne_move.cumulative_work
        if self._storage:
            self._storage.write_array("protocolwork", protocol_work, iteration=iteration)

        # Return
        return [final_old_sampler_state, final_sampler_state, logP_work, -initial_reduced_potential, -final_reduced_potential]