def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000) -> SimulationSchema: """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_simulation_protocols( analysis.AverageObservable("average_density"), use_target_uncertainty, n_molecules=n_molecules, ) # Specify that the average density should be estimated. protocols.analysis_protocol.observable = ProtocolPath( f"observables[{ObservableType.Density.value}]", protocols.production_simulation.id, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.decorrelate_trajectory.schema, protocols.decorrelate_observables.schema, ] schema.outputs_to_store = {"full_system": output_to_store} schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_paprika_schema( cls, existing_schema: SimulationSchema = None, n_solvent_molecules: int = 2500, n_thermalization_steps: int = 50000, n_equilibration_steps: int = 200000, n_production_steps: int = 2500000, dt_thermalization: unit.Quantity = 1.0 * unit.femtosecond, dt_equilibration: unit.Quantity = 2.0 * unit.femtosecond, dt_production: unit.Quantity = 2.0 * unit.femtosecond, debug: bool = False, ): """Returns the default calculation schema to use when estimating a host-guest binding affinity measurement with an APR calculation using the ``paprika`` package. Notes ----- * This schema requires additional metadata to be able to estimate each metadata. This metadata is automatically generated for properties loaded from the ``taproom`` package using the ``TaproomDataSet`` object. Parameters ---------- existing_schema: SimulationSchema, optional An existing schema whose settings to use. If set, the schema's `workflow_schema` will be overwritten by this method. n_solvent_molecules The number of solvent molecules to add to the box. n_thermalization_steps The number of thermalization simulations steps to perform. Sample generated during this step will be discarded. n_equilibration_steps The number of equilibration simulations steps to perform. Sample generated during this step will be discarded. n_production_steps The number of production simulations steps to perform. Sample generated during this step will be used in the final free energy calculation. dt_thermalization The integration timestep during thermalization dt_equilibration The integration timestep during equilibration dt_production The integration timestep during production debug Whether to return a debug schema. This is nearly identical to the default schema, albeit with significantly less solvent molecules (10), all simulations run in NVT and much shorter simulation runs (500 steps). If True, the other input arguments will be ignored. Returns ------- SimulationSchema The schema to follow when estimating this property. """ calculation_schema = SimulationSchema() if existing_schema is not None: assert isinstance(existing_schema, SimulationSchema) calculation_schema = copy.deepcopy(existing_schema) # Initialize the protocols which will serve as templates for those # used in the actual workflows. solvation_template = cls._paprika_default_solvation_protocol( n_solvent_molecules=n_solvent_molecules) ( minimization_template, *simulation_templates, ) = cls._paprika_default_simulation_protocols( n_thermalization_steps=n_thermalization_steps, n_equilibration_steps=n_equilibration_steps, n_production_steps=n_production_steps, dt_thermalization=dt_thermalization, dt_equilibration=dt_equilibration, dt_production=dt_production, ) if debug: solvation_template.max_molecules = 10 solvation_template.mass_density = 0.01 * unit.grams / unit.milliliters for simulation_template in simulation_templates: simulation_template.ensemble = Ensemble.NVT simulation_template.steps_per_iteration = 500 simulation_template.output_frequency = 50 # Set up a replicator which will perform the attach-pull calculation for # each of the guest orientations orientation_replicator = ProtocolReplicator("orientation_replicator") orientation_replicator.template_values = ProtocolPath( "guest_orientations", "global") restraint_schemas = { "static": ProtocolPath( f"guest_orientations[{orientation_replicator.placeholder_id}]." f"static_restraints", "global", ), "conformational": ProtocolPath( f"guest_orientations[{orientation_replicator.placeholder_id}]." f"conformational_restraints", "global", ), "guest": ProtocolPath("guest_restraints", "global"), "wall": ProtocolPath("wall_restraints", "global"), "symmetry": ProtocolPath("symmetry_restraints", "global"), } # Build the protocols to compute the attach and pull free energies. ( attach_pull_protocols, attach_pull_replicators, attach_free_energy, pull_free_energy, reference_work, ) = cls._paprika_build_attach_pull_protocols( orientation_replicator, restraint_schemas, solvation_template, minimization_template, *simulation_templates, ) # Build the protocols to compute the release free energies. ( release_protocols, release_replicator, release_free_energy, ) = cls._paprika_build_release_protocols( orientation_replicator, restraint_schemas, solvation_template, minimization_template, *simulation_templates, ) # Compute the symmetry correction. symmetry_correction = ComputeSymmetryCorrection("symmetry_correction") symmetry_correction.n_microstates = ProtocolPath( "n_guest_microstates", "global") symmetry_correction.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") # Sum together the free energies of the individual orientations orientation_free_energy = miscellaneous.AddValues( f"orientation_free_energy_{orientation_replicator.placeholder_id}") orientation_free_energy.values = [ attach_free_energy, pull_free_energy, reference_work, release_free_energy, ProtocolPath("result", symmetry_correction.id), ] # Finally, combine all of the values together total_free_energy = analysis.AverageFreeEnergies("total_free_energy") total_free_energy.values = ProtocolPath("result", orientation_free_energy.id) total_free_energy.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") calculation_schema.workflow_schema = WorkflowSchema() calculation_schema.workflow_schema.protocol_schemas = [ *(protocol.schema for protocol in attach_pull_protocols), *(protocol.schema for protocol in release_protocols), symmetry_correction.schema, orientation_free_energy.schema, total_free_energy.schema, ] calculation_schema.workflow_schema.protocol_replicators = [ orientation_replicator, *attach_pull_replicators, release_replicator, ] # Define where the final value comes from. calculation_schema.workflow_schema.final_value_source = ProtocolPath( "result", total_free_energy.id) return calculation_schema
def default_yank_schema(existing_schema=None): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- existing_schema: SimulationSchema, optional An existing schema whose settings to use. If set, the schema's `workflow_schema` will be overwritten by this method. Returns ------- SimulationSchema The schema to follow when estimating this property. """ calculation_schema = SimulationSchema() if existing_schema is not None: assert isinstance(existing_schema, SimulationSchema) calculation_schema = copy.deepcopy(existing_schema) schema = WorkflowSchema( property_type=HostGuestBindingAffinity.__name__) schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema") # Initial coordinate and topology setup. filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand") filter_ligand.input_substance = ProtocolPath("substance", "global") filter_ligand.component_roles = [Component.Role.Ligand] # We only support substances with a single guest ligand. filter_ligand.expected_components = 1 schema.protocols[filter_ligand.id] = filter_ligand.schema # Construct the protocols which will (for now) take as input a set of host coordinates, # and generate a set of charges for them. filter_receptor = miscellaneous.FilterSubstanceByRole( "filter_receptor") filter_receptor.input_substance = ProtocolPath("substance", "global") filter_receptor.component_roles = [Component.Role.Receptor] # We only support substances with a single host receptor. filter_receptor.expected_components = 1 schema.protocols[filter_receptor.id] = filter_receptor.schema # Perform docking to position the guest within the host. perform_docking = coordinates.BuildDockedCoordinates("perform_docking") perform_docking.ligand_substance = ProtocolPath( "filtered_substance", filter_ligand.id) perform_docking.receptor_coordinate_file = ProtocolPath( "receptor_mol2", "global") schema.protocols[perform_docking.id] = perform_docking.schema # Solvate the docked structure using packmol filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] schema.protocols[filter_solvent.id] = filter_solvent.schema solvate_complex = coordinates.SolvateExistingStructure( "solvate_complex") solvate_complex.max_molecules = 1000 solvate_complex.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_complex.solute_coordinate_file = ProtocolPath( "docked_complex_coordinate_path", perform_docking.id) schema.protocols[solvate_complex.id] = solvate_complex.schema # Assign force field parameters to the solvated complex system. build_solvated_complex_system = forcefield.BaseBuildSystem( "build_solvated_complex_system") build_solvated_complex_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_complex_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_complex.id) build_solvated_complex_system.substance = ProtocolPath( "substance", "global") build_solvated_complex_system.charged_molecule_paths = [ ProtocolPath("receptor_mol2", "global") ] schema.protocols[build_solvated_complex_system. id] = build_solvated_complex_system.schema # Solvate the ligand using packmol solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand") solvate_ligand.max_molecules = 1000 solvate_ligand.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_ligand.solute_coordinate_file = ProtocolPath( "docked_ligand_coordinate_path", perform_docking.id) schema.protocols[solvate_ligand.id] = solvate_ligand.schema # Assign force field parameters to the solvated ligand system. build_solvated_ligand_system = forcefield.BaseBuildSystem( "build_solvated_ligand_system") build_solvated_ligand_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_ligand_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_ligand.id) build_solvated_ligand_system.substance = ProtocolPath( "substance", "global") schema.protocols[build_solvated_ligand_system. id] = build_solvated_ligand_system.schema # Employ YANK to estimate the binding free energy. yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol") yank_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") yank_protocol.number_of_iterations = 2000 yank_protocol.steps_per_iteration = 500 yank_protocol.checkpoint_interval = 10 yank_protocol.verbose = True yank_protocol.force_field_path = ProtocolPath("force_field_path", "global") yank_protocol.ligand_residue_name = ProtocolPath( "ligand_residue_name", perform_docking.id) yank_protocol.receptor_residue_name = ProtocolPath( "receptor_residue_name", perform_docking.id) yank_protocol.solvated_ligand_coordinates = ProtocolPath( "coordinate_file_path", solvate_ligand.id) yank_protocol.solvated_ligand_system = ProtocolPath( "parameterized_system", build_solvated_ligand_system.id) yank_protocol.solvated_complex_coordinates = ProtocolPath( "coordinate_file_path", solvate_complex.id) yank_protocol.solvated_complex_system = ProtocolPath( "parameterized_system", build_solvated_complex_system.id) schema.protocols[yank_protocol.id] = yank_protocol.schema # Define where the final values come from. schema.final_value_source = ProtocolPath("free_energy_difference", yank_protocol.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=2000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Setup the fully solvated systems. build_full_coordinates = coordinates.BuildCoordinatesPackmol( "build_solvated_coordinates") build_full_coordinates.substance = ProtocolPath("substance", "global") build_full_coordinates.max_molecules = n_molecules assign_full_parameters = forcefield.BaseBuildSystem( "assign_solvated_parameters") assign_full_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_full_parameters.substance = ProtocolPath("substance", "global") assign_full_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) # Perform a quick minimisation of the full system to give # YANK a better starting point for its minimisation. energy_minimisation = openmm.OpenMMEnergyMinimisation( "energy_minimisation") energy_minimisation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) equilibration_simulation = openmm.OpenMMSimulation( "equilibration_simulation") equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 10000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") equilibration_simulation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id) # Create a substance which only contains the solute (e.g. for the # vacuum phase simulations). filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute") filter_solute.input_substance = ProtocolPath("substance", "global") filter_solute.component_roles = [Component.Role.Solute] # Setup the solute in vacuum system. build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol( "build_vacuum_coordinates") build_vacuum_coordinates.substance = ProtocolPath( "filtered_substance", filter_solute.id) build_vacuum_coordinates.max_molecules = 1 assign_vacuum_parameters = forcefield.BaseBuildSystem( "assign_parameters") assign_vacuum_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_vacuum_parameters.substance = ProtocolPath( "filtered_substance", filter_solute.id) assign_vacuum_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) # Set up the protocol to run yank. run_yank = yank.SolvationYankProtocol("run_solvation_yank") run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id) run_yank.solvent_1 = ProtocolPath("filtered_substance", filter_solvent.id) run_yank.solvent_2 = Substance() run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") run_yank.steps_per_iteration = 500 run_yank.checkpoint_interval = 50 run_yank.solvent_1_coordinates = ProtocolPath( "output_coordinate_file", equilibration_simulation.id) run_yank.solvent_1_system = ProtocolPath("system_path", assign_full_parameters.id) run_yank.solvent_2_coordinates = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) run_yank.solvent_2_system = ProtocolPath("system_path", assign_vacuum_parameters.id) # Set up the group which will run yank until the free energy has been determined to within # a given uncertainty conditional_group = groups.ConditionalGroup("conditional_group") conditional_group.max_iterations = 20 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.left_hand_value = ProtocolPath( "estimated_free_energy.error", conditional_group.id, run_yank.id) conditional_group.add_condition(condition) # Define the total number of iterations that yank should run for. total_iterations = miscellaneous.MultiplyValue("total_iterations") total_iterations.value = 2000 total_iterations.multiplier = ProtocolPath("current_iteration", conditional_group.id) # Make sure the simulations gets extended after each iteration. run_yank.number_of_iterations = ProtocolPath("result", total_iterations.id) conditional_group.add_protocols(total_iterations, run_yank) # Define the full workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ build_full_coordinates.schema, assign_full_parameters.schema, energy_minimisation.schema, equilibration_simulation.schema, filter_solvent.schema, filter_solute.schema, build_vacuum_coordinates.schema, assign_vacuum_parameters.schema, conditional_group.schema, ] schema.final_value_source = ProtocolPath("estimated_free_energy", conditional_group.id, run_yank.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( cls, absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000, ) -> SimulationSchema: """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Define the protocols to use for the fully mixed system. ( mixture_protocols, mixture_value, mixture_stored_data, ) = generate_simulation_protocols( analysis.AverageObservable("extract_observable_mixture"), use_target_uncertainty, id_suffix="_mixture", n_molecules=n_molecules, ) # Specify the average observable which should be estimated. mixture_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{cls._observable_type().value}]", mixture_protocols.production_simulation.id, ) ( mixture_protocols.analysis_protocol.divisor, mixture_n_molar_molecules, ) = cls._n_molecules_divisor( ProtocolPath("output_number_of_molecules", mixture_protocols.build_coordinates.id), "_mixture", ) # Define the protocols to use for each component, creating a replicator that # will copy these for each component in the mixture substance. component_replicator = ProtocolReplicator("component_replicator") component_replicator.template_values = ProtocolPath( "components", "global") component_substance = ReplicatorValue(component_replicator.id) component_protocols, _, component_stored_data = generate_simulation_protocols( analysis.AverageObservable( f"extract_observable_component_{component_replicator.placeholder_id}" ), use_target_uncertainty, id_suffix=f"_component_{component_replicator.placeholder_id}", n_molecules=n_molecules, ) # Make sure the protocols point to the correct substance. component_protocols.build_coordinates.substance = component_substance # Specify the average observable which should be estimated. component_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{cls._observable_type().value}]", component_protocols.production_simulation.id, ) ( component_protocols.analysis_protocol.divisor, component_n_molar_molecules, ) = cls._n_molecules_divisor( ProtocolPath("output_number_of_molecules", component_protocols.build_coordinates.id), f"_component_{component_replicator.placeholder_id}", ) # Weight the component value by the mole fraction. weight_by_mole_fraction = miscellaneous.WeightByMoleFraction( f"weight_by_mole_fraction_{component_replicator.placeholder_id}") weight_by_mole_fraction.value = ProtocolPath( "value", component_protocols.analysis_protocol.id) weight_by_mole_fraction.full_substance = ProtocolPath( "substance", "global") weight_by_mole_fraction.component = component_substance component_protocols.converge_uncertainty.add_protocols( weight_by_mole_fraction) # Make sure the convergence criteria is set to use the per component # uncertainty target. if use_target_uncertainty: component_protocols.converge_uncertainty.conditions[ 0].right_hand_value = ProtocolPath("per_component_uncertainty", "global") # Finally, set up the protocols which will be responsible for adding together # the component observables, and subtracting these from the mixture system value. add_component_observables = miscellaneous.AddValues( "add_component_observables") add_component_observables.values = ProtocolPath( "weighted_value", component_protocols.converge_uncertainty.id, weight_by_mole_fraction.id, ) calculate_excess_observable = miscellaneous.SubtractValues( "calculate_excess_observable") calculate_excess_observable.value_b = mixture_value calculate_excess_observable.value_a = ProtocolPath( "result", add_component_observables.id) # Build the final workflow schema schema = WorkflowSchema() schema.protocol_schemas = [ component_protocols.build_coordinates.schema, component_protocols.assign_parameters.schema, component_protocols.energy_minimisation.schema, component_protocols.equilibration_simulation.schema, component_protocols.converge_uncertainty.schema, component_protocols.decorrelate_trajectory.schema, component_protocols.decorrelate_observables.schema, mixture_protocols.build_coordinates.schema, mixture_protocols.assign_parameters.schema, mixture_protocols.energy_minimisation.schema, mixture_protocols.equilibration_simulation.schema, mixture_protocols.converge_uncertainty.schema, mixture_protocols.decorrelate_trajectory.schema, mixture_protocols.decorrelate_observables.schema, add_component_observables.schema, calculate_excess_observable.schema, ] if component_n_molar_molecules is not None: schema.protocol_schemas.append(component_n_molar_molecules.schema) if mixture_n_molar_molecules is not None: schema.protocol_schemas.append(mixture_n_molar_molecules.schema) schema.protocol_replicators = [component_replicator] schema.final_value_source = ProtocolPath( "result", calculate_excess_observable.id) schema.outputs_to_store = { "full_system": mixture_stored_data, f"component_{component_replicator.placeholder_id}": component_stored_data, } calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define a custom conditional group which will ensure both the liquid and # gas enthalpies are estimated to within the specified uncertainty tolerance. converge_uncertainty = groups.ConditionalGroup("conditional_group") converge_uncertainty.max_iterations = 100 # Define the protocols to perform the simulation in the liquid phase. average_liquid_energy = analysis.AverageObservable("average_liquid_potential") average_liquid_energy.divisor = n_molecules ( liquid_protocols, liquid_value_source, liquid_output_to_store, ) = generate_simulation_protocols( average_liquid_energy, use_target_uncertainty, "_liquid", converge_uncertainty, n_molecules=n_molecules, ) liquid_output_to_store.property_phase = PropertyPhase.Liquid liquid_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{ObservableType.PotentialEnergy.value}]", liquid_protocols.production_simulation.id, ) # Define the protocols to perform the simulation in the gas phase. average_gas_energy = analysis.AverageObservable("average_gas_potential") ( gas_protocols, gas_value_source, gas_output_to_store, ) = generate_simulation_protocols( average_gas_energy, use_target_uncertainty, "_gas", converge_uncertainty, n_molecules=1, ) gas_output_to_store.property_phase = PropertyPhase.Gas gas_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{ObservableType.PotentialEnergy.value}]", gas_protocols.production_simulation.id, ) # Specify that for the gas phase only a single molecule in vacuum should be # created. gas_protocols.build_coordinates.max_molecules = 1 gas_protocols.build_coordinates.mass_density = ( 0.01 * unit.gram / unit.milliliter ) # Run the gas phase simulations in the NVT ensemble without PBC gas_protocols.energy_minimisation.enable_pbc = False gas_protocols.equilibration_simulation.ensemble = Ensemble.NVT gas_protocols.equilibration_simulation.enable_pbc = False gas_protocols.production_simulation.ensemble = Ensemble.NVT gas_protocols.production_simulation.enable_pbc = False gas_protocols.production_simulation.steps_per_iteration = 15000000 gas_protocols.production_simulation.output_frequency = 5000 gas_protocols.production_simulation.checkpoint_frequency = 100 # Due to a bizarre issue where the OMM Reference platform is # the fastest at computing properties of a single molecule # in vacuum, we enforce those inputs which will force the # gas calculations to run on the Reference platform. gas_protocols.equilibration_simulation.high_precision = True gas_protocols.equilibration_simulation.allow_gpu_platforms = False gas_protocols.production_simulation.high_precision = True gas_protocols.production_simulation.allow_gpu_platforms = False # Combine the values to estimate the final energy of vaporization energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization") energy_of_vaporization.value_b = ProtocolPath("value", average_gas_energy.id) energy_of_vaporization.value_a = ProtocolPath("value", average_liquid_energy.id) ideal_volume = miscellaneous.MultiplyValue("ideal_volume") ideal_volume.value = 1.0 * unit.molar_gas_constant ideal_volume.multiplier = ProtocolPath( "thermodynamic_state.temperature", "global" ) enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization") enthalpy_of_vaporization.values = [ ProtocolPath("result", energy_of_vaporization.id), ProtocolPath("result", ideal_volume.id), ] # Add the extra protocols and conditions to the custom conditional group. converge_uncertainty.add_protocols( energy_of_vaporization, ideal_volume, enthalpy_of_vaporization ) if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.left_hand_value = ProtocolPath( "result.error", converge_uncertainty.id, enthalpy_of_vaporization.id, ) condition.right_hand_value = ProtocolPath("target_uncertainty", "global") gas_protocols.production_simulation.total_number_of_iterations = ( ProtocolPath("current_iteration", converge_uncertainty.id) ) liquid_protocols.production_simulation.total_number_of_iterations = ( ProtocolPath("current_iteration", converge_uncertainty.id) ) converge_uncertainty.add_condition(condition) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ liquid_protocols.build_coordinates.schema, liquid_protocols.assign_parameters.schema, liquid_protocols.energy_minimisation.schema, liquid_protocols.equilibration_simulation.schema, liquid_protocols.decorrelate_trajectory.schema, liquid_protocols.decorrelate_observables.schema, gas_protocols.build_coordinates.schema, gas_protocols.assign_parameters.schema, gas_protocols.energy_minimisation.schema, gas_protocols.equilibration_simulation.schema, gas_protocols.decorrelate_trajectory.schema, gas_protocols.decorrelate_observables.schema, converge_uncertainty.schema, ] schema.outputs_to_store = { "liquid_data": liquid_output_to_store, "gas_data": gas_output_to_store, } schema.final_value_source = ProtocolPath( "result", converge_uncertainty.id, enthalpy_of_vaporization.id ) calculation_schema.workflow_schema = schema return calculation_schema
def test_workflow_layer(): """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer` is the simplest implementation of the abstract layer, we settle for testing this.""" properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] # Create a very simple workflow which just returns some placeholder # value. estimated_value = Observable( (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)) protocol_a = DummyProtocol("protocol_a") protocol_a.input_value = estimated_value schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema] schema.final_value_source = ProtocolPath("output_value", protocol_a.id) layer_schema = SimulationSchema() layer_schema.workflow_schema = schema options = RequestOptions() options.add_schema("SimulationLayer", "Density", layer_schema) batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = options with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): # Create a directory for the layer. layer_directory = "simulation_layer" os.makedirs(layer_directory) # Set-up a simple storage backend and add a force field to it. force_field = SmirnoffForceFieldSource.from_path( "smirnoff99Frosst-1.1.0.offxml") storage_backend = LocalFileStorage() batch.force_field_id = storage_backend.store_force_field( force_field) # Create a simple calculation backend to test with. with DaskLocalCluster() as calculation_backend: def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 2 assert len(returned_request.exceptions) == 0 simulation_layer = SimulationLayer() simulation_layer.schedule_calculation( calculation_backend, storage_backend, layer_directory, batch, dummy_callback, True, )
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the id of the replicator which will clone the gradient protocols # for each gradient key to be estimated. gradient_replicator_id = "gradient_replicator" # Set up a workflow to calculate the molar volume of the full, mixed system. ( full_system_protocols, full_system_molar_molecules, full_system_volume, full_output, full_system_gradient_group, full_system_gradient_replicator, full_system_gradient, ) = ExcessMolarVolume._get_simulation_protocols( "_full", gradient_replicator_id, use_target_uncertainty=use_target_uncertainty, n_molecules=n_molecules, ) # Set up a general workflow for calculating the molar volume of one of the system components. component_replicator_id = "component_replicator" component_substance = ReplicatorValue(component_replicator_id) # Make sure to weight by the mole fractions of the actual full system as these may be slightly # different to the mole fractions of the measure property due to rounding. full_substance = ProtocolPath( "output_substance", full_system_protocols.build_coordinates.id ) ( component_protocols, component_molar_molecules, component_volumes, component_output, component_gradient_group, component_gradient_replicator, component_gradient, ) = ExcessMolarVolume._get_simulation_protocols( "_component", gradient_replicator_id, replicator_id=component_replicator_id, weight_by_mole_fraction=True, component_substance_reference=component_substance, full_substance_reference=full_substance, use_target_uncertainty=use_target_uncertainty, n_molecules=n_molecules, ) # Finally, set up the protocols which will be responsible for adding together # the component molar volumes, and subtracting these from the mixed system molar volume. add_component_molar_volumes = miscellaneous.AddValues( "add_component_molar_volumes" ) add_component_molar_volumes.values = component_volumes calculate_excess_volume = miscellaneous.SubtractValues( "calculate_excess_volume" ) calculate_excess_volume.value_b = full_system_volume calculate_excess_volume.value_a = ProtocolPath( "result", add_component_molar_volumes.id ) # Create the replicator object which defines how the pure component # molar volume estimation protocols will be replicated for each component. component_replicator = ProtocolReplicator(replicator_id=component_replicator_id) component_replicator.template_values = ProtocolPath("components", "global") # Combine the gradients. add_component_gradients = miscellaneous.AddValues( f"add_component_gradients" f"_$({gradient_replicator_id})" ) add_component_gradients.values = component_gradient combine_gradients = miscellaneous.SubtractValues( f"combine_gradients_$({gradient_replicator_id})" ) combine_gradients.value_b = full_system_gradient combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id) # Combine the gradient replicators. gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id) gradient_replicator.template_values = ProtocolPath( "parameter_gradient_keys", "global" ) # Build the final workflow schema schema = WorkflowSchema() schema.protocol_schemas = [ component_protocols.build_coordinates.schema, component_protocols.assign_parameters.schema, component_protocols.energy_minimisation.schema, component_protocols.equilibration_simulation.schema, component_protocols.converge_uncertainty.schema, component_molar_molecules.schema, full_system_protocols.build_coordinates.schema, full_system_protocols.assign_parameters.schema, full_system_protocols.energy_minimisation.schema, full_system_protocols.equilibration_simulation.schema, full_system_protocols.converge_uncertainty.schema, full_system_molar_molecules.schema, component_protocols.extract_uncorrelated_trajectory.schema, component_protocols.extract_uncorrelated_statistics.schema, full_system_protocols.extract_uncorrelated_trajectory.schema, full_system_protocols.extract_uncorrelated_statistics.schema, add_component_molar_volumes.schema, calculate_excess_volume.schema, component_gradient_group.schema, full_system_gradient_group.schema, add_component_gradients.schema, combine_gradients.schema, ] schema.protocol_replicators = [gradient_replicator, component_replicator] # Finally, tell the schemas where to look for its final values. schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)] schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id) schema.outputs_to_store = { "full_system": full_output, f"component_$({component_replicator_id})": component_output, } calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the protocol which will extract the average density from # the results of a simulation. extract_density = analysis.ExtractAverageStatistic("extract_density") extract_density.statistics_type = ObservableType.Density # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_base_simulation_protocols( extract_density, use_target_uncertainty, n_molecules=n_molecules, ) # Set up the gradient calculations coordinate_source = ProtocolPath( "output_coordinate_file", protocols.equilibration_simulation.id ) trajectory_source = ProtocolPath( "trajectory_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) statistics_source = ProtocolPath( "statistics_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) reweight_density_template = reweighting.ReweightStatistics("") reweight_density_template.statistics_type = ObservableType.Density reweight_density_template.statistics_paths = statistics_source reweight_density_template.reference_reduced_potentials = statistics_source ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( reweight_density_template, ProtocolPath("force_field_path", "global"), coordinate_source, trajectory_source, statistics_source, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.extract_uncorrelated_trajectory.schema, protocols.extract_uncorrelated_statistics.schema, gradient_group.schema, ] schema.protocol_replicators = [gradient_replicator] schema.outputs_to_store = {"full_system": output_to_store} schema.gradients_sources = [gradient_source] schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_simulation_protocols( AverageDielectricConstant("average_dielectric"), use_target_uncertainty, n_molecules=n_molecules, ) # Add a protocol to compute the dipole moments and pass these to # the analysis protocol. compute_dipoles = ComputeDipoleMoments("compute_dipoles") compute_dipoles.parameterized_system = ProtocolPath( "parameterized_system", protocols.assign_parameters.id ) compute_dipoles.trajectory_path = ProtocolPath( "trajectory_file_path", protocols.production_simulation.id ) compute_dipoles.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) protocols.converge_uncertainty.add_protocols(compute_dipoles) protocols.analysis_protocol.volumes = ProtocolPath( f"observables[{ObservableType.Volume.value}]", protocols.production_simulation.id, ) protocols.analysis_protocol.dipole_moments = ProtocolPath( "dipole_moments", compute_dipoles.id, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.decorrelate_trajectory.schema, protocols.decorrelate_observables.schema, ] schema.outputs_to_store = {"full_system": output_to_store} schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance # Define the protocol which will extract the average dielectric constant # from the results of a simulation. extract_dielectric = ExtractAverageDielectric("extract_dielectric") extract_dielectric.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") # Define the protocols which will run the simulation itself. use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) protocols, value_source, output_to_store = generate_base_simulation_protocols( extract_dielectric, use_target_uncertainty, n_molecules=n_molecules, ) # Make sure the input of the analysis protcol is properly hooked up. extract_dielectric.system_path = ProtocolPath( "system_path", protocols.assign_parameters.id) # Dielectric constants typically take longer to converge, so we need to # reflect this in the maximum number of convergence iterations. protocols.converge_uncertainty.max_iterations = 400 # Set up the gradient calculations. For dielectric constants, we need to use # a slightly specialised reweighting protocol which we set up here. coordinate_source = ProtocolPath("output_coordinate_file", protocols.equilibration_simulation.id) trajectory_source = ProtocolPath( "trajectory_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) statistics_source = ProtocolPath( "statistics_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) gradient_mbar_protocol = ReweightDielectricConstant("gradient_mbar") gradient_mbar_protocol.reference_dipole_moments = [ ProtocolPath( "dipole_moments", protocols.converge_uncertainty.id, extract_dielectric.id, ) ] gradient_mbar_protocol.reference_volumes = [ ProtocolPath("volumes", protocols.converge_uncertainty.id, extract_dielectric.id) ] gradient_mbar_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") gradient_mbar_protocol.reference_reduced_potentials = statistics_source ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( gradient_mbar_protocol, ProtocolPath("force_field_path", "global"), coordinate_source, trajectory_source, statistics_source, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.extract_uncorrelated_trajectory.schema, protocols.extract_uncorrelated_statistics.schema, gradient_group.schema, ] schema.protocol_replicators = [gradient_replicator] schema.outputs_to_store = {"full_system": output_to_store} schema.gradients_sources = [gradient_source] schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema