def _execute(self, directory, available_resources): if len(self.simulation_data_path) != 3: raise ValueError( "The simulation data path should be a tuple of a path to the data " "object, directory, and a path to the force field used to generate it." ) data_object_path = self.simulation_data_path[0] data_directory = self.simulation_data_path[1] force_field_path = self.simulation_data_path[2] if not path.isdir(data_directory): raise ValueError( f"The path to the data directory is invalid: {data_directory}") if not path.isfile(force_field_path): raise ValueError( f"The path to the force field is invalid: {force_field_path}") data_object = StoredSimulationData.from_json(data_object_path) if not isinstance(data_object, StoredSimulationData): raise ValueError( f"The data path must point to a `StoredSimulationData` " f"object, and not a {data_object.__class__.__name__}", ) self.substance = data_object.substance self.total_number_of_molecules = data_object.number_of_molecules self.thermodynamic_state = data_object.thermodynamic_state self.observables = data_object.observables self.coordinate_file_path = path.join(data_directory, data_object.coordinate_file_name) self.trajectory_file_path = path.join(data_directory, data_object.trajectory_file_name) self.force_field_path = force_field_path
def return_bad_result(physical_property, layer_directory, **_): """Return a result which leads to an unhandled exception.""" dummy_data_directory = path.join(layer_directory, "bad_dummy_data") makedirs(dummy_data_directory, exist_ok=True) dummy_stored_object = StoredSimulationData() dummy_stored_object_path = path.join(layer_directory, "bad_dummy_data.json") with open(dummy_stored_object_path, "w") as file: json.dump(dummy_stored_object, file, cls=TypedJSONEncoder) return_object = CalculationLayerResult() return_object.physical_property = physical_property return_object.data_to_store = [(dummy_stored_object_path, dummy_data_directory)] return return_object
def create_dummy_simulation_data( directory_path, substance, force_field_id="dummy_ff_id", coordinate_file_name="output.pdb", trajectory_file_name="trajectory.dcd", statistics_file_name="statistics.csv", statistical_inefficiency=1.0, phase=PropertyPhase.Liquid, number_of_molecules=1, calculation_id=None, ): """Creates a dummy `StoredSimulationData` object and the corresponding data directory. Parameters ---------- directory_path: str The path to the dummy data directory to create. substance: Substance force_field_id coordinate_file_name trajectory_file_name statistics_file_name statistical_inefficiency phase number_of_molecules calculation_id Returns ------- StoredSimulationData The dummy stored data object. """ os.makedirs(directory_path, exist_ok=True) data = StoredSimulationData() data.substance = substance data.force_field_id = force_field_id data.thermodynamic_state = ThermodynamicState(1.0 * unit.kelvin) data.property_phase = phase data.coordinate_file_name = coordinate_file_name data.trajectory_file_name = trajectory_file_name data.statistics_file_name = statistics_file_name with open(os.path.join(directory_path, coordinate_file_name), "w") as file: file.write("") with open(os.path.join(directory_path, trajectory_file_name), "w") as file: file.write("") with open(os.path.join(directory_path, statistics_file_name), "w") as file: file.write("") data.statistical_inefficiency = statistical_inefficiency data.number_of_molecules = number_of_molecules if calculation_id is None: calculation_id = str(uuid.uuid4()) data.source_calculation_id = calculation_id return data
def generate_simulation_protocols( analysis_protocol: S, use_target_uncertainty: bool, id_suffix: str = "", conditional_group: Optional[ConditionalGroup] = None, n_molecules: int = 1000, ) -> Tuple[SimulationProtocols[S], ProtocolPath, StoredSimulationData]: """Constructs a set of protocols which, when combined in a workflow schema, may be executed to run a single simulation to estimate the average value of an observable. The protocols returned will: 1) Build a set of liquid coordinates for the property substance using packmol. 2) Assign a set of smirnoff force field parameters to the system. 3) Perform an energy minimisation on the system. 4) Run a short NPT equilibration simulation for 100000 steps using a timestep of 2fs. 5) Within a conditional group (up to a maximum of 100 times): 5a) Run a longer NPT production simulation for 1000000 steps using a timestep of 2fs 5b) Extract the average value of an observable and it's uncertainty. 5c) If a convergence mode is set by the options, check if the target uncertainty has been met. If not, repeat steps 5a), 5b) and 5c). 6) Extract uncorrelated configurations from a generated production simulation. 7) Extract uncorrelated statistics from a generated production simulation. Parameters ---------- analysis_protocol The protocol which will extract the observable of interest from the generated simulation data. use_target_uncertainty Whether to run the simulation until the observable is estimated to within the target uncertainty. id_suffix: str A string suffix to append to each of the protocol ids. conditional_group: ProtocolGroup, optional A custom group to wrap the main simulation / extraction protocols within. It is up to the caller of this method to manually add the convergence conditions to this group. If `None`, a default group with uncertainty convergence conditions is automatically constructed. n_molecules: int The number of molecules to use in the workflow. Returns ------- The protocols to add to the workflow, a reference to the average value of the estimated observable (an ``Observable`` object), and an object which describes the default data from a simulation to store, such as the uncorrelated statistics and configurations. """ build_coordinates = coordinates.BuildCoordinatesPackmol( f"build_coordinates{id_suffix}" ) build_coordinates.substance = ProtocolPath("substance", "global") build_coordinates.max_molecules = n_molecules assign_parameters = forcefield.BaseBuildSystem(f"assign_parameters{id_suffix}") assign_parameters.force_field_path = ProtocolPath("force_field_path", "global") assign_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_coordinates.id ) assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id) # Equilibration energy_minimisation = openmm.OpenMMEnergyMinimisation( f"energy_minimisation{id_suffix}" ) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_coordinates.id ) energy_minimisation.parameterized_system = ProtocolPath( "parameterized_system", assign_parameters.id ) equilibration_simulation = openmm.OpenMMSimulation( f"equilibration_simulation{id_suffix}" ) equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 5000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id ) equilibration_simulation.parameterized_system = ProtocolPath( "parameterized_system", assign_parameters.id ) # Production production_simulation = openmm.OpenMMSimulation(f"production_simulation{id_suffix}") production_simulation.ensemble = Ensemble.NPT production_simulation.steps_per_iteration = 1000000 production_simulation.output_frequency = 2000 production_simulation.timestep = 2.0 * unit.femtosecond production_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) production_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", equilibration_simulation.id ) production_simulation.parameterized_system = ProtocolPath( "parameterized_system", assign_parameters.id ) production_simulation.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) # Set up a conditional group to ensure convergence of uncertainty if conditional_group is None: conditional_group = groups.ConditionalGroup(f"conditional_group{id_suffix}") conditional_group.max_iterations = 100 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.left_hand_value = ProtocolPath( "value.error", conditional_group.id, analysis_protocol.id ) conditional_group.add_condition(condition) # Make sure the simulation gets extended after each iteration. production_simulation.total_number_of_iterations = ProtocolPath( "current_iteration", conditional_group.id ) conditional_group.add_protocols(production_simulation, analysis_protocol) # Point the analyse protocol to the correct data sources if not isinstance(analysis_protocol, analysis.BaseAverageObservable): raise ValueError( "The analysis protocol must inherit from either the " "AverageTrajectoryObservable or BaseAverageObservable " "protocols." ) analysis_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) analysis_protocol.potential_energies = ProtocolPath( f"observables[{ObservableType.PotentialEnergy.value}]", production_simulation.id, ) # Finally, extract uncorrelated data time_series_statistics = ProtocolPath( "time_series_statistics", conditional_group.id, analysis_protocol.id ) coordinate_file = ProtocolPath( "output_coordinate_file", conditional_group.id, production_simulation.id ) trajectory_path = ProtocolPath( "trajectory_file_path", conditional_group.id, production_simulation.id ) observables = ProtocolPath( "observables", conditional_group.id, production_simulation.id ) decorrelate_trajectory = analysis.DecorrelateTrajectory( f"decorrelate_trajectory{id_suffix}" ) decorrelate_trajectory.time_series_statistics = time_series_statistics decorrelate_trajectory.input_coordinate_file = coordinate_file decorrelate_trajectory.input_trajectory_path = trajectory_path decorrelate_observables = analysis.DecorrelateObservables( f"decorrelate_observables{id_suffix}" ) decorrelate_observables.time_series_statistics = time_series_statistics decorrelate_observables.input_observables = observables # Build the object which defines which pieces of simulation data to store. output_to_store = StoredSimulationData() output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") output_to_store.property_phase = PropertyPhase.Liquid output_to_store.force_field_id = PlaceholderValue() output_to_store.number_of_molecules = ProtocolPath( "output_number_of_molecules", build_coordinates.id ) output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id) output_to_store.statistical_inefficiency = ProtocolPath( "time_series_statistics.statistical_inefficiency", conditional_group.id, analysis_protocol.id, ) output_to_store.observables = ProtocolPath( "output_observables", decorrelate_observables.id ) output_to_store.trajectory_file_name = ProtocolPath( "output_trajectory_path", decorrelate_trajectory.id ) output_to_store.coordinate_file_name = coordinate_file output_to_store.source_calculation_id = PlaceholderValue() # Define where the final values come from. final_value_source = ProtocolPath( "value", conditional_group.id, analysis_protocol.id ) base_protocols = SimulationProtocols( build_coordinates, assign_parameters, energy_minimisation, equilibration_simulation, production_simulation, analysis_protocol, conditional_group, decorrelate_trajectory, decorrelate_observables, ) return base_protocols, final_value_source, output_to_store
def process_successful_property(physical_property, layer_directory, **_): """Return a result as if the property had been successfully estimated.""" dummy_data_directory = path.join(layer_directory, "good_dummy_data") makedirs(dummy_data_directory, exist_ok=True) dummy_stored_object = StoredSimulationData() dummy_stored_object.substance = physical_property.substance dummy_stored_object.thermodynamic_state = physical_property.thermodynamic_state dummy_stored_object.property_phase = physical_property.phase dummy_stored_object.force_field_id = "" dummy_stored_object.coordinate_file_name = "" dummy_stored_object.trajectory_file_name = "" dummy_stored_object.statistics_file_name = "" dummy_stored_object.statistical_inefficiency = 1.0 dummy_stored_object.number_of_molecules = 10 dummy_stored_object.source_calculation_id = "" dummy_stored_object_path = path.join(layer_directory, "good_dummy_data.json") with open(dummy_stored_object_path, "w") as file: json.dump(dummy_stored_object, file, cls=TypedJSONEncoder) return_object = CalculationLayerResult() return_object.physical_property = physical_property return_object.data_to_store = [(dummy_stored_object_path, dummy_data_directory)] return return_object