def test_submission(): with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): with DaskLocalCluster() as calculation_backend: # Spin up a server instance. server = EvaluatorServer( calculation_backend=calculation_backend, working_directory=directory, ) with server: # Connect a client. client = EvaluatorClient() # Submit an empty data set. force_field_path = "smirnoff99Frosst-1.1.0.offxml" force_field_source = SmirnoffForceFieldSource.from_path( force_field_path ) request, error = client.request_estimate( PhysicalPropertyDataSet(), force_field_source ) assert error is None assert isinstance(request, Request) result, error = request.results(polling_interval=0.01) assert error is None assert isinstance(result, RequestResult)
def _run_yank(directory, available_resources, setup_only): """Runs YANK within the specified directory which contains a `yank.yaml` input file. Parameters ---------- directory: str The directory within which to run yank. available_resources: ComputeResources The compute resources available to yank. setup_only: bool If true, YANK will only create and validate the setup files, but not actually run any simulations. This argument is mainly only to be used for testing purposes. Returns ------- simtk.pint.Quantity The free energy returned by yank. simtk.pint.Quantity The uncertainty in the free energy returned by yank. """ from yank.experiment import ExperimentBuilder from yank.analyze import ExperimentAnalyzer from simtk import unit as simtk_unit with temporarily_change_directory(directory): # Set the default properties on the desired platform # before calling into yank. setup_platform_with_resources(available_resources) exp_builder = ExperimentBuilder("yank.yaml") if setup_only is True: return ( 0.0 * simtk_unit.kilojoule_per_mole, 0.0 * simtk_unit.kilojoule_per_mole, ) exp_builder.run_experiments() analyzer = ExperimentAnalyzer("experiments") output = analyzer.auto_analyze() free_energy = output["free_energy"]["free_energy_diff_unit"] free_energy_uncertainty = output["free_energy"][ "free_energy_diff_error_unit"] return free_energy, free_energy_uncertainty
def test_server_spin_up(): with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): with DaskLocalCluster() as calculation_backend: server = EvaluatorServer( calculation_backend=calculation_backend, working_directory=directory, ) with server: sleep(0.5)
def test_base_layer(): properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] dummy_options = RequestOptions() batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = dummy_options batch.force_field_id = "" batch.options.calculation_schemas = { "Density": { "DummyCalculationLayer": CalculationLayerSchema() } } with tempfile.TemporaryDirectory() as temporary_directory: with temporarily_change_directory(temporary_directory): # Create a simple calculation backend to test with. test_backend = DaskLocalCluster() test_backend.start() # Create a simple storage backend to test with. test_storage = LocalFileStorage() layer_directory = "dummy_layer" makedirs(layer_directory) def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 1 assert len(returned_request.exceptions) == 2 dummy_layer = DummyCalculationLayer() dummy_layer.schedule_calculation( test_backend, test_storage, layer_directory, batch, dummy_callback, True, )
def test_launch_batch(): # Set up a dummy data set data_set = PhysicalPropertyDataSet() data_set.add_properties(create_dummy_property(Density), create_dummy_property(Density)) batch = Batch() batch.force_field_id = "" batch.options = RequestOptions() batch.options.calculation_layers = ["QuickCalculationLayer"] batch.options.calculation_schemas = { "Density": { "QuickCalculationLayer": CalculationLayerSchema() } } batch.parameter_gradient_keys = [] batch.queued_properties = [*data_set] batch.validate() with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): with DaskLocalCluster() as calculation_backend: server = EvaluatorServer( calculation_backend=calculation_backend, working_directory=directory, ) server._queued_batches[batch.id] = batch server._launch_batch(batch) while len(batch.queued_properties) > 0: sleep(0.01) assert len(batch.estimated_properties) == 1 assert len(batch.unsuccessful_properties) == 1
def _run_tleap(molecule, force_field_source, directory): """Uses tleap to apply parameters to a particular molecule, generating a `.prmtop` and a `.rst7` file with the applied parameters. Parameters ---------- molecule: openforcefield.topology.Molecule The molecule to parameterize. force_field_source: TLeapForceFieldSource The tleap source which describes which parameters to apply. directory: str The directory to store and temporary files / the final parameters in. Returns ------- str The file path to the `prmtop` file. str The file path to the `rst7` file. """ from simtk import unit as simtk_unit # Change into the working directory. with temporarily_change_directory(directory): initial_file_path = "initial.sdf" molecule.to_file(initial_file_path, file_format="SDF") # Save the molecule charges to a file. charges = [ x.value_in_unit(simtk_unit.elementary_charge) for x in molecule.partial_charges ] with open("charges.txt", "w") as file: file.write(" ".join(map(str, charges))) if force_field_source.leap_source == "leaprc.gaff2": amber_type = "gaff2" elif force_field_source.leap_source == "leaprc.gaff": amber_type = "gaff" else: raise ValueError( f"The {force_field_source.leap_source} source is currently " f"unsupported. Only the 'leaprc.gaff2' and 'leaprc.gaff' " f" sources are supported.") # Run antechamber to find the correct atom types. processed_mol2_path = "antechamber.mol2" antechamber_process = subprocess.Popen( [ "antechamber", "-i", initial_file_path, "-fi", "sdf", "-o", processed_mol2_path, "-fo", "mol2", "-at", amber_type, "-rn", "MOL", "-an", "no", "-pf", "yes", "-c", "rc", "-cf", "charges.txt", ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) antechamber_output, antechamber_error = antechamber_process.communicate( ) antechamber_exit_code = antechamber_process.returncode with open("antechamber_output.log", "w") as file: file.write(f"error code: {antechamber_exit_code}\nstdout:\n\n") file.write("stdout:\n\n") file.write(antechamber_output.decode()) file.write("\nstderr:\n\n") file.write(antechamber_error.decode()) if not os.path.isfile(processed_mol2_path): raise RuntimeError( f"antechamber failed to assign atom types to the input mol2 file " f"({initial_file_path})") frcmod_path = None if amber_type == "gaff" or amber_type == "gaff2": # Optionally run parmchk to find any missing parameters. frcmod_path = "parmck2.frcmod" prmchk2_process = subprocess.Popen( [ "parmchk2", "-i", processed_mol2_path, "-f", "mol2", "-o", frcmod_path, "-s", amber_type, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) prmchk2_output, prmchk2_error = prmchk2_process.communicate() prmchk2_exit_code = prmchk2_process.returncode with open("prmchk2_output.log", "w") as file: file.write(f"error code: {prmchk2_exit_code}\nstdout:\n\n") file.write(prmchk2_output.decode()) file.write("\nstderr:\n\n") file.write(prmchk2_error.decode()) if not os.path.isfile(frcmod_path): raise RuntimeError( f"parmchk2 failed to assign missing {amber_type} parameters " f"to the antechamber created mol2 file ({processed_mol2_path})", ) # Build the tleap input file. template_lines = [f"source {force_field_source.leap_source}"] if frcmod_path is not None: template_lines.append(f"loadamberparams {frcmod_path}", ) prmtop_file_name = "structure.prmtop" rst7_file_name = "structure.rst7" template_lines.extend([ f"MOL = loadmol2 {processed_mol2_path}", f'setBox MOL "centers"', "check MOL", f"saveamberparm MOL {prmtop_file_name} {rst7_file_name}", ]) input_file_path = "tleap.in" with open(input_file_path, "w") as file: file.write("\n".join(template_lines)) # Run tleap. tleap_process = subprocess.Popen( ["tleap", "-s ", "-f ", input_file_path], stdout=subprocess.PIPE) tleap_output, _ = tleap_process.communicate() tleap_exit_code = tleap_process.returncode with open("tleap_output.log", "w") as file: file.write(f"error code: {tleap_exit_code}\nstdout:\n\n") file.write(tleap_output.decode()) if not os.path.isfile(prmtop_file_name) or not os.path.isfile( rst7_file_name): raise RuntimeError(f"tleap failed to execute.") with open("leap.log", "r") as file: if re.search( "ERROR|WARNING|Warning|duplicate|FATAL|Could|Fatal|Error", file.read(), ): raise RuntimeError(f"tleap failed to execute.") return ( os.path.join(directory, prmtop_file_name), os.path.join(directory, rst7_file_name), )
def _execute(self, directory, available_resources): from openforcefield.topology import Molecule, Topology force_field_source = ForceFieldSource.from_json(self.force_field_path) cutoff = pint_quantity_to_openmm(force_field_source.cutoff) # Load in the systems topology openmm_pdb_file = app.PDBFile(self.coordinate_file_path) # Create an OFF topology for better insight into the layout of the system # topology. unique_molecules = {} for component in self.substance: unique_molecule = Molecule.from_smiles(component.smiles) unique_molecules[unique_molecule.to_smiles()] = unique_molecule # Parameterize each component in the system. system_templates = {} for index, (smiles, unique_molecule) in enumerate(unique_molecules.items()): if smiles in ["O", "[H]O[H]", "[H][O][H]"]: component_system = self._build_tip3p_system( cutoff, openmm_pdb_file.topology.getUnitCellDimensions(), ) else: component_directory = os.path.join(directory, str(index)) os.makedirs(component_directory, exist_ok=True) with temporarily_change_directory(component_directory): component_system = self._parameterize_molecule( unique_molecule, force_field_source, cutoff) system_templates[smiles] = component_system # Apply the parameters to the topology. topology = Topology.from_openmm(openmm_pdb_file.topology, unique_molecules.values()) # Create the full system object from the component templates. system = self._create_empty_system(cutoff) for topology_molecule in topology.topology_molecules: smiles = topology_molecule.reference_molecule.to_smiles() system_template = system_templates[smiles] index_map = {} for index, topology_atom in enumerate(topology_molecule.atoms): index_map[topology_atom.atom.molecule_particle_index] = index # Append the component template to the full system. self._append_system(system, system_template, index_map) if openmm_pdb_file.topology.getPeriodicBoxVectors() is not None: system.setDefaultPeriodicBoxVectors( *openmm_pdb_file.topology.getPeriodicBoxVectors()) # Serialize the system object. self.system_path = os.path.join(directory, "system.xml") with open(self.system_path, "w") as file: file.write(openmm.XmlSerializer.serialize(system))
def test_workflow_layer(): """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer` is the simplest implementation of the abstract layer, we settle for testing this.""" properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] # Create a very simple workflow which just returns some placeholder # value. estimated_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin) protocol_a = DummyInputOutputProtocol("protocol_a") protocol_a.input_value = estimated_value schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema] schema.final_value_source = ProtocolPath("output_value", protocol_a.id) layer_schema = SimulationSchema() layer_schema.workflow_schema = schema options = RequestOptions() options.add_schema("SimulationLayer", "Density", layer_schema) batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = options with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): # Create a directory for the layer. layer_directory = "simulation_layer" os.makedirs(layer_directory) # Set-up a simple storage backend and add a force field to it. force_field = SmirnoffForceFieldSource.from_path( "smirnoff99Frosst-1.1.0.offxml") storage_backend = LocalFileStorage() batch.force_field_id = storage_backend.store_force_field( force_field) # Create a simple calculation backend to test with. with DaskLocalCluster() as calculation_backend: def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 2 assert len(returned_request.exceptions) == 0 simulation_layer = SimulationLayer() simulation_layer.schedule_calculation( calculation_backend, storage_backend, layer_directory, batch, dummy_callback, True, )
def pack_box( molecules, number_of_copies, structure_to_solvate=None, center_solute=True, tolerance=2.0 * unit.angstrom, box_size=None, mass_density=None, box_aspect_ratio=None, verbose=False, working_directory=None, retain_working_files=False, ): """Run packmol to generate a box containing a mixture of molecules. Parameters ---------- molecules : list of openforcefield.topology.Molecule The molecules in the system. number_of_copies : list of int A list of the number of copies of each molecule type, of length equal to the length of `molecules`. structure_to_solvate: str, optional A file path to the PDB coordinates of the structure to be solvated. center_solute: str If `True`, the structure to solvate will be centered in the simulation box. This option is only applied when `structure_to_solvate` is set. tolerance : pint.Quantity The minimum spacing between molecules during packing in units compatible with angstroms. box_size : pint.Quantity, optional The size of the box to generate in units compatible with angstroms. If `None`, `mass_density` must be provided. mass_density : pint.Quantity, optional Target mass density for final system with units compatible with g / mL. If `None`, `box_size` must be provided. box_aspect_ratio: list of float, optional The aspect ratio of the simulation box, used in conjunction with the `mass_density` parameter. If none, an isotropic ratio (i.e. [1.0, 1.0, 1.0]) is used. verbose : bool If True, verbose output is written. working_directory: str, optional The directory in which to generate the temporary working files. If `None`, a temporary one will be created. retain_working_files: bool If True all of the working files, such as individual molecule coordinate files, will be retained. Returns ------- mdtraj.Trajectory The packed box encoded in an mdtraj trajectory. list of str The residue names which were assigned to each of the molecules in the `molecules` list. Raises ------ PackmolRuntimeException When packmol fails to execute / converge. """ if mass_density is not None and box_aspect_ratio is None: box_aspect_ratio = [1.0, 1.0, 1.0] # Make sure packmol can be found. packmol_path = _find_packmol() if packmol_path is None: raise IOError("Packmol not found, cannot run pack_box()") # Validate the inputs. _validate_inputs( molecules, number_of_copies, structure_to_solvate, box_aspect_ratio, box_size, mass_density, ) # Estimate the box_size from mass density if one is not provided. if box_size is None: box_size = _approximate_box_size_by_density(molecules, number_of_copies, mass_density, box_aspect_ratio) # Set up the directory to create the working files in. temporary_directory = False if working_directory is None: working_directory = tempfile.mkdtemp() temporary_directory = True if len(working_directory) > 0: os.makedirs(working_directory, exist_ok=True) # Copy the structure to solvate if one is provided. if structure_to_solvate is not None: shutil.copyfile( structure_to_solvate, os.path.join(working_directory, "solvate.pdb"), ) structure_to_solvate = "solvate.pdb" assigned_residue_names = [] with temporarily_change_directory(working_directory): # Create PDB files for all of the molecules. pdb_file_names = [] mdtraj_topologies = [] for index, molecule in enumerate(molecules): mdtraj_trajectory = _create_trajectory(molecule) pdb_file_name = f"{index}.pdb" pdb_file_names.append(pdb_file_name) mdtraj_trajectory.save_pdb(pdb_file_name) mdtraj_topologies.append(mdtraj_trajectory.topology) residue_name = mdtraj_trajectory.topology.residue(0).name assigned_residue_names.append(residue_name) # Generate the input file. output_file_name = "packmol_output.pdb" input_file_path = _build_input_file( pdb_file_names, number_of_copies, structure_to_solvate, center_solute, box_size, tolerance, output_file_name, ) with open(input_file_path) as file_handle: result = subprocess.check_output( packmol_path, stdin=file_handle, stderr=subprocess.STDOUT).decode("utf-8") if verbose: logger.info(result) packmol_succeeded = result.find("Success!") > 0 if not retain_working_files: os.unlink(input_file_path) for file_path in pdb_file_names: os.unlink(file_path) if not packmol_succeeded: if verbose: logger.info("Packmol failed to converge") if os.path.isfile(output_file_name): os.unlink(output_file_name) if temporary_directory and not retain_working_files: shutil.rmtree(working_directory) raise PackmolRuntimeException(result) # Add a 2 angstrom buffer to help alleviate PBC issues. box_size = [(x + 2.0 * unit.angstrom).to(unit.nanometer).magnitude for x in box_size] # Append missing connect statements to the end of the # output file. trajectory = _correct_packmol_output(output_file_name, mdtraj_topologies, number_of_copies, structure_to_solvate) trajectory.unitcell_lengths = box_size trajectory.unitcell_angles = [90.0] * 3 if not retain_working_files: os.unlink(output_file_name) if temporary_directory and not retain_working_files: shutil.rmtree(working_directory) return trajectory, assigned_residue_names