def convert_pre_opt(cls, method: str) -> QCOptions: """Convert the preopt method string to a qcoptions object.""" if method in ["mmff94", "mmff94s", "uff"]: return QCOptions(program="rdkit", basis=None, method=method) elif method in ["gfn1xtb", "gfn2xtb", "gfn0xtb"]: return QCOptions(program="xtb", basis=None, method=method) elif method in ["ani1x", "ani1ccx", "ani2x"]: return QCOptions(program="torchani", basis=None, method=method) elif method == "gaff-2.11": return QCOptions(program="openmm", basis="antechamber", method=method) else: return QCOptions(program="openmm", basis="smirnoff", method=method)
def test_full_tdrive(tmpdir, workers, capsys): """ Try and run a full torsiondrive for ethane with a cheap rdkit method. """ with tmpdir.as_cwd(): ethane = Ligand.from_file(get_data("ethane.sdf")) # make the scan data bond = ethane.find_rotatable_bonds()[0] dihedral = ethane.dihedrals[bond.indices][0] dihedral_data = TorsionScan(torsion=dihedral, scan_range=(-165, 180)) qc_spec = QCOptions(program="rdkit", basis=None, method="uff") local_ops = LocalResource(cores=workers, memory=2) tdriver = TorsionDriver( n_workers=workers, grid_spacing=60, ) _ = tdriver.run_torsiondrive( molecule=ethane, dihedral_data=dihedral_data, qc_spec=qc_spec, local_options=local_ops, ) captured = capsys.readouterr() # make sure a fresh torsiondrive is run assert "Starting new torsiondrive" in captured.out
def test_get_initial_state(tmpdir, starting_conformations): """ Make sure we can correctly build a starting state using the torsiondrive api. """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("ethane.sdf")) bond = mol.find_rotatable_bonds()[0] dihedral = mol.dihedrals[bond.indices][0] tdriver = TorsionDriver(starting_conformations=starting_conformations) # make the scan data dihedral_data = TorsionScan(torsion=dihedral, scan_range=(-165, 180)) td_state = tdriver._create_initial_state(molecule=mol, dihedral_data=dihedral_data, qc_spec=QCOptions()) assert td_state["dihedrals"] == [ dihedral, ] assert td_state["elements"] == [ atom.atomic_symbol for atom in mol.atoms ] assert td_state["dihedral_ranges"] == [ (-165, 180), ] assert np.allclose((mol.coordinates * constants.ANGS_TO_BOHR), td_state["init_coords"][0]) # make sure we have tried to generate conformers assert len(td_state["init_coords"]) <= tdriver.starting_conformations
def test_optimise_grid_point_and_update(tmpdir, ethane_state): """ Try and perform a single grid point optimisation. """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("ethane.sdf")) tdriver = TorsionDriver(n_workers=1) qc_spec = QCOptions(program="rdkit", basis=None, method="uff") local_ops = LocalResource(cores=1, memory=1) geo_opt = tdriver._build_geometry_optimiser() # get the job inputs new_jobs = tdriver._get_new_jobs(td_state=ethane_state) coords = new_jobs["-60"][0] result = optimise_grid_point( geometry_optimiser=geo_opt, qc_spec=qc_spec, local_options=local_ops, molecule=mol, coordinates=coords, dihedral=ethane_state["dihedrals"][0], dihedral_angle=-60, job_id=0, ) new_state = tdriver._update_state( td_state=ethane_state, result_data=[ result, ], ) next_jobs = tdriver._get_new_jobs(td_state=new_state) assert "-75" in next_jobs assert "-45" in next_jobs
def test_program_not_installed(): """ Make sure an error is raised when we try and use a program that is not available. """ g_ops = QCOptions() with pytest.raises(SpecificationError): g_ops.program = "test" g_ops.validate_program()
def test_td_method_conversion_gaussian(): """ Make sure we do not convert the function even with td settings when using gaussian. """ spec = QCOptions( program="gaussian", method="cam-b3lyp", basis="6-31G", td_settings=TDSettings(use_tda=True, n_states=3), ) qc_model = spec.qc_model assert qc_model.method == "cam-b3lyp"
def test_td_method_conversion_psi4(functional, result): """ Make sure functionals are converted correctly when we have TD settings. """ spec = QCOptions( program="psi4", method=functional, basis="6-31G", td_settings=TDSettings(use_tda=True, n_states=3), ) qc_model = spec.qc_model assert qc_model.method == result
def _get_qc_options(self) -> Optional[QCOptions]: """ Extract a QCOptions model from the solvent settings. """ if self.basis is not None and self.method is not None: return QCOptions( program=self.program, method=self.method, basis=self.basis, td_settings=self.td_settings, ) return None
def test_td_psi4_validate(): """ Make sure validation fails in psi4 if we try and do a td calculation as geometry optimization does not work yet. """ spec = QCOptions( program="psi4", method="td-cam-b3lyp", basis="6-31+G*", td_settings=TDSettings(n_states=3, use_tda=False), ) with pytest.raises(SpecificationError): spec.validate_specification()
def test_optking_fail(): """ Optking currently only works with psi4 make sure we raise an error if we use a different program. """ with pytest.raises(SpecificationError): mol = Ligand.from_file(file_name=get_data("water.pdb")) g = GeometryOptimiser(optimiser="optking") g.optimise( molecule=mol, qc_spec=QCOptions(program="rdkit", basis=None, method="uff"), local_options=LocalResource(cores=1, memory=1), )
def test_ani_hessian(acetone, tmpdir): """ Test computing the hessian using ml, note this will not provide the wbo matrix """ with tmpdir.as_cwd(): # make sure the hessian has not already been assigned by mistake assert acetone.hessian is None spec = QCOptions(program="torchani", basis=None, method="ani2x") options = LocalResource(cores=1, memory=1) hes_stage = Hessian() hes_stage.run(molecule=acetone, qc_spec=spec, local_options=options) assert acetone.wbo is None assert acetone.hessian is not None assert acetone.hessian.shape == (30, 30)
def ethane_state(tmpdir) -> Dict[str, Any]: """ build an initial state for a ethane scan. """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("ethane.sdf")) bond = mol.find_rotatable_bonds()[0] dihedral = mol.dihedrals[bond.indices][0] tdriver = TorsionDriver(grid_spacing=15) # make the scan data dihedral_data = TorsionScan(torsion=dihedral, scan_range=(-165, 180)) qc_spec = QCOptions(program="rdkit", basis=None, method="uff") td_state = tdriver._create_initial_state(molecule=mol, dihedral_data=dihedral_data, qc_spec=qc_spec) return td_state
def test_optmiser_fail_no_output(tmpdir): """ Make sure we raise an error correctly when there is no output from a failed optimisation. """ if "psi4" not in qcengine.list_available_programs(): pytest.skip("Psi4 missing skipping test.") with tmpdir.as_cwd(): mol = Ligand.from_file(file_name=get_data("water.pdb")) qc_spec = QCOptions(program="psi4", method="wb97x-dbj", basis="dzvp") g = GeometryOptimiser(maxiter=10) with pytest.raises(RuntimeError): g.optimise( molecule=mol, allow_fail=False, qc_spec=qc_spec, local_options=LocalResource(cores=1, memory=1), )
def test_tdrive_restarts(capsys, ethane_state, tmpdir): """ Make sure that an old torsiondrive is continued when possible from the current state file. """ with tmpdir.as_cwd(): ethane_state["grid_spacing"] = [ 60, ] mol = Ligand.from_file(get_data("ethane.sdf")) tdriver = TorsionDriver(n_workers=1, grid_spacing=60) qc_spec = QCOptions(program="rdkit", basis=None, method="uff") local_ops = LocalResource(cores=1, memory=1) geo_opt = tdriver._build_geometry_optimiser() # get the job inputs new_jobs = tdriver._get_new_jobs(td_state=ethane_state) coords = new_jobs["-60"][0] result = optimise_grid_point( geometry_optimiser=geo_opt, qc_spec=qc_spec, local_options=local_ops, molecule=mol, coordinates=coords, dihedral=ethane_state["dihedrals"][0], dihedral_angle=-60, job_id=0, ) _ = tdriver._update_state( td_state=ethane_state, result_data=[ result, ], ) # now start a run and make sure it continues _ = tdriver.run_torsiondrive( molecule=mol, dihedral_data=TorsionScan(torsion=ethane_state["dihedrals"][0], scan_range=(-165, 180)), qc_spec=qc_spec, local_options=local_ops, ) capture = capsys.readouterr() assert ("Compatible TorsionDrive state found restarting torsiondrive!" in capture.out)
def test_optimise_fail_output(tmpdir): """ Make sure the optimised geometries and result is still wrote out if we fail the molecule and an error is rasied. """ with tmpdir.as_cwd(): mol = Ligand.from_file(file_name=get_data("water.pdb")) g = GeometryOptimiser(maxiter=5) qc_spec = QCOptions(program="torchani", basis=None, method="ani1ccx") with pytest.raises(RuntimeError): g.optimise( molecule=mol, allow_fail=False, qc_spec=qc_spec, local_options=LocalResource(cores=1, memory=1), ) files = os.listdir() assert "opt.xyz" in files assert "opt_trajectory.xyz" in files assert "result.json" in files
def test_gaussian_td_solvent_template(tmpdir, water): """ Make sure that we can calculate the electron density with implicit solvent in a td-scf calculation. """ with tmpdir.as_cwd(): # get the charge method and implicit solvent engine charge_engine = DDECCharges() charge_engine.solvent_settings.solver_type = "IPCM" qc_spec = QCOptions( method="cam-b3lyp", basis="6-31G", program="gaussian", td_settings=TDSettings(), ) options = LocalResource(cores=1, memory=1) with pytest.raises(SpecificationError): charge_engine.run(water, qc_spec=qc_spec, local_options=options) # as we can not run gaussian just make sure the solver was changed when we use td-scf assert charge_engine.solvent_settings.solver_type == "PCM"
def test_initial_state_coords_passed(tmpdir): """ Make sure any seed conformations are used in the initial state """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("ethane.sdf")) bond = mol.find_rotatable_bonds()[0] dihedral = mol.dihedrals[bond.indices][0] tdriver = TorsionDriver() # make the scan data dihedral_data = TorsionScan(torsion=dihedral, scan_range=(-165, 180)) # make some mock coords coords = [np.random.random(size=(mol.n_atoms, 3)) for _ in range(4)] td_state = tdriver._create_initial_state( molecule=mol, dihedral_data=dihedral_data, qc_spec=QCOptions(), seed_coordinates=coords, ) assert len(td_state["init_coords"]) == 4 # make sure they are the same random coords for i in range(4): assert np.allclose((coords[i] * constants.ANGS_TO_BOHR), td_state["init_coords"][i])
def test_rdkit_available(): """ Make sure the geometry optimiser allows rdkit as this comes with QUBEKit. """ qc_ops = QCOptions(program="rdkit", method="uff", basis=None) qc_ops.validate_program()
_ = tdriver.run_torsiondrive( molecule=ethane, dihedral_data=dihedral_data, qc_spec=qc_spec, local_options=local_ops, ) captured = capsys.readouterr() # make sure a fresh torsiondrive is run assert "Starting new torsiondrive" in captured.out @pytest.mark.parametrize( "qc_options, scan_range, compatible", [ pytest.param( QCOptions( program="rdkit", method="uff", basis=None, td_settings=None), (-165, 180), True, id="Compatible", ), pytest.param( QCOptions( program="xtb", method="gfn2xtb", basis=None, td_settings=None), (-165, 180), False, id="Wrong program", ), pytest.param( QCOptions( program="rdkit", method="uff",
assert 1 in keywords.values() assert "GAU" in keywords.values() def test_missing_optimiser(): """ Make sure an error is raised when we try and set a missing optimiser. """ with pytest.raises(SpecificationError): _ = GeometryOptimiser(optimiser="bad_optimiser") @pytest.mark.parametrize( "qc_spec", [ pytest.param(QCOptions(program="rdkit", basis=None, method="UFF"), id="rdkit uff"), pytest.param(QCOptions(program="torchani", basis=None, method="ani2x"), id="ani2x"), pytest.param(QCOptions(program="psi4", basis="6-311G", method="b3lyp"), id="psi4 b3lyp"), pytest.param( QCOptions(program="openmm", basis="smirnoff", method="openff_unconstrained-1.3.0"), id="openff", ), pytest.param( QCOptions( program="openmm", basis="antechamber", method="gaff-2.11"), id="gaff",
import pytest import qcengine from qubekit.engines import call_qcengine from qubekit.utils.datastructures import LocalResource, QCOptions, TDSettings from qubekit.utils.exceptions import SpecificationError @pytest.mark.parametrize( "qc_options", [ pytest.param(QCOptions(program="rdkit", basis=None, method="mmff94"), id="rdkit mmff"), pytest.param( QCOptions(program="openmm", basis="smirnoff", method="openff-1.0.0.offxml"), id="parsley", ), pytest.param( QCOptions( program="openmm", basis="antechamber", method="gaff-2.11"), id="gaff-2.11", ), pytest.param(QCOptions(program="psi4", basis="3-21g", method="hf"), id="psi4 hf"), pytest.param(QCOptions(program="gaussian", basis="3-21g", method="hf"), id="gaussian hf"), ], ) def test_single_point_energy(qc_options: QCOptions, tmpdir, water):
from qubekit.charges import DDECCharges, MBISCharges, SolventGaussian from qubekit.nonbonded.protocols import MODELS, get_protocol from qubekit.utils.datastructures import QCOptions from qubekit.workflow.workflow import WorkFlow # create a list of protocol definitions which can be build by a helper function wb97xd = QCOptions(method="wB97X-D", basis="6-311++G(d,p)", program="gaussian") b3lyp = QCOptions(method="B3LYP-D3BJ", basis="DGDZVP", program="gaussian") ddec6 = DDECCharges(ddec_version=6, ) model_0 = { "qc_options": wb97xd, "charges": ddec6, "virtual_sites": None, "non_bonded": get_protocol(protocol_name="0"), } model_1a = { "qc_options": b3lyp, "charges": ddec6, "virtual_sites": None, "non_bonded": get_protocol(protocol_name="1a"), } model_1b = { "qc_options": QCOptions(method="HF", basis="6-31G(d)", program="gaussian"), "charges": DDECCharges(ddec_version=6, solvent_settings=None), "virtual_sites": None, "non_bonded": get_protocol(protocol_name="1b"),
class WorkFlow(SchemaBase): type: Literal["WorkFlow"] = "WorkFlow" qc_options: QCOptions = Field( QCOptions(), description= "The QC options to be used for all QC calculations apart from implicit solvent.", ) local_resources: LocalResource = Field( LocalResource(), description= "The local resource options for the workflow like total memory and cores available.", ) parametrisation: Union[OpenFF, XML, AnteChamber] = Field( OpenFF(), description= "The parametrisation engine which should be used to assign initial parameters.", ) optimisation: Optimiser = Field( Optimiser(), description= "The main geometry optimiser settings including pre_optimisation settings.", ) charges: Union[DDECCharges, MBISCharges] = Field( DDECCharges(), description= "The method that should be used to calculate the AIM reference data the charge should be extracted from. Note that the non-bonded parameters are also calculated from this data.", ) virtual_sites: Optional[VirtualSites] = Field( VirtualSites(), description= "The method that should be used to fit virtual sites if they are requested.", ) non_bonded: LennardJones612 = Field( get_protocol(protocol_name="0"), description= "The method that should be used to calculate the non-bonded non-charge parameters and their functional form.", ) bonded_parameters: Union[ModSeminario, QForceHessianFitting] = Field( ModSeminario(), description= "The method that should be used to optimise the bonded parameters.", ) torsion_scanner: TorsionScan1D = Field( TorsionScan1D(), description="The method that should be used to drive the torsions", ) torsion_optimisation: ForceBalanceFitting = Field( ForceBalanceFitting(), description= "The method that should be used to optimise the scanned soft dihedrals.", ) hessian: ClassVar[Hessian] = Hessian() _results_fname: str = PrivateAttr("workflow_result.json") @classmethod def from_results(cls, results: WorkFlowResult): """Build a workflow from the provenance info in the results object.""" model_data = { "qc_options": results.qc_spec.dict(), "local_resources": results.local_resources.dict(), } # now loop over the stages and update the options for stage_name, result in results.results.items(): if stage_name != "Hessian": # this stage has no settings model_data[stage_name] = result.stage_settings return cls(**model_data) def validate_workflow(self, workflow: List[str], molecule: Optional[Ligand] = None) -> None: """ Make sure that the workflow can be run ahead of time by looking for errors in the QCspecification and missing dependencies. Args: workflow: The list of stages to be run which should be checked. """ # first check the general qc spec self.qc_options.validate_specification() # then check each component for missing dependencies for field in workflow: stage = getattr(self, field) # some stages are optional and should be skipped if stage is not None: stage.is_available() # check special stages # check that the pre_opt method is available if "optimisation" in workflow: stage = self.optimisation if stage.pre_optimisation_method is not None: pre_spec = stage.convert_pre_opt( method=stage.pre_optimisation_method) pre_spec.validate_specification() # if we are doing nonbonded check the element coverage if "non_bonded" in workflow and molecule is not None: self.non_bonded.check_element_coverage(molecule=molecule) def to_file(self, filename: str) -> None: """ Write the workflow to file supported file types are json or yaml. """ f_type = filename.split(".")[-1] with open(filename, "w") as output: if f_type in ["yaml" or "yml"]: import yaml output.write(yaml.dump(self.dict())) else: output.write(self.json(indent=2)) @classmethod def get_running_order( cls, start: Optional[str] = None, skip_stages: Optional[List[str]] = None, end: Optional[str] = None, ) -> List[str]: """Work out the running order based on any skip stages and the end point. Args: start: The starting stage for the workflow. skip_stages: A list of stages to remove from the workflow. end: The final stage which should be executed. Returns: A list of stage names in the order they will be ran in. """ normal_workflow = [ "parametrisation", "optimisation", "hessian", "charges", "virtual_sites", "non_bonded", "bonded_parameters", "torsion_scanner", "torsion_optimisation", ] if skip_stages is not None: for stage in skip_stages: try: normal_workflow.remove(stage) except ValueError: continue if start is not None: start_id = normal_workflow.index(start) else: start_id = None if end is not None: end_id = normal_workflow.index(end) + 1 else: end_id = None return normal_workflow[start_id:end_id] def _build_initial_results(self, molecule: Ligand) -> WorkFlowResult: """Build the initial results schema using the workflow.""" workflow = self.get_running_order() result = WorkFlowResult( version=qubekit.__version__, input_molecule=molecule.copy(deep=True), qc_spec=self.qc_options, current_molecule=molecule, local_resources=self.local_resources, ) # for each stage set if they are to be ran for stage_name in workflow: stage: Optional[StageBase] = getattr(self, stage_name) if stage is not None: stage_result = StageResult(stage=stage.type, stage_settings=stage.dict(), status=Status.Waiting) else: stage_result = StageResult(stage=None, stage_settings=None, status=Status.Waiting) result.results[stage_name] = stage_result return result def _get_optional_stage_skip( self, skip_stages: Optional[List[str]]) -> Optional[List[str]]: """ Add any optional stages which are skipped when not supplied, to the skip stages list. """ if self.virtual_sites is None and skip_stages is not None: # we get a tuple from click so we can not append return [*skip_stages, "virtual_sites"] elif self.virtual_sites is None and skip_stages is None: return ["virtual_sites"] return skip_stages def restart_workflow( self, start: str, result: WorkFlowResult, skip_stages: Optional[List[str]] = None, end: Optional[str] = None, ) -> WorkFlowResult: """ Restart the workflow from the given stage and continue the run. Args: start: The name of the stage we want to restart the workflow from. result: The past run results object which will be updated and that starting molecule will be extracted from. skip_stages: The list of stage names which should be skipped. end: The name of the last stage to be computed before finishing. """ molecule = result.current_molecule skip_stages = self._get_optional_stage_skip(skip_stages=skip_stages) run_order = self.get_running_order(start=start, skip_stages=skip_stages, end=end) # update local and qc options result.qc_spec = self.qc_options result.local_resources = self.local_resources return self._run_workflow(molecule=molecule, results=result, workflow=run_order) def new_workflow( self, molecule: Ligand, skip_stages: Optional[List[str]] = None, end: Optional[str] = None, ) -> WorkFlowResult: """ The main worker method to be used for starting new workflows. Args: molecule: The molecule to be re-parametrised using QUBEKit. skip_stages: A list of stages which should be skipped in the workflow. end: The last stage to be computed before finishing, useful to finish early. """ # get the running order skip_stages = self._get_optional_stage_skip(skip_stages=skip_stages) run_order = self.get_running_order(skip_stages=skip_stages, end=end) results = self._build_initial_results(molecule=molecule) # if we have any skips assign them for stage_name, stage_result in results.results.items(): if stage_name not in run_order: stage_result.status = Status.Skip return self._run_workflow(molecule=molecule, workflow=run_order, results=results) def _run_workflow( self, molecule: "Ligand", workflow: List[str], results: WorkFlowResult, ) -> WorkFlowResult: """ The main run method of the workflow which will execute each stage inorder on the given input molecule. Args: molecule: The molecule to be re-parametrised using QUBEKit. workflow: The list of prefiltered stage names which should be ran in order. results: The results object that we should update throughout the workflow. Returns: A fully parametrised molecule. """ # try and find missing dependencies self.validate_workflow(workflow=workflow, molecule=molecule) # start message # TODO Move to outside workflow so this doesn't get printed for every run in bulk. print( "If QUBEKit ever breaks or you would like to view timings and loads of other info, " "view the log file.\nOur documentation (README.md) " "also contains help on handling the various commands for QUBEKit.\n" ) # write out the results object to track the status at the start results.to_file(filename=self._results_fname) # loop over stages and run for field in workflow: stage: StageBase = getattr(self, field) # some stages print based on what spec they are using print(stage.start_message(qc_spec=self.qc_options)) molecule = self._run_stage(stage_name=field, stage=stage, molecule=molecule, results=results) print(stage.finish_message()) # now the workflow has finished # write final results results.to_file(filename=self._results_fname) # write out final parameters with folder_setup("final_parameters"): # if we have U-B terms we need to write a non-standard pdb file if molecule.has_ub_terms(): molecule._to_ub_pdb() else: molecule.to_file(file_name=f"{molecule.name}.pdb") molecule.write_parameters(file_name=f"{molecule.name}.xml") return results def _run_stage( self, stage_name: str, stage: StageBase, molecule: Ligand, results: WorkFlowResult, ) -> Ligand: """ A stage wrapper to run the stage and update the results workflow in place. """ home = os.getcwd() # update settings and set to running and save stage_result = StageResult(stage=stage.type, stage_settings=stage.dict(), status=Status.Running) results.results[stage_name] = stage_result results.modified_date = datetime.now().strftime("%Y_%m_%d") results.to_file(filename=self._results_fname) make_and_change_into(name=stage_name) try: # run the stage and save the result result_mol = stage.run( molecule=molecule, qc_spec=self.qc_options, local_options=self.local_resources, ) stage_result.status = Status.Done results.current_molecule = result_mol except MissingReferenceData: # this means there are no torsions to scan so simulate it working stage_result.status = Status.Done results.current_molecule = molecule except Exception as e: import traceback # save the error do not update the current molecule stage_result.status = Status.Error stage_result.error = traceback.extract_tb(e.__traceback__).format() os.chdir(home) results.results[stage_name] = stage_result results.to_file(self._results_fname) # write the exception to file with open("QUBEKit.err", "w") as output: traceback.print_exc(file=output) raise WorkFlowExecutionError( f"The workflow stopped unexpectedly due to the following error at stage: {stage_name}" ) from e # move back os.chdir(home) # update the results results.results[stage_name] = stage_result results.to_file(self._results_fname) return results.current_molecule