def _build_system( self, molecule: Ligand, input_files: Optional[List[str]] = None ) -> System: """Serialise the input XML system using openmm.""" modeller = app.Modeller( molecule.to_openmm_topology(), molecule.openmm_coordinates() ) xml = None if input_files is not None: for file in input_files: if file.endswith(".xml"): xml = file break # if we did not find one guess the name xml = xml or f"{molecule.name}.xml" forcefield = app.ForceField(xml) # Check for virtual sites try: system = forcefield.createSystem( modeller.topology, nonbondedMethod=app.NoCutoff, constraints=None ) except ValueError: print("Virtual sites were found in the xml file") modeller.addExtraParticles(forcefield) system = forcefield.createSystem( modeller.topology, nonbondedMethod=app.NoCutoff, constraints=None ) return system
def test_parameter_round_trip(method, tmpdir, xml, openff, antechamber): """ Check we can parametrise a molecule then write out the same parameters. """ if method == "openff": param_method = openff else: param_method = antechamber with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("acetone.sdf")) param_mol = param_method.run(mol) with open("serialised.xml") as old: with open("orig.xml", "w") as new: new.write(old.read()) # write out params mol.write_parameters(file_name="test.xml") # make a second mol mol2 = Ligand.from_file(get_data("acetone.sdf")) param_mol2 = xml.run(molecule=mol2, input_files=["test.xml"]) for bond in mol.bonds: bond_tuple = (bond.atom1_index, bond.atom2_index) bond1 = param_mol.BondForce[bond_tuple] bond2 = param_mol2.BondForce[bond_tuple] assert bond1.k == pytest.approx(bond2.k) assert bond1.length == pytest.approx(bond2.length) for angle in mol.angles: angle1 = param_mol.AngleForce[angle] angle2 = param_mol2.AngleForce[angle] assert angle1.k == pytest.approx(angle2.k) assert angle2.angle == pytest.approx(angle2.angle) for atom in range(mol.n_atoms): atom1 = param_mol.NonbondedForce[(atom, )] atom2 = param_mol2.NonbondedForce[(atom, )] assert atom1.charge == pytest.approx(atom2.charge) assert atom1.sigma == pytest.approx(atom2.sigma) assert atom1.epsilon == pytest.approx(atom2.epsilon) # loop over the round trip mol as we lose some parameters which are 0 for dihedral in param_mol2.TorsionForce.parameters: other_dih = param_mol.TorsionForce[dihedral.atoms] for key in dihedral.__fields__: # openmm will not load any torsions which have k=0, this causes differences between antechamber and # qubekit when the phase is not as expected, this does not change the energy however as k=0 if (key not in ["atoms", "attributes", "parameter_eval"] and "phase" not in key): assert getattr(dihedral, key) == pytest.approx( getattr(other_dih, key))
def test_single_point_energy(program, basis, method, tmpdir): """ Make sure our qcengine wrapper works correctly. """ if program not in qcengine.list_available_programs(): pytest.skip(f"{program} missing skipping test.") with tmpdir.as_cwd(): mol = Ligand.from_file(file_name=get_data("water.pdb")) engine = QCEngine( program=program, basis=basis, method=method, memory=1, cores=1, driver="energy", ) result = engine.call_qcengine(molecule=mol) assert result.driver == "energy" assert result.model.basis == basis assert result.model.method == method assert result.provenance.creator.lower() == program # make sure the grid was set to ultrafine for psi4 if program == "psi4": assert result.keywords["dft_spherical_points"] == 590 assert result.keywords["dft_radial_points"] == 99
def test_no_impropers(): """ Make sure we return None when no impropers are found in the molecule. """ mol = Ligand.from_file(file_name=get_data("water.pdb")) assert mol.improper_torsions is None assert mol.n_improper_torsions == 0
def test_no_dihedrals(): """ Make sure we return None when no dihedrals are found in the molecule. """ mol = Ligand.from_file(file_name=get_data("water.pdb")) assert not mol.dihedrals assert mol.n_dihedrals == 0
def test_hessian_unit_regression(tmpdir): """ The modsem method was found to have a bug in the scaling code which caused all angles to be scaled by the same amount. Here we try to reproduce some reference values for methanol which has a mix of scaled and non scaled angles. """ with tmpdir.as_cwd(): # load coords at the qm geometry mol = Ligand.parse_file(get_data("methanol.json")) mod_sem = ModSeminario() mod_sem.run(molecule=mol) # check the C-O bond assert round(mol.BondForce[(0, 1)].length, ndigits=4) == 0.1413 # get in kcal/mol like the reference values assert round(mol.BondForce[(0, 1)].k, 3) == 246439.036 # check a O-H bond assert round(mol.BondForce[(1, 5)].length, 4) == 0.0957 assert round(mol.BondForce[(1, 5)].k, 2) == 513819.18 # check the C-O-H angle assert round(mol.AngleForce[(0, 1, 5)].angle, 3) == 1.899 assert round(mol.AngleForce[(0, 1, 5)].k, 3) == 578.503 # check a scaled H-C-H angle assert round(mol.AngleForce[(2, 0, 3)].angle, 3) == 1.894 assert round(mol.AngleForce[(2, 0, 3)].k, 3) == 357.05
def test_combine_molecules_sites_deepdiff(openff, xml, acetone, rfree_data, tmpdir): """ Test combining molecules with virtual sites and ensure they are correctly applied and the energy break down matches. """ with tmpdir.as_cwd(): openff.run(acetone) acetone_ref_system = xmltodict.parse(open("serialised.xml").read()) pyridine = Ligand.from_file(file_name=get_data("pyridine.sdf")) xml.run(molecule=pyridine, input_files=[get_data("pyridine.xml")]) pyridine_ref_system = xmltodict.parse(open("serialised.xml").read()) combined_xml = _combine_molecules( molecules=[acetone, pyridine], parameters=elements, rfree_data=rfree_data ).getroot() messy = ET.tostring(combined_xml, "utf-8") pretty_xml = parseString(messy).toprettyxml(indent="") print(pretty_xml) with open("combined.xml", "w") as xml_doc: xml_doc.write(pretty_xml) root = combined_xml.find("QUBEKit") assert qubekit.__version__ == root.get("Version") # load up new systems and compare combinded_ff = app.ForceField("combined.xml") acetone_combine_system = xmltodict.parse( XmlSerializer.serialize( combinded_ff.createSystem( acetone.to_openmm_topology(), nonbondedCutoff=0.9, removeCMMotion=False, ) ) ) # slight differences in masses we need to ignore acetone_diff = DeepDiff( acetone_ref_system, acetone_combine_system, ignore_order=True, significant_digits=6, exclude_regex_paths="mass", ) assert len(acetone_diff) == 0 # add v-site pyridine_mod = app.Modeller( pyridine.to_openmm_topology(), pyridine.openmm_coordinates() ) pyridine_mod.addExtraParticles(combinded_ff) pyridine_combine_system = xmltodict.parse( XmlSerializer.serialize(combinded_ff.createSystem(pyridine_mod.topology)) ) pyridine_diff = DeepDiff( pyridine_ref_system, pyridine_combine_system, ignore_order=True, significant_digits=6, exclude_regex_paths="mass", ) assert len(pyridine_diff) == 0
def test_lennard_jones612(tmpdir): """ Make sure that we can reproduce some reference values using the LJ612 class """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("chloromethane.pdb")) # get some initial Nonbonded values OpenFF().run(molecule=mol) # get some aim reference data ExtractChargeData.extract_charge_data_chargemol(molecule=mol, dir_path=get_data(""), ddec_version=6) # apply symmetry to the reference data DDECCharges.apply_symmetrisation(molecule=mol) # calculate the new LJ terms LennardJones612( lj_on_polar_h=False, # qubekit 1 legacy parameters free_parameters={ "H": h_base(r_free=1.64), "C": c_base(r_free=2.08), "Cl": cl_base(r_free=1.88), }, ).run(molecule=mol) # make sure we get out expected reference values assert mol.NonbondedForce[(0, )].sigma == 0.3552211069814666 assert mol.NonbondedForce[(0, )].epsilon == 0.25918723101839924 assert mol.NonbondedForce[(1, )].sigma == 0.33888067968663566 assert mol.NonbondedForce[(1, )].epsilon == 0.9650542683335082 assert mol.NonbondedForce[(2, )].sigma == 0.22192905304751342 assert mol.NonbondedForce[(2, )].epsilon == 0.15047278650152818
def test_rb_energy_round_trip(tmpdir): """ Make sure that no parameters are lost when reading in RBterms. """ with tmpdir.as_cwd(): # load the molecule and parameterise mol = Ligand.from_file(file_name=get_data("cyclohexane.sdf")) XML().run(molecule=mol, input_files=[get_data("cyclohexane.xml")]) # load the serialised system we extract the parameters from as our reference ref_system = XmlSerializer.deserializeSystem( open("serialised.xml").read()) parm_top = load_topology(mol.to_openmm_topology(), system=ref_system, xyz=mol.openmm_coordinates()) ref_energy = energy_decomposition_system(parm_top, ref_system, platform="Reference") # now we need to build the system from our stored parameters mol.write_parameters(file_name="test.xml") ff = app.ForceField("test.xml") qube_system = ff.createSystem(mol.to_openmm_topology()) with open("qube.xml", "w") as xml_out: xml_out.write(XmlSerializer.serialize(qube_system)) qube_struc = load_topology(mol.to_openmm_topology(), system=qube_system, xyz=mol.openmm_coordinates()) qube_energy = energy_decomposition_system(qube_struc, qube_system, platform="Reference") # compare the decomposed energies of the groups for force_group, energy in ref_energy: for qube_force, qube_e in qube_energy: if force_group == qube_force: assert energy == pytest.approx(qube_e, abs=2e-3)
def test_parameter_tags(tmpdir, force_group, ff_group, key, terms): """ Make sure that the parameter tagger tags correct terms. """ with tmpdir.as_cwd(): mol = Ligand.from_file(file_name=get_data("biphenyl.sdf")) OpenFF().run(molecule=mol) # set the parameter tags for term in terms: f_group = getattr(mol, force_group) parameter = f_group[term] parameter.attributes = {"test tag"} # make the force field ff = mol._build_forcefield() classes = [[ f"{mol.atoms[i].atomic_symbol}{mol.atoms[i].atom_index}" for i in term ] for term in terms] term_length = len(terms[0]) # now search through and make sure the force groups were tagged for group in ff.iter(tag=ff_group): for ff_term in group.iter(tag=key): ff_class = [ ff_term.get(f"class{i}") for i in range(1, 1 + term_length) ] if ff_class in classes: assert ff_term.get("parametrize") == "test tag" else: assert ff_term.get("parametrize", None) is None
def test_parameter_engines(tmpdir, parameter_engine, openff, antechamber): """ Make sure we can parametrise a molecule using antechamber """ if parameter_engine == "openff": engine = openff else: engine = antechamber with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("acetone.sdf")) # make sure we have no starting parameters assert mol.BondForce.n_parameters == 0 assert mol.AngleForce.n_parameters == 0 assert mol.TorsionForce.n_parameters == 0 assert mol.ImproperTorsionForce.n_parameters == 0 assert mol.NonbondedForce.n_parameters == 0 engine.run(mol) # make sure the parameters have been set assert mol.BondForce.n_parameters != 0 assert mol.AngleForce.n_parameters != 0 assert mol.TorsionForce.n_parameters != 0 assert mol.ImproperTorsionForce.n_parameters != 0 assert mol.NonbondedForce.n_parameters != 0
def test_gaussian_solvent_template(): """ Make sure that the template rendered with solvent settings matches what we expect. """ mol = Ligand.from_file(get_data("water.pdb")) # get the charge method and implicit solvent engine charge_engine = DDECCharges() solvent_settings = charge_engine.solvent_settings.format_keywords() # now make an atomic input for the harness task = AtomicInput( molecule=mol.to_qcschema(), driver="energy", model={ "method": "b3lyp-d3bj", "basis": "6-311G" }, keywords=solvent_settings, ) # we need the harness as this will render the template gaussian_harness = GaussianHarness() config = get_config(local_options={"ncores": 1, "memory": 1}) job_inputs = gaussian_harness.build_input(task, config) # make sure the job file matches or expected reference with open(get_data("gaussian_solvent_example.com")) as g_out: assert g_out.read() == job_inputs["infiles"]["gaussian.com"]
def test_chargemol_template(tmpdir, version): """ Make sure we can correctly render a chargemol template job. """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("water.pdb")) OpenFF().parametrise_molecule(molecule=mol) charge_method = DDECCharges( apply_symmetry=True, basis="sto-3g", method="hf", cores=1, memory=1, ddec_version=version, ) # fake the chargemol dir os.environ["CHARGEMOL_DIR"] = "test" # now render the template charge_method._build_chargemol_input(density_file_name="test.wfx", molecule=mol) with open("job_control.txt") as job_file: job_data = job_file.readlines() assert f"DDEC{version}\n" in job_data assert "test.wfx\n" in job_data assert "test/atomic_densities/\n" in job_data assert f"{mol.charge}\n" in job_data
def test_optimise_grid_point_and_update(tmpdir, ethane_state): """ Try and perform a single grid point optimisation. """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("ethane.sdf")) tdriver = TorsionDriver(n_workers=1) qc_spec = QCOptions(program="rdkit", basis=None, method="uff") local_ops = LocalResource(cores=1, memory=1) geo_opt = tdriver._build_geometry_optimiser() # get the job inputs new_jobs = tdriver._get_new_jobs(td_state=ethane_state) coords = new_jobs["-60"][0] result = optimise_grid_point( geometry_optimiser=geo_opt, qc_spec=qc_spec, local_options=local_ops, molecule=mol, coordinates=coords, dihedral=ethane_state["dihedrals"][0], dihedral_angle=-60, job_id=0, ) new_state = tdriver._update_state( td_state=ethane_state, result_data=[ result, ], ) next_jobs = tdriver._get_new_jobs(td_state=new_state) assert "-75" in next_jobs assert "-45" in next_jobs
def test_parse_output(driver): """ Test reading gaussian outfiles and extracting the correct information based on the driver type. """ outfiles = {} with open(get_data("gaussian.log")) as log: outfiles["gaussian.log"] = log.read() with open(get_data("gaussian.fchk")) as fchk: outfiles["lig.fchk"] = fchk.read() # build the input mol = Ligand.from_file(file_name=get_data("acetone.pdb")) # build the atomic model qc_spec = qcel.models.common_models.Model(method="pbe", basis="6-31G") # build a job for a specific driver qc_task = qcel.models.AtomicInput(molecule=mol.to_qcschema(), driver=driver, model=qc_spec) g = GaussianHarness() result = g.parse_output(outfiles=outfiles, input_model=qc_task) if driver == "energy": assert result.return_result == -1.931393770857046e02 elif driver == "gradient": assert result.return_result.shape == (10, 3) elif driver == "hessian": assert result.return_result.shape == (30, 30)
def test_get_initial_state(tmpdir, starting_conformations): """ Make sure we can correctly build a starting state using the torsiondrive api. """ with tmpdir.as_cwd(): mol = Ligand.from_file(get_data("ethane.sdf")) bond = mol.find_rotatable_bonds()[0] dihedral = mol.dihedrals[bond.indices][0] tdriver = TorsionDriver(starting_conformations=starting_conformations) # make the scan data dihedral_data = TorsionScan(torsion=dihedral, scan_range=(-165, 180)) td_state = tdriver._create_initial_state(molecule=mol, dihedral_data=dihedral_data, qc_spec=QCOptions()) assert td_state["dihedrals"] == [ dihedral, ] assert td_state["elements"] == [ atom.atomic_symbol for atom in mol.atoms ] assert td_state["dihedral_ranges"] == [ (-165, 180), ] assert np.allclose((mol.coordinates * constants.ANGS_TO_BOHR), td_state["init_coords"][0]) # make sure we have tried to generate conformers assert len(td_state["init_coords"]) <= tdriver.starting_conformations
def test_find_rotatable_bonds_n_rotatables(molecule, n_rotatables): """ Ensure the number of rotatable bonds found matches the expected. """ mol = Ligand.from_file(get_data(molecule)) assert (len(mol.find_rotatable_bonds(["[*:1]-[CH3:2]", "[*:1]-[NH2:2]"])) == n_rotatables)
def test_full_tdrive(tmpdir, workers, capsys): """ Try and run a full torsiondrive for ethane with a cheap rdkit method. """ with tmpdir.as_cwd(): ethane = Ligand.from_file(get_data("ethane.sdf")) # make the scan data bond = ethane.find_rotatable_bonds()[0] dihedral = ethane.dihedrals[bond.indices][0] dihedral_data = TorsionScan(torsion=dihedral, scan_range=(-165, 180)) qc_spec = QCOptions(program="rdkit", basis=None, method="uff") local_ops = LocalResource(cores=workers, memory=2) tdriver = TorsionDriver( n_workers=workers, grid_spacing=60, ) _ = tdriver.run_torsiondrive( molecule=ethane, dihedral_data=dihedral_data, qc_spec=qc_spec, local_options=local_ops, ) captured = capsys.readouterr() # make sure a fresh torsiondrive is run assert "Starting new torsiondrive" in captured.out
def test_find_rotatable_bonds_no_rotatables(molecule): """ Ensure rigid molecules, or molecules without any rotatable bonds do not have any rotatable bonds. """ mol = Ligand.from_file(get_data(molecule)) assert mol.find_rotatable_bonds(["[*:1]-[CH3:2]", "[*:1]-[NH2:2]"]) is None
def test_to_rdkit(molecule): """ Make sure we can convert to rdkit. We test on bace which has a chiral center and 12-dichloroethene which has a stereo bond. """ from rdkit import Chem mol = Ligand.from_file(file_name=get_data(molecule)) rd_mol = mol.to_rdkit() # make sure the atom and bond stereo match for atom in rd_mol.GetAtoms(): qb_atom = mol.atoms[atom.GetIdx()] assert atom.GetIsAromatic() is qb_atom.aromatic if qb_atom.stereochemistry is not None: if qb_atom.stereochemistry == "S": assert atom.GetChiralTag() == Chem.CHI_TETRAHEDRAL_CCW else: assert atom.GetChiralTag() == Chem.CHI_TETRAHEDRAL_CW for bond in rd_mol.GetBonds(): qb_bond = mol.bonds[bond.GetIdx()] assert qb_bond.aromatic is bond.GetIsAromatic() assert qb_bond.bond_order == bond.GetBondTypeAsDouble() if qb_bond.stereochemistry is not None: if qb_bond.stereochemistry == "E": assert bond.GetStereo() == Chem.BondStereo.STEREOE else: assert bond.GetStereo() == Chem.BondStereo.STEREOZ
def prep_for_fitting(self, molecule: Ligand) -> List[str]: """ For the given ligand prep the input files ready for torsion profile fitting. Args: molecule: The molecule object that we need to prep for fitting, this should have qm reference data stored in molecule.qm_scans. Note: We assume we are already in the targets folder. Returns: A list of target folder names made by this target. Raises: MissingReferenceData: If the molecule does not have any torsion drive reference data saved in molecule.qm_scans. """ # make sure we have data if not molecule.qm_scans: raise MissingReferenceData( f"Can not prepare a forcebalance fitting target for {molecule.name} as the reference data is missing!" ) # write out the qdata and other input files for each scan target_folders = [] # keep track of where we start base_folder = os.getcwd() # loop over each scanned bond and make a target folder for scan in molecule.qm_scans: task_name = ( f"{self.target_name}_{scan.central_bond[0]}_{scan.central_bond[1]}" ) target_folders.append(task_name) make_and_change_into(name=task_name) # make the pdb topology file if molecule.has_ub_terms(): molecule._to_ub_pdb(file_name="molecule") else: molecule.to_file(file_name="molecule.pdb") # write the qdata file export_torsiondrive_data(molecule=molecule, tdrive_data=scan) # make the metadata self.make_metadata(torsiondrive_data=scan) # now move back to the base os.chdir(base_folder) return target_folders
def test_make_unique_names(): """ After loading a molecule with non unique atom names make sure a unique set is automatically generated. """ # load the molecule with missing names mol = Ligand.from_file(get_data("missing_names.pdb")) # make sure they have been converted assert mol.has_unique_atom_names is True
def test_find_rotatable_bonds_indices_of_bonds(): mol = Ligand.from_file(get_data("bace0.pdb")) rotatables = mol.find_rotatable_bonds(["[*:1]-[CH3:2]", "[*:1]-[NH2:2]"]) bonds = [(bond.atom1_index, bond.atom2_index) for bond in rotatables] expected_bonds = [(12, 13), (5, 13)] for bond in bonds: assert bond in expected_bonds or tuple( reversed(bond)) in expected_bonds
def test_to_topology(molecule): """ Make sure that a topology generated using qubekit matches an openff one. """ mol = Ligand.from_file(file_name=get_data(molecule)) offmol = OFFMolecule.from_file(file_path=get_data(molecule)) assert (nx.algorithms.isomorphism.is_isomorphic( mol.to_topology(), offmol.to_networkx()) is True)
def test_ligand_from_file(file_name): """ For the given file type make sure rdkit can parse it and return the molecule. """ mol = Ligand.from_file(file_name=get_data(file_name)) assert mol.n_atoms > 1 assert mol.n_bonds > 1 assert mol.name is not None
def test_from_smiles(smiles): """ Make sure hydrogens are added to a molecule when needed. """ mol = Ligand.from_smiles(smiles_string=smiles, name="methane") # count the number of hydrogens hs = sum([1 for atom in mol.atoms if atom.atomic_symbol == "H"]) assert hs == 4
def test_add_conformers(file_name): """ Load up the bace pdb and then add conformers to it from other file types. """ mol = Ligand.from_file(file_name=get_data("bace0.pdb")) mol.coordinates = None mol.add_conformer(file_name=get_data(file_name)) assert mol.coordinates.shape == (mol.n_atoms, 3)
def run( input_file: Optional[str] = None, smiles: Optional[str] = None, name: Optional[str] = None, multiplicity: int = 1, end: Optional[str] = None, skip_stages: Optional[List[str]] = None, config: Optional[str] = None, protocol: Optional[str] = None, cores: Optional[int] = None, memory: Optional[int] = None, ): """Run the QUBEKit parametrisation workflow on an input molecule.""" # make sure we have an input or smiles not both if input_file is not None and smiles is not None: raise RuntimeError( "Please supply either the name of the input file or a smiles string not both." ) # load the molecule if input_file is not None: molecule = Ligand.from_file(file_name=input_file, multiplicity=multiplicity) else: if name is None: raise RuntimeError( "Please also pass a name for the molecule when starting from smiles." ) molecule = Ligand.from_smiles(smiles_string=smiles, name=name, multiplicity=multiplicity) # load workflow workflow = prep_config(config_file=config, memory=memory, cores=cores, protocol=protocol) # move into the working folder and run with folder_setup( f"QUBEKit_{molecule.name}_{datetime.now().strftime('%Y_%m_%d')}"): # write the starting molecule molecule.to_file(file_name=f"{molecule.name}.pdb") workflow.new_workflow(molecule=molecule, skip_stages=skip_stages, end=end)
def test_to_rdkit_complicated_stereo(): """ Make sure we can convert a complicated molecule with multiple stereo centres to rdkit. """ mol = Ligand.from_smiles( "[H][C@]1([C@@]([C@](O[C@@]1([H])C([H])([H])OP(=O)(O[H])O[H])([H])N2C(=C([N+](C2([H])[H])([H])[H])C(=O)N([H])[H])O[H])([H])O[H])O[H]", name="complicated", ) mol.to_rdkit()
def test_torsion_finder_multiple(): """ Find non hydrogen torsions for multiple rotatable bonds. """ mol = Ligand.from_smiles("CCO", "ethanol") bonds = mol.find_rotatable_bonds() for bond in bonds: torsion = find_heavy_torsion(molecule=mol, bond=bond) check_proper_torsion(torsion=torsion, molecule=mol)