def test_forcebalance_collect_results(): """ Test trying to collect results that have been successful and updated the parameters. """ workflow = biphenyl_workflow(target=AbInitio_SMIRNOFF) # first make sure the target smirks are set to the default value target_smirks = workflow.target_smirks for smirk in target_smirks: for param in smirk.terms.values(): # starting value assert param.k == "1.048715180139 * mole**-1 * kilocalorie" # set up the dummy output folder with temp_directory(): # copy the file over shutil.copy(get_data("complete.out"), "optimize.out") results_folder = os.path.join("result", "optimize") os.makedirs(results_folder, exist_ok=True) ff_path = os.path.join(results_folder, "bespoke.offxml") shutil.copy(get_data("bespoke.offxml"), ff_path) fb = ForceBalanceOptimizer() result_workflow = fb.collect_results(schema=workflow) # make sure the smirks have been updated new_smirks = result_workflow.final_smirks for smirk in new_smirks: for param in smirk.terms.values(): assert param.k != "1.048715180139 * mole**-1 * kilocalorie"
def test_optimizer_explicit(): """ Run the optimizer process in the main thread to make sure it works. """ biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) result = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) schema.update_with_results(results=result) # now submit to the executor execute = Executor() # we dont need the server here # put a task in the opt queue then kill it execute.total_tasks = 1 execute.opt_queue.put(schema.tasks[0]) with temp_directory(): execute.optimizer() # find the task in the finished queue task = execute.finished_tasks.get() result_schema = execute.update_fitting_schema(task=task, fitting_schema=schema) smirks = result_schema.tasks[0].final_smirks # make sure they have been updated for smirk in smirks: for term in smirk.terms.values(): assert float(term.k.split()[0]) != 1e-5
def test_parent_fragment_mapping(molecules): """ Test generating a parent fragment mapping. """ molecule1, molecule2, atom_map = molecules mol1 = Molecule.from_file(get_data(molecule1), "sdf") mol2 = Molecule.from_file(get_data(molecule2), "sdf") mapping = FragmentEngine._get_fragment_parent_mapping(fragment=mol2, parent=mol1) assert mapping == atom_map
def test_abinitio_fitting_prep_no_gradient(): """ Test preparing for fitting using the abinitio target. """ torsion_target = AbInitio_SMIRNOFF() torsion_target.fit_gradient = False target_schema = biphenyl_target(target=torsion_target) biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now load in a scan result we have saved result_data = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) # now try and update the results target_schema.update_with_results(results=result_data) assert target_schema.ready_for_fitting is True # now try and prep for fitting with temp_directory(): torsion_target.prep_for_fitting(fitting_target=target_schema) # we should only have one torsion drive to do here folders = os.listdir(".") assert len(folders) == 1 target_files = os.listdir(folders[0]) assert "molecule.pdb" in target_files assert "scan.xyz" in target_files assert "molecule.mol2" in target_files assert "qdata.txt" in target_files # now we need to make sure the pdb order was not changed mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"), file_format="pdb") isomorphic, atom_map = Molecule.are_isomorphic(biphenyl, mol, return_atom_map=True) assert isomorphic is True assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms)) # also make sure charges are in the mol2 file mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"), "mol2") assert mol.partial_charges is not None # make sure the scan coords and energies match qdata_file = os.path.join(folders[0], "qdata.txt") coords, energies, gradients = read_qdata(qdata_file=qdata_file) # make sure no gradients were written assert not gradients reference_data = target_schema.tasks[0].reference_data() for i, (coord, energy) in enumerate(zip(coords, energies)): # find the reference data data = reference_data[i] assert data.energy == energy assert coord == data.molecule.geometry.flatten().tolist()
def test_collecting_results(): """ Make sure that tasks are collected correctly from a QCArchive instance. """ # connect to the public database client = FractalClient() biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) # now submit to the executor executor = Executor() # change to make sure we search the correct dataset executor._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0" # fake a collection dict to_collect = { "torsion1d": { "default": [ "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]", ] }, "optimization": {}, "hessian": {} } # now let the executor update the task executor.collect_task_results(task=schema.tasks[0], collection_dict=to_collect, client=client) # make sure it worked assert schema.tasks[0].ready_for_fitting is True
def test_torsionprofile_metadata(): """ Make sure that when using the torsionprofile target we make the metatdat.json file. """ from openff.qcsubmit.serializers import deserialize torsion_target = TorsionProfile_SMIRNOFF() target_schema = biphenyl_target(target=torsion_target) # now load in a scan result we have saved result_data = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) # now try and update the results target_schema.update_with_results(results=result_data) assert target_schema.ready_for_fitting is True # now try and prep for fitting with temp_directory(): torsion_target.prep_for_fitting(fitting_target=target_schema) # we should only have one torsion drive to do here folders = os.listdir(".") assert len(folders) == 1 target_files = os.listdir(folders[0]) assert "molecule.pdb" in target_files assert "scan.xyz" in target_files assert "molecule.mol2" in target_files assert "qdata.txt" in target_files assert "metadata.json" in target_files metadata = deserialize( file_name=os.path.join(folders[0], "metadata.json")) # now make sure the json is complete entry = target_schema.tasks[0] assert entry.dihedrals[0] == tuple(metadata["dihedrals"][0]) for data in entry.reference_data(): assert data.extras["dihedral_angle"] in metadata[ "torsion_grid_ids"]
def test_forcebalance_collect_result_error(): """ Test trying to collect the result when the workflow has an error. """ workflow = biphenyl_workflow(target=AbInitio_SMIRNOFF) # we need to set up a dummy folder with the error with temp_directory(): # copy the file over shutil.copy(get_data("error.out"), "optimize.out") results_folder = os.path.join("result", "optimize") os.makedirs(results_folder, exist_ok=True) ff_path = os.path.join(results_folder, "bespoke.offxml") shutil.copy(get_data("bespoke.offxml"), ff_path) fb = ForceBalanceOptimizer() result_workflow = fb.collect_results(schema=workflow) assert result_workflow.status == Status.ConvergenceError
def test_pre_run_check_no_opt(): """ Make sure that the pre run check throws an error if there is no optimiser. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") with pytest.raises(OptimizerError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_label_molecule(): """ Test that labeling a molecule with the editor works. """ ff = ForceFieldEditor(forcefield_name="openff-1.0.0.offxml") ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") labels = ff.label_molecule(molecule=ethane) for param_type in ["Bonds", "Angles", "ProperTorsions", "ImproperTorsions", "vdW"]: assert param_type in labels
def test_normal_fragmentation(): """ Test that a molecule can be fragmented successfully and produce the expected results. """ # bace can be fragmented into 3 parts 2 of which are the same engine = WBOFragmenter() engine.keep_non_rotor_ring_substituents = False bace = Molecule.from_file(file_path=get_data("bace_parent.sdf"), file_format="sdf") fragment_data = engine.fragment(molecule=bace) assert len(fragment_data) == 3 fragments = [fragment.fragment_molecule for fragment in fragment_data] # make sure the fragments are correct for fragment in ["bace_frag1.sdf", "bace_frag2.sdf"]: frag_mol = Molecule.from_file(file_path=get_data(fragment), file_format="sdf") assert frag_mol in fragments # make sure all of the central bonds are different torsions = set([fragment.parent_torsion for fragment in fragment_data]) assert len(torsions) == 3
def test_pre_run_check_no_target(): """ Make sure that the pre run check catches if there are no targets set up """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() workflow.set_optimizer(optimizer=fb) with pytest.raises(OptimizerError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_missing_task_type(): """ Make sure an error is raised if we do not know how to generate the task. """ target = DummyTarget() target.collection_workflow = "test" molecule = Molecule.from_file(get_data("ethanol.sdf")) with pytest.raises(NotImplementedError): _ = target.generate_fitting_task( molecule=molecule, fragment=False, attributes=get_molecule_cmiles(molecule), dihedrals=[(8, 2, 1, 0)])
def test_pre_run_check_no_smirks(): """ Make sure that the pre run check checks that some target smirks have been supplied. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) workflow.target_smirks = [] with pytest.raises(TargetNotSetError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_pre_run_check_no_params(): """ Make sure that the pre run check catches if we have not set any parameters to optimise, like bond length. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) workflow.target_parameters = [] with pytest.raises(TargetNotSetError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_pre_run_check_no_frag(): """ Make sure the pre run check catches if there is no fragmentation method set. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) workflow.fragmentation_engine = None with pytest.raises(FragmenterError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_generate_fitting_task(collection_workflow): """ Make sure the correct fitting task is made based on the collection workflow. """ target = DummyTarget() target.collection_workflow = collection_workflow molecule = Molecule.from_file(get_data("ethanol.sdf")) task_schema = target.generate_fitting_task( molecule=molecule, fragment=False, attributes=get_molecule_cmiles(molecule), dihedrals=[(8, 2, 1, 0)]) assert task_schema.task_type == collection_workflow
def biphenyl_workflow(target) -> OptimizationSchema: """ Create a workflow schema which targets the rotatable bond in ethane. """ mol = Molecule.from_file(get_data("biphenyl.sdf"), "sdf") workflow = WorkflowFactory() # turn off bespoke terms we want fast fitting workflow.generate_bespoke_terms = False workflow.expand_torsion_terms = False fb = ForceBalanceOptimizer() target = target() fb.set_optimization_target(target=target) workflow.set_optimizer(optimizer=fb) schema = workflow.fitting_schema_from_molecules(molecules=mol) return schema.tasks[0]
def test_bespoke_target_torsion_smirks(): """ Generate bespoke torsion smirks only for the target torsions and make sure the intended atoms are covered. """ gen = SmirksGenerator() mol = Molecule.from_file(get_data("OCCO.sdf")) torsion_smirks = gen._get_bespoke_torsion_smirks(molecule=mol, central_bonds=[(1, 2)]) # there should be 3 unique smirks for this molecule # H-C-C-H, H-C-C-O, O-C-C-O assert len(torsion_smirks) == 3 for smirk in torsion_smirks: atoms = condense_matches(mol.chemical_environment_matches( smirk.smirks)) assert compare_matches(atoms, smirk.atoms) is True
def test_submit_new_tasks(fractal_compute_server): """ Make sure that any new tasks which are generated/found are added to the archive instance. """ client = FractalClient(fractal_compute_server) # this will not actually run as we do not install psi4 biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) executor = Executor() # make sure new tasks are submitted task = schema.tasks[0] response = executor.submit_new_tasks(task=task, client=client) assert response == {'OpenFF Bespoke-fit': {'default': 1}}
def test_forcebalance_readoutput(output): """ Test reading the output of a forcebalance run. """ file_name, status = output with temp_directory(): # copy the file over shutil.copy(get_data(file_name), "optimize.out") # now we have to make sum dummy folders results_folder = os.path.join("result", "optimize") os.makedirs(results_folder, exist_ok=True) with open(os.path.join(results_folder, "bespoke.offxml"), "w") as xml: xml.write("test") fb = ForceBalanceOptimizer() result = fb.read_output() assert result["status"] == status assert "bespoke.offxml" in result["forcefield"]
def biphenyl_target( target: Union[AbInitio_SMIRNOFF, TorsionProfile_SMIRNOFF]) -> TargetSchema: """ Return a target schema made by the target class for biphenyl. """ mol = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") target_schema = target.generate_target_schema() # create one task schema task_schema = target.generate_fitting_task( molecule=mol, fragment=False, attributes=get_molecule_cmiles(molecule=mol), dihedrals=[ (5, 9, 10, 6), ]) target_schema.add_fitting_task(task=task_schema) return target_schema
def test_task_from_molecule(): """ Test the workflow function which makes the optimization schema from a molecule """ bace = Molecule.from_file(file_path=get_data("bace.sdf"), file_format="sdf") workflow = WorkflowFactory() fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) opt_schema = workflow._task_from_molecule(molecule=bace, index=1) assert opt_schema.initial_forcefield == workflow.initial_forcefield assert opt_schema.optimizer_name == fb.optimizer_name assert opt_schema.job_id == "bespoke_task_1" assert bool(opt_schema.target_smirks) is True assert opt_schema.target_parameters == workflow.target_parameters assert opt_schema.target_molecule.molecule == bace assert opt_schema.n_tasks == 3 assert opt_schema.n_targets == 1
def test_forcebalance_optimize(optimization_target): """ Test running the full optimization stage for a simple biphenyl system using different targets. The data has been extracted from qcarchive. """ from openff.qcsubmit.results import TorsionDriveCollectionResult workflow = biphenyl_workflow(target=optimization_target) with temp_directory(): # load the computed results and add them to the workflow torsiondrive_result = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) workflow.update_with_results(results=torsiondrive_result) # setup the optimizer fb = ForceBalanceOptimizer() result = fb.optimize(schema=workflow) assert result.status == Status.Complete new_smirks = result.target_smirks for smirk in new_smirks: for param in smirk.terms.values(): assert param.k != "1e-05 * mole**-1 * kilocalorie"
def test_bespoke_torsion_smirks(): """ Generate bespoke smirks for every torsion in the molecule, make sure that the intended atoms are covered and make sure every torsion has a bespoke smirks. """ gen = SmirksGenerator() mol = Molecule.from_file(get_data("OCCO.sdf")) torsion_smirks = gen._get_bespoke_torsion_smirks(molecule=mol) # there should be 5 unique torsions assert len(torsion_smirks) == 5 all_torsions = [] for smirk in torsion_smirks: atoms = condense_matches(mol.chemical_environment_matches( smirk.smirks)) all_torsions.extend(atoms) assert compare_matches(atoms, smirk.atoms) is True for torsion in mol.propers: dihedral = tuple([atom.molecule_atom_index for atom in torsion]) assert dihedral in all_torsions or tuple( reversed(dihedral)) in all_torsions
def generate_optimize_in( self, priors: Dict[str, float], fitting_targets: Dict[str, List[str]] ) -> None: """ Using jinja generate an optimize.in control file for forcebalance at the given location. Parameters ---------- priors: Dict[str, float] A dictionary containing the prior names and values. fitting_targets: Dict[str, List[str]] A dictionary containing the fitting target names sorted by forcebalance target. Notes ----- This function can be used to generate many optimize in files so many force balance jobs can be ran simultaneously. """ # check that all of the fitting targets have been set target_names = [target.name.lower() for target in self.optimization_targets] for target_name in fitting_targets.keys(): if target_name.lower() not in target_names: raise TargetNotSetError( f"The target {target_name} is not setup for this optimizer and is required, please add it with runtime options using `set_optimization_target`." ) # grab the template file template_file = get_data(os.path.join("templates", "optimize.txt")) with open(template_file) as file: template = Template(file.read()) data = self.dict() # function to collect the priors from the targets. data["priors"] = priors # now we need to collect the fitting target data from the schema data["fitting_targets"] = fitting_targets rendered_template = template.render(**data) with open("optimize.in", "w") as opt_in: opt_in.write(rendered_template)
def test_error_cycle_complete(): """ Try and error cycle a task which is complete in qcarchive this should cause the task result to be collected and put into the optimization queue. """ client = FractalClient() biphenyl = Molecule.from_file(get_data("biphenyl.sdf")) schema = get_fitting_schema(biphenyl) execute = Executor() # fake the dataset name execute._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0" task = schema.tasks[0] tasks = list(task.get_task_map().keys()) # fake the task map execute.task_map = { tasks[0]: "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]" } execute._error_cycle_task(task=task, client=client) # the result should be collected and the task is now in the opt queue opt_task = execute.opt_queue.get(timeout=5) assert opt_task.ready_for_fitting is True
def test_make_fitting_schema_from_molecules(): """ Test making a fitting schema for a simple molecule using the default settings. Bace is a small molecule that should split into 2 fragments for a total of 3 torsiondrives. """ bace = Molecule.from_file(file_path=get_data("bace.sdf"), file_format="sdf") workflow = WorkflowFactory() fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) schema = workflow.fitting_schema_from_molecules(molecules=bace) # make sure one ethane torsion dirve is made assert schema.n_molecules == 1 assert schema.n_tasks == 3 assert bace in schema.molecules assert bace not in schema.entry_molecules # get the qcsubmit dataset datasets = schema.generate_qcsubmit_datasets() assert len(datasets) == 1 assert datasets[0].dataset_type == "TorsiondriveDataset" assert datasets[0].n_records == 3