def test_fragmentation_settings(): """ Make sure the settings are correctly handled. """ fragmenter = workflow_components.WBOFragmenter() with pytest.raises(ValueError): fragmenter.functional_groups = get_data("functional_groups_error.yaml") fragmenter.functional_groups = get_data("functional_groups.yaml") assert fragmenter.functional_groups is not None
def test_torsiondrive_torsion_string(): """ Test the torsiondrive factories ability to create a torsion string for a given bond. """ factory = TorsiondriveDatasetFactory() methanol = Molecule.from_file(get_data("methanol.sdf"), "sdf") rotatable = methanol.find_rotatable_bonds() assert len(rotatable) == 1 bond = rotatable[0] torsion = factory._get_torsion_string(bond=bond) # now make sure this torsion is in the propers list reference_torsions = [] for proper in methanol.propers: dihedral = [] for atom in proper: dihedral.append(atom.molecule_atom_index) reference_torsions.append(tuple(dihedral)) assert torsion in reference_torsions or tuple( reversed(torsion)) in reference_torsions
def get_stereoisomers(): """ Get a set of molecules that all have some undefined stereochemistry. """ mols = Molecule.from_file(get_data("stereoisomers.smi"), allow_undefined_stereo=True) return mols
def get_tautomers(): """ Get a set of molecules that all have tautomers """ mols = Molecule.from_file(get_data("tautomers_small.smi"), allow_undefined_stereo=True) return mols
def test_optimization_submissions_with_constraints(fractal_compute_server): """ Make sure that the constraints are added to the optimization and enforced. """ client = FractalClient(fractal_compute_server) ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf") factory = OptimizationDatasetFactory() dataset = OptimizationDataset( dataset_name="Test optimizations with constraint", description="Test optimization dataset with constraints", tagline="Testing optimization datasets") # add just mm spec dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="mm default spec", overwrite=True) # build some constraints constraints = Constraints() constraints.add_set_constraint(constraint_type="dihedral", indices=[2, 0, 1, 5], value=60, bonded=True) constraints.add_freeze_constraint(constraint_type="distance", indices=[0, 1], bonded=True) # add the molecule attributes = factory.create_cmiles_metadata(ethane) index = ethane.to_smiles() dataset.add_molecule(index=index, molecule=ethane, attributes=attributes, constraints=constraints) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) record = ds.get_record(ds.df.index[0], "default") assert "constraints" in record.keywords assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # now make sure the constraints worked final_molecule = record.get_final_molecule() assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5))) assert pytest.approx(record.get_initial_molecule().measure((0, 1)), final_molecule.measure((0, 1)))
def test_torsiondrive_linear_torsion(): """ Test the torsiondrive factorys ability to find linear bonds which should not be driven. """ factory = TorsiondriveDatasetFactory() molecules = Molecule.from_file(get_data("linear_molecules.sdf"), "sdf", allow_undefined_stereo=True) for molecule in molecules: assert bool(factory._detect_linear_torsions(molecule)) is True
def test_serializer_round_trips(serializer): """ Test serializing data to and from file with no compression. """ # get data in a dict format data = deserialize(get_data("settings_with_workflow.json")) file_name = "settings_with_workflow" + serializer # now export to file and back with temp_directory(): serialize(serializable=data, file_name=file_name, compression=None) deserialized_data = deserialize(file_name=file_name) assert data == deserialized_data
def test_compression_serialization_round_trip_file_name( serialization, compression): """ Test all of the different serialization and compression combinations. Here the compression is in the file name. """ # get data in a dict format data = deserialize(get_data("settings_with_workflow.json")) file_name = "".join( ["settings_with_workflow", ".", serialization, ".", compression]) # now export the file and read back with temp_directory(): serialize(serializable=data, file_name=file_name, compression=None) deserialized_data = deserialize(file_name=file_name) assert data == deserialized_data
def test_fragmentation_apply(): """ Make sure that fragmentation is working. """ fragmenter = workflow_components.WBOFragmenter() assert fragmenter.is_available() # check that a molecule with no rotatable bonds fails if we dont want the parent back benzene = Molecule.from_file(get_data("benzene.sdf"), "sdf") result = fragmenter.apply([benzene, ], processors=1) assert result.n_molecules == 0 # now try ethanol ethanol = Molecule.from_file(get_data("methanol.sdf"), "sdf") fragmenter.include_parent = True result = fragmenter.apply([ethanol, ], processors=1) assert result.n_molecules == 1 # now try a molecule which should give fragments diphenhydramine = Molecule.from_smiles("O(CCN(C)C)C(c1ccccc1)c2ccccc2") fragmenter.include_parent = False result = fragmenter.apply([diphenhydramine, ], processors=1) assert result.n_molecules == 4 for molecule in result.molecules: assert "dihedrals" in molecule.properties
def test_torsiondrive_index(): """ Test generating an index using torsiondrive, this should tag the atoms in the torsion. """ mol = Molecule.from_file(get_data("methanol.sdf")) mol.properties["atom_map"] = {4: 0, 0: 1, 1: 2, 5: 3} factory = TorsiondriveDatasetFactory() index = factory.create_index(mol) tags = ["[C:2]", "[H:1]", "[O:3]", "[H:4]"] for tag in tags: assert tag in index
def test_import_workflow_only(file_type, factory_type): """ Test importing a workflow only from a workflow file. """ factory = factory_type() factory2 = factory_type() file_name = "settings_with_workflow." + file_type factory.import_workflow(get_data(file_name)) # make sure the settings have not changed from default assert factory.dict(exclude={"workflow"}) == factory2.dict( exclude={"workflow"}) assert len(factory.workflow) == 1 assert factory.workflow != factory2.workflow
def test_importing_settings_no_workflow(file_type, factory_type): """ Test importing the settings with no workflow components from the supported file types. """ factory = factory_type() file_name = "settings." + file_type factory.import_settings(get_data(file_name)) changed_attrs = { "maxiter": 400, "priority": "super_high", "compute_tag": "loaded tag", } for attr, value in changed_attrs.items(): assert getattr(factory, attr) == value
def test_create_dataset(factory_dataset_type): """ Test making a the correct corresponding dataset type from a given factory type. """ factory = factory_dataset_type[0]() element_filter = workflow_components.ElementFilter() element_filter.allowed_elements = [1, 6, 8, 7] factory.add_workflow_component(element_filter) conformer_generator = workflow_components.StandardConformerGenerator( max_conformers=1) factory.add_workflow_component(conformer_generator) mols = Molecule.from_file(get_data("tautomers_small.smi"), "smi", allow_undefined_stereo=True) # set some settings changed_attrs = { "compute_tag": "test tag", "dataset_tags": ["openff", "test"], "maxiter": 400 } for attr, value in changed_attrs.items(): setattr(factory, attr, value) dataset = factory.create_dataset(dataset_name="test name", molecules=mols, description="Force field test", tagline="A test dataset") # check the attributes were changed for attr, value in changed_attrs.items(): assert getattr(dataset, attr) == value assert dataset.dataset_name == "test name" assert isinstance(dataset, factory_dataset_type[1]) is True # make sure molecules we filtered and passed assert dataset.dataset != {} assert dataset.filtered != {} assert element_filter.component_name in dataset.filtered_molecules
def test_importing_settings_workflow(file_type, factory_type): """ Test importing the settings and a workflow from the supported file types. """ factory = factory_type() file_name = "settings_with_workflow." + file_type factory.import_settings(get_data(file_name)) changed_attrs = { "maxiter": 400, "priority": "super_high", "compute_tag": "loaded tag", } for attr, value in changed_attrs.items(): assert getattr(factory, attr) == value assert len(factory.workflow) == 1 assert "StandardConformerGenerator" in factory.workflow component = factory.get_workflow_component("StandardConformerGenerator") assert component.component_description == "loaded component" assert isinstance(component, workflow_components.StandardConformerGenerator) is True
def test_basic_submissions_wavefunction(fractal_compute_server): """ Test submitting a basic dataset with a wavefunction protocol and make sure it is executed. """ # only a psi4 test if not has_program("psi4"): pytest.skip(f"Program psi4 not found.") client = FractalClient(fractal_compute_server) molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.clear_qcspecs() factory.add_qc_spec(method="hf", basis="sto-6g", program="psi4", spec_name="default", spec_description="wavefunction spec", store_wavefunction="orbitals_and_eigenvalues") dataset = factory.create_dataset( dataset_name=f"Test single points with wavefunction", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets with wavefunction", ) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # submit the dataset # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None basis = result.get_wavefunction("basis") assert basis.name.lower() == "sto-6g" orbitals = result.get_wavefunction("orbitals_a") assert orbitals.shape is not None
def test_basic_submissions_multiple_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with multiple qcspecs.""" client = FractalClient(fractal_compute_server) qc_specs = [{ "method": "openff-1.0.0", "basis": "smirnoff", "program": "openmm", "spec_name": "openff" }, { "method": "gaff-2.11", "basis": "antechamber", "program": "openmm", "spec_name": "gaff" }] molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.clear_qcspecs() for spec in qc_specs: factory.add_qc_spec(**spec, spec_description="testing the single points") dataset = factory.create_dataset( dataset_name=f"Test single points multiple specs", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None
def test_basic_submissions_single_pcm_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with pcm water in the specification.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="testing the single points with pcm", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) # only use one molecule due to the time it takes to run with pcm dataset = factory.create_dataset( dataset_name=f"Test single points with pcm water", molecules=molecules[0], description="Test basics dataset with pcm water", tagline="Testing single point datasets with pcm water", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def test_basic_submissions_single_spec(fractal_compute_server, specification): """Test submitting a basic dataset to a snowflake server.""" client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="testing the single points", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test single points info {program}, {driver}", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None
def test_optimization_submissions(fractal_compute_server, specification): """Test submitting an Optimization dataset to a snowflake server.""" client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = OptimizationDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="test", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test optimizations info {program}, {driver}", molecules=molecules[:2], description="Test optimization dataset", tagline="Testing optimization datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # if we used psi4 make sure the properties were captured if program == "psi4": result = record.get_trajectory()[0] assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys() assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()