Пример #1
0
def test_fragmentation_settings():
    """
    Make sure the settings are correctly handled.
    """

    fragmenter = workflow_components.WBOFragmenter()
    with pytest.raises(ValueError):
        fragmenter.functional_groups = get_data("functional_groups_error.yaml")

    fragmenter.functional_groups = get_data("functional_groups.yaml")

    assert fragmenter.functional_groups is not None
Пример #2
0
def test_torsiondrive_torsion_string():
    """
    Test the torsiondrive factories ability to create a torsion string for a given bond.
    """

    factory = TorsiondriveDatasetFactory()

    methanol = Molecule.from_file(get_data("methanol.sdf"), "sdf")

    rotatable = methanol.find_rotatable_bonds()
    assert len(rotatable) == 1

    bond = rotatable[0]
    torsion = factory._get_torsion_string(bond=bond)

    # now make sure this torsion is in the propers list
    reference_torsions = []
    for proper in methanol.propers:
        dihedral = []
        for atom in proper:
            dihedral.append(atom.molecule_atom_index)
        reference_torsions.append(tuple(dihedral))

    assert torsion in reference_torsions or tuple(
        reversed(torsion)) in reference_torsions
Пример #3
0
def get_stereoisomers():
    """
    Get a set of molecules that all have some undefined stereochemistry.
    """
    mols = Molecule.from_file(get_data("stereoisomers.smi"), allow_undefined_stereo=True)

    return mols
Пример #4
0
def get_tautomers():
    """
    Get a set of molecules that all have tautomers
    """

    mols = Molecule.from_file(get_data("tautomers_small.smi"), allow_undefined_stereo=True)

    return mols
Пример #5
0
def test_optimization_submissions_with_constraints(fractal_compute_server):
    """
    Make sure that the constraints are added to the optimization and enforced.
    """
    client = FractalClient(fractal_compute_server)
    ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf")
    factory = OptimizationDatasetFactory()
    dataset = OptimizationDataset(
        dataset_name="Test optimizations with constraint",
        description="Test optimization dataset with constraints",
        tagline="Testing optimization datasets")
    # add just mm spec
    dataset.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="default",
                        spec_description="mm default spec",
                        overwrite=True)
    # build some constraints
    constraints = Constraints()
    constraints.add_set_constraint(constraint_type="dihedral",
                                   indices=[2, 0, 1, 5],
                                   value=60,
                                   bonded=True)
    constraints.add_freeze_constraint(constraint_type="distance",
                                      indices=[0, 1],
                                      bonded=True)
    # add the molecule
    attributes = factory.create_cmiles_metadata(ethane)
    index = ethane.to_smiles()
    dataset.add_molecule(index=index,
                         molecule=ethane,
                         attributes=attributes,
                         constraints=constraints)
    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)
    record = ds.get_record(ds.df.index[0], "default")
    assert "constraints" in record.keywords
    assert record.status.value == "COMPLETE"
    assert record.error is None
    assert len(record.trajectory) > 1

    # now make sure the constraints worked
    final_molecule = record.get_final_molecule()
    assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5)))
    assert pytest.approx(record.get_initial_molecule().measure((0, 1)),
                         final_molecule.measure((0, 1)))
Пример #6
0
def test_torsiondrive_linear_torsion():
    """
    Test the torsiondrive factorys ability to find linear bonds which should not be driven.
    """

    factory = TorsiondriveDatasetFactory()
    molecules = Molecule.from_file(get_data("linear_molecules.sdf"),
                                   "sdf",
                                   allow_undefined_stereo=True)

    for molecule in molecules:
        assert bool(factory._detect_linear_torsions(molecule)) is True
Пример #7
0
def test_serializer_round_trips(serializer):
    """
    Test serializing data to and from file with no compression.
    """
    # get data in a dict format
    data = deserialize(get_data("settings_with_workflow.json"))
    file_name = "settings_with_workflow" + serializer
    # now export to file and back
    with temp_directory():
        serialize(serializable=data, file_name=file_name, compression=None)
        deserialized_data = deserialize(file_name=file_name)
        assert data == deserialized_data
Пример #8
0
def test_compression_serialization_round_trip_file_name(
        serialization, compression):
    """
    Test all of the different serialization and compression combinations.
    Here the compression is in the file name.
    """
    # get data in a dict format
    data = deserialize(get_data("settings_with_workflow.json"))
    file_name = "".join(
        ["settings_with_workflow", ".", serialization, ".", compression])
    # now export the file and read back
    with temp_directory():
        serialize(serializable=data, file_name=file_name, compression=None)
        deserialized_data = deserialize(file_name=file_name)
        assert data == deserialized_data
Пример #9
0
def test_fragmentation_apply():
    """
    Make sure that fragmentation is working.
    """
    fragmenter = workflow_components.WBOFragmenter()
    assert fragmenter.is_available()
    # check that a molecule with no rotatable bonds fails if we dont want the parent back
    benzene = Molecule.from_file(get_data("benzene.sdf"), "sdf")
    result = fragmenter.apply([benzene, ], processors=1)
    assert result.n_molecules == 0

    # now try ethanol
    ethanol = Molecule.from_file(get_data("methanol.sdf"), "sdf")
    fragmenter.include_parent = True
    result = fragmenter.apply([ethanol, ], processors=1)
    assert result.n_molecules == 1

    # now try a molecule which should give fragments
    diphenhydramine = Molecule.from_smiles("O(CCN(C)C)C(c1ccccc1)c2ccccc2")
    fragmenter.include_parent = False
    result = fragmenter.apply([diphenhydramine, ], processors=1)
    assert result.n_molecules == 4
    for molecule in result.molecules:
        assert "dihedrals" in molecule.properties
Пример #10
0
def test_torsiondrive_index():
    """
    Test generating an index using torsiondrive, this should tag the atoms in the torsion.
    """

    mol = Molecule.from_file(get_data("methanol.sdf"))

    mol.properties["atom_map"] = {4: 0, 0: 1, 1: 2, 5: 3}

    factory = TorsiondriveDatasetFactory()

    index = factory.create_index(mol)

    tags = ["[C:2]", "[H:1]", "[O:3]", "[H:4]"]
    for tag in tags:
        assert tag in index
Пример #11
0
def test_import_workflow_only(file_type, factory_type):
    """
    Test importing a workflow only from a workflow file.
    """

    factory = factory_type()

    factory2 = factory_type()

    file_name = "settings_with_workflow." + file_type

    factory.import_workflow(get_data(file_name))
    # make sure the settings have not changed from default
    assert factory.dict(exclude={"workflow"}) == factory2.dict(
        exclude={"workflow"})
    assert len(factory.workflow) == 1
    assert factory.workflow != factory2.workflow
Пример #12
0
def test_importing_settings_no_workflow(file_type, factory_type):
    """
    Test importing the settings with no workflow components from the supported file types.
    """

    factory = factory_type()

    file_name = "settings." + file_type
    factory.import_settings(get_data(file_name))

    changed_attrs = {
        "maxiter": 400,
        "priority": "super_high",
        "compute_tag": "loaded tag",
    }
    for attr, value in changed_attrs.items():
        assert getattr(factory, attr) == value
Пример #13
0
def test_create_dataset(factory_dataset_type):
    """
    Test making a the correct corresponding dataset type from a given factory type.
    """

    factory = factory_dataset_type[0]()
    element_filter = workflow_components.ElementFilter()
    element_filter.allowed_elements = [1, 6, 8, 7]
    factory.add_workflow_component(element_filter)
    conformer_generator = workflow_components.StandardConformerGenerator(
        max_conformers=1)
    factory.add_workflow_component(conformer_generator)

    mols = Molecule.from_file(get_data("tautomers_small.smi"),
                              "smi",
                              allow_undefined_stereo=True)

    # set some settings
    changed_attrs = {
        "compute_tag": "test tag",
        "dataset_tags": ["openff", "test"],
        "maxiter": 400
    }
    for attr, value in changed_attrs.items():
        setattr(factory, attr, value)

    dataset = factory.create_dataset(dataset_name="test name",
                                     molecules=mols,
                                     description="Force field test",
                                     tagline="A test dataset")

    # check the attributes were changed
    for attr, value in changed_attrs.items():
        assert getattr(dataset, attr) == value

    assert dataset.dataset_name == "test name"

    assert isinstance(dataset, factory_dataset_type[1]) is True

    # make sure molecules we filtered and passed
    assert dataset.dataset != {}
    assert dataset.filtered != {}
    assert element_filter.component_name in dataset.filtered_molecules
Пример #14
0
def test_importing_settings_workflow(file_type, factory_type):
    """
    Test importing the settings and a workflow from the supported file types.
    """

    factory = factory_type()

    file_name = "settings_with_workflow." + file_type
    factory.import_settings(get_data(file_name))

    changed_attrs = {
        "maxiter": 400,
        "priority": "super_high",
        "compute_tag": "loaded tag",
    }
    for attr, value in changed_attrs.items():
        assert getattr(factory, attr) == value

    assert len(factory.workflow) == 1
    assert "StandardConformerGenerator" in factory.workflow
    component = factory.get_workflow_component("StandardConformerGenerator")
    assert component.component_description == "loaded component"
    assert isinstance(component,
                      workflow_components.StandardConformerGenerator) is True
Пример #15
0
def test_basic_submissions_wavefunction(fractal_compute_server):
    """
    Test submitting a basic dataset with a wavefunction protocol and make sure it is executed.
    """
    # only a psi4 test
    if not has_program("psi4"):
        pytest.skip(f"Program psi4 not found.")

    client = FractalClient(fractal_compute_server)
    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver="energy")
    factory.clear_qcspecs()
    factory.add_qc_spec(method="hf",
                        basis="sto-6g",
                        program="psi4",
                        spec_name="default",
                        spec_description="wavefunction spec",
                        store_wavefunction="orbitals_and_eigenvalues")

    dataset = factory.create_dataset(
        dataset_name=f"Test single points with wavefunction",
        molecules=molecules,
        description="Test basics dataset",
        tagline="Testing single point datasets with wavefunction",
    )
    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # submit the dataset
    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
            basis = result.get_wavefunction("basis")
            assert basis.name.lower() == "sto-6g"
            orbitals = result.get_wavefunction("orbitals_a")
            assert orbitals.shape is not None
Пример #16
0
def test_basic_submissions_multiple_spec(fractal_compute_server):
    """Test submitting a basic dataset to a snowflake server with multiple qcspecs."""

    client = FractalClient(fractal_compute_server)

    qc_specs = [{
        "method": "openff-1.0.0",
        "basis": "smirnoff",
        "program": "openmm",
        "spec_name": "openff"
    }, {
        "method": "gaff-2.11",
        "basis": "antechamber",
        "program": "openmm",
        "spec_name": "gaff"
    }]

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver="energy")
    factory.clear_qcspecs()
    for spec in qc_specs:
        factory.add_qc_spec(**spec,
                            spec_description="testing the single points")

    dataset = factory.create_dataset(
        dataset_name=f"Test single points multiple specs",
        molecules=molecules,
        description="Test basics dataset",
        tagline="Testing single point datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
Пример #17
0
def test_basic_submissions_single_pcm_spec(fractal_compute_server):
    """Test submitting a basic dataset to a snowflake server with pcm water in the specification."""

    client = FractalClient(fractal_compute_server)

    program = "psi4"
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver="energy")
    factory.add_qc_spec(method="hf",
                        basis="sto-3g",
                        program=program,
                        spec_name="default",
                        spec_description="testing the single points with pcm",
                        implicit_solvent=PCMSettings(units="au",
                                                     medium_Solvent="water"),
                        overwrite=True)

    # only use one molecule due to the time it takes to run with pcm
    dataset = factory.create_dataset(
        dataset_name=f"Test single points with pcm water",
        molecules=molecules[0],
        description="Test basics dataset with pcm water",
        tagline="Testing single point datasets with pcm water",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis
        break
    else:
        raise RuntimeError(
            f"The requested compute was not found in the history {ds.data.history}"
        )

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
            # make sure the PCM result was captured
            assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
Пример #18
0
def test_basic_submissions_single_spec(fractal_compute_server, specification):
    """Test submitting a basic dataset to a snowflake server."""

    client = FractalClient(fractal_compute_server)

    qc_spec, driver = specification

    program = qc_spec["program"]
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver=driver)
    factory.add_qc_spec(**qc_spec,
                        spec_name="default",
                        spec_description="testing the single points",
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test single points info {program}, {driver}",
        molecules=molecules,
        description="Test basics dataset",
        tagline="Testing single point datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis
        break
    else:
        raise RuntimeError(
            f"The requested compute was not found in the history {ds.data.history}"
        )

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
Пример #19
0
def test_optimization_submissions(fractal_compute_server, specification):
    """Test submitting an Optimization dataset to a snowflake server."""

    client = FractalClient(fractal_compute_server)

    qc_spec, driver = specification
    program = qc_spec["program"]
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = OptimizationDatasetFactory(driver=driver)
    factory.add_qc_spec(**qc_spec,
                        spec_name="default",
                        spec_description="test",
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test optimizations info {program}, {driver}",
        molecules=molecules[:2],
        description="Test optimization dataset",
        tagline="Testing optimization datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    for qc_spec in dataset.qc_specifications.values():
        spec = ds.data.specs[qc_spec.spec_name]

        assert spec.description == qc_spec.spec_description
        assert spec.qc_spec.driver == dataset.driver
        assert spec.qc_spec.method == qc_spec.method
        assert spec.qc_spec.basis == qc_spec.basis
        assert spec.qc_spec.program == qc_spec.program

        # check the keywords
        keywords = client.query_keywords(spec.qc_spec.keywords)[0]

        assert keywords.values["maxiter"] == dataset.maxiter
        assert keywords.values["scf_properties"] == dataset.scf_properties

        # query the dataset
        ds.query(qc_spec.spec_name)

        for index in ds.df.index:
            record = ds.df.loc[index].default
            assert record.status.value == "COMPLETE"
            assert record.error is None
            assert len(record.trajectory) > 1
            # if we used psi4 make sure the properties were captured
            if program == "psi4":
                result = record.get_trajectory()[0]
                assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys()
                assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()