def test_truncate_n_molecules():

    substance = Substance()

    substance.add_component(
        component=Component(smiles="[Na+]"),
        amount=MoleFraction(0.00267),
    )
    substance.add_component(
        component=Component(smiles="[Cl-]"),
        amount=MoleFraction(0.00267),
    )
    substance.add_component(component=Component(smiles="O"),
                            amount=MoleFraction(1.0 - 2.0 * 0.00267))

    # Attempt to get the number of molecules without truncating.
    with pytest.raises(ValueError):
        substance.get_molecules_per_component(1000, truncate_n_molecules=False)

    # Attempt to get the number of molecules with truncating.
    molecule_counts = substance.get_molecules_per_component(
        1000, truncate_n_molecules=True)

    assert molecule_counts == {
        "[Na+]{solv}": 3,
        "[Cl-]{solv}": 3,
        "O{solv}": 994
    }
Пример #2
0
def test_solvate_existing_structure_protocol():
    """Tests solvating a single methanol molecule in water."""

    import mdtraj

    methanol_component = Component("CO")

    methanol_substance = Substance()
    methanol_substance.add_component(methanol_component, ExactAmount(1))

    water_substance = Substance()
    water_substance.add_component(Component("O"), MoleFraction(1.0))

    with tempfile.TemporaryDirectory() as temporary_directory:

        build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol")
        build_methanol_coordinates.max_molecules = 1
        build_methanol_coordinates.substance = methanol_substance
        build_methanol_coordinates.execute(temporary_directory,
                                           ComputeResources())

        methanol_residue_name = build_methanol_coordinates.assigned_residue_names[
            methanol_component.identifier]

        solvate_coordinates = SolvateExistingStructure("solvate_methanol")
        solvate_coordinates.max_molecules = 9
        solvate_coordinates.substance = water_substance
        solvate_coordinates.solute_coordinate_file = (
            build_methanol_coordinates.coordinate_file_path)
        solvate_coordinates.execute(temporary_directory, ComputeResources())
        solvated_system = mdtraj.load_pdb(
            solvate_coordinates.coordinate_file_path)

        assert solvated_system.n_residues == 10
        assert solvated_system.top.residue(0).name == methanol_residue_name
def test_multiple_amounts():

    substance = Substance()

    sodium = Component("[Na+]")
    chloride = Component("[Cl-]")

    substance.add_component(sodium, MoleFraction(0.75))
    substance.add_component(sodium, ExactAmount(1))

    substance.add_component(chloride, MoleFraction(0.25))
    substance.add_component(chloride, ExactAmount(1))

    assert substance.number_of_components == 2

    sodium_amounts = substance.get_amounts(sodium)
    chlorine_amounts = substance.get_amounts(chloride)

    assert len(sodium_amounts) == 2
    assert len(chlorine_amounts) == 2

    molecule_counts = substance.get_molecules_per_component(6)

    assert len(molecule_counts) == 2

    assert molecule_counts[sodium.identifier] == 4
    assert molecule_counts[chloride.identifier] == 2
Пример #4
0
def dummy_complex() -> Substance:

    substance = Substance()

    substance.add_component(
        Component(smiles="C", role=Component.Role.Ligand), ExactAmount(1)
    )
    substance.add_component(
        Component(smiles="CO", role=Component.Role.Receptor), ExactAmount(1)
    )

    return substance
Пример #5
0
def create_dummy_substance(number_of_components, elements=None):
    """Creates a substance with a given number of components,
    each containing the specified elements.

    Parameters
    ----------
    number_of_components : int
        The number of components to add to the substance.
    elements : list of str
        The elements that each component should containt.

    Returns
    -------
    Substance
        The created substance.
    """
    if elements is None:
        elements = ["C"]

    substance = Substance()

    mole_fraction = 1.0 / number_of_components

    for index in range(number_of_components):

        smiles_pattern = "".join(elements * (index + 1))

        substance.add_component(Component(smiles_pattern),
                                MoleFraction(mole_fraction))

    return substance
Пример #6
0
    def from_components(cls, *components):
        """Creates a new `Substance` object from a list of components.
        This method assumes that all components should be present with
        equal mole fractions.

        Parameters
        ----------
        components: Component or str
            The components to add to the substance. These may either be full
            `Component` objects or just the smiles representation
            of the component.

        Returns
        -------
        Substance
            The substance containing the requested components in equal amounts.
        """

        if len(components) == 0:
            raise ValueError("At least one component must be specified")

        mole_fraction = 1.0 / len(components)

        return_substance = cls()

        for component in components:

            if isinstance(component, str):
                component = Component(smiles=component)

            return_substance.add_component(component,
                                           MoleFraction(mole_fraction))

        return return_substance
Пример #7
0
def test_build_docked_coordinates_protocol():
    """Tests docking a methanol molecule into alpha-Cyclodextrin."""

    if not has_openeye():
        pytest.skip("The `BuildDockedCoordinates` protocol requires OpenEye.")

    ligand_substance = Substance()
    ligand_substance.add_component(
        Component("CO", role=Component.Role.Ligand),
        ExactAmount(1),
    )

    # TODO: This test could likely be made substantially faster
    #       by storing the binary prepared receptor. Would this
    #       be in breach of any oe license terms?
    with tempfile.TemporaryDirectory() as temporary_directory:

        build_docked_coordinates = BuildDockedCoordinates("build_methanol")
        build_docked_coordinates.ligand_substance = ligand_substance
        build_docked_coordinates.number_of_ligand_conformers = 5
        build_docked_coordinates.receptor_coordinate_file = get_data_filename(
            "test/molecules/acd.mol2")
        build_docked_coordinates.execute(temporary_directory,
                                         ComputeResources())

        docked_pdb = PDBFile(
            build_docked_coordinates.docked_complex_coordinate_path)
        assert docked_pdb.topology.getNumResidues() == 2
def test_add_mole_fractions():

    substance = Substance()

    substance.add_component(Component("C"), MoleFraction(0.5))
    substance.add_component(Component("C"), MoleFraction(0.5))

    assert substance.number_of_components == 1

    amounts = substance.get_amounts(substance.components[0])

    assert len(amounts) == 1

    amount = next(iter(amounts))

    assert isinstance(amount, MoleFraction)
    assert np.isclose(amount.value, 1.0)
Пример #9
0
def test_build_coordinates_packmol_exact(count_exact_amount):
    """Tests that the build coordinate protocol behaves correctly for substances
    with exact amounts."""

    import mdtraj

    substance = Substance()
    substance.add_component(Component("O"), MoleFraction(1.0))
    substance.add_component(Component("C"), ExactAmount(1))

    max_molecule = 11 if count_exact_amount else 10

    build_coordinates = BuildCoordinatesPackmol("build_coordinates")
    build_coordinates.max_molecules = max_molecule
    build_coordinates.count_exact_amount = count_exact_amount
    build_coordinates.substance = substance

    with tempfile.TemporaryDirectory() as directory:
        build_coordinates.execute(directory)
        built_system = mdtraj.load_pdb(build_coordinates.coordinate_file_path)

    assert built_system.n_residues == 11
Пример #10
0
def data_frame() -> pandas.DataFrame:

    temperatures = [298.15, 318.15]
    pressures = [101.325, 101.0]

    properties = [Density, EnthalpyOfMixing]

    mole_fractions = [(1.0, ), (1.0, ), (0.25, 0.75), (0.75, 0.25)]
    smiles = {1: [("C(F)(Cl)(Br)", ), ("C", )], 2: [("CO", "C"), ("C", "CO")]}

    loop_variables = [(
        temperature,
        pressure,
        property_type,
        mole_fraction,
    ) for temperature in temperatures for pressure in pressures
                      for property_type in properties
                      for mole_fraction in mole_fractions]

    data_entries = []

    for temperature, pressure, property_type, mole_fraction in loop_variables:

        n_components = len(mole_fraction)

        for smiles_tuple in smiles[n_components]:

            substance = Substance()

            for smiles_pattern, x in zip(smiles_tuple, mole_fraction):
                substance.add_component(Component(smiles_pattern),
                                        MoleFraction(x))

            data_entries.append(
                property_type(
                    thermodynamic_state=ThermodynamicState(
                        temperature=temperature * unit.kelvin,
                        pressure=pressure * unit.kilopascal,
                    ),
                    phase=PropertyPhase.Liquid,
                    value=1.0 * property_type.default_unit(),
                    uncertainty=1.0 * property_type.default_unit(),
                    source=MeasurementSource(doi=" "),
                    substance=substance,
                ))

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(*data_entries)

    return data_set.to_pandas()
    def create_substance():
        test_substance = Substance()

        test_substance.add_component(
            Component("C", role=Component.Role.Solute),
            ExactAmount(1),
        )

        test_substance.add_component(
            Component("CC", role=Component.Role.Ligand),
            ExactAmount(1),
        )

        test_substance.add_component(
            Component("CCC", role=Component.Role.Receptor),
            ExactAmount(1),
        )

        test_substance.add_component(
            Component("O", role=Component.Role.Solvent),
            MoleFraction(1.0),
        )

        return test_substance
Пример #12
0
def _build_input_output_substances():
    """Builds sets if input and expected substances for the
    `test_build_coordinate_composition` test.

    Returns
    -------
    list of tuple of Substance and Substance
        A list of input and expected substances.
    """

    # Start with some easy cases
    substances = [
        (Substance.from_components("O"), Substance.from_components("O")),
        (Substance.from_components("O",
                                   "C"), Substance.from_components("O", "C")),
        (
            Substance.from_components("O", "C", "CO"),
            Substance.from_components("O", "C", "CO"),
        ),
    ]

    # Handle some cases where rounding will need to occur.
    input_substance = Substance()
    input_substance.add_component(Component("O"), MoleFraction(0.41))
    input_substance.add_component(Component("C"), MoleFraction(0.59))

    expected_substance = Substance()
    expected_substance.add_component(Component("O"), MoleFraction(0.4))
    expected_substance.add_component(Component("C"), MoleFraction(0.6))

    substances.append((input_substance, expected_substance))

    input_substance = Substance()
    input_substance.add_component(Component("O"), MoleFraction(0.59))
    input_substance.add_component(Component("C"), MoleFraction(0.41))

    expected_substance = Substance()
    expected_substance.add_component(Component("O"), MoleFraction(0.6))
    expected_substance.add_component(Component("C"), MoleFraction(0.4))

    substances.append((input_substance, expected_substance))

    return substances
Пример #13
0
    def _build_substance(
        guest_smiles: Optional[str],
        host_smiles: str,
        ionic_strength: Optional[unit.Quantity],
        negative_buffer_ion: str = "[Cl-]",
        positive_buffer_ion: str = "[Na+]",
    ):
        """Builds a substance containing a ligand and receptor solvated in an aqueous
        solution with a given ionic strength

        Parameters
        ----------
        guest_smiles
            The SMILES descriptor of the guest.
        host_smiles
            The SMILES descriptor of the host.
        ionic_strength
            The ionic strength of the aqueous solvent.

        Returns
        -------
            The built substance.
        """
        from openff.toolkit.topology import Molecule
        from simtk import unit as simtk_unit

        substance = Substance()

        if guest_smiles is not None:

            guest = Component(smiles=guest_smiles, role=Component.Role.Ligand)
            substance.add_component(component=guest, amount=ExactAmount(1))

        host = Component(smiles=host_smiles, role=Component.Role.Receptor)
        substance.add_component(component=host, amount=ExactAmount(1))

        water = Component(smiles="O", role=Component.Role.Solvent)
        sodium = Component(smiles=positive_buffer_ion,
                           role=Component.Role.Solvent)
        chlorine = Component(smiles=negative_buffer_ion,
                             role=Component.Role.Solvent)

        water_mole_fraction = 1.0

        if ionic_strength is not None:

            salt_mole_fraction = Substance.calculate_aqueous_ionic_mole_fraction(
                ionic_strength)

            if isinstance(salt_mole_fraction, unit.Quantity):
                # noinspection PyUnresolvedReferences
                salt_mole_fraction = salt_mole_fraction.magnitude

            water_mole_fraction = 1.0 - salt_mole_fraction * 2

            substance.add_component(
                component=sodium,
                amount=MoleFraction(salt_mole_fraction),
            )
            substance.add_component(
                component=chlorine,
                amount=MoleFraction(salt_mole_fraction),
            )

        substance.add_component(component=water,
                                amount=MoleFraction(water_mole_fraction))

        host_molecule_charge = Molecule.from_smiles(host_smiles).total_charge
        guest_molecule_charge = (
            0.0 * simtk_unit.elementary_charge if guest_smiles is None else
            Molecule.from_smiles(guest_smiles).total_charge)

        net_charge = (host_molecule_charge +
                      guest_molecule_charge).value_in_unit(
                          simtk_unit.elementary_charge)
        n_counter_ions = abs(int(net_charge))

        if net_charge <= -0.9999:
            substance.add_component(sodium, ExactAmount(n_counter_ions))
        elif net_charge >= 0.9999:
            substance.add_component(chlorine, ExactAmount(n_counter_ions))

        return substance
Пример #14
0
    def _apply(
        cls,
        data_frame: pandas.DataFrame,
        schema: ImportFreeSolvSchema,
        n_processes,
    ) -> pandas.DataFrame:

        from openff.evaluator import properties, substances, unit

        # Convert the data frame into data rows.
        free_solv_data_frame = cls._download_free_solv()

        data_entries = []

        for _, row in free_solv_data_frame.iterrows():

            # Extract and standardize the SMILES pattern of the
            solute_smiles = row["SMILES"].lstrip().rstrip()
            solute_smiles = substances.Component(solute_smiles).smiles

            # Build the substance.
            substance = Substance()
            substance.add_component(Component(smiles="O"), MoleFraction(1.0))
            substance.add_component(
                Component(smiles=solute_smiles, role=Component.Role.Solute),
                ExactAmount(1),
            )

            # Extract the value and uncertainty
            value = (float(row["experimental value (kcal/mol)"]) *
                     unit.kilocalorie / unit.mole)
            std_error = (float(row["experimental uncertainty (kcal/mol)"]) *
                         unit.kilocalorie / unit.mole)

            # Attempt to extract a DOI
            original_source = row[
                "experimental reference (original or paper this value was taken from)"]
            doi = cls._validate_doi(original_source)

            data_entry = SolvationFreeEnergy(
                thermodynamic_state=ThermodynamicState(
                    temperature=298.15 * unit.kelvin,
                    pressure=101.325 * unit.kilopascal,
                ),
                phase=PropertyPhase.Liquid,
                substance=substance,
                value=value.to(properties.SolvationFreeEnergy.default_unit()),
                uncertainty=std_error.to(
                    properties.SolvationFreeEnergy.default_unit()),
                source=MeasurementSource(doi=doi),
            )
            data_entries.append(data_entry)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*data_entries)

        free_solv_data_frame = data_set.to_pandas()

        data_frame = pandas.concat([data_frame, free_solv_data_frame],
                                   ignore_index=True,
                                   sort=False)

        return data_frame
def main():

    os.makedirs("raw_data_v2", exist_ok=True)

    for data_set_name in [
            "curated_data_set",
            "gaff 1.81",
            "gaff 2.11",
            "parsley 1.0.0",
            "smirnoff99frosst 1.1.0",
    ]:

        with open(os.path.join("raw_data", f"{data_set_name}.json")) as file:
            raw_data_set = json.load(file)

        assert (raw_data_set["@type"] ==
                "propertyestimator.datasets.datasets.PhysicalPropertyDataSet")

        physical_properties = []

        for raw_data_set_entries in raw_data_set["properties"].values():

            for raw_data_set_entry in raw_data_set_entries:

                # Extract the substance this entry was measured for.
                substance = Substance()

                for raw_component in raw_data_set_entry["substance"][
                        "components"]:

                    component = Component(
                        smiles=raw_component["smiles"],
                        role=Component.Role[raw_component["role"]["value"]],
                    )

                    raw_amounts = raw_data_set_entry["substance"]["amounts"][
                        raw_component["smiles"]]

                    for raw_amount in raw_amounts["value"]:

                        if (raw_amount["@type"] ==
                                "propertyestimator.substances.Substance->MoleFraction"
                            ):

                            substance.add_component(
                                component, MoleFraction(raw_amount["value"]))

                        elif (raw_amount["@type"] ==
                              "propertyestimator.substances.Substance->ExactAmount"
                              ):

                            substance.add_component(
                                component, ExactAmount(raw_amount["value"]))

                        else:
                            raise NotImplementedError()

                # Extract the source of the property
                if (raw_data_set_entry["source"]["@type"] ==
                        "propertyestimator.properties.properties.CalculationSource"
                    ):
                    source = CalculationSource(
                        fidelity=raw_data_set_entry["source"]["fidelity"])
                elif (raw_data_set_entry["source"]["@type"] ==
                      "propertyestimator.properties.properties.MeasurementSource"
                      ):
                    source = MeasurementSource(doi=correct_doi(
                        raw_data_set_entry["source"]["reference"]))
                else:
                    raise NotImplementedError()

                # Generate the new property object.
                property_class = getattr(
                    properties, raw_data_set_entry["@type"].split(".")[-1])

                physical_property = property_class(
                    thermodynamic_state=ThermodynamicState(
                        temperature=(
                            raw_data_set_entry["thermodynamic_state"]
                            ["temperature"]["value"] *
                            unit.Unit(raw_data_set_entry["thermodynamic_state"]
                                      ["temperature"]["unit"])),
                        pressure=(
                            raw_data_set_entry["thermodynamic_state"]
                            ["pressure"]["value"] *
                            unit.Unit(raw_data_set_entry["thermodynamic_state"]
                                      ["pressure"]["unit"])),
                    ),
                    phase=PropertyPhase(raw_data_set_entry["phase"]),
                    substance=substance,
                    value=(raw_data_set_entry["value"]["value"] *
                           unit.Unit(raw_data_set_entry["value"]["unit"])),
                    uncertainty=(
                        None if isinstance(source, MeasurementSource) else
                        (raw_data_set_entry["uncertainty"]["value"] *
                         unit.Unit(raw_data_set_entry["uncertainty"]["unit"])
                         )),
                    source=source,
                )
                physical_property.id = raw_data_set_entry["id"]

                physical_properties.append(physical_property)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*physical_properties)

        data_set.json(os.path.join("raw_data_v2", f"{data_set_name}.json"),
                      format=True)
        data_set.to_pandas().to_csv(
            os.path.join("raw_data_v2", f"{data_set_name}.csv"))
Пример #16
0
def test_solvation_yank_protocol(solvent_smiles):

    full_substance = Substance()

    full_substance.add_component(
        Component(smiles="CO", role=Component.Role.Solute),
        ExactAmount(1),
    )
    full_substance.add_component(
        Component(smiles=solvent_smiles, role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    solvent_substance = Substance()
    solvent_substance.add_component(
        Component(smiles=solvent_smiles, role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    solute_substance = Substance()
    solute_substance.add_component(
        Component(smiles="CO", role=Component.Role.Solute),
        ExactAmount(1),
    )

    thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            force_field_path = "ff.json"

            with open(force_field_path, "w") as file:
                file.write(build_tip3p_smirnoff_force_field().json())

            solvated_coordinate_path, solvated_system = _setup_dummy_system(
                "full", full_substance, 2, force_field_path)

            vacuum_coordinate_path, vacuum_system = _setup_dummy_system(
                "vacuum", solute_substance, 1, force_field_path)

            run_yank = SolvationYankProtocol("yank")
            run_yank.solute = solute_substance
            run_yank.solvent_1 = solvent_substance
            run_yank.solvent_2 = Substance()
            run_yank.thermodynamic_state = thermodynamic_state
            run_yank.number_of_iterations = 1
            run_yank.steps_per_iteration = 1
            run_yank.checkpoint_interval = 1
            run_yank.verbose = True
            run_yank.setup_only = True
            run_yank.solution_1_coordinates = solvated_coordinate_path
            run_yank.solution_1_system = solvated_system
            run_yank.solution_2_coordinates = vacuum_coordinate_path
            run_yank.solution_2_system = vacuum_system

            run_yank.electrostatic_lambdas_1 = [1.00]
            run_yank.steric_lambdas_1 = [1.00]
            run_yank.electrostatic_lambdas_2 = [1.00]
            run_yank.steric_lambdas_2 = [1.00]
            run_yank.execute("", ComputeResources())
Пример #17
0
def test_ligand_receptor_yank_protocol():

    full_substance = Substance()

    full_substance.add_component(
        Component(smiles="c1ccccc1", role=Component.Role.Receptor),
        ExactAmount(1),
    )
    full_substance.add_component(
        Component(smiles="C", role=Component.Role.Ligand),
        ExactAmount(1),
    )
    full_substance.add_component(
        Component(smiles="O", role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    solute_substance = Substance()
    solute_substance.add_component(
        Component(smiles="C", role=Component.Role.Ligand),
        ExactAmount(1),
    )
    solute_substance.add_component(
        Component(smiles="O", role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            force_field_path = "ff.json"

            with open(force_field_path, "w") as file:
                file.write(build_tip3p_smirnoff_force_field().json())

            complex_coordinate_path, complex_system = _setup_dummy_system(
                "full", full_substance, 3, force_field_path)

            ligand_coordinate_path, ligand_system = _setup_dummy_system(
                "ligand", solute_substance, 2, force_field_path)

            run_yank = LigandReceptorYankProtocol("yank")
            run_yank.substance = full_substance
            run_yank.thermodynamic_state = thermodynamic_state
            run_yank.number_of_iterations = 1
            run_yank.steps_per_iteration = 1
            run_yank.checkpoint_interval = 1
            run_yank.verbose = True
            run_yank.setup_only = True

            run_yank.ligand_residue_name = "TMP"
            run_yank.receptor_residue_name = "TMP"
            run_yank.solvated_ligand_coordinates = ligand_coordinate_path
            run_yank.solvated_ligand_system = ligand_system
            run_yank.solvated_complex_coordinates = complex_coordinate_path
            run_yank.solvated_complex_system = complex_system

            run_yank.force_field_path = force_field_path
            run_yank.execute("", ComputeResources())
Пример #18
0
def _generate_residue_name(residue, smiles):
    """Generates residue name for a particular residue which
    corresponds to a particular smiles pattern.

    Where possible (i.e for amino acids and ions) a standard residue
    name will be returned, otherwise a random name will be used.

    Parameters
    ----------
    residue: mdtraj.core.topology.Residue
        The residue to assign the name to.
    smiles: str
        The SMILES pattern to generate a resiude name for.
    """
    from mdtraj.core import residue_names
    from openff.toolkit.topology import Molecule

    # Define the set of residue names which should be discarded
    # if randomly generated as they have a reserved meaning.
    # noinspection PyProtectedMember
    forbidden_residue_names = [
        *residue_names._AMINO_ACID_CODES,
        *residue_names._SOLVENT_TYPES,
        *residue_names._WATER_RESIDUES,
        "ADE",
        "CYT",
        "CYX",
        "DAD",
        "DGU",
        "FOR",
        "GUA",
        "HID",
        "HIE",
        "HIH",
        "HSD",
        "HSH",
        "HSP",
        "NMA",
        "THY",
        "URA",
    ]

    amino_residue_mappings = {
        "C[C@H](N)C(=O)O": "ALA",
        "N=C(N)NCCC[C@H](N)C(=O)O": "ARG",
        "NC(=O)C[C@H](N)C(=O)O": "ASN",
        "N[C@@H](CC(=O)O)C(=O)O": "ASP",
        "N[C@@H](CS)C(=O)O": "CYS",
        "N[C@@H](CCC(=O)O)C(=O)O": "GLU",
        "NC(=O)CC[C@H](N)C(=O)O": "GLN",
        "NCC(=O)O": "GLY",
        "N[C@@H](Cc1c[nH]cn1)C(=O)O": "HIS",
        "CC[C@H](C)[C@H](N)C(=O)O": "ILE",
        "CC(C)C[C@H](N)C(=O)O": "LEU",
        "NCCCC[C@H](N)C(=O)O": "LYS",
        "CSCC[C@H](N)C(=O)O": "MET",
        "N[C@@H](Cc1ccccc1)C(=O)O": "PHE",
        "O=C(O)[C@@H]1CCCN1": "PRO",
        "N[C@@H](CO)C(=O)O": "SER",
        "C[C@@H](O)[C@H](N)C(=O)O": "THR",
        "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O": "TRP",
        "N[C@@H](Cc1ccc(O)cc1)C(=O)O": "TYR",
        "CC(C)[C@H](N)C(=O)O": "VAL",
    }

    standardized_smiles = Component(smiles=smiles).smiles

    # Check for amino acids.
    if standardized_smiles in amino_residue_mappings:
        residue.name = amino_residue_mappings[standardized_smiles]
        return

    # Check for water
    if standardized_smiles == "O":

        residue.name = "HOH"

        # Re-assign the water atom names. These need to be set to get
        # correct CONECT statements.
        h_counter = 1

        for atom in residue.atoms:

            if atom.element.symbol == "O":
                atom.name = "O1"
            else:
                atom.name = f"H{h_counter}"
                h_counter += 1

        return

    # Check for ions
    openff_molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)

    if openff_molecule.n_atoms == 1:
        residue.name = _ion_residue_name(openff_molecule)
        residue.atom(0).name = residue.name

        return

    # Randomly generate a name
    random_residue_name = "".join(
        [random.choice(string.ascii_uppercase) for _ in range(3)])

    while random_residue_name in forbidden_residue_names:
        # Re-choose the residue name until we find a safe one.
        random_residue_name = "".join(
            [random.choice(string.ascii_uppercase) for _ in range(3)])

    residue.name = random_residue_name

    # Assign unique atom names.
    element_counter = defaultdict(int)

    for atom in residue.atoms:
        atom.name = f"{atom.element.symbol}{element_counter[atom.element.symbol] + 1}"
        element_counter[atom.element.symbol] += 1
Пример #19
0
def complete_evaluator_data_set():
    """Create a more comprehensive `PhysicalPropertyDataSet` which contains one
    measurement for each of:

        * pure density
        * binary density
        * pure enthalpy of vaporization
        * binary enthalpy of mixing
        * binary excess molar volume
        * hydration free energy

    Returns
    -------
    PhysicalPropertyDataSet
    """
    thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)
    source = MeasurementSource(doi="10.1000/xyz123")

    solvation_substance = Substance()
    solvation_substance.add_component(Component("O"), MoleFraction(1.0))
    solvation_substance.add_component(Component("CCCO"), ExactAmount(1))

    evaluator_properties = [
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=0.1 * unit.kilogram / unit.meter**3,
            source=source,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O", "CC=O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=0.1 * unit.kilogram / unit.meter**3,
            source=source,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas),
            substance=Substance.from_components("CCO"),
            value=1.0 * EnthalpyOfVaporization.default_unit(),
            uncertainty=0.1 * EnthalpyOfVaporization.default_unit(),
            source=source,
        ),
        EnthalpyOfMixing(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("CCCCO", "CC(C=O)C"),
            value=1.0 * EnthalpyOfMixing.default_unit(),
            uncertainty=0.1 * EnthalpyOfMixing.default_unit(),
            source=source,
        ),
        ExcessMolarVolume(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("C(=O)CCCO", "CCCCCC"),
            value=1.0 * ExcessMolarVolume.default_unit(),
            uncertainty=0.1 * ExcessMolarVolume.default_unit(),
            source=source,
        ),
        SolvationFreeEnergy(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=solvation_substance,
            value=1.0 * SolvationFreeEnergy.default_unit(),
            uncertainty=0.1 * SolvationFreeEnergy.default_unit(),
            source=source,
        ),
    ]

    for index, evaluator_property in enumerate(evaluator_properties):
        evaluator_property.id = str(index + 1)

    evaluator_data_set = PhysicalPropertyDataSet()
    evaluator_data_set.add_properties(*evaluator_properties)

    return evaluator_data_set
Пример #20
0
def define_data_set(reweighting: bool) -> PhysicalPropertyDataSet:

    # Define a common state to compute estimates at
    states = [
        ThermodynamicState(temperature=296.15 * unit.kelvin,
                           pressure=1.0 * unit.atmosphere),
        ThermodynamicState(temperature=298.15 * unit.kelvin,
                           pressure=1.0 * unit.atmosphere),
        ThermodynamicState(temperature=300.15 * unit.kelvin,
                           pressure=1.0 * unit.atmosphere),
    ]

    data_set = PhysicalPropertyDataSet()

    # Solvation free energies.
    if not reweighting:

        ethanol_substance = Substance.from_components("CCO")
        ethanol_substance.add_component(
            Component("CC=O", Component.Role.Solute), ExactAmount(1))
        ethanal_substance = Substance.from_components("CC=O")
        ethanal_substance.add_component(
            Component("CCO", Component.Role.Solute), ExactAmount(1))

        data_set.add_properties(
            SolvationFreeEnergy(
                thermodynamic_state=states[1],
                phase=PropertyPhase.Liquid,
                substance=ethanol_substance,
                value=0.0 * SolvationFreeEnergy.default_unit(),
            ),
            SolvationFreeEnergy(
                thermodynamic_state=states[1],
                phase=PropertyPhase.Liquid,
                substance=ethanal_substance,
                value=0.0 * SolvationFreeEnergy.default_unit(),
            ),
            *CurationWorkflow.apply(
                PhysicalPropertyDataSet(),
                CurationWorkflowSchema(component_schemas=[
                    ImportFreeSolvSchema(),
                    FilterBySubstancesSchema(substances_to_include=[("O",
                                                                     "CO")]),
                ]),
            ),
        )

    for state in states:

        # Excess properties.
        data_set.add_properties(
            ExcessMolarVolume(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CC=O", "CCO"),
                value=0.0 * ExcessMolarVolume.default_unit(),
            ),
            EnthalpyOfMixing(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CC=O", "CCO"),
                value=0.0 * EnthalpyOfMixing.default_unit(),
            ),
        )
        # Pure properties
        data_set.add_properties(
            Density(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CCO"),
                value=0.0 * Density.default_unit(),
            ),
            EnthalpyOfVaporization(
                thermodynamic_state=state,
                phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas),
                substance=Substance.from_components("CCO"),
                value=0.0 * EnthalpyOfVaporization.default_unit(),
            ),
            DielectricConstant(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CCO"),
                value=0.0 * DielectricConstant.default_unit(),
            ),
        )

    return data_set
Пример #21
0
    def from_pandas(cls,
                    data_frame: pandas.DataFrame) -> "PhysicalPropertyDataSet":
        """Constructs a data set object from a pandas ``DataFrame`` object.

        Notes
        -----
        * All physical properties are assumed to be source from experimental
          measurements.
        * Currently this method onlu supports data frames containing properties
          which are built-in to the framework (e.g. Density).
        * This method assumes the data frame has a structure identical to that
          produced by the ``PhysicalPropertyDataSet.to_pandas`` function.

        Parameters
        ----------
        data_frame
            The data frame to construct the data set from.

        Returns
        -------
            The constructed data set.
        """

        from openff.evaluator import properties

        property_header_matches = {
            re.match(r"^([a-zA-Z]+) Value \(([a-zA-Z0-9+-/\s]*)\)$", header)
            for header in data_frame if header.find(" Value ") >= 0
        }
        property_headers = {}

        # Validate that the headers have the correct format, specify a
        # built-in property type, and specify correctly the properties
        # units.
        for match in property_header_matches:

            assert match

            property_type_string, property_unit_string = match.groups()

            assert hasattr(properties, property_type_string)
            property_type = getattr(properties, property_type_string)

            property_unit = unit.Unit(property_unit_string)
            assert property_unit is not None

            assert (property_unit.dimensionality ==
                    property_type.default_unit().dimensionality)

            property_headers[match.group(0)] = (property_type, property_unit)

        # Convert the data rows to property objects.
        physical_properties = []

        for _, data_row in data_frame.iterrows():

            data_row = data_row.dropna()

            # Extract the state at which the measurement was made.
            thermodynamic_state = ThermodynamicState(
                temperature=data_row["Temperature (K)"] * unit.kelvin,
                pressure=data_row["Pressure (kPa)"] * unit.kilopascal,
            )
            property_phase = PropertyPhase.from_string(data_row["Phase"])

            # Extract the substance the measurement was made for.
            substance = Substance()

            for i in range(data_row["N Components"]):

                component = Component(
                    smiles=data_row[f"Component {i + 1}"],
                    role=Component.Role[data_row.get(f"Role {i + 1}",
                                                     "Solvent")],
                )

                mole_fraction = data_row.get(f"Mole Fraction {i + 1}", 0.0)
                exact_amount = data_row.get(f"Exact Amount {i + 1}", 0)

                if not numpy.isclose(mole_fraction, 0.0):
                    substance.add_component(component,
                                            MoleFraction(mole_fraction))
                if not numpy.isclose(exact_amount, 0.0):
                    substance.add_component(component,
                                            ExactAmount(exact_amount))

            for (
                    property_header,
                (property_type, property_unit),
            ) in property_headers.items():

                # Check to see whether the row contains a value for this
                # type of property.
                if property_header not in data_row:
                    continue

                uncertainty_header = property_header.replace(
                    "Value", "Uncertainty")

                source_string = data_row["Source"]

                is_doi = all(
                    any(
                        re.match(pattern, split_string, re.I) for pattern in [
                            r"^10.\d{4,9}/[-._;()/:A-Z0-9]+$",
                            r"^10.1002/[^\s]+$",
                            r"^10.\d{4}/\d+-\d+X?(\d+)\d+<[\d\w]+:[\d\w]*>\d+.\d+.\w+;\d$",
                            r"^10.1021/\w\w\d+$",
                            r"^10.1207/[\w\d]+\&\d+_\d+$",
                        ]) for split_string in source_string.split(" + "))

                physical_property = property_type(
                    thermodynamic_state=thermodynamic_state,
                    phase=property_phase,
                    value=data_row[property_header] * property_unit,
                    uncertainty=None if uncertainty_header not in data_row else
                    data_row[uncertainty_header] * property_unit,
                    substance=substance,
                    source=MeasurementSource(
                        doi="" if not is_doi else source_string,
                        reference=source_string if not is_doi else "",
                    ),
                )

                identifier = data_row.get("Id", None)

                if identifier:
                    physical_property.id = identifier

                physical_properties.append(physical_property)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*physical_properties)

        return data_set
def test_component_standardization(smiles, expected):

    component = Component(smiles=smiles)
    assert component.smiles == expected