def test_get_ring_and_fgroup_ortho(input_smiles, bond_smarts,
                                   expected_pattern):
    """Ensure that FGs and rings attached to ortho groups are correctly
    detected.

    The expected values were generated using fragmenter=0.0.7
    """

    molecule, _, functional_groups, ring_systems = Fragmenter._prepare_molecule(
        smiles_to_molecule(input_smiles, True), default_functional_groups(),
        False)

    bond = tuple(
        get_map_index(molecule, i)
        for i in molecule.chemical_environment_matches(bond_smarts)[0])

    # noinspection PyTypeChecker
    atoms, bonds = Fragmenter._get_torsion_quartet(molecule, bond)
    atoms, bonds = Fragmenter._get_ring_and_fgroups(molecule,
                                                    functional_groups,
                                                    ring_systems, atoms, bonds)

    actual_atoms = {
        map_index
        for map_index in atoms if
        molecule.atoms[get_atom_index(molecule, map_index)].atomic_number != 1
    }
    expected_atoms = {
        get_map_index(molecule, atom_index)
        for match in molecule.chemical_environment_matches(expected_pattern)
        for atom_index in match
    }

    assert actual_atoms == expected_atoms
def test_cap_open_valance():

    molecule, _, functional_groups, ring_systems = Fragmenter._prepare_molecule(
        smiles_to_molecule("CNCCc1ccccc1", True), default_functional_groups(),
        False)

    expected_atom = get_map_index(
        molecule,
        molecule.chemical_environment_matches("[#7]-[#6H3:1]")[0][0],
    )

    # noinspection PyTypeChecker
    atoms, bonds = Fragmenter._get_torsion_quartet(
        molecule,
        tuple(
            get_map_index(molecule, i) for i in
            molecule.chemical_environment_matches("[#6a:1]-[#6H2:2]")[0]),
    )
    atoms, bonds = Fragmenter._get_ring_and_fgroups(molecule,
                                                    functional_groups,
                                                    ring_systems, atoms, bonds)

    # Remove the cap atom from the current list to make sure it gets included during
    # capping.
    atoms -= {expected_atom}

    atoms, _ = Fragmenter._cap_open_valence(molecule, functional_groups, atoms,
                                            bonds)

    # Check that carbon bonded to N was added
    assert expected_atom in atoms
def test_add_substituent():

    result = WBOFragmenter().fragment(Molecule.from_smiles("CCCCCC"))

    fragment = result.fragments_by_bond[(3, 5)].molecule

    assert fragment.to_smiles(mapped=False,
                              explicit_hydrogens=False) == "CCCCC"

    atoms = set(
        get_map_index(fragment, i) for i in range(fragment.n_atoms)
        if fragment.atoms[i].atomic_number != 1)

    bonds = set(
        (
            get_map_index(fragment, bond.atom1_index),
            get_map_index(fragment, bond.atom2_index),
        ) for bond in fragment.bonds
        if bond.atom1.atomic_number != 1 and bond.atom2.atomic_number != 1)

    fragment, _ = WBOFragmenter._add_next_substituent(result.parent_molecule,
                                                      {}, {}, {},
                                                      atoms,
                                                      bonds,
                                                      target_bond=(3, 5))

    assert fragment.to_smiles(mapped=False,
                              explicit_hydrogens=False) == "CCCCCC"
Пример #4
0
    def _find_stereo(cls, molecule: Molecule) -> Stereochemistries:
        """Find chiral atoms and bonds, store the chirality.

        Notes
        -----
            * This is needed to check if a fragment has flipped chirality. Currently
              this can happen and it is a bug.

        Parameters
        ----------
        molecule
            The molecule to search for stereochemistry.

        Returns
        -------
            The stereochemistry associated with atom and bond stereocenters
        """

        atom_stereo = {
            get_map_index(molecule, atom.molecule_atom_index): atom.stereochemistry
            for atom in molecule.atoms
            if atom.stereochemistry is not None
        }

        bond_stereo = {
            (
                get_map_index(molecule, bond.atom1_index),
                get_map_index(molecule, bond.atom2_index),
            ): bond.stereochemistry
            for bond in molecule.bonds
            if bond.stereochemistry is not None
        }

        return {**atom_stereo, **bond_stereo}
Пример #5
0
    def _find_non_rotor_ring_substituents(
        cls, molecule: Molecule, ring_system_atoms: Set[int]
    ) -> AtomAndBondSet:
        """Find the non-rotor substituents attached to a particular ring system.

        Parameters
        ----------
        molecule
            The molecule to search for non-rotor ring substituents.
        ring_system_atoms
            The map indices of the atoms in the ring system of interest.

        Returns
        -------
            The map indices of the atoms and bonds involved in any found
            functional groups.
        """

        rotatable_bonds = molecule.find_rotatable_bonds()

        def heavy_degree(atom: Atom) -> int:
            return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1)

        rotor_bonds = [
            bond
            for bond in rotatable_bonds
            if heavy_degree(bond.atom1) >= 2 and heavy_degree(bond.atom2) >= 2
        ]

        non_rotor_atoms = set()
        non_rotor_bonds = set()

        for bond in molecule.bonds:

            # Check if the bond is a rotor.
            if bond in rotor_bonds:
                continue

            if bond.atom1.atomic_number == 1 or bond.atom2.atomic_number == 1:
                continue

            map_index_1 = get_map_index(molecule, bond.atom1_index)
            map_index_2 = get_map_index(molecule, bond.atom2_index)

            in_system_1 = map_index_1 in ring_system_atoms
            in_system_2 = map_index_2 in ring_system_atoms

            if (in_system_1 and in_system_2) or (not in_system_1 and not in_system_2):
                continue

            non_rotor_atoms.update((map_index_1, map_index_2))
            non_rotor_bonds.add((map_index_1, map_index_2))

        return non_rotor_atoms, non_rotor_bonds
Пример #6
0
    def _get_torsion_quartet(
        cls, molecule: Molecule, bond: BondTuple
    ) -> AtomAndBondSet:
        """Get all atoms bonded to the torsion quartet around rotatable bond

        Parameters
        ----------
        molecule
            The molecule containing the rotatable bond.
        bond
            map indices of atoms in bond

        Returns
        -------
            The map indices of atoms in quartet and the bonds in quartet.
        """

        atom_map_indices = {*bond}
        bond_map_indices = {bond}

        atoms = [
            molecule.atoms[i]
            for i, j in molecule.properties["atom_map"].items()
            if j in bond
        ]

        for atom in atoms:

            map_index = get_map_index(molecule, atom.molecule_atom_index)

            for neighbor in atom.bonded_atoms:

                neighbour_map_index = get_map_index(
                    molecule, neighbor.molecule_atom_index
                )

                atom_map_indices.add(neighbour_map_index)
                bond_map_indices.add((map_index, neighbour_map_index))

                for next_neighbour in neighbor.bonded_atoms:

                    next_neighbour_map_index = get_map_index(
                        molecule, next_neighbour.molecule_atom_index
                    )

                    atom_map_indices.add(next_neighbour_map_index)
                    bond_map_indices.add(
                        (neighbour_map_index, next_neighbour_map_index)
                    )

        return atom_map_indices, bond_map_indices
def test_get_map_index_error(raise_error, expected_raises):

    molecule = Molecule.from_smiles("C")

    with expected_raises:
        map_index = get_map_index(molecule, 0,
                                  error_on_missing=raise_error) == 5
        assert map_index == 0
def test_get_torsion_quartet(input_smiles, expected_n_atoms, expected_n_bonds):

    molecule = smiles_to_molecule(input_smiles, True)

    bond_match = molecule.chemical_environment_matches("C[C:1][C:2]CCCC")[0]

    atoms, bonds = Fragmenter._get_torsion_quartet(
        molecule,
        (
            get_map_index(molecule, bond_match[0]),
            get_map_index(molecule, bond_match[1]),
        ),
    )

    # This also includes explicit hydrogen
    assert len(atoms) == expected_n_atoms
    assert len(bonds) == expected_n_bonds
def test_atom_bond_set_to_mol(abemaciclib):

    molecule = smiles_to_molecule(abemaciclib.to_smiles(mapped=False), True)

    atoms = {
        get_map_index(molecule, atom_index)
        for match in molecule.chemical_environment_matches(
            "[C:1][C:2][N:3]1[C:4][C:5][N:6][C:7][C:8]1")
        for atom_index in match
    }

    bonds = {(
        get_map_index(molecule, bond.atom1_index),
        get_map_index(molecule, bond.atom2_index),
    )
             for bond in molecule.bonds
             if get_map_index(molecule, bond.atom1_index) in atoms
             and get_map_index(molecule, bond.atom2_index) in atoms}

    fragment, _ = Fragmenter._atom_bond_set_to_mol(molecule, {},
                                                   atoms=atoms,
                                                   bonds=bonds)

    for bond in fragment.bonds:

        if bond.atom1.atomic_number == 1 or bond.atom2.atomic_number == 1:
            continue

        map_index_1 = get_map_index(fragment, bond.atom1_index)
        map_index_2 = get_map_index(fragment, bond.atom2_index)

        assert tuple(sorted((map_index_1, map_index_2))) in bonds
Пример #10
0
    def _find_functional_groups(
        cls, molecule: Molecule, functional_groups: Dict[str, str]
    ) -> FunctionalGroups:
        """Find the atoms and bonds involved in the functional groups specified by
        ``functional_groups``.

        Parameters
        ----------
        molecule
            The molecule to search for function groups.
        functional_groups
            A dictionary of SMARTS of functional groups that should not be fragmented
            indexed by a friendly string label, e.g. 'alcohol: [#6:1]-[#8H1X2:2]'

        Returns
        -------
            The atoms and bonds in the found function groups stored in a dictionary
            indexed by a unique key associated with each functional group.
        """

        found_groups = {}

        for functional_group, smarts in functional_groups.items():

            unique_matches = {
                tuple(sorted(match))
                for match in molecule.chemical_environment_matches(smarts)
            }

            for i, match in enumerate(unique_matches):

                atoms = set(get_map_index(molecule, index) for index in match)
                bonds = set(
                    (
                        get_map_index(molecule, bond.atom1_index),
                        get_map_index(molecule, bond.atom2_index),
                    )
                    for bond in molecule.bonds
                    if bond.atom1_index in match and bond.atom2_index in match
                )

                found_groups[f"{functional_group}_{i}"] = (atoms, bonds)

        return found_groups
Пример #11
0
    def _select_neighbour_by_wbo(
        cls, molecule: Molecule, atoms: Set[int]
    ) -> Optional[Tuple[int, BondTuple]]:
        """A function which return those atoms which neighbour those in the ``atoms``
        list sorted by the WBO of the bond between the input atom and the neighbouring
        atom from largest to smallest.

        Parameters
        ----------
        molecule
            The original molecule being fragmented.
        atoms
            The map indices of the atoms currently in the fragment.

        Returns
        -------
            The indices of the atoms to be added to the fragment sorted into the
            order that they should be added in.
        """

        map_bond_orders = {
            (
                get_map_index(molecule, bond.atom1_index),
                get_map_index(molecule, bond.atom2_index),
            ): bond.fractional_bond_order
            for bond in molecule.bonds
            if bond.atom1.atomic_number != 1 and bond.atom2.atomic_number != 1
        }

        neighbour_bond_orders = {
            (bond_order, (map_tuple[1 - i], map_tuple))
            for i in range(2)
            for map_tuple, bond_order in map_bond_orders.items()
            if map_tuple[i] in atoms and map_tuple[1 - i] not in atoms
        }

        sorted_atoms = [
            atom_to_add
            for _, atom_to_add in sorted(
                neighbour_bond_orders, key=lambda x: x[0], reverse=True
            )
        ]

        return None if len(sorted_atoms) == 0 else sorted_atoms[0]
def test_get_rotor_wbo():

    molecule = smiles_to_molecule("CCCC", True)

    for bond in molecule.bonds:
        bond.fractional_bond_order = 0.986

    expected_bonds = {
        (
            get_map_index(molecule, match[0]),
            get_map_index(molecule, match[1]),
        )
        for match in molecule.chemical_environment_matches("[#6:1]-[#6:2]")
    }

    rotors_wbo = WBOFragmenter._get_rotor_wbo(
        molecule, WBOFragmenter.find_rotatable_bonds(molecule, None))

    assert len(rotors_wbo) == 1

    rotor_index = next(iter(rotors_wbo))

    assert rotor_index in expected_bonds
    assert numpy.isclose(rotors_wbo[rotor_index], 0.986, atol=0.001)
Пример #13
0
    def _find_ortho_substituents(
        cls, parent: Molecule, bonds: Set[BondTuple]
    ) -> AtomAndBondSet:
        """Find ring substituents that are ortho to one of the rotatable bonds specified
        in a list of bonds.

        Parameters
        ----------
        parent
            The parent molecule being fragmented.
        bonds
            The map indices of the rotatable bonds.

        Returns
        -------
            The set of map indices of atoms in ortho group and of bond tuples in ortho
            group.
        """

        matched_atoms = set()
        matched_bonds = set()

        for match in parent.chemical_environment_matches(
            "[!#1:1]~&!@[*:2]@[*:3]~&!@[!#1*:4]"
        ):

            map_tuple = tuple(get_map_index(parent, i) for i in match)

            if map_tuple[:2] not in bonds and map_tuple[:2][::-1] not in bonds:
                continue

            matched_atoms.update(map_tuple[::3])
            matched_bonds.update((map_tuple[i], map_tuple[i + 1]) for i in [0, 2])

        # Ensure the matched bonds doesn't include duplicates.
        matched_bonds = {tuple(sorted(bond)) for bond in matched_bonds}

        return matched_atoms, matched_bonds
Пример #14
0
def _oe_render_parent(
    parent: Molecule,
    rotor_bonds: Optional[Collection[BondTuple]] = None,
    image_width: int = 572,
    image_height: int = 198,
) -> str:

    from openeye import oedepict

    rotor_bonds = [] if rotor_bonds is None else rotor_bonds

    # Map the OpenFF molecules into OE ones, making sure to explicitly set the atom
    # map on the OE object as this is not handled by the OpenFF toolkit.
    oe_parent = parent.to_openeye()

    for atom in oe_parent.GetAtoms():
        atom.SetMapIdx(get_map_index(parent, atom.GetIdx(), False))

    oedepict.OEPrepareDepiction(oe_parent)

    # Set-up common display options.
    image = oedepict.OEImage(image_width, image_height)

    display_options = oedepict.OE2DMolDisplayOptions(
        image_width, image_height, oedepict.OEScale_AutoScale)
    display_options.SetTitleLocation(oedepict.OETitleLocation_Hidden)
    display_options.SetAtomColorStyle(
        oedepict.OEAtomColorStyle_WhiteMonochrome)
    display_options.SetAtomLabelFontScale(1.2)
    display_options.SetBondPropertyFunctor(_oe_wbo_label_display(rotor_bonds))

    display = oedepict.OE2DMolDisplay(oe_parent, display_options)

    oedepict.OERenderMolecule(image, display)

    svg_contents = oedepict.OEWriteImageToString("svg", image)
    return svg_contents.decode()
def test_get_ring_and_fgroup(input_smiles, bond_smarts, expected):

    molecule, _, functional_groups, ring_systems = Fragmenter._prepare_molecule(
        smiles_to_molecule(input_smiles, True), default_functional_groups(),
        False)

    # noinspection PyTypeChecker
    atoms, bonds = Fragmenter._get_torsion_quartet(
        molecule,
        tuple(
            get_map_index(molecule, i)
            for i in molecule.chemical_environment_matches(bond_smarts)[0]),
    )

    bonds = {tuple(sorted(bond)) for bond in bonds}

    l_atoms = len(atoms)
    l_bonds = len(bonds)

    atoms_2, bonds_2 = Fragmenter._get_ring_and_fgroups(
        molecule, functional_groups, ring_systems, atoms, bonds)

    assert (l_atoms == len(atoms_2)) == expected
    assert (l_bonds == len(bonds_2)) == expected
Пример #16
0
    def _check_stereo(
        cls, fragment: Molecule, parent_stereo: Stereochemistries
    ) -> bool:
        """Checks if the stereochemistry of a fragment is different to the
        stereochemistry of the parent.

        Parameters
        ----------
        fragment
            The fragment whose stereo should be compared to the parent.
        parent_stereo
            The stereochemistry of the parent molecule.

        Returns
        -------
            Whether the fragment has the same stereochemistry as the parent.
        """

        atom_stereocenters, bond_stereocenters = find_stereocenters(fragment)

        # Check for new / flipped chiral centers.
        for atom_index in atom_stereocenters:

            map_index = get_map_index(fragment, atom_index)

            if map_index not in parent_stereo:

                logger.warning(f"A new stereocenter formed at atom {map_index}")
                return False

            fragment_stereo = fragment.atoms[atom_index].stereochemistry

            if fragment_stereo != parent_stereo[map_index]:

                logger.warning(
                    f"Stereochemistry for atom {map_index} flipped from "
                    f"{parent_stereo[map_index]} to {fragment_stereo}"
                )

                return False

        for index_tuple in bond_stereocenters:

            map_tuple = tuple(get_map_index(fragment, i) for i in index_tuple)

            map_tuple = (
                map_tuple if map_tuple in parent_stereo else tuple(reversed(map_tuple))
            )

            if map_tuple not in parent_stereo:

                logger.warning(f"A new chiral bond formed at bond {map_tuple}")
                return False

            fragment_stereo = fragment.get_bond_between(*index_tuple).stereochemistry

            if fragment_stereo != parent_stereo[map_tuple]:

                logger.warning(
                    f"Stereochemistry for bond {map_tuple} flipped from "
                    f"{parent_stereo[map_tuple]} to {fragment_stereo}"
                )

                return False

        return True
Пример #17
0
    def find_rotatable_bonds(
        cls, molecule: Molecule, target_bond_smarts: Optional[List[str]]
    ) -> List[BondTuple]:
        """Finds the rotatable bonds in a molecule *including* rotatable double
        bonds.

        Parameters
        ----------
        molecule
            The molecule to search for rotatable bonds.
        target_bond_smarts
            An optional list of SMARTS patterns that should be used to identify the bonds
            within the parent molecule to grow fragments around. Each SMARTS pattern
            should include **two** indexed atoms that correspond to the two atoms
            involved in the central bond.

            If no pattern is provided fragments will be constructed around all 'rotatable
            bonds'. A 'rotatable bond' here means any bond matched by a
            `[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]` SMARTS pattern with the added
            constraint that the **heavy** degree (i.e. the degree excluding hydrogen) of
            both atoms in the bond must be >= 2.

        Returns
        -------
            A list of the **map** indices of the atoms that form the rotatable
            bonds, ``[(m1, m2),...]``.
        """

        if target_bond_smarts is None:

            matches = molecule.chemical_environment_matches(
                "[!$(*#*)&!D1:1]-,=;!@[!$(*#*)&!D1:2]"
            )

        else:

            matches = [
                match
                for smarts in target_bond_smarts
                for match in molecule.chemical_environment_matches(smarts)
            ]

            if not all(len(match) == 2 for match in matches):

                raise ValueError(
                    f"The `target_bond_smarts` pattern ({target_bond_smarts}) "
                    f"must define exactly two indexed atoms to match."
                )

        unique_matches = {tuple(sorted(match)) for match in matches}

        if target_bond_smarts is None:

            # Drop bonds without a heavy degree of at least 2 on each end to avoid
            # finding terminal bonds
            def heavy_degree(atom_index: int) -> int:
                atom = molecule.atoms[atom_index]
                return sum(1 for atom in atom.bonded_atoms if atom.atomic_number != 1)

            unique_matches = {
                match
                for match in unique_matches
                if all(heavy_degree(i) > 1 for i in match)
            }

        return [
            (
                get_map_index(molecule, match[0]),
                get_map_index(molecule, match[1]),
            )
            for match in unique_matches
        ]
Пример #18
0
def _rd_render_fragment(
    parent: Molecule,
    fragment: Molecule,
    bond_indices: BondTuple,
    image_width: int = 283,
    image_height: int = 169,
) -> str:

    from rdkit import Chem
    from rdkit.Chem import Draw
    from rdkit.Chem.rdDepictor import Compute2DCoords

    rd_parent: Chem.Mol = parent.to_rdkit()

    for atom in rd_parent.GetAtoms():
        atom.SetAtomMapNum(get_map_index(parent, atom.GetIdx(), False))

    rd_parent = Chem.RemoveHs(rd_parent)
    Compute2DCoords(rd_parent)

    map_indices = {*fragment.properties["atom_map"].values()} - {0}

    fragment_atom_indices = [
        atom.GetIdx() for atom in rd_parent.GetAtoms()
        if atom.GetAtomMapNum() in map_indices
    ]
    fragment_bond_indices = [
        bond.GetIdx() for bond in rd_parent.GetBonds()
        if bond.GetBeginAtom().GetAtomMapNum() in map_indices
        and bond.GetEndAtom().GetAtomMapNum() in map_indices
    ]

    rotatable_bond_index = [
        bond.GetIdx() for bond in rd_parent.GetBonds()
        if bond.GetBeginAtom().GetAtomMapNum() in bond_indices
        and bond.GetEndAtom().GetAtomMapNum() in bond_indices
    ]

    for atom in rd_parent.GetAtoms():
        atom.SetAtomMapNum(0)

    drawer = Draw.MolDraw2DSVG(image_width, image_height)

    draw_options = drawer.drawOptions()
    draw_options.useBWAtomPalette()

    drawer.DrawMolecule(
        rd_parent,
        highlightAtoms=fragment_atom_indices,
        highlightAtomColors={
            index: (52.0 / 255.0, 143.0 / 255.0, 235.0 / 255.0)
            for index in fragment_atom_indices
        },
        highlightBonds=fragment_bond_indices + rotatable_bond_index,
        highlightBondColors={
            index: (239.0 / 255.0, 134.0 / 255.0,
                    131.0 / 255.0) if index in rotatable_bond_index else
            (52.0 / 255.0, 143.0 / 255.0, 235.0 / 255.0)
            for index in fragment_bond_indices + rotatable_bond_index
        },
    )
    drawer.FinishDrawing()

    svg_contents = drawer.GetDrawingText()

    return svg_contents
Пример #19
0
def _oe_render_fragment(
    parent: Molecule,
    fragment: Molecule,
    bond_indices: BondTuple,
    image_width: int = 283,
    image_height: int = 169,
) -> str:

    from openeye import oechem, oedepict

    # Map the OpenFF molecules into OE ones, making sure to explicitly set the atom
    # map on the OE object as this is not handled by the OpenFF toolkit.
    oe_parent = parent.to_openeye()

    for atom in oe_parent.GetAtoms():
        atom.SetMapIdx(get_map_index(parent, atom.GetIdx(), False))

    oedepict.OEPrepareDepiction(oe_parent)

    oe_fragment = fragment.to_openeye()

    for atom in oe_fragment.GetAtoms():
        atom.SetMapIdx(get_map_index(fragment, atom.GetIdx(), False))

    oe_parent_bond = oe_parent.GetBond(
        oe_parent.GetAtom(oechem.OEHasMapIdx(bond_indices[0])),
        oe_parent.GetAtom(oechem.OEHasMapIdx(bond_indices[1])),
    )

    # Set-up common display options.
    image = oedepict.OEImage(image_width, image_height)

    display_options = oedepict.OE2DMolDisplayOptions(
        image_width, image_height, oedepict.OEScale_AutoScale)

    display_options.SetTitleLocation(oedepict.OETitleLocation_Hidden)
    display_options.SetAtomColorStyle(
        oedepict.OEAtomColorStyle_WhiteMonochrome)
    display_options.SetAtomLabelFontScale(1.2)

    # display_options.SetBondPropertyFunctor(_oe_wbo_label_display({bond_indices}))

    display = oedepict.OE2DMolDisplay(oe_parent, display_options)

    fragment_atom_predicate, fragment_bond_predicate = _oe_fragment_predicates(
        {atom.GetMapIdx()
         for atom in oe_fragment.GetAtoms()})

    not_fragment_atoms = oechem.OENotAtom(fragment_atom_predicate)
    not_fragment_bonds = oechem.OENotBond(fragment_bond_predicate)

    oedepict.OEAddHighlighting(
        display,
        oedepict.OEHighlightByColor(oechem.OEGrey, 0.75),
        not_fragment_atoms,
        not_fragment_bonds,
    )

    rotatable_bond = oechem.OEAtomBondSet()

    rotatable_bond.AddBond(oe_parent_bond)
    rotatable_bond.AddAtom(oe_parent_bond.GetBgn())
    rotatable_bond.AddAtom(oe_parent_bond.GetEnd())

    oedepict.OEAddHighlighting(
        display,
        oechem.OEColor(oechem.OELimeGreen),
        oedepict.OEHighlightStyle_BallAndStick,
        rotatable_bond,
    )

    oedepict.OERenderMolecule(image, display)

    svg_contents = oedepict.OEWriteImageToString("svg", image)
    return svg_contents.decode()
Пример #20
0
    def _select_neighbour_by_path_length(
        cls, molecule: Molecule, atoms: Set[int], target_bond: BondTuple
    ) -> Optional[Tuple[int, BondTuple]]:

        atom_indices = {get_atom_index(molecule, atom) for atom in atoms}

        atoms_to_add = [
            (atom_index, neighbour.molecule_atom_index)
            for atom_index in atom_indices
            for neighbour in molecule.atoms[atom_index].bonded_atoms
            if neighbour.atomic_number != 1
            and neighbour.molecule_atom_index not in atom_indices
        ]
        map_atoms_to_add = [
            (
                get_map_index(molecule, j),
                (get_map_index(molecule, i), get_map_index(molecule, j)),
            )
            for i, j in atoms_to_add
        ]

        # Compute the distance from each neighbouring atom to each of the atoms in the
        # target bond.
        nx_molecule = molecule.to_networkx()

        target_indices = [get_atom_index(molecule, atom) for atom in target_bond]

        path_lengths_1, path_lengths_2 = zip(
            *(
                (
                    networkx.shortest_path_length(
                        nx_molecule, target_index, neighbour_index
                    )
                    for target_index in target_indices
                )
                for atom_index, neighbour_index in atoms_to_add
            )
        )

        if len(path_lengths_1) == 0 and len(path_lengths_2) == 0:
            return None

        reverse = False

        min_path_length_1 = min(path_lengths_1)
        min_path_length_2 = min(path_lengths_2)

        if min_path_length_1 < min_path_length_2:
            sort_by = path_lengths_1
        elif min_path_length_2 < min_path_length_1:
            sort_by = path_lengths_2

        else:

            # If there are multiple neighbouring atoms the same path length away
            # from the target bond fall back to sorting by the WBO.
            map_atoms_to_add = [
                map_tuple
                for map_tuple, *path_length_tuple in zip(
                    map_atoms_to_add, path_lengths_1, path_lengths_2
                )
                if min_path_length_1 in path_length_tuple
            ]

            sort_by = [
                molecule.get_bond_between(
                    get_atom_index(molecule, neighbour_bond[0]),
                    get_atom_index(molecule, neighbour_bond[1]),
                ).fractional_bond_order
                for _, neighbour_bond in map_atoms_to_add
            ]

            reverse = True

        sorted_atoms = [
            a for _, a in sorted(zip(sort_by, map_atoms_to_add), reverse=reverse)
        ]

        return None if len(sorted_atoms) == 0 else sorted_atoms[0]
Пример #21
0
def test_get_map_index():

    molecule = Molecule.from_smiles("[C:5]([H:1])([H:2])([H:3])([H:4])")
    assert get_map_index(molecule, 0) == 5
Пример #22
0
    def _cap_open_valence(
        cls,
        parent: Molecule,
        parent_groups: FunctionalGroups,
        atoms: Set[int],
        bonds: Set[BondTuple],
    ) -> AtomAndBondSet:
        """Cap with methyl for fragments that ends with N, O or S. Otherwise cap with H

        Parameters
        ----------
        parent
            The molecule being fragmented.
        parent_groups
            A dictionary of the functional groups on the molecule which should not
            be fragmented.
        atoms
            The map indices of the atoms in the fragment being constructed.
        bonds
            The map indices of the bonds in the fragment being constructed.
        """

        map_index_to_functional_group = {
            map_index: functional_group
            for functional_group in parent_groups
            for map_index in parent_groups[functional_group][0]
        }

        atoms_to_add = set()
        bonds_to_add = set()

        for map_index in atoms:

            atom_index = get_atom_index(parent, map_index)
            atom = parent.atoms[atom_index]

            if (
                atom.atomic_number not in (7, 8, 16)
                and map_index not in map_index_to_functional_group
            ):
                continue

            # If atom is N, O or S, it needs to be capped
            should_cap = False

            for neighbour in atom.bonded_atoms:

                neighbour_map_index = get_map_index(
                    parent, neighbour.molecule_atom_index
                )

                if neighbour.atomic_number == 1 or neighbour_map_index in atoms:
                    continue

                should_cap = True
                break

            if not should_cap:
                continue

            for neighbour in atom.bonded_atoms:

                if neighbour.atomic_number != 6:
                    continue

                neighbour_map_index = get_map_index(
                    parent, neighbour.molecule_atom_index
                )

                atoms_to_add.add(neighbour_map_index)
                bonds_to_add.add((map_index, neighbour_map_index))

        atoms.update(atoms_to_add)
        bonds.update(bonds_to_add)

        return atoms, bonds
Пример #23
0
def _extract_rd_fragment(
    molecule: Molecule, atom_indices: Set[int], bond_indices: Set[Tuple[int, int]]
) -> Molecule:

    from rdkit import Chem

    rd_molecule = Chem.RWMol(molecule.to_rdkit())
    rd_atoms_by_map: Dict[int, Chem.Atom] = {}

    # Restore the map indices as to_rdkit does not automatically add them.
    for atom in rd_molecule.GetAtoms():
        atom.SetAtomMapNum(get_map_index(molecule, atom.GetIdx()))

        rd_atoms_by_map[atom.GetAtomMapNum()] = atom

    atoms_to_use = [get_atom_index(molecule, i) for i in atom_indices]
    bonds_to_use = [
        rd_molecule.GetBondBetweenAtoms(
            get_atom_index(molecule, pair[0]), get_atom_index(molecule, pair[1])
        ).GetIdx()
        for pair in bond_indices
    ]

    # Make sure to include any Hs bonded to the included atom set otherwise radicals
    # will form.
    for map_index in atom_indices:

        for neighbour in rd_atoms_by_map[map_index].GetNeighbors():

            if (
                neighbour.GetAtomicNum() != 1
                or neighbour.GetAtomMapNum() < 1
                or neighbour.GetAtomMapNum() in atom_indices
            ):
                continue

            atoms_to_use.append(neighbour.GetIdx())
            bonds_to_use.append(
                rd_molecule.GetBondBetweenAtoms(
                    rd_atoms_by_map[map_index].GetIdx(), neighbour.GetIdx()
                ).GetIdx()
            )

    # Add additional hydrogens to atoms where the total valence will change likewise to
    # ensure the valence does not change.
    rd_atoms_by_index = {atom.GetIdx(): atom for atom in rd_molecule.GetAtoms()}

    for atom_index in [*atoms_to_use]:

        atom = rd_atoms_by_index[atom_index]

        old_valence = atom.GetTotalValence()
        new_valence = atom.GetTotalValence()

        for neighbour_bond in rd_atoms_by_index[atom_index].GetBonds():

            if (
                neighbour_bond.GetBeginAtomIdx() in atoms_to_use
                and neighbour_bond.GetEndAtomIdx() in atoms_to_use
            ):
                continue

            new_valence -= neighbour_bond.GetValenceContrib(atom)

        if numpy.isclose(old_valence, new_valence):
            # Skip the cases where the valence won't change
            continue

        if (
            atom.GetAtomicNum() == 6
            and atom.GetIsAromatic()
            and sum(
                1 for bond_tuple in bond_indices if atom.GetAtomMapNum() in bond_tuple
            )
            == 1
        ):

            # This is likely a cap carbon which was retained from an existing ring. It's
            # aromaticity needs to be cleared before calling ``MolFragmentToSmiles``
            # otherwise will (understandably) be confused and throw an exception.
            atom.SetIsAromatic(False)

        # Add a hydrogen to the atom whose valence will change.
        for _ in range(int(numpy.rint(old_valence - new_valence))):

            new_atom = Chem.Atom(1)
            new_atom_index = rd_molecule.AddAtom(new_atom)

            rd_molecule.AddBond(atom_index, new_atom_index)

            new_bond = rd_molecule.GetBondBetweenAtoms(atom_index, new_atom_index)
            new_bond.SetBondType(Chem.BondType.SINGLE)
            new_bond.SetIsAromatic(False)

            atoms_to_use.append(new_atom_index)
            bonds_to_use.append(new_bond.GetIdx())

    fragment_smiles = Chem.MolFragmentToSmiles(rd_molecule, atoms_to_use, bonds_to_use)
    fragment = Molecule.from_smiles(fragment_smiles, allow_undefined_stereo=True)

    return fragment
Пример #24
0
    def _find_ring_systems(
        cls,
        molecule: Molecule,
        functional_groups: FunctionalGroups,
        keep_non_rotor_ring_substituents: bool = False,
    ) -> RingSystems:
        """This function finds all ring systems in a molecule.

        Parameters
        ----------
        molecule
            The molecule to search for ring systems.
        functional_groups
            A dictionary of the functional groups on the molecule which should not
            be fragmented.
        keep_non_rotor_ring_substituents
            If True, keep all non rotatable ring substituents. According to the
            benchmark, it is not necessary.

        Returns
        -------
            Any found ring systems.
        """

        atom_to_ring_indices = find_ring_systems(molecule)

        # Find the map indices of the atoms involved in each ring system.
        ring_system_atoms = {
            ring_index: {
                get_map_index(molecule, i)
                for i in atom_to_ring_indices
                if atom_to_ring_indices[i] == ring_index
            }
            for ring_index in {*atom_to_ring_indices.values()}
        }

        # Find the map indices of the bonds involved in each ring system.
        ring_system_bonds = defaultdict(set)

        for bond in molecule.bonds:

            ring_index_1 = atom_to_ring_indices.get(bond.atom1_index, -1)
            ring_index_2 = atom_to_ring_indices.get(bond.atom2_index, -2)

            if ring_index_1 != ring_index_2:
                continue

            ring_system_bonds[ring_index_1].add(
                (
                    get_map_index(molecule, bond.atom1_index),
                    get_map_index(molecule, bond.atom2_index),
                )
            )

        # Scan the neighbours of the ring system atoms for any functional groups
        # / non-rotor substituents which should be included in the ring systems.
        for ring_index in ring_system_atoms:

            # If any atoms are part of a functional group, include the other atoms in the
            # group in the ring system lists
            ring_functional_groups = {
                functional_group
                for map_index in ring_system_atoms[ring_index]
                for functional_group in functional_groups
                if map_index in functional_groups[functional_group][0]
            }

            ring_system_atoms[ring_index].update(
                map_index
                for functional_group in ring_functional_groups
                for map_index in functional_groups[functional_group][0]
            )
            ring_system_bonds[ring_index].update(
                map_tuple
                for functional_group in ring_functional_groups
                for map_tuple in functional_groups[functional_group][1]
            )

            if not keep_non_rotor_ring_substituents:
                continue

            non_rotor_atoms, non_rotor_bonds = cls._find_non_rotor_ring_substituents(
                molecule, ring_system_atoms[ring_index]
            )

            ring_system_atoms[ring_index].update(non_rotor_atoms)
            ring_system_bonds[ring_index].update(non_rotor_bonds)

        ring_systems = {
            ring_index: (
                ring_system_atoms[ring_index],
                ring_system_bonds[ring_index],
            )
            for ring_index in ring_system_atoms
        }

        return ring_systems