示例#1
0
class Conformer():
    """
    A class for generating and editing 3D conformers of molecules
    """

    def __init__(self, smiles=None, rmg_molecule=None, index=0):

        self.energy = None
        self.index = index

        if (smiles or rmg_molecule):
            if smiles and rmg_molecule:
                assert rmg_molecule.isIsomorphic(RMGMolecule(
                    SMILES=smiles)), "SMILES string did not match RMG Molecule object"
                self.smiles = smiles
                self.rmg_molecule = rmg_molecule

            elif rmg_molecule:
                self.rmg_molecule = rmg_molecule
                self.smiles = rmg_molecule.toSMILES()

            else:
                self.smiles = smiles
                self.rmg_molecule = RMGMolecule(SMILES=smiles)

            self.rmg_molecule.updateMultiplicity()
            self.get_molecules()
            self.get_geometries()
            self._symmetry_number = None

        else:
            self.smiles = None
            self.rmg_molecule = None
            self.rdkit_molecule = None
            self.ase_molecule = None
            self.bonds = []
            self.angles = []
            self.torsions = []
            self.cistrans = []
            self.chiral_centers = []
            self._symmetry_number = None

    def __repr__(self):
        return '<Conformer "{}">'.format(self.smiles)

    def copy(self):
        copy_conf = Conformer()
        copy_conf.smiles = self.smiles
        copy_conf.rmg_molecule = self.rmg_molecule.copy()
        copy_conf.rdkit_molecule = self.rdkit_molecule.__copy__()
        copy_conf.ase_molecule = self.ase_molecule.copy()
        copy_conf.get_geometries()
        copy_conf.energy = self.energy
        return copy_conf

    @property
    def symmetry_number(self):
        if not self._symmetry_number:
            self._symmetry_number = self.calculate_symmetry_number()
        return self._symmetry_number

    def get_rdkit_mol(self):
        """
        A method for creating an rdkit geometry from an rmg mol
        """

        assert self.rmg_molecule, "Cannot create an RDKit geometry without an RMG molecule object"

        RDMol = self.rmg_molecule.toRDKitMol(removeHs=False)
        rdkit.Chem.AllChem.EmbedMolecule(RDMol)
        self.rdkit_molecule = RDMol

        mol_list = AllChem.MolToMolBlock(self.rdkit_molecule).split('\n')
        for i, atom in enumerate(self.rmg_molecule.atoms):
            j = i + 4
            coords = mol_list[j].split()[:3]
            for k, coord in enumerate(coords):
                coords[k] = float(coord)
            atom.coords = np.array(coords)

        return self.rdkit_molecule

    def get_ase_mol(self):
        """
        A method for creating an ase atoms object from an rdkit mol
        """

        if not self.rdkit_molecule:
            self.get_rdkit_mol()

        mol_list = AllChem.MolToMolBlock(self.rdkit_molecule).split('\n')
        ase_atoms = []
        for i, line in enumerate(mol_list):
            if i > 3:
                try:
                    atom0, atom1, bond, rest = line
                    atom0 = int(atom0)
                    atom0 = int(atom1)
                    bond = float(bond)
                except ValueError:
                    try:
                        x, y, z, symbol = line.split()[0:4]
                        x = float(x)
                        y = float(y)
                        z = float(z)
                        ase_atoms.append(
                            Atom(symbol=symbol, position=(x, y, z)))
                    except BaseException:
                        continue

        self.ase_molecule = Atoms(ase_atoms)

        return self.ase_molecule

    def get_molecules(self):
        if not self.rmg_molecule:
            self.rmg_molecule = RMGMolecule(SMILES=self.smiles)
        self.rdkit_molecule = self.get_rdkit_mol()
        self.ase_molecule = self.get_ase_mol()
        self.get_geometries()

        return self.rdkit_molecule, self.ase_molecule

    def view(self):
        """
        A method designed to create a 3D figure of the AutoTST_Molecule with py3Dmol from the rdkit_molecule
        """
        mb = Chem.MolToMolBlock(self.rdkit_molecule)
        p = py3Dmol.view(width=600, height=600)
        p.addModel(mb, "sdf")
        p.setStyle({'stick': {}})
        p.setBackgroundColor('0xeeeeee')
        p.zoomTo()
        return p.show()

    def get_bonds(self):
        """
        A method for identifying all of the bonds in a conformer
        """
        bond_list = []
        for bond in self.rdkit_molecule.GetBonds():
            bond_list.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))

        bonds = []
        for index, indices in enumerate(bond_list):
            i, j = indices

            length = self.ase_molecule.get_distance(i, j)
            center = False
            if ((self.rmg_molecule.atoms[i].label) and (
                    self.rmg_molecule.atoms[j].label)):
                center = True

            bond = Bond(index=index,
                        atom_indices=indices,
                        length=length,
                        reaction_center=center)
            mask = self.get_mask(bond)
            bond.mask = mask

            bonds.append(bond)

        self.bonds = bonds

        return self.bonds

    def get_angles(self):
        """
        A method for identifying all of the angles in a conformer
        """

        angle_list = []
        for atom1 in self.rdkit_molecule.GetAtoms():
            for atom2 in atom1.GetNeighbors():
                for atom3 in atom2.GetNeighbors():
                    if atom1.GetIdx() == atom3.GetIdx():
                        continue

                    to_add = (atom1.GetIdx(), atom2.GetIdx(), atom3.GetIdx())
                    if (to_add in angle_list) or (
                            tuple(reversed(to_add)) in angle_list):
                        continue
                    angle_list.append(to_add)

        angles = []
        for index, indices in enumerate(angle_list):
            i, j, k = indices

            degree = self.ase_molecule.get_angle(i, j, k)
            ang = Angle(index=index,
                        atom_indices=indices,
                        degree=degree,
                        mask=[])
            mask = self.get_mask(ang)
            reaction_center = False

            angles.append(Angle(index=index,
                                atom_indices=indices,
                                degree=degree,
                                mask=mask,
                                reaction_center=reaction_center))
        self.angles = angles
        return self.angles

    def get_torsions(self):
        """
        A method for identifying all of the torsions in a conformer
        """
        torsion_list = []
        for bond1 in self.rdkit_molecule.GetBonds():
            atom1 = bond1.GetBeginAtom()
            atom2 = bond1.GetEndAtom()
            if atom1.IsInRing() or atom2.IsInRing():
                # Making sure that bond1 we're looking at are not in a ring
                continue

            bond_list1 = list(atom1.GetBonds())
            bond_list2 = list(atom2.GetBonds())

            if not len(bond_list1) > 1 and not len(bond_list2) > 1:
                # Making sure that there are more than one bond attached to
                # the atoms we're looking at
                continue

            # Getting the 0th and 3rd atom and insuring that atoms
            # attached to the 1st and 2nd atom are not terminal hydrogens
            # We also make sure that all of the atoms are properly bound
            # together

            # If the above are satisfied, we append a tuple of the torsion our
            # torsion_list
            got_atom0 = False
            got_atom3 = False

            for bond0 in bond_list1:
                atomX = bond0.GetOtherAtom(atom1)
                # if atomX.GetAtomicNum() == 1 and len(atomX.GetBonds()) == 1:
                # This means that we have a terminal hydrogen, skip this
                # NOTE: for H_abstraction TSs, a non teminal H should exist
                #    continue
                if atomX.GetIdx() != atom2.GetIdx():
                    got_atom0 = True
                    atom0 = atomX

            for bond2 in bond_list2:
                atomY = bond2.GetOtherAtom(atom2)
                # if atomY.GetAtomicNum() == 1 and len(atomY.GetBonds()) == 1:
                # This means that we have a terminal hydrogen, skip this
                #    continue
                if atomY.GetIdx() != atom1.GetIdx():
                    got_atom3 = True
                    atom3 = atomY

            if not (got_atom0 and got_atom3):
                # Making sure atom0 and atom3 were not found
                continue

            # Looking to make sure that all of the atoms are properly bonded to
            # eached
            if (
                "SINGLE" in str(
                    self.rdkit_molecule.GetBondBetweenAtoms(
                        atom1.GetIdx(),
                        atom2.GetIdx()).GetBondType()) and self.rdkit_molecule.GetBondBetweenAtoms(
                    atom0.GetIdx(),
                    atom1.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms(
                    atom1.GetIdx(),
                    atom2.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms(
                        atom2.GetIdx(),
                    atom3.GetIdx())):

                torsion_tup = (atom0.GetIdx(), atom1.GetIdx(),
                               atom2.GetIdx(), atom3.GetIdx())

                already_in_list = False
                for torsion_entry in torsion_list:
                    a, b, c, d = torsion_entry
                    e, f, g, h = torsion_tup

                    if (b, c) == (f, g) or (b, c) == (g, f):
                        already_in_list = True

                if not already_in_list:
                    torsion_list.append(torsion_tup)

        torsions = []
        for index, indices in enumerate(torsion_list):
            i, j, k, l = indices

            dihedral = self.ase_molecule.get_dihedral(i, j, k, l)
            tor = Torsion(index=index,
                          atom_indices=indices,
                          dihedral=dihedral,
                          mask=[])
            mask = self.get_mask(tor)
            reaction_center = False

            torsions.append(Torsion(index=index,
                                    atom_indices=indices,
                                    dihedral=dihedral,
                                    mask=mask,
                                    reaction_center=reaction_center))

        self.torsions = torsions
        return self.torsions

    def get_cistrans(self):
        """
        A method for identifying all possible cistrans bonds in a molecule
        """
        torsion_list = []
        cistrans_list = []
        for bond1 in self.rdkit_molecule.GetBonds():
            atom1 = bond1.GetBeginAtom()
            atom2 = bond1.GetEndAtom()
            if atom1.IsInRing() or atom2.IsInRing():
                # Making sure that bond1 we're looking at are not in a ring
                continue

            bond_list1 = list(atom1.GetBonds())
            bond_list2 = list(atom2.GetBonds())

            if not len(bond_list1) > 1 and not len(bond_list2) > 1:
                # Making sure that there are more than one bond attached to
                # the atoms we're looking at
                continue

            # Getting the 0th and 3rd atom and insuring that atoms
            # attached to the 1st and 2nd atom are not terminal hydrogens
            # We also make sure that all of the atoms are properly bound
            # together

            # If the above are satisfied, we append a tuple of the torsion our
            # torsion_list
            got_atom0 = False
            got_atom3 = False

            for bond0 in bond_list1:
                atomX = bond0.GetOtherAtom(atom1)
                # if atomX.GetAtomicNum() == 1 and len(atomX.GetBonds()) == 1:
                # This means that we have a terminal hydrogen, skip this
                # NOTE: for H_abstraction TSs, a non teminal H should exist
                #    continue
                if atomX.GetIdx() != atom2.GetIdx():
                    got_atom0 = True
                    atom0 = atomX

            for bond2 in bond_list2:
                atomY = bond2.GetOtherAtom(atom2)
                # if atomY.GetAtomicNum() == 1 and len(atomY.GetBonds()) == 1:
                # This means that we have a terminal hydrogen, skip this
                #    continue
                if atomY.GetIdx() != atom1.GetIdx():
                    got_atom3 = True
                    atom3 = atomY

            if not (got_atom0 and got_atom3):
                # Making sure atom0 and atom3 were not found
                continue

            # Looking to make sure that all of the atoms are properly bonded to
            # eached
            if (
                "DOUBLE" in str(
                    self.rdkit_molecule.GetBondBetweenAtoms(
                        atom1.GetIdx(),
                        atom2.GetIdx()).GetBondType()) and self.rdkit_molecule.GetBondBetweenAtoms(
                    atom0.GetIdx(),
                    atom1.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms(
                    atom1.GetIdx(),
                    atom2.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms(
                        atom2.GetIdx(),
                    atom3.GetIdx())):

                torsion_tup = (atom0.GetIdx(), atom1.GetIdx(),
                               atom2.GetIdx(), atom3.GetIdx())

                already_in_list = False
                for torsion_entry in torsion_list:
                    a, b, c, d = torsion_entry
                    e, f, g, h = torsion_tup

                    if (b, c) == (f, g) or (b, c) == (g, f):
                        already_in_list = True

                if not already_in_list:
                    cistrans_list.append(torsion_tup)

        cistrans = []

        for ct_index, indices in enumerate(cistrans_list):
            i, j, k, l = indices

            b0 = self.rdkit_molecule.GetBondBetweenAtoms(i, j)
            b1 = self.rdkit_molecule.GetBondBetweenAtoms(j, k)
            b2 = self.rdkit_molecule.GetBondBetweenAtoms(k, l)

            b0.SetBondDir(Chem.BondDir.ENDUPRIGHT)
            b2.SetBondDir(Chem.BondDir.ENDDOWNRIGHT)

            Chem.AssignStereochemistry(self.rdkit_molecule, force=True)

            if "STEREOZ" in str(b1.GetStereo()):
                if round(self.ase_molecule.get_dihedral(i, j, k, l), -1) == 0:
                    atom = self.rdkit_molecule.GetAtomWithIdx(k)
                    bonds = atom.GetBonds()
                    for bond in bonds:
                        indexes = [
                            bond.GetBeginAtomIdx(),
                            bond.GetEndAtomIdx()]
                        if not ((sorted([j, k]) == sorted(indexes)) or (
                                sorted([k, l]) == sorted(indexes))):
                            break

                    for index in indexes:
                        if not (index in indices):
                            l = index
                            break

                indices = [i, j, k, l]
                stero = "Z"

            else:
                if round(
                    self.ase_molecule.get_dihedral(
                        i, j, k, l), -1) == 180:
                    atom = self.rdkit_molecule.GetAtomWithIdx(k)
                    bonds = atom.GetBonds()
                    for bond in bonds:
                        indexes = [
                            bond.GetBeginAtomIdx(),
                            bond.GetEndAtomIdx()]
                        if not ((sorted([j, k]) == sorted(indexes)) or (
                                sorted([k, l]) == sorted(indexes))):
                            break

                    for index in indexes:
                        if not (index in indices):
                            l = index
                            break

                indices = [i, j, k, l]
                stero = "E"

            dihedral = self.ase_molecule.get_dihedral(i, j, k, l)
            tor = CisTrans(index=ct_index,
                           atom_indices=indices,
                           dihedral=dihedral,
                           mask=[],
                           stero=stero)
            mask = self.get_mask(tor)
            reaction_center = False

            cistrans.append(CisTrans(index=ct_index,
                                     atom_indices=indices,
                                     dihedral=dihedral,
                                     mask=mask,
                                     stero=stero
                                     )
                            )

        self.cistrans = cistrans
        return self.cistrans

    def get_mask(self, geometry):
        """
        Getting the right hand mask for a geometry object:

        - self: an AutoTST Conformer object
        - geometry: a Bond, Angle, Dihedral, or Torsion object 


        """

        rdkit_atoms = self.rdkit_molecule.GetAtoms()
        if (isinstance(geometry, autotst.geometry.Torsion) or
                isinstance(geometry, autotst.geometry.CisTrans)):

            L1, L0, R0, R1 = geometry.atom_indices

            # trying to get the left hand side of this torsion
            LHS_atoms_index = [L0, L1]
            RHS_atoms_index = [R0, R1]

        elif isinstance(geometry, autotst.geometry.Angle):
            a1, a2, a3 = geometry.atom_indices
            LHS_atoms_index = [a2, a1]
            RHS_atoms_index = [a2, a3]

        elif isinstance(geometry, autotst.geometry.Bond):
            a1, a2 = geometry.atom_indices
            LHS_atoms_index = [a1]
            RHS_atoms_index = [a2]

        complete_RHS = False
        i = 0
        atom_index = RHS_atoms_index[0]
        while complete_RHS is False:
            try:
                RHS_atom = rdkit_atoms[atom_index]
                for neighbor in RHS_atom.GetNeighbors():
                    if (neighbor.GetIdx() in RHS_atoms_index) or (
                            neighbor.GetIdx() in LHS_atoms_index):
                        continue
                    else:
                        RHS_atoms_index.append(neighbor.GetIdx())
                i += 1
                atom_index = RHS_atoms_index[i]

            except IndexError:
                complete_RHS = True

        mask = [index in RHS_atoms_index for index in range(
            len(self.ase_molecule))]

        return mask

    def get_chiral_centers(self):
        """
        A method to identify
        """

        centers = rdkit.Chem.FindMolChiralCenters(
            self.rdkit_molecule, includeUnassigned=True)
        chiral_centers = []

        for index, center in enumerate(centers):
            atom_index, chirality = center

            chiral_centers.append(
                ChiralCenter(
                    index=index,
                    atom_index=atom_index,
                    chirality=chirality))

        self.chiral_centers = chiral_centers
        return self.chiral_centers

    def get_geometries(self):
        """
        A helper method to obtain all geometry things
        """

        self.bonds = self.get_bonds()
        self.angles = self.get_angles()
        self.torsions = self.get_torsions()
        self.cistrans = self.get_cistrans()
        self.chiral_centers = self.get_chiral_centers()

        return (
            self.bonds,
            self.angles,
            self.torsions,
            self.cistrans,
            self.chiral_centers)

    def update_coords(self):
        """
        A function that creates distance matricies for the RMG, ASE, and RDKit molecules and finds which
        (if any) are different. If one is different, this will update the coordinates of the other two
        with the different one. If all three are different, nothing will happen. If all are the same,
        nothing will happen.
        """
        rdkit_dm = rdkit.Chem.rdmolops.Get3DDistanceMatrix(self.rdkit_molecule)
        ase_dm = self.ase_molecule.get_all_distances()
        l = len(self.rmg_molecule.atoms)
        rmg_dm = np.zeros((l, l))

        for i, atom_i in enumerate(self.rmg_molecule.atoms):
            for j, atom_j in enumerate(self.rmg_molecule.atoms):
                rmg_dm[i][j] = np.linalg.norm(atom_i.coords - atom_j.coords)

        d1 = round(abs(rdkit_dm - ase_dm).max(), 3)
        d2 = round(abs(rdkit_dm - rmg_dm).max(), 3)
        d3 = round(abs(ase_dm - rmg_dm).max(), 3)

        if np.all(np.array([d1, d2, d3]) > 0):
            return False, None

        if np.any(np.array([d1, d2, d3]) > 0):
            if d1 == 0:
                diff = "rmg"
                self.update_coords_from("rmg")
            elif d2 == 0:
                diff = "ase"
                self.update_coords_from("ase")
            else:
                diff = "rdkit"
                self.update_coords_from("rdkit")

            return True, diff
        else:
            return True, None

    def update_coords_from(self, mol_type="ase"):
        """
        A method to update the coordinates of the RMG, RDKit, and ASE objects with a chosen object.
        """

        possible_mol_types = ["ase", "rmg", "rdkit"]

        assert (mol_type.lower() in possible_mol_types), "Please specifiy a valid mol type. Valid types are {}".format(
            possible_mol_types)

        if mol_type.lower() == "rmg":
            conf = self.rdkit_molecule.GetConformers()[0]
            ase_atoms = []
            for i, atom in enumerate(self.rmg_molecule.atoms):
                x, y, z = atom.coords
                symbol = atom.symbol

                conf.SetAtomPosition(i, [x, y, z])

                ase_atoms.append(Atom(symbol=symbol, position=(x, y, z)))

            self.ase_molecule = Atoms(ase_atoms)
            # self.calculate_symmetry_number()

        elif mol_type.lower() == "ase":
            conf = self.rdkit_molecule.GetConformers()[0]
            for i, position in enumerate(self.ase_molecule.get_positions()):
                self.rmg_molecule.atoms[i].coords = position
                conf.SetAtomPosition(i, position)

            # self.calculate_symmetry_number()

        elif mol_type.lower() == "rdkit":

            mol_list = AllChem.MolToMolBlock(self.rdkit_molecule).split('\n')
            for i, atom in enumerate(self.rmg_molecule.atoms):
                j = i + 4
                coords = mol_list[j].split()[:3]
                for k, coord in enumerate(coords):
                    coords[k] = float(coord)
                atom.coords = np.array(coords)

            self.get_ase_mol()
            # self.calculate_symmetry_number()

    def set_bond_length(self, bond_index, length):
        """
        This is a method to set bond lengths
        Variabels:
        - bond_index (int): the index of the bond you want to edit
        - length (float, int): the distance you want to set the bond (in angstroms)
        """

        assert isinstance(length, (float, int))

        matched = False
        for bond in self.bonds:
            if bond.index == bond_index:
                matched = True
                break

        if not matched:
            logging.info("Angle index provided is out of range. Nothing was changed.")
            return self

        i, j = bond.atom_indices
        self.ase_molecule.set_distance(
            a0=i,
            a1=j,
            distance=length,
            mask=bond.mask,
            fix=0
        )

        bond.length = length

        self.update_coords_from(mol_type="ase")
        return self

    def set_angle(self, angle_index, angle):
        """
        A method that will set the angle of an Angle object accordingly
        """

        assert isinstance(
            angle, (int, float)), "Plese provide a float or an int for the angle"

        matched = False
        for a in self.angles:
            if a.index == angle_index:
                matched = True
                break

        if not matched:
            logging.info("Angle index provided is out of range. Nothing was changed.")
            return self

        i, j, k = a.atom_indices
        self.ase_molecule.set_angle(
            a1=i,
            a2=j,
            a3=k,
            angle=angle,
            mask=a.mask
        )

        a.degree = angle

        self.update_coords_from(mol_type="ase")

        return self

    def set_torsion(self, torsion_index, dihedral):
        """
        A method that will set the diehdral angle of a Torsion object accordingly.
        """

        assert isinstance(
            dihedral, (int, float)), "Plese provide a float or an int for the diehdral angle"

        matched = False
        for torsion in self.torsions:
            if torsion.index == torsion_index:
                matched = True
                break

        if not matched:
            logging.info("Torsion index provided is out of range. Nothing was changed.")
            return self

        i, j, k, l = torsion.atom_indices
        self.ase_molecule.set_dihedral(
            a1=i,
            a2=j,
            a3=k,
            a4=l,
            angle=dihedral,
            mask=torsion.mask
        )
        torsion.dihedral = dihedral

        self.update_coords_from(mol_type="ase")

        return self

    def set_cistrans(self, cistrans_index, stero="E"):
        """
        A module that will set a corresponding cistrans bond to the proper E/Z config
        """

        assert stero.upper() in [
            "E", "Z"], "Please specify a valid stero direction."

        matched = False
        for cistrans in self.cistrans:
            if cistrans.index == cistrans_index:
                matched = True
                break

        if not matched:
            logging.info("CisTrans index provided is out of range. Nothing was changed.")
            return self

        if cistrans.stero == stero.upper():
            self.update_coords_from("ase")
            return self

        else:
            cistrans.stero = stero.upper()
            i, j, k, l = cistrans.atom_indices
            self.ase_molecule.rotate_dihedral(
                a1=i,
                a2=j,
                a3=k,
                a4=l,
                angle=float(180),
                mask=cistrans.mask
            )
            cistrans.stero = stero.upper()

            self.update_coords_from(mol_type="ase")
            return self

    def set_chirality(self, chiral_center_index, stero="R"):
        """
        A module that can set the orientation of a chiral center.
        """
        assert stero.upper() in ["R", "S"], "Specify a valid stero orientation"

        centers_dict = {
            'R': Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW,
            'S': Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW
        }

        assert isinstance(chiral_center_index,
                          int), "Please provide an integer for the index"

        rdmol = self.rdkit_molecule.__copy__()

        match = False
        for chiral_center in self.chiral_centers:
            if chiral_center.index == chiral_center_index:
                match = True
                break

        if not match:
            logging.info("ChiralCenter index provided is out of range. Nothing was changed")
            return self

        rdmol.GetAtomWithIdx(chiral_center.atom_index).SetChiralTag(
            centers_dict[stero.upper()])

        rdkit.Chem.rdDistGeom.EmbedMolecule(rdmol)

        old_torsions = self.torsions[:] + self.cistrans[:]

        self.rdkit_molecule = rdmol
        self.update_coords_from(mol_type="rdkit")

        # Now resetting dihedral angles in case if they changed.

        for torsion in old_torsions:
            i, j, k, l = torsion.atom_indices

            self.ase_molecule.set_dihedral(
                a1=i,
                a2=j,
                a3=k,
                a4=l,
                mask=torsion.mask,
                angle=torsion.dihedral,
            )

        self.update_coords_from(mol_type="ase")

        return self

    def calculate_symmetry_number(self):
        from rmgpy.qm.symmetry import PointGroupCalculator
        from rmgpy.qm.qmdata import QMData

        atom_numbers = self.ase_molecule.get_atomic_numbers()
        coordinates = self.ase_molecule.get_positions()

        qmdata = QMData(
            groundStateDegeneracy=1,  # Only needed to check if valid QMData
            numberOfAtoms=len(atom_numbers),
            atomicNumbers=atom_numbers,
            atomCoords=(coordinates, str('angstrom')),
            energy=(0.0, str('kcal/mol'))  # Only needed to avoid error
        )
        settings = type(str(''), (), dict(symmetryPath=str(
            'symmetry'), scratchDirectory="."))()  # Creates anonymous class
        pgc = PointGroupCalculator(settings, self.smiles, qmdata)
        pg = pgc.calculate()
        #os.remove("{}.symm".format(self.smiles))

        if pg is not None:
            symmetry_number = pg.symmetryNumber
        else:
            symmetry_number = 1

        return symmetry_number
示例#2
0
class TS(Conformer):
    """
    A class that defines the 3D geometry of a transition state (TS)
    """
    def __init__(self,
                 smiles=None,
                 reaction_label=None,
                 rmg_molecule=None,
                 reaction_family="H_Abstraction",
                 distance_data=None,
                 index=0):
        self.energy = None
        #####################################################
        #####################################################
        assert reaction_label, "A reaction label needs to be provided in addition to a smiles or rmg_molecule"
        self.reaction_label = reaction_label
        self._rdkit_molecule = None
        self._ase_molecule = None
        self.reaction_family = reaction_family
        self.distance_data = distance_data
        self.index = index

        if (smiles or rmg_molecule):
            if smiles and rmg_molecule:
                assert rmg_molecule.isIsomorphic(
                    RMGMolecule(SMILES=smiles)
                ), "SMILES string did not match RMG Molecule object"
                self.smiles = smiles
                self.rmg_molecule = rmg_molecule

            elif rmg_molecule:
                self.rmg_molecule = rmg_molecule
                self.smiles = rmg_molecule.toSMILES()

            else:
                self.smiles = smiles
                self.rmg_molecule = RMGMolecule(SMILES=smiles)

            self.rmg_molecule.updateMultiplicity()
            self.get_mols()
            self.get_geometries()

        else:
            self.smiles = None
            self.rmg_molecule = None
            self.rdkit_molecule = None
            self._pseudo_geometry = None
            self.ase_molecule = None
            self.bonds = []
            self.angles = []
            self.torsions = []
            self.cistrans = []
            self.chiral_centers = []

    def __repr__(self):
        return '<TS "{}">'.format(self.smiles)

    def copy(self):
        copy_conf = TS(reaction_label=self.reaction_label,
                       reaction_family=self.reaction_family)
        copy_conf.smiles = self.smiles
        copy_conf.rmg_molecule = self.rmg_molecule.copy()
        copy_conf.rdkit_molecule = self.rdkit_molecule.__copy__()
        copy_conf._pseudo_geometry = self._pseudo_geometry.__copy__()
        copy_conf.ase_molecule = self.ase_molecule.copy()
        copy_conf.get_geometries()
        return copy_conf

    @property
    def rdkit_molecule(self):
        if (self._rdkit_molecule is None) and self.distance_data:
            self._rdkit_molecule = self.get_rdkit_mol(self.rmg_molecule,
                                                      self.reaction_family,
                                                      self.distance_data)[0]
        return self._rdkit_molecule

    @property
    def ase_molecule(self):
        if (self._ase_molecule is None):
            self._ase_molecule = self.get_ase_mol()
        return self._ase_molecule

    def get_rdkit_mol(self,
                      rmg_molecule=None,
                      reaction_family="H_Abstraction",
                      distance_data=None):
        """
        A method to create an rdkit geometry... slightly different than that of the conformer method
        returns both the rdkit_molecule and the bm
        """
        if not rmg_molecule:
            rmg_molecule = self.rmg_molecule
        rdkit_molecule = Chem.RWMol(
            Conformer().get_rdkit_mol(rmg_molecule=rmg_molecule))

        labels, atom_match = self.get_labels(rmg_molecule, reaction_family)
        for i, atom in enumerate(rmg_molecule.atoms):
            assert atom.number == rdkit_molecule.GetAtoms()[i].GetAtomicNum()

        if len(labels) == 3:

            rd_copy = rdkit_molecule.__copy__()

            lbl1, lbl2, lbl3 = labels

            if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2):
                rd_copy.AddBond(lbl1,
                                lbl2,
                                order=rdkit.Chem.rdchem.BondType.SINGLE)
            else:
                rd_copy.AddBond(lbl2,
                                lbl3,
                                order=rdkit.Chem.rdchem.BondType.SINGLE)

            self._pseudo_geometry = rd_copy

        logging.info("Initially embedded molecule")

        bm = None

        if distance_data:
            logging.info("Getting bounds matrix")
            bm = self.get_bounds_matrix(rdkit_molecule=rdkit_molecule)

            if len(labels) > 0:

                logging.info("Editing bounds matrix")
                bm = self.edit_matrix(rmg_molecule, bm, labels, distance_data)

            logging.info("Performing triangle smoothing on bounds matrix.")
            DistanceGeometry.DoTriangleSmoothing(bm)

            logging.info("Now attempting to embed using edited bounds matrix.")

            rdkit_molecule = self.rd_embed(rdkit_molecule,
                                           10000,
                                           bm=bm,
                                           match=atom_match)[0]

        return rdkit_molecule, bm

    def get_bounds_matrix(self, rmg_molecule=None, rdkit_molecule=None):
        """
        A method to obtain the bounds matrix
        """

        if not rmg_molecule:
            try:
                rmg_molecule = self.rmg_molecule
            except BaseException:
                return None

        if not rdkit_molecule:
            try:
                rdkit_molecule = self.get_rdkit_mol(rmg_molecule=rmg_molecule)
            except BaseException:
                return None

        logging.info("before")

        bm = rdDistGeom.GetMoleculeBoundsMatrix(rdkit_molecule)
        logging.info("Got bounds matrix")

        return bm

    def set_limits(self, bm, lbl1, lbl2, value, uncertainty):
        """
        A method to set the limits of a particular distance between two atoms

        :param bm: an array of arrays corresponding to the bounds matrix
        :param lbl1: the label of one atom
        :param lbl2: the label of another atom
        :param value: the distance from a distance data object (float)
        :param uncertainty: the uncertainty of the `value` distance (float)
        :return bm: an array of arrays corresponding to the edited bounds matrix
        """
        logging.info(
            "For atoms {0} and {1} we have a distance of: \t {2}".format(
                lbl1, lbl2, value))
        if lbl1 > lbl2:
            bm[lbl2][lbl1] = value + uncertainty / 2
            bm[lbl1][lbl2] = max(0, value - uncertainty / 2)
        else:
            bm[lbl2][lbl1] = max(0, value - uncertainty / 2)
            bm[lbl1][lbl2] = value + uncertainty / 2

        return bm

    def bm_pre_edit(self, bm, sect):
        """
        Clean up some of the atom distance limits before attempting triangle smoothing.
        This ensures any edits made do not lead to unsolvable scenarios for the molecular
        embedding algorithm.

        sect is the list of atom indices belonging to one species.
        """
        others = range(len(bm))
        for idx in sect:
            others.remove(idx)

        for i in range(len(bm)):  # sect:
            for j in range(i):  # others:
                if i < j:
                    continue
                for k in range(len(bm)):
                    if k == i or k == j or i == j:
                        continue
                    Uik = bm[i, k] if k > i else bm[k, i]
                    Ukj = bm[j, k] if k > j else bm[k, j]

                    maxLij = Uik + Ukj - 0.1
                    if bm[i, j] > maxLij:
                        logging.info("Changing lower limit {0} to {1}".format(
                            bm[i, j], maxLij))
                        bm[i, j] = maxLij

        return bm

    def get_labels(self, rmg_molecule, reaction_family):
        """
        A method to get the labeled atoms from a reaction

        :param reactants: a combined rmg_molecule object
        :return labels: the atom labels corresponding to the reaction center
        :return atomMatch: a tuple of tuples the atoms labels corresponding to the reaction center
        """

        if len(rmg_molecule.getLabeledAtoms()) == 0:
            labeles = []
            atomMatch = ()

        if reaction_family.lower() in [
                'h_abstraction', 'r_addition_multiplebond', 'intra_h_migration'
        ]:
            # for i, atom in enumerate(reactants.atoms):
            lbl1 = rmg_molecule.getLabeledAtoms()["*1"].sortingLabel
            lbl2 = rmg_molecule.getLabeledAtoms()["*2"].sortingLabel
            lbl3 = rmg_molecule.getLabeledAtoms()["*3"].sortingLabel
            labels = [lbl1, lbl2, lbl3]
            atomMatch = ((lbl1, ), (lbl2, ), (lbl3, ))
        elif reaction_family.lower() in ['disproportionation']:
            lbl1 = rmg_molecule.getLabeledAtoms()["*2"].sortingLabel
            lbl2 = rmg_molecule.getLabeledAtoms()["*4"].sortingLabel
            lbl3 = rmg_molecule.getLabeledAtoms()["*1"].sortingLabel

            labels = [lbl1, lbl2, lbl3]
            atomMatch = ((lbl1, ), (lbl2, ), (lbl3, ))

        #logging.info("The labled atoms are {}.".format(labels))

        return labels, atomMatch

    def edit_matrix(self, rmg_molecule, bm, labels, distance_data):
        """
        A method to edit the bounds matrix using labels and distance data
        """

        lbl1, lbl2, lbl3 = labels

        sect = []

        for atom in rmg_molecule.split()[0].atoms:
            sect.append(atom.sortingLabel)

        uncertainties = {'d12': 0.02, 'd13': 0.02, 'd23': 0.02}
        bm = self.set_limits(bm, lbl1, lbl2, distance_data.distances['d12'],
                             uncertainties['d12'])
        bm = self.set_limits(bm, lbl2, lbl3, distance_data.distances['d23'],
                             uncertainties['d23'])
        bm = self.set_limits(bm, lbl1, lbl3, distance_data.distances['d13'],
                             uncertainties['d13'])

        bm = self.bm_pre_edit(bm, sect)

        return bm

    def optimize(self, rdmol, boundsMatrix=None, atomMatch=None):
        """

        Optimizes the rdmol object using UFF.
        Determines the energy level for each of the conformers identified in rdmol.GetConformer.


        :param rdmol:
        :param boundsMatrix:
        :param atomMatch:
        :return rdmol, minEid (index of the lowest energy conformer)
        """

        energy = 0.0
        minEid = 0
        lowestE = 9.999999e99  # start with a very high number, which would never be reached

        for conf in rdmol.GetConformers():
            if (boundsMatrix is None) or (atomMatch is None):
                AllChem.UFFOptimizeMolecule(rdmol, confId=conf.GetId())
                energy = AllChem.UFFGetMoleculeForceField(
                    rdmol, confId=conf.GetId()).CalcEnergy()
            else:
                _, energy = EmbedLib.OptimizeMol(rdmol,
                                                 boundsMatrix,
                                                 atomMatches=atomMatch,
                                                 forceConstant=100000.0)

            if energy < lowestE:
                minEid = conf.GetId()
                lowestE = energy

        return rdmol, minEid

    def rd_embed(self, rdmol, numConfAttempts, bm=None, match=None):
        """
        This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object

        Embed the RDKit molecule and create the crude molecule file.
        """
        if (bm is None) or (match is None):
            AllChem.EmbedMultipleConfs(rdmol, numConfAttempts, randomSeed=1)

            rdmol, minEid = self.optimize(rdmol)
        else:
            """
            Embed the molecule according to the bounds matrix. Built to handle possible failures
            of some of the embedding attempts.
            """
            rdmol.RemoveAllConformers()
            for i in range(0, numConfAttempts):
                try:
                    EmbedLib.EmbedMol(rdmol, bm, atomMatch=match)
                    break
                except ValueError:
                    logging.info(
                        "RDKit failed to embed on attempt {0} of {1}".format(
                            i + 1, numConfAttempts))
                except RuntimeError:
                    logging.info("RDKit failed to embed.")
            else:
                logging.error("RDKit failed all attempts to embed")
                return None, None
            """
            RDKit currently embeds the conformers and sets the id as 0, so even though multiple
            conformers have been generated, only 1 can be called. Below the id's are resolved.
            """
            for i in range(len(rdmol.GetConformers())):
                rdmol.GetConformers()[i].SetId(i)

            rdmol, minEid = self.optimize(rdmol,
                                          boundsMatrix=bm,
                                          atomMatch=match)

        return rdmol, minEid

    def get_bonds(self):
        return Conformer().get_bonds(self._pseudo_geometry, self.ase_molecule,
                                     self.rmg_molecule)

    def get_torsions(self):
        return Conformer().get_torsions(self._pseudo_geometry,
                                        self.ase_molecule)

    def get_angles(self):
        return Conformer().get_angles(self._pseudo_geometry, self.ase_molecule)