def get_rmg_complexes(self): """ A method to create a forward and reverse TS complexes used to initialize transition state geometries Variables: - rmg_reaction (RMGReaction): The RMGReaction of interest Returns: - complexes (dict): a dictionary containing RMGMolecules of the forward and reverse reaction complexes """ if self.rmg_reaction is None: self.get_labeled_reaction() reactant_complex = RMGMolecule() for react in self.rmg_reaction.reactants: if isinstance(react, RMGMolecule): reactant_complex = reactant_complex.merge(react) elif isinstance(react, RMGSpecies): for mol in react.molecule: if len(mol.get_all_labeled_atoms()) > 0: reactant_complex = reactant_complex.merge(mol) product_complex = RMGMolecule() for prod in self.rmg_reaction.products: if isinstance(prod, RMGMolecule): product_complex = product_complex.merge(prod) elif isinstance(prod, RMGSpecies): for mol in prod.molecule: if len(mol.get_all_labeled_atoms()) > 0: product_complex = product_complex.merge(mol) reactant_complex.update_multiplicity() product_complex.update_multiplicity() if len(reactant_complex.get_all_labeled_atoms()) == 0 or len( product_complex.get_all_labeled_atoms()) == 0: logging.warning( "REACTING ATOMS LABELES NOT PROVIDED. Please call `Reaction.get_labeled_reaction` to generate labeled complexes" ) self.complexes = { "forward": reactant_complex, "reverse": product_complex } return self.complexes
def test_intra_r_add_exo_scission(self): """ Test that the Intra_R_Add_Exo_scission family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['Intra_R_Add_Exo_scission'] reactants = [Molecule().from_adjacency_list(""" multiplicity 2 1 *3 C u0 p0 c0 {2,S} {8,S} {11,S} {12,S} 2 *2 C u0 p0 c0 {1,S} {3,B} {4,B} 3 C u0 p0 c0 {2,B} {5,B} {13,S} 4 C u0 p0 c0 {2,B} {7,B} {17,S} 5 C u0 p0 c0 {3,B} {6,B} {14,S} 6 C u0 p0 c0 {5,B} {7,B} {15,S} 7 C u0 p0 c0 {4,B} {6,B} {16,S} 8 *1 C u1 p0 c0 {1,S} {9,S} {18,S} 9 C u0 p0 c0 {8,S} {10,T} 10 C u0 p0 c0 {9,T} {19,S} 11 H u0 p0 c0 {1,S} 12 H u0 p0 c0 {1,S} 13 H u0 p0 c0 {3,S} 14 H u0 p0 c0 {5,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {7,S} 17 H u0 p0 c0 {4,S} 18 H u0 p0 c0 {8,S} 19 H u0 p0 c0 {10,S} """)] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *3 C u0 p0 c0 {2,S} {8,S} {9,S} {11,S} 2 *2 C u0 p0 c0 {1,S} {3,B} {4,B} 3 C u0 p0 c0 {2,B} {5,B} {12,S} 4 C u0 p0 c0 {2,B} {7,B} {16,S} 5 C u0 p0 c0 {3,B} {6,B} {13,S} 6 C u0 p0 c0 {5,B} {7,B} {14,S} 7 C u0 p0 c0 {4,B} {6,B} {15,S} 8 *1 C u1 p0 c0 {1,S} {17,S} {18,S} 9 C u0 p0 c0 {1,S} {10,T} 10 C u0 p0 c0 {9,T} {19,S} 11 H u0 p0 c0 {1,S} 12 H u0 p0 c0 {3,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {6,S} 15 H u0 p0 c0 {7,S} 16 H u0 p0 c0 {4,S} 17 H u0 p0 c0 {8,S} 18 H u0 p0 c0 {8,S} 19 H u0 p0 c0 {10,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_12_shift_c(self): """ Test that the 1,2_shiftC family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['1,2_shiftC'] reactants = [ Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {2,S} {3,S} {8,S} {9,S} 2 *1 C u0 p0 c0 {1,S} {10,S} {11,S} {12,S} 3 *3 C u1 p0 c0 {1,S} {4,S} {5,S} 4 C u0 p0 c0 {3,S} {6,D} {13,S} 5 C u0 p0 c0 {3,S} {7,D} {14,S} 6 C u0 p0 c0 {4,D} {7,S} {15,S} 7 C u0 p0 c0 {5,D} {6,S} {16,S} 8 H u0 p0 c0 {1,S} 9 H u0 p0 c0 {1,S} 10 H u0 p0 c0 {2,S} 11 H u0 p0 c0 {2,S} 12 H u0 p0 c0 {2,S} 13 H u0 p0 c0 {4,S} 14 H u0 p0 c0 {5,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {7,S} """) ] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {2,S} {3,S} {4,S} {7,S} 2 *1 C u0 p0 c0 {1,S} {8,S} {9,S} {10,S} 3 C u0 p0 c0 {1,S} {5,D} {11,S} 4 C u0 p0 c0 {1,S} {6,D} {12,S} 5 C u0 p0 c0 {3,D} {6,S} {13,S} 6 C u0 p0 c0 {4,D} {5,S} {14,S} 7 *3 C u1 p0 c0 {1,S} {15,S} {16,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {2,S} 10 H u0 p0 c0 {2,S} 11 H u0 p0 c0 {3,S} 12 H u0 p0 c0 {4,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {6,S} 15 H u0 p0 c0 {7,S} 16 H u0 p0 c0 {7,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_intra_ene_reaction(self): """ Test that the Intra_ene_reaction family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['Intra_ene_reaction'] reactants = [ Molecule().from_adjacency_list(""" 1 *1 C u0 p0 c0 {2,S} {3,S} {4,S} {10,S} 2 *5 C u0 p0 c0 {1,S} {5,D} {6,S} 3 *2 C u0 p0 c0 {1,S} {7,D} {11,S} 4 C u0 p0 c0 {1,S} {8,D} {12,S} 5 *4 C u0 p0 c0 {2,D} {7,S} {13,S} 6 C u0 p0 c0 {2,S} {9,D} {15,S} 7 *3 C u0 p0 c0 {3,D} {5,S} {14,S} 8 C u0 p0 c0 {4,D} {9,S} {17,S} 9 C u0 p0 c0 {6,D} {8,S} {16,S} 10 *6 H u0 p0 c0 {1,S} 11 H u0 p0 c0 {3,S} 12 H u0 p0 c0 {4,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {7,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {9,S} 17 H u0 p0 c0 {8,S} """) ] expected_product = Molecule().from_adjacency_list(""" 1 *2 C u0 p0 c0 {2,D} {3,S} {4,S} 2 *3 C u0 p0 c0 {1,D} {5,S} {6,S} 3 *1 C u0 p0 c0 {1,S} {7,S} {11,S} {10,S} 4 C u0 p0 c0 {1,S} {8,D} {12,S} 5 *4 C u0 p0 c0 {2,S} {7,D} {13,S} 6 C u0 p0 c0 {2,S} {9,D} {15,S} 7 *5 C u0 p0 c0 {3,S} {5,D} {14,S} 8 C u0 p0 c0 {4,D} {9,S} {17,S} 9 C u0 p0 c0 {6,D} {8,S} {16,S} 10 *6 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {3,S} 12 H u0 p0 c0 {4,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {7,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {9,S} 17 H u0 p0 c0 {8,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_intra_substitution_s_isomerization(self): """ Test that the intra_substitutionS_isomerization family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['intra_substitutionS_isomerization'] reactants = [ Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {3,S} {4,S} {5,S} {6,S} 2 C u0 p0 c0 {3,S} {7,S} {8,S} {9,S} 3 *3 C u1 p0 c0 {1,S} {2,S} {10,S} 4 *1 S u0 p2 c0 {1,S} {11,S} 5 H u0 p0 c0 {1,S} 6 H u0 p0 c0 {1,S} 7 H u0 p0 c0 {2,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {2,S} 10 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {4,S} """) ] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} 2 C u0 p0 c0 {1,S} {6,S} {7,S} {8,S} 3 *3 C u1 p0 c0 {1,S} {9,S} {10,S} 4 *1 S u0 p2 c0 {1,S} {11,S} 5 H u0 p0 c0 {1,S} 6 H u0 p0 c0 {2,S} 7 H u0 p0 c0 {2,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {3,S} 10 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {4,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_6_membered_central_cc_shift(self): """ Test that the 6_membered_central_C-C_shift family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['6_membered_central_C-C_shift'] reactants = [ Molecule().from_adjacency_list(""" 1 *3 C u0 p0 c0 {2,S} {3,S} {7,S} {8,S} 2 *4 C u0 p0 c0 {1,S} {4,S} {9,S} {10,S} 3 *2 C u0 p0 c0 {1,S} {5,T} 4 *5 C u0 p0 c0 {2,S} {6,T} 5 *1 C u0 p0 c0 {3,T} {11,S} 6 *6 C u0 p0 c0 {4,T} {12,S} 7 H u0 p0 c0 {1,S} 8 H u0 p0 c0 {1,S} 9 H u0 p0 c0 {2,S} 10 H u0 p0 c0 {2,S} 11 H u0 p0 c0 {5,S} 12 H u0 p0 c0 {6,S} """) ] expected_product = Molecule().from_adjacency_list(""" 1 *3 C u0 p0 c0 {2,S} {5,D} {7,S} 2 *4 C u0 p0 c0 {1,S} {6,D} {8,S} 3 *1 C u0 p0 c0 {5,D} {9,S} {10,S} 4 *6 C u0 p0 c0 {6,D} {11,S} {12,S} 5 *2 C u0 p0 c0 {1,D} {3,D} 6 *5 C u0 p0 c0 {2,D} {4,D} 7 H u0 p0 c0 {1,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {3,S} 10 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {4,S} 12 H u0 p0 c0 {4,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
class TS(Conformer): """ A class that defines the 3D geometry of a transition state (TS) """ def __init__(self, smiles=None, reaction_label=None, direction='forward', rmg_molecule=None, reaction_family="H_Abstraction", distance_data=None, index=0): self.energy = None self.reaction_label = reaction_label self.direction = direction.lower() self.reaction_family = reaction_family self.distance_data = distance_data self.index = index self.bm = None assert direction in ["forward", "reverse"], "Please provide a valid direction" self._rdkit_molecule = None self._ase_molecule = None if (smiles or rmg_molecule): if smiles and rmg_molecule: assert rmg_molecule.is_isomorphic( RMGMolecule(smiles=smiles) ), "smiles string did not match RMG Molecule object" self.smiles = smiles self.rmg_molecule = rmg_molecule elif rmg_molecule: self.rmg_molecule = rmg_molecule self.smiles = rmg_molecule.to_smiles() else: self.smiles = smiles self.rmg_molecule = RMGMolecule(smiles=smiles) self.rmg_molecule.update_multiplicity() self._symmetry_number = None else: self.smiles = None self.rmg_molecule = None self._rdkit_molecule = None self._pseudo_geometry = None self._ase_molecule = None self.bonds = [] self.angles = [] self.torsions = [] self.cistrans = [] self.chiral_centers = [] self._symmetry_number = None def __repr__(self): return '<TS "{}">'.format(self.smiles) def copy(self): copy_conf = TS(reaction_label=self.reaction_label, reaction_family=self.reaction_family) copy_conf.smiles = self.smiles copy_conf.rmg_molecule = self.rmg_molecule.copy() copy_conf._rdkit_molecule = self.rdkit_molecule.__copy__() copy_conf._pseudo_geometry = self._pseudo_geometry.__copy__() copy_conf._ase_molecule = self.ase_molecule.copy() copy_conf.get_geometries() copy_conf.energy = self.energy copy_conf._symmetry_number = self._symmetry_number return copy_conf @property def symmetry_number(self): if not self._symmetry_number: self._symmetry_number = self.calculate_symmetry_number() return self._symmetry_number @property def rdkit_molecule(self): if (self._rdkit_molecule is None) and self.distance_data: self._rdkit_molecule = self.get_rdkit_mol() return self._rdkit_molecule @property def ase_molecule(self): if (self._ase_molecule is None): self._ase_molecule = self.get_ase_mol() return self._ase_molecule def get_rdkit_mol(self): """ A method to create an rdkit geometry... slightly different than that of the conformer method returns both the rdkit_molecule and the bm """ self._rdkit_molecule = Conformer( rmg_molecule=self.rmg_molecule).get_rdkit_mol() self.get_labels() for i, atom in enumerate(self.rmg_molecule.atoms): assert atom.number == self.rdkit_molecule.GetAtoms( )[i].GetAtomicNum() if len(self.labels) == 3: rd_copy = Chem.RWMol(self.rdkit_molecule.__copy__()) lbl1, lbl2, lbl3 = self.labels if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2): rd_copy.AddBond(lbl1, lbl2, order=rdkit.Chem.rdchem.BondType.SINGLE) elif not rd_copy.GetBondBetweenAtoms(lbl2, lbl3): rd_copy.AddBond(lbl2, lbl3, order=rdkit.Chem.rdchem.BondType.SINGLE) self._pseudo_geometry = rd_copy logging.info("Initially embedded molecule") self.bm = None if self.distance_data: logging.info("Getting bounds matrix") self.bm = self.get_bounds_matrix() if len(self.labels) > 0: logging.info("Editing bounds matrix") self.bm = self.edit_matrix() logging.info("Performing triangle smoothing on bounds matrix.") DistanceGeometry.DoTriangleSmoothing(self.bm) logging.info("Now attempting to embed using edited bounds matrix.") self.rd_embed() return self.rdkit_molecule def get_bounds_matrix(self): """ A method to obtain the bounds matrix """ self.bm = rdDistGeom.GetMoleculeBoundsMatrix(self.rdkit_molecule) return self.bm def set_limits(self, lbl1, lbl2, value, uncertainty): """ A method to set the limits of a particular distance between two atoms :param bm: an array of arrays corresponding to the bounds matrix :param lbl1: the label of one atom :param lbl2: the label of another atom :param value: the distance from a distance data object (float) :param uncertainty: the uncertainty of the `value` distance (float) :return bm: an array of arrays corresponding to the edited bounds matrix """ logging.info( "For atoms {0} and {1} we have a distance of: \t {2}".format( lbl1, lbl2, value)) if lbl1 > lbl2: self.bm[lbl2][lbl1] = value + uncertainty / 2 self.bm[lbl1][lbl2] = max(0, value - uncertainty / 2) else: self.bm[lbl2][lbl1] = max(0, value - uncertainty / 2) self.bm[lbl1][lbl2] = value + uncertainty / 2 return self.bm def bm_pre_edit(self, sect): """ Clean up some of the atom distance limits before attempting triangle smoothing. This ensures any edits made do not lead to unsolvable scenarios for the molecular embedding algorithm. sect is the list of atom indices belonging to one species. """ others = list(range(len(self.bm))) for idx in sect: others.remove(idx) for i in range(len(self.bm)): # sect: for j in range(i): # others: if i < j: continue for k in range(len(self.bm)): if k == i or k == j or i == j: continue u_ik = self.bm[i, k] if k > i else self.bm[k, i] u_kj = self.bm[j, k] if k > j else self.bm[k, j] max_lij = u_ik + u_kj - 0.1 if self.bm[i, j] > max_lij: logging.info("Changing lower limit {0} to {1}".format( self.bm[i, j], max_lij)) self.bm[i, j] = max_lij return self.bm def get_labels(self): """ A method to get the labeled atoms from a reaction :param reactants: a combined rmg_molecule object :return labels: the atom labels corresponding to the reaction center :return atomMatch: a tuple of tuples the atoms labels corresponding to the reaction center """ if len(self.rmg_molecule.get_all_labeled_atoms()) == 0: labels = [] atom_match = () if self.reaction_family.lower() in [ 'h_abstraction', 'r_addition_multiplebond', 'intra_h_migration' ]: # for i, atom in enumerate(reactants.atoms): lbl1 = self.rmg_molecule.get_all_labeled_atoms( )["*1"].sorting_label lbl2 = self.rmg_molecule.get_all_labeled_atoms( )["*2"].sorting_label lbl3 = self.rmg_molecule.get_all_labeled_atoms( )["*3"].sorting_label labels = [lbl1, lbl2, lbl3] atom_match = ((lbl1, ), (lbl2, ), (lbl3, )) elif self.reaction_family.lower() in ['disproportionation']: lbl1 = self.rmg_molecule.get_all_labeled_atoms( )["*2"].sorting_label lbl2 = self.rmg_molecule.get_all_labeled_atoms( )["*4"].sorting_label lbl3 = self.rmg_molecule.get_all_labeled_atoms( )["*1"].sorting_label labels = [lbl1, lbl2, lbl3] atom_match = ((lbl1, ), (lbl2, ), (lbl3, )) #logging.info("The labled atoms are {}.".format(labels)) self.labels = labels self.atom_match = atom_match return self.labels, self.atom_match def edit_matrix(self): """ A method to edit the bounds matrix using labels and distance data """ lbl1, lbl2, lbl3 = self.labels sect = [] for atom in self.rmg_molecule.split()[0].atoms: sect.append(atom.sorting_label) uncertainties = {'d12': 0.02, 'd13': 0.02, 'd23': 0.02} self.bm = self.set_limits(lbl1, lbl2, self.distance_data.distances['d12'], uncertainties['d12']) self.bm = self.set_limits(lbl2, lbl3, self.distance_data.distances['d23'], uncertainties['d23']) self.bm = self.set_limits(lbl1, lbl3, self.distance_data.distances['d13'], uncertainties['d13']) self.bm = self.bm_pre_edit(sect) return self.bm def optimize_rdkit_molecule(self): """ Optimizes the rdmol object using UFF. Determines the energy level for each of the conformers identified in rdmol.GetConformer. :param rdmol: :param boundsMatrix: :param atomMatch: :return rdmol, minEid (index of the lowest energy conformer) """ energy = 0.0 min_eid = 0 lowest_e = 9.999999e99 # start with a very high number, which would never be reached for conf in self.rdkit_molecule.GetConformers(): if (self.bm is None) or (self.atom_match is None): AllChem.UFFOptimizeMolecule(self._rdkit_molecule, confId=conf.GetId()) energy = AllChem.UFFGetMoleculeForceField( self._rdkit_molecule, confId=conf.GetId()).CalcEnergy() else: _, energy = EmbedLib.OptimizeMol(self._rdkit_molecule, self.bm, atomMatches=self.atom_match, forceConstant=100000.0) if energy < lowest_e: min_eid = conf.GetId() lowest_e = energy return self.rdkit_molecule, min_eid def rd_embed(self): """ This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object Embed the RDKit molecule and create the crude molecule file. """ num_conf_attempts = 10000 if (self.bm is None) or (self.atom_match is None): AllChem.EmbedMultipleConfs(self._rdkit_molecule, num_conf_attempts, randomSeed=1) self._rdkit_molecule, minEid = self.optimize_rdkit_molecule() else: """ Embed the molecule according to the bounds matrix. Built to handle possible failures of some of the embedding attempts. """ self._rdkit_molecule.RemoveAllConformers() for i in range(0, num_conf_attempts): try: EmbedLib.EmbedMol(self._rdkit_molecule, self.bm, atomMatch=self.atom_match) break except ValueError: logging.info( "RDKit failed to embed on attempt {0} of {1}".format( i + 1, num_conf_attempts)) except RuntimeError: logging.info("RDKit failed to embed.") else: logging.error("RDKit failed all attempts to embed") return None, None """ RDKit currently embeds the conformers and sets the id as 0, so even though multiple conformers have been generated, only 1 can be called. Below the id's are resolved. """ for i in range(len(self.rdkit_molecule.GetConformers())): self.rdkit_molecule.GetConformers()[i].SetId(i) self._rdkit_molecule, min_eid = self.optimize_rdkit_molecule() return self._rdkit_molecule, min_eid def get_bonds(self): test_conf = Conformer() test_conf.rmg_molecule = self.rmg_molecule try: test_conf._rdkit_molecule = self._pseudo_geometry except: self.get_rdkit_mol() test_conf._rdkit_molecule = self._pseudo_geometry test_conf._ase_molecule = self.ase_molecule return test_conf.get_bonds() def get_torsions(self): test_conf = Conformer() test_conf.rmg_molecule = self.rmg_molecule try: test_conf._rdkit_molecule = self._pseudo_geometry except: self.get_rdkit_mol() test_conf._rdkit_molecule = self._pseudo_geometry test_conf._ase_molecule = self.ase_molecule return test_conf.get_torsions() def get_angles(self): test_conf = Conformer() test_conf.rmg_molecule = self.rmg_molecule try: test_conf._rdkit_molecule = self._pseudo_geometry except: self.get_rdkit_mol() test_conf._rdkit_molecule = self._pseudo_geometry test_conf._ase_molecule = self.ase_molecule return test_conf.get_angles()