def validate_irc(self, calc=None): """ A method to verify an IRC calc """ assert "irc" in calc.label, "The calculator provided is not an IRC calculator" reaction_label = calc.label.split("_irc")[0] logging.info("Validating IRC file...") irc_path = os.path.join(calc.scratch, calc.label + ".log") if not os.path.exists(irc_path): logging.info("It seems that the IRC claculation has not been run.") return False f = open(irc_path, "r") file_lines = f.readlines()[-5:] completed = False for file_line in file_lines: if "Normal termination" in file_line: logging.info("IRC successfully ran") completed = True if not completed: logging.info("IRC failed... could not be validated...") return False pth1 = list() steps = list() with open(irc_path) as outputFile: for line in outputFile: line = line.strip() if line.startswith('Point Number:'): if int(line.split()[2]) > 0: if int(line.split()[-1]) == 1: ptNum = int(line.split()[2]) pth1.append(ptNum) else: pass elif line.startswith('# OF STEPS ='): numStp = int(line.split()[-1]) steps.append(numStp) # This indexes the coordinate to be used from the parsing if steps == []: logging.error('No steps taken in the IRC calculation!') return False else: pth1End = sum(steps[:pth1[-1]]) # Compare the reactants and products ircParse = ccread(irc_path) # cf. # http://cclib.sourceforge.net/wiki/index.php/Using_cclib#Additional_information atomcoords = ircParse.atomcoords atomnos = ircParse.atomnos # Convert the IRC geometries into RMG molecules # We don't know which is reactant or product, so take the two at the end of the # paths and compare to the reactants and products mol1 = RMGMolecule() mol1.fromXYZ(atomnos, atomcoords[pth1End]) mol2 = RMGMolecule() mol2.fromXYZ(atomnos, atomcoords[-1]) testReaction = RMGReaction( reactants=mol1.split(), products=mol2.split(), ) r, p = reaction_label.split("_") reactants = [] products = [] for react in r.split("+"): react = RMGMolecule(SMILES=react) reactants.append(react) for prod in p.split("+"): prod = RMGMolecule(SMILES=prod) products.append(prod) possible_reactants = [] possible_products = [] for reactant in reactants: possible_reactants.append( reactant.generate_resonance_structures()) for product in products: possible_products.append( product.generate_resonance_structures()) possible_reactants = list(itertools.product(*possible_reactants)) possible_products = list(itertools.product(*possible_products)) for possible_reactant in possible_reactants: reactant_list = [] for react in possible_reactant: reactant_list.append(react.toSingleBonds()) for possible_product in possible_products: product_list = [] for prod in possible_product: product_list.append(prod.toSingleBonds()) targetReaction = RMGReaction(reactants=list(reactant_list), products=list(product_list)) if targetReaction.isIsomorphic(testReaction): logging.info("IRC calculation was successful!") return True logging.info("IRC calculation failed for {} :(".format(calc.label)) return False
class TS(Conformer): """ A class that defines the 3D geometry of a transition state (TS) """ def __init__(self, smiles=None, reaction_label=None, direction='forward', rmg_molecule=None, reaction_family="H_Abstraction", distance_data=None, index=0): self.energy = None self.reaction_label = reaction_label self.direction = direction.lower() self.reaction_family = reaction_family self.distance_data = distance_data self.index = index self.bm = None assert direction in ["forward", "reverse"], "Please provide a valid direction" self._rdkit_molecule = None self._ase_molecule = None if (smiles or rmg_molecule): if smiles and rmg_molecule: assert rmg_molecule.isIsomorphic( RMGMolecule(SMILES=smiles) ), "SMILES string did not match RMG Molecule object" self.smiles = smiles self.rmg_molecule = rmg_molecule elif rmg_molecule: self.rmg_molecule = rmg_molecule self.smiles = rmg_molecule.toSMILES() else: self.smiles = smiles self.rmg_molecule = RMGMolecule(SMILES=smiles) self.rmg_molecule.updateMultiplicity() self._symmetry_number = None else: self.smiles = None self.rmg_molecule = None self.rdkit_molecule = None self._pseudo_geometry = None self.ase_molecule = None self.bonds = [] self.angles = [] self.torsions = [] self.cistrans = [] self.chiral_centers = [] self._symmetry_number = None def __repr__(self): return '<TS "{}">'.format(self.smiles) def copy(self): copy_conf = TS(reaction_label=self.reaction_label, reaction_family=self.reaction_family) copy_conf.smiles = self.smiles copy_conf.rmg_molecule = self.rmg_molecule.copy() copy_conf.rdkit_molecule = self.rdkit_molecule.__copy__() copy_conf._pseudo_geometry = self._pseudo_geometry.__copy__() copy_conf.ase_molecule = self.ase_molecule.copy() copy_conf.get_geometries() copy_conf.energy = self.energy copy_conf._symmetry_number = self._symmetry_number return copy_conf @property def symmetry_number(self): if not self._symmetry_number: self._symmetry_number = self.calculate_symmetry_number() return self._symmetry_number @property def rdkit_molecule(self): if (self._rdkit_molecule is None) and self.distance_data: self._rdkit_molecule = self.get_rdkit_mol() return self._rdkit_molecule @property def ase_molecule(self): if (self._ase_molecule is None): self._ase_molecule = self.get_ase_mol() return self._ase_molecule def get_rdkit_mol(self): """ A method to create an rdkit geometry... slightly different than that of the conformer method returns both the rdkit_molecule and the bm """ self.rdkit_molecule = Conformer( rmg_molecule=self.rmg_molecule).get_rdkit_mol() self.get_labels() for i, atom in enumerate(self.rmg_molecule.atoms): assert atom.number == self.rdkit_molecule.GetAtoms( )[i].GetAtomicNum() if len(self.labels) == 3: rd_copy = Chem.RWMol(self.rdkit_molecule.__copy__()) lbl1, lbl2, lbl3 = self.labels if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2): rd_copy.AddBond(lbl1, lbl2, order=rdkit.Chem.rdchem.BondType.SINGLE) elif not rd_copy.GetBondBetweenAtoms(lbl2, lbl3): rd_copy.AddBond(lbl2, lbl3, order=rdkit.Chem.rdchem.BondType.SINGLE) self._pseudo_geometry = rd_copy logging.info("Initially embedded molecule") self.bm = None if self.distance_data: logging.info("Getting bounds matrix") self.bm = self.get_bounds_matrix() if len(self.labels) > 0: logging.info("Editing bounds matrix") self.bm = self.edit_matrix() logging.info("Performing triangle smoothing on bounds matrix.") DistanceGeometry.DoTriangleSmoothing(self.bm) logging.info("Now attempting to embed using edited bounds matrix.") self.rd_embed() return self.rdkit_molecule def get_bounds_matrix(self): """ A method to obtain the bounds matrix """ self.bm = rdDistGeom.GetMoleculeBoundsMatrix(self.rdkit_molecule) return self.bm def set_limits(self, lbl1, lbl2, value, uncertainty): """ A method to set the limits of a particular distance between two atoms :param bm: an array of arrays corresponding to the bounds matrix :param lbl1: the label of one atom :param lbl2: the label of another atom :param value: the distance from a distance data object (float) :param uncertainty: the uncertainty of the `value` distance (float) :return bm: an array of arrays corresponding to the edited bounds matrix """ logging.info( "For atoms {0} and {1} we have a distance of: \t {2}".format( lbl1, lbl2, value)) if lbl1 > lbl2: self.bm[lbl2][lbl1] = value + uncertainty / 2 self.bm[lbl1][lbl2] = max(0, value - uncertainty / 2) else: self.bm[lbl2][lbl1] = max(0, value - uncertainty / 2) self.bm[lbl1][lbl2] = value + uncertainty / 2 return self.bm def bm_pre_edit(self, sect): """ Clean up some of the atom distance limits before attempting triangle smoothing. This ensures any edits made do not lead to unsolvable scenarios for the molecular embedding algorithm. sect is the list of atom indices belonging to one species. """ others = list(range(len(self.bm))) for idx in sect: others.remove(idx) for i in range(len(self.bm)): # sect: for j in range(i): # others: if i < j: continue for k in range(len(self.bm)): if k == i or k == j or i == j: continue Uik = self.bm[i, k] if k > i else self.bm[k, i] Ukj = self.bm[j, k] if k > j else self.bm[k, j] maxLij = Uik + Ukj - 0.1 if self.bm[i, j] > maxLij: logging.info("Changing lower limit {0} to {1}".format( self.bm[i, j], maxLij)) self.bm[i, j] = maxLij return self.bm def get_labels(self): """ A method to get the labeled atoms from a reaction :param reactants: a combined rmg_molecule object :return labels: the atom labels corresponding to the reaction center :return atomMatch: a tuple of tuples the atoms labels corresponding to the reaction center """ if len(self.rmg_molecule.getLabeledAtoms()) == 0: labels = [] atomMatch = () if self.reaction_family.lower() in [ 'h_abstraction', 'r_addition_multiplebond', 'intra_h_migration' ]: # for i, atom in enumerate(reactants.atoms): lbl1 = self.rmg_molecule.getLabeledAtoms()["*1"].sortingLabel lbl2 = self.rmg_molecule.getLabeledAtoms()["*2"].sortingLabel lbl3 = self.rmg_molecule.getLabeledAtoms()["*3"].sortingLabel labels = [lbl1, lbl2, lbl3] atomMatch = ((lbl1, ), (lbl2, ), (lbl3, )) elif self.reaction_family.lower() in ['disproportionation']: lbl1 = self.rmg_molecule.getLabeledAtoms()["*2"].sortingLabel lbl2 = self.rmg_molecule.getLabeledAtoms()["*4"].sortingLabel lbl3 = self.rmg_molecule.getLabeledAtoms()["*1"].sortingLabel labels = [lbl1, lbl2, lbl3] atomMatch = ((lbl1, ), (lbl2, ), (lbl3, )) #logging.info("The labled atoms are {}.".format(labels)) self.labels = labels self.atom_match = atomMatch return self.labels, self.atom_match def edit_matrix(self): """ A method to edit the bounds matrix using labels and distance data """ lbl1, lbl2, lbl3 = self.labels sect = [] for atom in self.rmg_molecule.split()[0].atoms: sect.append(atom.sortingLabel) uncertainties = {'d12': 0.02, 'd13': 0.02, 'd23': 0.02} self.bm = self.set_limits(lbl1, lbl2, self.distance_data.distances['d12'], uncertainties['d12']) self.bm = self.set_limits(lbl2, lbl3, self.distance_data.distances['d23'], uncertainties['d23']) self.bm = self.set_limits(lbl1, lbl3, self.distance_data.distances['d13'], uncertainties['d13']) self.bm = self.bm_pre_edit(sect) return self.bm def optimize_rdkit_molecule(self): """ Optimizes the rdmol object using UFF. Determines the energy level for each of the conformers identified in rdmol.GetConformer. :param rdmol: :param boundsMatrix: :param atomMatch: :return rdmol, minEid (index of the lowest energy conformer) """ energy = 0.0 minEid = 0 lowestE = 9.999999e99 # start with a very high number, which would never be reached for conf in self.rdkit_molecule.GetConformers(): if (self.bm is None) or (self.atom_match is None): AllChem.UFFOptimizeMolecule(self.rdkit_molecule, confId=conf.GetId()) energy = AllChem.UFFGetMoleculeForceField( self.rdkit_molecule, confId=conf.GetId()).CalcEnergy() else: _, energy = EmbedLib.OptimizeMol(self.rdkit_molecule, self.bm, atomMatches=self.atom_match, forceConstant=100000.0) if energy < lowestE: minEid = conf.GetId() lowestE = energy return self.rdkit_molecule, minEid def rd_embed(self): """ This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object Embed the RDKit molecule and create the crude molecule file. """ numConfAttempts = 10000 if (self.bm is None) or (self.atom_match is None): AllChem.EmbedMultipleConfs(self.rdkit_molecule, numConfAttempts, randomSeed=1) self.rdkit_molecule, minEid = self.optimize_rdkit_molecule() else: """ Embed the molecule according to the bounds matrix. Built to handle possible failures of some of the embedding attempts. """ self.rdkit_molecule.RemoveAllConformers() for i in range(0, numConfAttempts): try: EmbedLib.EmbedMol(self.rdkit_molecule, self.bm, atomMatch=self.atom_match) break except ValueError: logging.info( "RDKit failed to embed on attempt {0} of {1}".format( i + 1, numConfAttempts)) except RuntimeError: logging.info("RDKit failed to embed.") else: logging.error("RDKit failed all attempts to embed") return None, None """ RDKit currently embeds the conformers and sets the id as 0, so even though multiple conformers have been generated, only 1 can be called. Below the id's are resolved. """ for i in range(len(self.rdkit_molecule.GetConformers())): self.rdkit_molecule.GetConformers()[i].SetId(i) self.rdkit_molecule, minEid = self.optimize_rdkit_molecule() return self.rdkit_molecule, minEid def get_bonds(self): test_conf = Conformer() test_conf.rmg_molecule = self.rmg_molecule test_conf.rdkit_molecule = self._pseudo_geometry test_conf.ase_molecule = self.ase_molecule return test_conf.get_bonds() def get_torsions(self): test_conf = Conformer() test_conf.rmg_molecule = self.rmg_molecule test_conf.rdkit_molecule = self._pseudo_geometry test_conf.ase_molecule = self.ase_molecule return test_conf.get_torsions() def get_angles(self): test_conf = Conformer() test_conf.rmg_molecule = self.rmg_molecule test_conf.rdkit_molecule = self._pseudo_geometry test_conf.ase_molecule = self.ase_molecule return test_conf.get_angles()
def validate_irc(self): # TODO: need to add more verification here logging.info("Validating IRC file...") irc_path = os.path.join(self.irc_calc.scratch, self.irc_calc.label + ".log") if not os.path.exists(irc_path): logging.info( "It seems that the file was `fixed`, reading in the `fixed` version.") irc_path = irc_path.replace("left", "(").replace("right", ")") if not os.path.exists(irc_path): logging.info( "It seems that the IRC claculation has not been run.") return False f = open(irc_path, "r") file_lines = f.readlines()[-5:] completed = False for file_line in file_lines: if " Normal termination" in file_line: logging.info("IRC successfully ran") completed = True if completed == False: logging.info("IRC failed... could not be validated...") return False pth1 = list() steps = list() with open(irc_path) as outputFile: for line in outputFile: line = line.strip() if line.startswith('Point Number:'): if int(line.split()[2]) > 0: if int(line.split()[-1]) == 1: ptNum = int(line.split()[2]) pth1.append(ptNum) else: pass elif line.startswith('# OF STEPS ='): numStp = int(line.split()[-1]) steps.append(numStp) # This indexes the coordinate to be used from the parsing if steps == []: logging.error('No steps taken in the IRC calculation!') return False else: pth1End = sum(steps[:pth1[-1]]) # Compare the reactants and products ircParse = ccread(irc_path) # cf. http://cclib.sourceforge.net/wiki/index.php/Using_cclib#Additional_information atomcoords = ircParse.atomcoords atomnos = ircParse.atomnos # Convert the IRC geometries into RMG molecules # We don't know which is reactant or product, so take the two at the end of the # paths and compare to the reactants and products mol1 = Molecule() mol1.fromXYZ(atomnos, atomcoords[pth1End]) mol2 = Molecule() mol2.fromXYZ(atomnos, atomcoords[-1]) testReaction = Reaction( reactants=mol1.split(), products=mol2.split(), ) if isinstance(self.reaction.rmg_reaction.reactants[0], rmgpy.molecule.Molecule): targetReaction = Reaction( reactants=[reactant.toSingleBonds() for reactant in self.reaction.rmg_reaction.reactants], products=[product.toSingleBonds() for product in self.reaction.rmg_reaction.products], ) elif isinstance(self.reaction.rmg_reaction.reactants[0], rmgpy.species.Species): targetReaction = Reaction( reactants=[reactant.molecule[0].toSingleBonds() for reactant in self.reaction.rmg_reaction.reactants], products=[product.molecule[0].toSingleBonds() for product in self.reaction.rmg_reaction.products], ) if targetReaction.isIsomorphic(testReaction): return True else: return False
def validate_irc(self): """ A method to verify an IRC calc """ logging.info("Validating IRC file...") irc_path = os.path.join( self.directory, "ts", self.conformer.reaction_label, "irc", self.conformer.reaction_label + "_irc_" + self.conformer.direction + "_" + str(self.conformer.index) + ".log") complete, converged = self.verify_output_file(irc_path) if not complete: logging.info("It seems that the IRC claculation did not complete") return False if not converged: logging.info("The IRC calculation did not converge...") return False pth1 = list() steps = list() with open(irc_path) as outputFile: for line in outputFile: line = line.strip() if line.startswith('Point Number:'): if int(line.split()[2]) > 0: if int(line.split()[-1]) == 1: ptNum = int(line.split()[2]) pth1.append(ptNum) else: pass elif line.startswith('# OF STEPS ='): numStp = int(line.split()[-1]) steps.append(numStp) # This indexes the coordinate to be used from the parsing if steps == []: logging.error('No steps taken in the IRC calculation!') return False else: pth1End = sum(steps[:pth1[-1]]) # Compare the reactants and products ircParse = ccread(irc_path) atomcoords = ircParse.atomcoords atomnos = ircParse.atomnos mol1 = RMGMolecule() mol1.fromXYZ(atomnos, atomcoords[pth1End]) mol2 = RMGMolecule() mol2.fromXYZ(atomnos, atomcoords[-1]) testReaction = RMGReaction( reactants=mol1.split(), products=mol2.split(), ) r, p = self.conformer.reaction_label.split("_") reactants = [] products = [] for react in r.split("+"): react = RMGMolecule(SMILES=react) reactants.append(react) for prod in p.split("+"): prod = RMGMolecule(SMILES=prod) products.append(prod) possible_reactants = [] possible_products = [] for reactant in reactants: possible_reactants.append( reactant.generate_resonance_structures()) for product in products: possible_products.append( product.generate_resonance_structures()) possible_reactants = list(itertools.product(*possible_reactants)) possible_products = list(itertools.product(*possible_products)) for possible_reactant in possible_reactants: reactant_list = [] for react in possible_reactant: reactant_list.append(react.toSingleBonds()) for possible_product in possible_products: product_list = [] for prod in possible_product: product_list.append(prod.toSingleBonds()) targetReaction = RMGReaction(reactants=list(reactant_list), products=list(product_list)) if targetReaction.isIsomorphic(testReaction): logging.info("IRC calculation was successful!") return True logging.info("IRC calculation failed for {} :(".format(calc.label)) return False