def _get_isomorphisms(self, test_atoms): ref = self.ref mol = test_atoms # Make all bond orders 1 and reassign atom types # this is a magic recipe for bond in ob.OBMolBondIter(ref): bond.SetBondOrder(1) for bond in ob.OBMolBondIter(mol): bond.SetBondOrder(1) ob.OBAtomTyper().AssignTypes(ref) ob.OBAtomTyper().AssignTypes(mol) # DEBUG if False: rt = [a for a in ob.OBMolAtomIter(ref)] mt = [a for a in ob.OBMolAtomIter(mol)] for (r, m) in zip(rt, mt): print(r.GetType(), m.GetType(), r.GetType() == m.GetType()) r = self._atoms2obabel(ref_atoms, ref_type) m = self._atoms2obabel(mol_atoms) obutils.writeMolecule(r, 'ref.mol2', ftype='mol2') obutils.writeMolecule(m, 'mol.mol2', ftype='mol2') # Mapping magic query = ob.CompileMoleculeQuery(ref) mapper = ob.OBIsomorphismMapper.GetInstance(query) isomorphs = ob.vvpairUIntUInt() mapper.MapAll(mol, isomorphs) return isomorphs
def adjacency_matrix(mol) -> np.ndarray: """ Adjacency matrix from OpenBabel molecule. Parameters ---------- mol: Molecule Returns ------- np.ndarray Adjacency matrix of the molecule """ n = len(mol.atoms) # Pre-allocate memory for the adjacency matrix A = np.zeros((n, n), dtype=int) # Loop over molecular bonds for bond in ob.OBMolBondIter(mol.OBMol): # Bonds are 1-indexed i: int = bond.GetBeginAtomIdx() - 1 j: int = bond.GetEndAtomIdx() - 1 # A molecular graph is undirected A[i, j] = A[j, i] = 1 return A
def remove_bad_geometry(self, ob_mol): ''' Remove bonds with excessive stretch or angle strain without fragmenting the molecule, and prioritizing the most stretch bonds. ''' # eliminate geometrically poor bonds bond_info = sort_bonds_by_stretch(ob.OBMolBondIter(ob_mol)) for bond_stretch, bond_len, bond in bond_info: # can we remove this bond without disconnecting the molecule? atom1 = bond.GetBeginAtom() atom2 = bond.GetEndAtom() # as long as we aren't disconnecting, let's remove things # that are excessively far away (0.45 from ConnectTheDots) # get bonds to be less than max allowed # also remove tight angles, as done in openbabel if (bond_stretch > self.max_bond_stretch or forms_small_angle(atom1, atom2, self.min_bond_angle) or forms_small_angle(atom2, atom1, self.min_bond_angle)): if reachable(atom1, atom2): # don't fragment the molecule ob_mol.DeleteBond(bond) # deleting bonds resets this flag ob_mol.SetHybridizationPerceived(True)
def set_aromaticity(self, ob_mol, atoms, struct): ''' Use openbabel to perceive aromaticity, or set it based on atom types, if available. Set bonds as aromatic iff they are between aromatic atoms in a ring. ''' if Atom.aromatic not in struct.typer: # turn on perception ob_mol.SetAromaticPerceived(False) # trigger perception for ob_atom in atoms: ob_atom.IsAromatic() else: # set aromaticity based on atom types for ob_atom, atom_type in zip(atoms, struct.atom_types): ob_atom.SetAromatic(bool(atom_type.aromatic)) # turn off perception ob_mol.SetAromaticPerceived(True) # set bonds between aromatic ring atoms as aromatic for bond in ob.OBMolBondIter(ob_mol): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if bond.IsInRing(): bond.SetAromatic(a1.IsAromatic() and a2.IsAromatic())
def get_bond_info(self): for bond in ob.OBMolBondIter(self.mol): start_idx = bond.GetBeginAtomIdx() end_idx = bond.GetEndAtomIdx() type = self.return_bondtype(bond) line = "%4i%4i%4s\n" % (start_idx - 1, end_idx - 1, type) self.update(bond_table=line)
def openbabel_to_rdkit_mol(mol: openbabel.OBMol) -> Chem.Mol: """Convert an OpenBabel molecule to a RDKit molecule. :param mol: OpenBabel molecule """ # Create an editable molecule rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol()) for obatom in openbabel.OBMolAtomIter(mol): # Create new atom and assign values atom = Chem.Atom(obatom.GetAtomicNum()) atom.SetIsotope(obatom.GetIsotope()) atom.SetFormalCharge(obatom.GetFormalCharge()) atom.SetDoubleProp('_PartialCharge', obatom.GetPartialCharge()) atom.SetNumRadicalElectrons(obatom.GetSpinMultiplicity() - 1 if obatom.GetSpinMultiplicity() != 0 else 0) # Add it to the current molecule rdkitmol.AddAtom(atom) orders = {1: Chem.rdchem.BondType.SINGLE, 2: Chem.rdchem.BondType.DOUBLE, 3: Chem.rdchem.BondType.TRIPLE, 4: Chem.rdchem.BondType.QUADRUPLE, 5: Chem.rdchem.BondType.QUINTUPLE, 1.5: Chem.rdchem.BondType.AROMATIC} for obbond in openbabel.OBMolBondIter(mol): rdkitmol.AddBond(obbond.GetBeginAtomIdx() - 1, obbond.GetEndAtomIdx() - 1, orders[obbond.GetBondOrder()]) rdkitmol = rdkitmol.GetMol() Chem.SanitizeMol(rdkitmol) return rdkitmol
def get_bonds(self): """ Returns an iterator of all bonds in the molecule """ #self._obmol.ConnectTheDots() # what happens if called more than once? for _obbond in openbabel.OBMolBondIter(self._obmol): iat = _obbond.GetBeginAtom() jat = _obbond.GetEndAtom() yield Bond(iat.GetId(), jat.GetId(), order=_obbond.GetBondOrder())
def bonds(mol) -> List[Tuple[int, int]]: """ List of bonds Parameters ---------- mol: Molecule Returns ------- List[Tuple[int, int]] List of bonds Notes ----- A bond is defined by a tuple of (0-based) indices of two atoms. """ b = [] for bond in ob.OBMolBondIter(mol.OBMol): i = bond.GetBeginAtomIdx() - 1 j = bond.GetEndAtomIdx() - 1 b.append((i, j)) return b
def testIterators(self): """Basic check that at least two iterators are working""" mol = pybel.readstring("smi", "c1ccccc1C(=O)Cl") atoms = list(ob.OBMolAtomIter(mol.OBMol)) self.assertEqual(len(atoms), 9) elements = [atom.GetAtomicNum() for atom in atoms] self.assertEqual(elements, [6, 6, 6, 6, 6, 6, 6, 8, 17]) bonds = list(ob.OBMolBondIter(mol.OBMol)) self.assertEqual(len(bonds), 9)
def remove_bad_valences(self, ob_mol, atoms, struct): ''' Remove hypervalent bonds without fragmenting the molecule, and prioritize stretched bonds. Also remove bonds between halogens/hydrogens. ''' # get max valence of the atoms max_vals = get_max_valences(atoms) # remove any bonds between halogens or hydrogens for bond in ob.OBMolBondIter(ob_mol): atom_a = bond.GetBeginAtom() atom_b = bond.GetEndAtom() if (max_vals.get(atom_a.GetIdx(), 1) == 1 and max_vals.get(atom_b.GetIdx(), 1) == 1): ob_mol.DeleteBond(bond) # remove bonds causing larger-than-permitted valences # prioritize atoms with lowest max valence, since they # place the hardest constraint on reachability (e.g O) atom_info = sort_atoms_by_valence(atoms, max_vals) for max_val, rem_val, atom in atom_info: if atom.GetExplicitValence() <= max_val: continue # else, the atom could have an invalid valence # so check whether we can modify a bond bond_info = sort_bonds_by_stretch(ob.OBAtomBondIter(atom)) for bond_stretch, bond_len, bond in bond_info: # do the atoms involved in this bond have bad valences? # since we are modifying the valences in the loop, this # could have changed since calling sort_atoms_by_valence a1, a2 = bond.GetBeginAtom(), bond.GetEndAtom() max_val_diff = max( # by how much are the valences over? a1.GetExplicitValence() - max_vals.get(a1.GetIdx(), 1), a2.GetExplicitValence() - max_vals.get(a2.GetIdx(), 1)) if max_val_diff > 0: bond_order = bond.GetBondOrder() if bond_order > max_val_diff: # decrease bond order bond.SetBondOrder(bond_order - max_val_diff) elif reachable(a1, a2): # don't fragment the molecule ob_mol.DeleteBond(bond) # if the current atom now has a permitted valence, # break and let other atoms choose next bonds to remove if atom.GetExplicitValence() <= max_vals[atom.GetIdx()]: break # deleting bonds resets this flag ob_mol.SetHybridizationPerceived(True)
def getMoleculeFeatures(self): """Get the essential features of the constructed obMol for the input component. OBConversion object, use SetInAndOutFormat(InCode, OutCode). To set a Read Option s, use SetOptions("s", OBConversion::INOPTIONS). """ title = self.__pybelMol.title molWeight = self.__pybelMol.molwt formula = self.__pybelMol.formula ccId = title ifCharge = self.__pybelMol.charge logger.info("%s formula %s charge %d mw %f", title, formula, ifCharge, molWeight) inchi = self.__pybelMol.write("inchi").strip() inchiKey = self.__pybelMol.write("inchikey").strip() smiles = self.__pybelMol.write("can", opt={"n": None}).strip() isoSmiles = self.__pybelMol.write("can", opt={"i": None, "n": None}).strip() details = ComponentDetails(ccId=ccId, formula=formula, ifCharge=ifCharge) descriptors = ComponentDescriptors(smiles=smiles, isoSmiles=isoSmiles, inchi=inchi, inchiKey=inchiKey) # # typeCounts = defaultdict(int) ccAtomD = {} ccAtomIdD = {} for ii, pat in enumerate(self.__pybelMol.atoms, 1): at = pat.OBAtom atIdx = at.GetIdx() # atNo = at.GetAtomicNum() aType = at.GetType() typeCounts[aType] += 1 atName = self.__atomIdxD[ii] if ii in self.__atomIdxD else aType + str(typeCounts[aType]) # isAromatic = at.IsAromatic() isChiral = at.IsChiral() iCharge = at.GetFormalCharge() cipStereo = None ccAtomD[atName] = ComponentAtom(name=atName, aType=aType, isAromatic=isAromatic, isChiral=isChiral, CIP=cipStereo, fCharge=iCharge) ccAtomIdD[atIdx] = atName logger.debug("%s Atom %s %s %r %r %s", ccId, atName, aType, isAromatic, isChiral, cipStereo) # ccBondD = {} for bnd in openbabel.OBMolBondIter(self.__pybelMol.OBMol): atI = bnd.GetBeginAtomIdx() atJ = bnd.GetEndAtomIdx() atNameI = ccAtomIdD[atI] atNameJ = ccAtomIdD[atJ] isAromatic = bnd.IsAromatic() iType = bnd.GetBondOrder() cipStereo = None logger.debug("Bond %s %s iType %r cipStereo %r aromatic %r", atNameI, atNameJ, iType, cipStereo, isAromatic) # ccBondD[(atNameI, atNameJ)] = ComponentBond(iType=iType, isAromatic=isAromatic, CIP=cipStereo) # ccD = {"details": details, "descriptors": descriptors, "atoms": ccAtomD, "bonds": ccBondD} return ccD
def bonds(self) -> Dict[int, int]: obmol = self.to_obmol() bonds = {k: [] for k in range(self.num_atoms)} for bond in ob.OBMolBondIter(obmol): a, b = bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1 bonds[a].append(b) bonds[b].append(a) return bonds
def _make_bond_connectivity_from_openbabel(self, obmol): """Based upon the Open Babel/Pybel molecule, create a list of tuples to represent bonding information, where the three integers are the index of the starting atom, the index of the ending atom, and the bond order. """ bond_connectivities = [] for obbond in ob.OBMolBondIter(obmol): bond_connectivities.append( (obbond.GetBeginAtom().GetIndex(), obbond.GetEndAtom().GetIndex(), obbond.GetBondOrder())) return bond_connectivities
def remove_bond(self, idx1, idx2): """ Remove a bond from an openbabel molecule Args: idx1: The atom index of one of the atoms participating the in bond idx2: The atom index of the other atom participating in the bond """ for obbond in ob.OBMolBondIter(self._obmol): if (obbond.GetBeginAtomIdx() == idx1 and obbond.GetEndAtomIdx() == idx2) or ( obbond.GetBeginAtomIdx() == idx2 and obbond.GetEndAtomIdx() == idx1): self._obmol.DeleteBond(obbond)
def ob_mol_to_rd_mol(ob_mol): ''' Convert an OBMol to an RWMol, copying over the elements, coordinates, formal charges, bonds and aromaticity. ''' n_atoms = ob_mol.NumAtoms() rd_mol = Chem.RWMol() rd_conf = Chem.Conformer(n_atoms) for ob_atom in ob.OBMolAtomIter(ob_mol): rd_atom = Chem.Atom(ob_atom.GetAtomicNum()) rd_atom.SetFormalCharge(ob_atom.GetFormalCharge()) rd_atom.SetIsAromatic(ob_atom.IsAromatic()) rd_atom.SetNumExplicitHs(ob_atom.GetImplicitHCount()) rd_atom.SetNoImplicit(True) # don't use rdkit valence model rd_atom.SetHybridization(ob_hyb_to_rd_hyb(ob_atom)) idx = rd_mol.AddAtom(rd_atom) rd_coords = Geometry.Point3D( ob_atom.GetX(), ob_atom.GetY(), ob_atom.GetZ() ) rd_conf.SetAtomPosition(idx, rd_coords) rd_mol.AddConformer(rd_conf) for ob_bond in ob.OBMolBondIter(ob_mol): # OB uses 1-indexing, rdkit uses 0 i = ob_bond.GetBeginAtomIdx() - 1 j = ob_bond.GetEndAtomIdx() - 1 bond_order = ob_bond.GetBondOrder() if bond_order == 1: bond_type = Chem.BondType.SINGLE elif bond_order == 2: bond_type = Chem.BondType.DOUBLE elif bond_order == 3: bond_type = Chem.BondType.TRIPLE else: raise Exception('unknown bond order {}'.format(bond_order)) rd_mol.AddBond(i, j, bond_type) rd_bond = rd_mol.GetBondBetweenAtoms(i, j) rd_bond.SetIsAromatic(ob_bond.IsAromatic()) Chem.GetSSSR(rd_mol) # initialize ring info rd_mol.UpdatePropertyCache(strict=False) # compute valence return rd_mol
def get_rotlist(traj, ligidx): ftype = 'xyz' ligidx_sort = sorted(ligidx) outp_xyz = write_xyz_from_md(traj, ligidx_sort) mymols = list([pybel.readstring(ftype, outp_xyz)]) mymol = mymols[0] iter_bond = openbabel.OBMolBondIter(mymol.OBMol) rotlist = [] for bond in iter_bond: if bond.IsRotor(): i1, i2 = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) rotlist.append((ligidx_sort[i1-1], ligidx_sort[i2-1])) return rotlist
def _readpdb(self): conv = openbabel.OBConversion() conv.SetInAndOutFormats('xyz', 'pdb') mol = openbabel.OBMol() conv.ReadFile(mol, self.xyzfilename) bond = [[] for x in range(self._natom)] for b in openbabel.OBMolBondIter(mol): s1 = b.GetBeginAtom().GetId() s2 = b.GetEndAtom().GetId() bond[s1].append(s2) bond[s2].append(s1) # connect molecules self._mols = connectmolecule(bond)
def test_to_OBMol(configuration): """Test creating an OBMol object from a structure.""" mol = configuration.to_OBMol() bondorder_list = [] for bond in openbabel.OBMolBondIter(mol): bondorder_list.append(bond.GetBondOrder()) atno_list = [] for atno in openbabel.OBMolAtomIter(mol): atno_list.append(mol.GetAtmoicNum()) assert configuration.atoms.atomic_numbers == atno_list assert configuration.bonds.bondorders == bondorder_list
def smi_to_2D(smiles): mol = pybel.readstring('smi', smiles) mol.OBMol.AddHydrogens() mol.make2D() num_atoms = mol.OBMol.NumAtoms() connect_mat = np.zeros((num_atoms, num_atoms)) for bond in openbabel.OBMolBondIter(mol.OBMol): bpoint = bond.GetBeginAtomIdx() - 1 epoint = bond.GetEndAtomIdx() - 1 connect_mat[bpoint, epoint] = 1 connect_mat[epoint, bpoint] = 1 return connect_mat
def test_mol_iteration(self): mol = parse_smiles("c12c(O[CH](C1=O)C(C)C)cc1c(c2)ccc(=O)o1") element_counts = {} for atom in ob.OBMolAtomIter(mol): n = atom.GetAtomicNum() element_counts[n] = element_counts.get(n, 0) + 1 self.assertEqual(element_counts[8], 4) bond_counts = {} for bond in ob.OBMolBondIter(mol): n = bond.GetBondOrder() if not bond.IsAromatic(): bond_counts[n] = bond_counts.get(n, 0) + 1 self.assertEqual(bond_counts[2], 2)
def rotatable_bonds(mol): """ Calculates the number of rotatable bonds in a molecules. Rotors are defined as any non-terminal bond between heavy atoms, excluding amides :param mol: pybel molecule object :type mol: pybel.Molecule :return rb: number of rotatable bonds :rtype int """ rb = 0 for bond in ob.OBMolBondIter(mol.OBMol): if is_rotor(bond): rb += 1 return rb
def _crd2bond(cls, step_atoms, readlevel): # copy from reacnetgenerator on 2019/4/13 # updated on 2019/10/11 atomnumber = len(step_atoms) if step_atoms.pbc.any(): # Apply period boundry conditions # add ghost atoms repeated_atoms = step_atoms.repeat(2)[atomnumber:] tree = cKDTree(step_atoms.get_positions()) d = tree.query(repeated_atoms.get_positions(), k=1)[0] nearest = d < 5 ghost_atoms = repeated_atoms[nearest] realnumber = np.where(nearest)[0] % atomnumber step_atoms += ghost_atoms # Use openbabel to connect atoms mol = openbabel.OBMol() mol.BeginModify() for idx, (num, position) in enumerate( zip(step_atoms.get_atomic_numbers(), step_atoms.positions)): a = mol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) mol.ConnectTheDots() if not readlevel: bond = [[] for i in range(atomnumber)] else: mol.PerceiveBondOrders() bondlevel = [[] for i in range(atomnumber)] mol.EndModify() for b in openbabel.OBMolBondIter(mol): s1 = b.GetBeginAtom().GetId() s2 = b.GetEndAtom().GetId() if s1 >= atomnumber and s2 >= atomnumber: # duplicated continue elif s1 >= atomnumber: s1 = realnumber[s1 - atomnumber] elif s2 >= atomnumber: s2 = realnumber[s2 - atomnumber] if not readlevel: bond[s1].append(s2) bond[s2].append(s1) else: level = b.GetBondOrder() bondlevel[s1].append(level) bondlevel[s2].append(level) return bondlevel if readlevel else bond
def _getbondfromcrd(self, step_atoms, cell): atomnumber = len(step_atoms) if self.pbc: # Apply period boundry conditions step_atoms.set_pbc(True) step_atoms.set_cell(cell) # add ghost atoms repeated_atoms = step_atoms.repeat(2)[atomnumber:] tree = cKDTree(step_atoms.get_positions()) d = tree.query(repeated_atoms.get_positions(), k=1)[0] nearest = d < 5 ghost_atoms = repeated_atoms[nearest] realnumber = np.where(nearest)[0] % atomnumber step_atoms += ghost_atoms # Use openbabel to connect atoms mol = openbabel.OBMol() mol.BeginModify() for idx, (num, position) in enumerate( zip(step_atoms.get_atomic_numbers(), step_atoms.positions)): a = mol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) mol.ConnectTheDots() mol.PerceiveBondOrders() mol.EndModify() bond = [[] for i in range(atomnumber)] bondlevel = [[] for i in range(atomnumber)] for b in openbabel.OBMolBondIter(mol): s1 = b.GetBeginAtom().GetId() s2 = b.GetEndAtom().GetId() if s1 >= atomnumber and s2 >= atomnumber: # duplicated continue elif s1 >= atomnumber: s1 = realnumber[s1 - atomnumber] elif s2 >= atomnumber: s2 = realnumber[s2 - atomnumber] level = b.GetBondOrder() if level == 5: # aromatic, 5 in openbabel but 12 in rdkit level = 12 bond[s1].append(s2) bond[s2].append(s1) bondlevel[s1].append(level) bondlevel[s2].append(level) return bond, bondlevel
def from_OBMol(self, ob_mol): """Transform an Open Babel molecule into the current object.""" atnos = [] Xs = [] Ys = [] Zs = [] for ob_atom in openbabel.OBMolAtomIter(ob_mol): atno = ob_atom.GetAtomicNum() atnos.append(atno) Xs.append(ob_atom.x()) Ys.append(ob_atom.y()) Zs.append(ob_atom.z()) logger.debug( f"atom {atno} {ob_atom.x()} {ob_atom.z()} {ob_atom.z()}") Is = [] Js = [] BondOrders = [] for ob_bond in openbabel.OBMolBondIter(ob_mol): ob_i = ob_bond.GetBeginAtom() ob_j = ob_bond.GetEndAtom() i = ob_i.GetIdx() j = ob_j.GetIdx() bondorder = ob_bond.GetBondOrder() Is.append(i) Js.append(j) BondOrders.append(bondorder) logger.debug(f"bond {i} - {j} {bondorder}") self.clear() if self.__class__.__name__ == "_Configuration": self.charge = ob_mol.GetTotalCharge() self.spin_multiplicity = ob_mol.GetTotalSpinMultiplicity() ids = self.atoms.append(x=Xs, y=Ys, z=Zs, atno=atnos) i = [ids[x - 1] for x in Is] j = [ids[x - 1] for x in Js] self.bonds.append(i=i, j=j, bondorder=BondOrders) return self
def ob_mol_to_rd_mol(ob_mol): n_atoms = ob_mol.NumAtoms() rd_mol = Chem.RWMol() rd_conf = Chem.Conformer(n_atoms) for ob_atom in ob.OBMolAtomIter(ob_mol): rd_atom = Chem.Atom(ob_atom.GetAtomicNum()) rd_atom.SetIsAromatic(ob_atom.IsAromatic()) #TODO copy format charge i = rd_mol.AddAtom(rd_atom) ob_coords = ob_atom.GetVector() x = ob_coords.GetX() y = ob_coords.GetY() z = ob_coords.GetZ() rd_coords = Geometry.Point3D(x, y, z) rd_conf.SetAtomPosition(i, rd_coords) rd_mol.AddConformer(rd_conf) for ob_bond in ob.OBMolBondIter(ob_mol): i = ob_bond.GetBeginAtomIdx() - 1 j = ob_bond.GetEndAtomIdx() - 1 bond_order = ob_bond.GetBondOrder() if ob_bond.IsAromatic(): bond_type = Chem.BondType.AROMATIC elif bond_order == 1: bond_type = Chem.BondType.SINGLE elif bond_order == 2: bond_type = Chem.BondType.DOUBLE elif bond_order == 3: bond_type = Chem.BondType.TRIPLE else: raise Exception('unknown bond order {}'.format(bond_order)) rd_mol.AddBond(i, j, bond_type) return rd_mol
def make_obmol(struct, verbose=False): '''Create an OBMol from AtomStruct that attempts to maintain correct atom typing''' mol = ob.OBMol() mol.BeginModify() visited_mols = [] atoms = [] for xyz, t in zip(struct.xyz, struct.c): x, y, z = map(float, xyz) ch = struct.channels[t] atom = mol.NewAtom() atom.SetAtomicNum(ch.atomic_num) atom.SetVector(x, y, z) atoms.append(atom) fixup(atoms, mol, struct) visited_mols.append(ob.OBMol(mol)) connect_the_dots(mol, atoms, struct) fixup(atoms, mol, struct) visited_mols.append(ob.OBMol(mol)) mol.EndModify() mol.AddPolarHydrogens() #make implicits explicit visited_mols.append(ob.OBMol(mol)) mol.PerceiveBondOrders() fixup(atoms, mol, struct) visited_mols.append(ob.OBMol(mol)) for (i, a) in enumerate(atoms): ob.OBAtomAssignTypicalImplicitHydrogens(a) fixup(atoms, mol, struct) visited_mols.append(ob.OBMol(mol)) mol.AddHydrogens() fixup(atoms, mol, struct) visited_mols.append(ob.OBMol(mol)) #make rings all aromatic if majority of carbons are aromatic for ring in ob.OBMolRingIter(mol): if 5 <= ring.Size() <= 6: carbon_cnt = 0 aromatic_ccnt = 0 for ai in ring._path: a = mol.GetAtom(ai) if a.GetAtomicNum() == 6: carbon_cnt += 1 if a.IsAromatic(): aromatic_ccnt += 1 if aromatic_ccnt / carbon_cnt >= .5 and aromatic_ccnt != ring.Size( ): #set all ring atoms to be aromatic for ai in ring._path: a = mol.GetAtom(ai) a.SetAromatic(True) #bonds must be marked aromatic for smiles to match for bond in ob.OBMolBondIter(mol): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if a1.IsAromatic() and a2.IsAromatic(): bond.SetAromatic(True) visited_mols.append(ob.OBMol(mol)) mismatches = 0 for (a, t) in zip(atoms, struct.c): ch = struct.channels[t] if 'Donor' in ch.name and not a.IsHbondDonor(): mismatches += 1 if verbose: print("Not Donor", ch.name, a.GetX(), a.GetY(), a.GetZ()) if ch.name != 'NitrogenXSDonorAcceptor' and 'Acceptor' in ch.name and a.GetExplicitDegree( ) != a.GetTotalDegree(): #there are issues with nitrogens and openbabel protonation.. mismatches += 1 if verbose: print("Not Acceptor", ch.name, a.GetX(), a.GetY(), a.GetZ()) if 'Aromatic' in ch.name and not a.IsAromatic(): mismatches += 1 if verbose: print("Not Aromatic", ch.name, a.GetX(), a.GetY(), a.GetZ()) return pybel.Molecule(mol), mismatches, visited_mols
def create_rdkit_mol_from_mol_graph(mol_graph, name=None, force_sanitize=False, metals={ "Li": 1, "Mg": 2 }): """ Create a rdkit molecule from molecule graph, with bond type perceived by babel. Done in the below steps: 1. create a babel mol without metal atoms. 2. perceive bond order (conducted by BabelMolAdaptor) 3. adjust formal charge of metal atoms so as not to violate valence rule 4. create rdkit mol based on species, coords, bonds, and formal charge Args: mol_graph (pymatgen MoleculeGraph): molecule graph name (str): name of the molecule force_sanitize (bool): whether to force sanitization of the rdkit mol metals dict: with metal atom (str) as key and the number of valence electrons as key. Returns: m: rdkit Chem.Mol bond_types (dict): bond types assigned to the created rdkit mol """ pymatgen_mol = mol_graph.molecule species = [str(s) for s in pymatgen_mol.species] coords = pymatgen_mol.cart_coords bonds = [ tuple(sorted([i, j])) for i, j, attr in mol_graph.graph.edges.data() ] # create babel mol without metals pmg_mol_no_metals = remove_metals(pymatgen_mol) adaptor = BabelMolAdaptor(pmg_mol_no_metals) ob_mol = adaptor.openbabel_mol # get babel bond order of mol without metals ob_bond_order = {} for bd in ob.OBMolBondIter(ob_mol): k = tuple(sorted([bd.GetBeginAtomIdx(), bd.GetEndAtomIdx()])) v = bd.GetBondOrder() ob_bond_order[k] = v # create bond type atom_idx_mapping = pymatgen_2_babel_atom_idx_map(pymatgen_mol, ob_mol) bond_types = {} for bd in bonds: try: ob_bond = [atom_idx_mapping[a] for a in bd] # atom not in ob mol if None in ob_bond: raise KeyError # atom in ob mol else: ob_bond = tuple(sorted(ob_bond)) v = ob_bond_order[ob_bond] if v == 0: tp = BondType.UNSPECIFIED elif v == 1: tp = BondType.SINGLE elif v == 2: tp = BondType.DOUBLE elif v == 3: tp = BondType.TRIPLE elif v == 5: tp = BondType.AROMATIC else: raise RuntimeError(f"Got unexpected babel bond order: {v}") except KeyError: atom1_spec, atom2_spec = [species[a] for a in bd] if atom1_spec in metals and atom2_spec in metals: raise RuntimeError("Got a bond between two metal atoms") # bond involves one and only one metal atom (atom not in ob mol case above) elif atom1_spec in metals or atom2_spec in metals: tp = Chem.rdchem.BondType.DATIVE # Dative bonds have the special characteristic that they do not affect # the valence on the start atom, but do affect the end atom. # Here we adjust the atom ordering in the bond for dative bond to make # metal the end atom. if atom1_spec in metals: bd = tuple(reversed(bd)) # bond not found by babel (atom in ob mol) else: tp = Chem.rdchem.BondType.UNSPECIFIED bond_types[bd] = tp # a metal atom can form multiple dative bond (e.g. bidentate LiEC), for such cases # we need to adjust the their formal charge so as not to violate valence rule formal_charge = adjust_formal_charge(species, bonds, metals) m = create_rdkit_mol(species, coords, bond_types, formal_charge, name, force_sanitize) return m, bond_types
def connect_the_dots(mol, atoms, struct, maxbond=4): '''Custom implementation of ConnectTheDots. This is similar to OpenBabel's version, but is more willing to make long bonds (up to maxbond long) to keep the molecule connected. It also attempts to respect atom type information from struct. atoms and struct need to correspond in their order Assumes no hydrogens or existing bonds. ''' pt = Chem.GetPeriodicTable() if len(atoms) == 0: return mol.BeginModify() #just going to to do n^2 comparisons, can worry about efficiency later coords = np.array([(a.GetX(), a.GetY(), a.GetZ()) for a in atoms]) dists = squareform(pdist(coords)) types = [struct.channels[t].name for t in struct.c] for (i, a) in enumerate(atoms): for (j, b) in enumerate(atoms): if a == b: break if dists[i, j] < 0.01: #reduce from 0.4 continue #don't bond too close atoms if dists[i, j] < maxbond: flag = 0 if 'Aromatic' in types[i] and 'Aromatic' in types[j]: flag = ob.OB_AROMATIC_BOND mol.AddBond(a.GetIdx(), b.GetIdx(), 1, flag) atom_maxb = {} for (i, a) in enumerate(atoms): #set max valance to the smallest max allowed by openbabel or rdkit #since we want the molecule to be valid for both (rdkit is usually lower) maxb = ob.GetMaxBonds(a.GetAtomicNum()) maxb = min(maxb, pt.GetDefaultValence(a.GetAtomicNum())) if 'Donor' in types[i]: maxb -= 1 #leave room for hydrogen atom_maxb[a.GetIdx()] = maxb #remove any impossible bonds between halogens for bond in ob.OBMolBondIter(mol): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if atom_maxb[a1.GetIdx()] == 1 and atom_maxb[a2.GetIdx()] == 1: mol.DeleteBond(bond) def get_bond_info(biter): '''Return bonds sorted by their distortion''' bonds = [b for b in biter] binfo = [] for bond in bonds: bdist = bond.GetLength() #compute how far away from optimal we are a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() ideal = ob.GetCovalentRad(a1.GetAtomicNum()) + ob.GetCovalentRad( a2.GetAtomicNum()) stretch = bdist - ideal binfo.append((stretch, bdist, bond)) binfo.sort(reverse=True, key=lambda t: t[:2]) #most stretched bonds first return binfo #prioritize removing hypervalency causing bonds, do more valent #constrained atoms first since their bonds introduce the most problems #with reachability (e.g. oxygen) hypers = sorted([(atom_maxb[a.GetIdx()], a.GetExplicitValence() - atom_maxb[a.GetIdx()], a) for a in atoms], key=lambda aa: (aa[0], -aa[1])) for mb, diff, a in hypers: if a.GetExplicitValence() <= atom_maxb[a.GetIdx()]: continue binfo = get_bond_info(ob.OBAtomBondIter(a)) for stretch, bdist, bond in binfo: #can we remove this bond without disconnecting the molecule? a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() #get right valence if a1.GetExplicitValence() > atom_maxb[a1.GetIdx()] or \ a2.GetExplicitValence() > atom_maxb[a2.GetIdx()]: #don't fragment the molecule if not reachable(a1, a2): continue mol.DeleteBond(bond) if a.GetExplicitValence() <= atom_maxb[a.GetIdx()]: break #let nbr atoms choose what bonds to throw out binfo = get_bond_info(ob.OBMolBondIter(mol)) #now eliminate geometrically poor bonds for stretch, bdist, bond in binfo: #can we remove this bond without disconnecting the molecule? a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() #as long as we aren't disconnecting, let's remove things #that are excessively far away (0.45 from ConnectTheDots) #get bonds to be less than max allowed #also remove tight angles, because that is what ConnectTheDots does if stretch > 0.45 or forms_small_angle(a1, a2) or forms_small_angle( a2, a1): #don't fragment the molecule if not reachable(a1, a2): continue mol.DeleteBond(bond) mol.EndModify()
def convert_ob_mol_to_rd_mol(ob_mol, struct=None): '''Convert OBMol to RDKit mol, fixing up issues''' ob_mol.DeleteHydrogens() n_atoms = ob_mol.NumAtoms() rd_mol = Chem.RWMol() rd_conf = Chem.Conformer(n_atoms) for ob_atom in ob.OBMolAtomIter(ob_mol): rd_atom = Chem.Atom(ob_atom.GetAtomicNum()) #TODO copy format charge if ob_atom.IsAromatic() and ob_atom.IsInRing( ) and ob_atom.MemberOfRingSize() <= 6: #don't commit to being aromatic unless rdkit will be okay with the ring status #(this can happen if the atoms aren't fit well enough) rd_atom.SetIsAromatic(True) i = rd_mol.AddAtom(rd_atom) ob_coords = ob_atom.GetVector() x = ob_coords.GetX() y = ob_coords.GetY() z = ob_coords.GetZ() rd_coords = Geometry.Point3D(x, y, z) rd_conf.SetAtomPosition(i, rd_coords) rd_mol.AddConformer(rd_conf) for ob_bond in ob.OBMolBondIter(ob_mol): i = ob_bond.GetBeginAtomIdx() - 1 j = ob_bond.GetEndAtomIdx() - 1 bond_order = ob_bond.GetBondOrder() if bond_order == 1: rd_mol.AddBond(i, j, Chem.BondType.SINGLE) elif bond_order == 2: rd_mol.AddBond(i, j, Chem.BondType.DOUBLE) elif bond_order == 3: rd_mol.AddBond(i, j, Chem.BondType.TRIPLE) else: raise Exception('unknown bond order {}'.format(bond_order)) if ob_bond.IsAromatic(): bond = rd_mol.GetBondBetweenAtoms(i, j) bond.SetIsAromatic(True) rd_mol = Chem.RemoveHs(rd_mol, sanitize=False) pt = Chem.GetPeriodicTable() #if double/triple bonds are connected to hypervalent atoms, decrement the order positions = rd_mol.GetConformer().GetPositions() nonsingles = [] for bond in rd_mol.GetBonds(): if bond.GetBondType() == Chem.BondType.DOUBLE or bond.GetBondType( ) == Chem.BondType.TRIPLE: i = bond.GetBeginAtomIdx() j = bond.GetEndAtomIdx() dist = np.linalg.norm(positions[i] - positions[j]) nonsingles.append((dist, bond)) nonsingles.sort(reverse=True, key=lambda t: t[0]) for (d, bond) in nonsingles: a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if calc_valence(a1) > pt.GetDefaultValence(a1.GetAtomicNum()) or \ calc_valence(a2) > pt.GetDefaultValence(a2.GetAtomicNum()): btype = Chem.BondType.SINGLE if bond.GetBondType() == Chem.BondType.TRIPLE: btype = Chem.BondType.DOUBLE bond.SetBondType(btype) for atom in rd_mol.GetAtoms(): #set nitrogens with 4 neighbors to have a charge if atom.GetAtomicNum() == 7 and atom.GetDegree() == 4: atom.SetFormalCharge(1) rd_mol = Chem.AddHs(rd_mol, addCoords=True) positions = rd_mol.GetConformer().GetPositions() center = np.mean(positions[np.all(np.isfinite(positions), axis=1)], axis=0) for atom in rd_mol.GetAtoms(): i = atom.GetIdx() pos = positions[i] if not np.all(np.isfinite(pos)): #hydrogens on C fragment get set to nan (shouldn't, but they do) rd_mol.GetConformer().SetAtomPosition(i, center) try: Chem.SanitizeMol(rd_mol, Chem.SANITIZE_ALL ^ Chem.SANITIZE_KEKULIZE) except: # mtr22 - don't assume mols will pass this pass # dkoes - but we want to make failures as rare as possible and should debug them m = pybel.Molecule(ob_mol) i = np.random.randint(1000000) outname = 'bad%d.sdf' % i print("WRITING", outname) m.write('sdf', outname, overwrite=True) pickle.dump(struct, open('bad%d.pkl' % i, 'wb')) #but at some point stop trying to enforce our aromaticity - #openbabel and rdkit have different aromaticity models so they #won't always agree. Remove any aromatic bonds to non-aromatic atoms for bond in rd_mol.GetBonds(): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if bond.GetIsAromatic(): if not a1.GetIsAromatic() or not a2.GetIsAromatic(): bond.SetIsAromatic(False) elif a1.GetIsAromatic() and a2.GetIsAromatic(): bond.SetIsAromatic(True) return rd_mol
def from_ob_mol(mol, obmol, raise_atomtype_exception=True): """ Convert a OpenBabel Mol object `obmol` to a molecular structure. Uses `OpenBabel <http://openbabel.org/>`_ to perform the conversion. It estimates radical placement based on undervalence of atoms, and assumes overall spin multiplicity is radical count + 1 """ # Below are the declared variables for cythonizing the module cython.declare( number=cython.int, isotope=cython.int, element=elements.Element, charge=cython.int, valence=cython.int, radical_electrons=cython.int, atom=mm.Atom, ) if openbabel is None: raise DependencyError('OpenBabel is not installed. Please install or use RDKit.') mol.vertices = [] # Add hydrogen atoms to complete molecule if needed obmol.AddHydrogens() # TODO Chem.rdmolops.Kekulize(obmol, clearAromaticFlags=True) # iterate through atoms in obmol for obatom in openbabel.OBMolAtomIter(obmol): # Use atomic number as key for element number = obatom.GetAtomicNum() isotope = obatom.GetIsotope() element = elements.get_element(number, isotope or -1) # Process charge charge = obatom.GetFormalCharge() # Calculate the radical electrons due to undervalence, # ignoring whatever may be set on obatom.GetSpinMultiplicity() valence = obatom.GetTotalValence() radical_electrons = openbabel.GetTypicalValence(number, valence, charge) - valence atom = mm.Atom(element, radical_electrons, charge, '', 0) mol.vertices.append(atom) # iterate through bonds in obmol for obbond in openbabel.OBMolBondIter(obmol): # Process bond type oborder = obbond.GetBondOrder() if oborder not in [1, 2, 3, 4] and obbond.IsAromatic(): oborder = 1.5 bond = mm.Bond(mol.vertices[obbond.GetBeginAtomIdx() - 1], mol.vertices[obbond.GetEndAtomIdx() - 1], oborder) # python array indices start at 0 mol.add_bond(bond) # Set atom types and connectivity values mol.update_connectivity_values() mol.update_atomtypes(log_species=True, raise_exception=raise_atomtype_exception) mol.update_multiplicity() mol.identify_ring_membership() # Assume this is always true # There are cases where 2 radical_electrons is a singlet, but # the triplet is often more stable, mol.multiplicity = mol.get_radical_count() + 1 return mol