def _parseMolData(data): """Imports a molfile and verifies if all of the coordinates are set to zeros. if they are set to zeros then we know there are no real coordinates in the molfile In this case we allow RDKit to recaculate the positions of the atoms and come up with its own pictorial representation of the molecule If not we use the molecule as drawn""" suppl = SDMolSupplier() suppl.SetData(str(data), sanitize=False) data = [x for x in suppl if x] for x in data: if not x.HasProp("_drawingBondsWedged"): SanitizeMol(x) ctab = MolToMolBlock(x) ctablines = [ item.split("0.0000") for item in ctab.split("\n") if "0.0000" in item ] needs_redraw = 0 for line in ctablines: if len(line) > 3: needs_redraw += 1 if needs_redraw == len(ctablines): #check for overlapping molecules in the CTAB SanitizeMol(x) Compute2DCoords(x) return data
def sequence_tunable( mol, OP_REMOVE_ISOTOPE=True, OP_NEUTRALISE_CHARGE=True, OP_REMOVE_STEREO=False, OP_COMMUTE_INCHI=False, OP_KEEP_BIGGEST=True, OP_ADD_HYDROGEN=True, OP_KEKULIZE=True, OP_NEUTRALISE_CHARGE_LATE=True ): """Tunable sequence of filters for standardization. Operations will made in the following order: 1 RDKit Cleanup -- always 2 RDKIT SanitizeMol -- always 3 Remove isotope -- optional (default: True) 4 Neutralise charges -- optional (default: True) 5 RDKit SanitizeMol -- if 4 or 5 6 Remove stereo -- optional (default: False) 7 Commute Inchi -- if 6 or optional (default: False) 8 Keep biggest -- optional (default: True) 9 RDKit SanitizeMol -- if any (6, 7, 8) 10 Add hydrogens -- optional (default: True) 11 Kekulize -- optional (default: True) """ F = Filters() # Always perform the basics.. Cleanup(mol) SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Fix bug TD201904.01 # if OP_REMOVE_ISOTOPE: mol = F.remove_isotope(mol) if OP_NEUTRALISE_CHARGE: mol = F.neutralise_charge(mol) if any([OP_REMOVE_ISOTOPE, OP_REMOVE_ISOTOPE]): SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) # if OP_REMOVE_STEREO: mol = F.remove_stereo(mol) OP_COMMUTE_INCHI = True if OP_COMMUTE_INCHI: mol = F.commute_inchi(mol) if OP_KEEP_BIGGEST: mol = F.keep_biggest(mol) if any([OP_REMOVE_STEREO, OP_COMMUTE_INCHI, OP_KEEP_BIGGEST]): SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) # if OP_NEUTRALISE_CHARGE_LATE: mol = F.neutralise_charge(mol) SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) # if OP_ADD_HYDROGEN: mol = F.add_hydrogen(mol, addCoords=True) if OP_KEKULIZE: mol = F.kekulize(mol) # return mol
def sequence_rr_legacy(mol): """Sequence of filters applied for the first version of RetroRules """ F = Filters() Cleanup(mol) SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Fix bug TD201904.01 mol = F.remove_isotope(mol) mol = F.neutralise_charge(mol) SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) mol = F.keep_biggest(mol) mol = F.add_hydrogen(mol, addCoords=True) mol = F.kekulize(mol) return mol
def to_rdkit_molecule(data): """ MoleculeContainer to RDKit molecule object converter """ mol = RWMol() conf = Conformer() mapping = {} is_3d = False for n, a in data.atoms(): ra = Atom(a.number) ra.SetAtomMapNum(n) if a.charge: ra.SetFormalCharge(a.charge) if a.isotope != a.common_isotope: ra.SetIsotope(a.isotope) if a.radical: ra.SetNumRadicalElectrons(a.radical) mapping[n] = m = mol.AddAtom(ra) conf.SetAtomPosition(m, (a.x, a.y, a.z)) if a.z: is_3d = True if not is_3d: conf.Set3D(False) for n, m, b in data.bonds(): mol.AddBond(mapping[n], mapping[m], _bond_map[b.order]) mol.AddConformer(conf) SanitizeMol(mol) return mol
def _mols2imageString(mols, size, legend, format, recalc=False, highlightMatch=None): """Take an input stream for the molecule image and return as a string""" if not mols: return '' # if recalc: # _apply(mols, _computeCoords) imageData = StringIO.StringIO() for mol in mols: try: SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL ^ SanitizeFlags.SANITIZE_CLEANUPCHIRALITY ^ Chem.SanitizeFlags.SANITIZE_SETCONJUGATION ^ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY) except ValueError: return imageData.getvalue() AllChem.AssignAtomChiralTagsFromStructure(mol, replaceExistingTags=False) _mols2imageStream(mols, imageData, format, size, legend, highlightMatch=highlightMatch) return imageData.getvalue()
def protonate_molecule(mol_in: Mol, ph=7.4) -> Mol: molblock_in = MolToMolBlock(mol_in) babel_mol = pybel.readstring('mol', molblock_in) babel_mol.OBMol.AddHydrogens(False, True, ph) molblock_out = babel_mol.write('mol') mol = MolFromMolBlock(molblock_out, removeHs=False, sanitize=False) try: SanitizeMol(mol) except ValueError: # Try again, but without ph correction babel_mol = pybel.readstring('mol', molblock_in) babel_mol.OBMol.AddHydrogens(False, False) molblock_out = babel_mol.write('mol') mol = MolFromMolBlock(molblock_out, removeHs=False, sanitize=False) SanitizeMol(mol) return mol
def to_rdkit_molecule(data): """ MoleculeContainer to RDKit molecule object converter """ mol = RWMol() mapping = {} for n, a in data.atoms(): ra = Atom(a.atomic_number) ra.SetAtomMapNum(n) if a.charge: ra.SetFormalCharge(a.charge) if a.isotope: ra.SetIsotope(a.isotope) if a.is_radical: ra.SetNumRadicalElectrons(1) mapping[n] = mol.AddAtom(ra) for n, m, b in data.bonds(): mol.AddBond(mapping[n], mapping[m], _bond_map[b.order]) conf = Conformer() for n, a in data.atoms(): conf.SetAtomPosition(mapping[n], (a.x, a.y, 0)) conf.Set3D(False) mol.AddConformer(conf) for c in data._conformers: conf = Conformer() for n, xyz in c.items(): conf.SetAtomPosition(mapping[n], xyz) mol.AddConformer(conf) SanitizeMol(mol) return mol
def _apply_reaction(self, mol, reaction): products = reaction.RunReactants((mol, )) if products: mol = products[0][0] SanitizeMol(mol) return mol else: return mol
def _neutralise_sulphoxide(mol): smirks = '[S+1:1][O-1:2]>>[S+0:1]=[O-0:2]' rxn = rdChemReactions.ReactionFromSmarts(smirks) frags = rdmolops.GetMolFrags(mol, asMols=True) n_frags = list( filter(lambda x: x is not None, [_apply_rxn(frag, rxn) for frag in frags])) if len(n_frags) == 1: n_mol = n_frags[0] elif len(n_frags) == 2: n_mol = CombineMols(*n_frags) SanitizeMol(n_mol) else: n_mol = CombineMols(n_frags[0], n_frags[1]) for i in range(2, len(n_frags)): n_mol = CombineMols(n_mol, n_frags[i]) SanitizeMol(n_mol) return n_mol
def mol_to_nx(mol) -> nx.Graph: G = nx.Graph() conf = mol.GetConformer() SanitizeMol(mol, SanitizeFlags.SANITIZE_ALL ^ SanitizeFlags.SANITIZE_PROPERTIES) ComputeGasteigerCharges(mol) ring_info = mol.GetRingInfo() crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol) tpsa_contribs = rdMolDescriptors._CalcTPSAContribs(mol) for atom in mol.GetAtoms(): idx = atom.GetIdx() # if atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2: # formal_charge = -1 # elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 4: # formal_charge = 1 # elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1: # formal_charge = -1 # else: # formal_charge = atom.GetFormalCharge() formal_charge = atom.GetFormalCharge() G.add_node( idx, pos=conf.GetAtomPosition(idx), formal_charge=formal_charge, chiral_tag=atom.GetChiralTag(), hybridization=atom.GetHybridization(), # num_explicit_hs=atom.GetNumExplicitHs(), # All same is_aromatic=atom.GetIsAromatic(), num_atom_rings=ring_info.NumAtomRings(idx), is_in_ring_size3=atom.IsInRingSize(3), is_in_ring_size4=atom.IsInRingSize(4), is_in_ring_size5=atom.IsInRingSize(5), is_in_ring_size6=atom.IsInRingSize(6), symbol=atom.GetSymbol(), total_valence=atom.GetTotalValence(), gasteiger_charge=atom.GetProp('_GasteigerCharge'), num_implicit_hs=atom.GetNumImplicitHs(), total_degree=atom.GetTotalDegree(), crippen_logp=crippen_contribs[idx][0], crippen_mr=crippen_contribs[idx][1], tpsa=tpsa_contribs[idx], ) for bond in mol.GetBonds(): G.add_edge( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType(), is_conjugated=bond.GetIsConjugated(), ) return G
def sequence_minimal(self, mol): """Minimal standardization.""" SanitizeMol(mol, sanitizeOps=SanitizeFlags.SANITIZE_ALL, catchErrors=False) AssignStereochemistry( mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Fix bug TD201904.01 return mol
def sanitize_without_hypervalencies(m: Mol): ### Sanitize molecule (without checking for hypervalencies) SanitizeMol(m, SanitizeFlags.SANITIZE_FINDRADICALS | SanitizeFlags.SANITIZE_KEKULIZE | SanitizeFlags.SANITIZE_SETAROMATICITY | SanitizeFlags.SANITIZE_SETCONJUGATION | SanitizeFlags.SANITIZE_SETHYBRIDIZATION | SanitizeFlags.SANITIZE_SYMMRINGS, catchErrors=True)
def test0InchiWritePubChem(self): for fp, f in self.dataset.items(): inchi_db = self.dataset_inchi[fp] same, diff, reasonable = 0, 0, 0 for m in f: if m is None: # pragma: nocover continue ref_inchi = inchi_db[m.GetProp('PUBCHEM_COMPOUND_CID')] x, y = MolToInchi(m), ref_inchi if x != y: # print("---------------") # print(m.GetProp('PUBCHEM_COMPOUND_CID')) # print(MolToSmiles(m)) # print(y) # print(x) if re.search(r'.[1-9]?ClO4', x) is not None: reasonable += 1 continue SanitizeMol(m) if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): reasonable += 1 continue # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) # if it is because RDKit does not think the bond is stereo z = MolToInchi(MolFromMolBlock(MolToMolBlock(m))) if y != z and inchiDiffPrefix(y, z) == 'b': reasonable += 1 continue # some warning try: MolToInchi(m, treatWarningAsError=True) except InchiReadWriteError as inst: _, error = inst.args if 'Metal' in error: reasonable += 1 continue diff += 1 print('InChI mismatch for PubChem Compound ' + m.GetProp('PUBCHEM_COMPOUND_CID')) print(MolToSmiles(m, True)) print(inchiDiff(x, y)) print() else: same += 1 fmt = "\n{0}InChI write Summary: {1} identical, {2} suffix variance, {3} reasonable{4}" print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) self.assertEqual(same, 1162) self.assertEqual(diff, 0) self.assertEqual(reasonable, 19)
def generate_png(mol, pngpath, logfile=devnull, size=300): with stdout_redirected(to=sys.stdout, stdout=sys.stderr): with stdout_redirected(to=logfile, stdout=sys.stdout): nhmol = RemoveHs(mol, implicitOnly=False, updateExplicitCount=True, sanitize=False) SanitizeMol(nhmol, catchErrors=True) op = DrawingOptions() op.atomLabelFontSize = size / 25 MolToFile(PrepareMolForDrawing(nhmol,forceCoords=True,addChiralHs=True),\ pngpath,fitImage=True,size=(size, size),options=op)
def partial_sanitization(mol): """Partially sanitize a molecule. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Molecule to sanitize. """ SanitizeMol(mol, sanitizeOps=SANITIZE_ALL ^ SANITIZE_CLEANUP ^ SANITIZE_CLEANUPCHIRALITY ^ SANITIZE_FINDRADICALS)
def to_rdkit_molecule(data: MoleculeContainer): """ MoleculeContainer to RDKit molecule object converter """ mol = RWMol() mapping = {} bonds = data._bonds for n, a in data.atoms(): ra = Atom(a.atomic_number) ra.SetAtomMapNum(n) if a.charge: ra.SetFormalCharge(a.charge) if a.isotope: ra.SetIsotope(a.isotope) if a.is_radical: ra.SetNumRadicalElectrons(1) mapping[n] = mol.AddAtom(ra) for n, m, b in data.bonds(): mol.AddBond(mapping[n], mapping[m], _bond_map[b.order]) for n in data._atoms_stereo: ra = mol.GetAtomWithIdx(mapping[n]) env = bonds[n] s = data._translate_tetrahedron_sign(n, [x for x in mapping if x in env]) ra.SetChiralTag(_chiral_ccw if s else _chiral_cw) for nm, s in data._cis_trans_stereo.items(): n, m = nm if m in bonds[n]: # cumulenes unsupported nn, nm, *_ = data._stereo_cis_trans[nm] b = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) b.SetStereoAtoms(mapping[nn], mapping[nm]) b.SetStereo(_cis if s else _trans) conf = Conformer() for n, a in data.atoms(): conf.SetAtomPosition(mapping[n], (a.x, a.y, 0)) conf.Set3D(False) mol.AddConformer(conf, assignId=True) for c in data._conformers: conf = Conformer() for n, xyz in c.items(): conf.SetAtomPosition(mapping[n], xyz) mol.AddConformer(conf, assignId=True) SanitizeMol(mol) AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True) return mol
def get_BRICS_builds(BRICS_func, rule_test, block_size=1000): # Will do this in blocks to avoid running out of memory block = [] for mol in BRICS_func: if get_lipinksi_test(mol, rule_test) == True: SanitizeMol(mol) block.append(mol) if len(block) == block_size: yield block block = [] # Yield the last block if block: yield block
def react(self, reactant): products = set() product_smiles = set() n = self.steps new_mols = [reactant] while n > 0 and new_mols != []: mols = new_mols new_mols = [] for mol in mols: SanitizeMol(mol) for reaction in self.reactions: for ps in reaction.RunReactants((mol, )): q = ps[0] SanitizeMol(q) smile = MolToSmiles(q) if smile not in product_smiles: embed_r_groups(q, mol) new_mols.append(q) product_smiles.add(smile) products.add(q) n -= 1 return products
def _apply_rxn(mol, rxn): mols = [mol] changed = False for n_pass in range(MAX_PASSES): products = {} for m in mols: for product in [x[0] for x in rxn.RunReactants((m, ))]: try: SanitizeMol(product) smiles = MolToSmiles(product, isomericSmiles=True) except ValueError as error: # assuming an unphysical molecule has been generated continue if smiles in products: # keep only new structures continue products[smiles] = product if products: changed = True # update list of mols mols = list(products.values()) else: break return mols[0] if changed else mol
def partial_sanitization(mol): """Partially sanitize a molecule (used during fragmentation)""" SanitizeMol(mol, sanitizeOps=SANITIZE_ALL ^ SANITIZE_CLEANUP ^ SANITIZE_CLEANUPCHIRALITY ^ SANITIZE_FINDRADICALS)
def test1InchiReadPubChem(self): for f in self.dataset.values(): same, diff, reasonable = 0, 0, 0 for m in f: if m is None: # pragma: nocover continue x = MolToInchi(m) y = None RDLogger.DisableLog('rdApp.error') mol = MolFromInchi(x) RDLogger.EnableLog('rdApp.error') if mol is not None: y = MolToInchi( MolFromSmiles(MolToSmiles(mol, isomericSmiles=True))) if y is None: # metal involved? try: MolToInchi(m, treatWarningAsError=True) except InchiReadWriteError as inst: _, error = inst.args if 'Metal' in error or \ 'Charges were rearranged' in error: reasonable += 1 continue # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) # RDKit does not like the SMILES? use MolBlock instead inchiMol = MolFromInchi(x) if inchiMol: rdDepictor.Compute2DCoords(inchiMol) z = MolToInchi(MolFromMolBlock( MolToMolBlock(inchiMol))) if x == z: reasonable += 1 continue # InChI messed up the radical? unsanitizedInchiMol = MolFromInchi(x, sanitize=False) if sum([ a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in m.GetAtoms() if a.GetNumRadicalElectrons() != 0 ]) != sum([ a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in unsanitizedInchiMol.GetAtoms() if a.GetNumRadicalElectrons() != 0 ]): reasonable += 1 continue diff += 1 cid = m.GetProp('PUBCHEM_COMPOUND_CID') print(COLOR_GREEN + 'Empty mol for PubChem Compound ' + cid + '\n' + COLOR_RESET) continue if x != y: # if there was warning in the first place, then this is # tolerable try: MolToInchi(m, treatWarningAsError=True) MolFromInchi(x, treatWarningAsError=True) except InchiReadWriteError as inst: reasonable += 1 continue # or if there are big rings SanitizeMol(m) if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): reasonable += 1 continue # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) # or if RDKit loses bond stereo s = MolToSmiles(m, True) if MolToSmiles(MolFromSmiles(s), True) != s: reasonable += 1 continue # or if it is RDKit SMILES writer unhappy about the mol inchiMol = MolFromInchi(x) rdDepictor.Compute2DCoords(inchiMol) z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol))) if x == z: reasonable += 1 continue diff += 1 print(COLOR_GREEN + 'Molecule mismatch for PubChem Compound ' + cid + COLOR_RESET) print(inchiDiff(x, y)) print() else: same += 1 fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}" print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) self.assertEqual(same, 621) self.assertEqual(diff, 0) self.assertEqual(reasonable, 560)
def main(args, output=sys.stdout, log=logging): parser = argparse.ArgumentParser( """RDKit-based conformer generation proof-of-concept. This program accepts either a mol2 file or a SMILES string and produces an SD file """) input_group = parser.add_mutually_exclusive_group(required=True) input_group.add_argument('-m', '--mol2', type=str, help="Mol2 file to gererate conformers for") input_group.add_argument('-s', '--smiles', type=str, help="SMILES string of molecule") parser.add_argument('-N', '--name', type=str, default=None, help="Molecule name") parser.add_argument('-H', '--no-hydrogens', action='store_true', default=False, help="Do NOT explicitly add implicit Hydrogens to conformers [default: %(default)s]") parser.add_argument('-r', '--rmsd-threshold', type=float, default=2.0, help="Only accept conformers that have an RMSD of at least this value from previously seen conformers [default: %(default)s") parser.add_argument('-n', '--num-conformers', type=int, default=None, help="Number of conformers to initially generate [default: auto]") parser.add_argument('-F', '--forcefield', type=str, default=DEFAULT_FORCEFIELD, choices=FORCEFIELDS.keys(), help="Forcefield to use for optimization [default: %(default)s]") parser.add_argument('-P', '--parallelism', type=int, default=None, help="Number of processes to use [default: 1]") params = parser.parse_args(args) # Load input molecule if hasattr(params, 'mol2') and params.mol2 is not None: mol = MolFromMol2File(params.mol2, sanitize=False) else: mol = MolFromSmiles(params.smiles, sanitize=False) try: SanitizeMol(mol) except ValueError as e: log.critical("Could not sanitize molecule: {0}:".format(str(e))) sys.exit(2) except Exception: # This is `Boost.Python.ArgumentError` log.critical("Could not parse molecule!") sys.exit(2) # Assign user-provided name if applicable if params.name is not None: mol.SetProp(RD_NAME, params.name) elif not mol.HasProp(RD_NAME): mol.SetProp(RD_NAME, 'Ligand') # Generate 3D conformers embedded, selected = generate_conformers(mol, add_hydrogens=not params.no_hydrogens, rmsd_threshold=params.rmsd_threshold, num_conformers=params.num_conformers, parallelism=params.parallelism, forcefield=params.forcefield, log=log) log.info("Conformers selected: {0}".format(len(selected))) log.info("Energy: min={0:.4f} kcal/mol max={1:.4f} kcal/mol".format(selected[0][1], selected[-1][1])) # Find lowest-energy conformers sorted_by_energy = [item[0] for item in selected] # Render SDF file names = dump_conformers_sdf(embedded, output, conf_ids=sorted_by_energy, renumber=True) for name, (conf_id, energy) in zip(names, selected): log.info("\t{0}: {1:0.4f} kcal/mol".format(name, energy)) return 0
def remove_isotopes(mol, sanitize=True): edmol = EditableMol(mol) for atom in mol.GetAtoms(): atom.SetIsotope(0) if sanitize: SanitizeMol(mol)
def _sanitize(mol, sanitizeOps=SANITIZE_ALL): return SanitizeMol(mol, sanitizeOps=sanitizeOps)