def depict(self, filename=None, ipython=False): from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem.Draw import MolToImage from rdkit.Chem.Draw import rdMolDraw2D from rdkit.Chem.AllChem import EmbedMolecule from IPython.display import SVG from rdkit.Chem import RWMol, MolFromSmiles, Atom, BondType, ChiralType _ = MolFromSmiles('C') rmol = RWMol(_) dict_old_new_idx = {} n = 1 for a in self.atoms: old_idx = a.GetIdx() rmol.AddAtom(a) dict_old_new_idx[old_idx] = n n += 1 for a in self.enviroments: old_idx = a.GetIdx() a.SetChiralTag(ChiralType.CHI_UNSPECIFIED) a.SetIsAromatic(0) rmol.AddAtom(a) dict_old_new_idx[old_idx] = n n += 1 for b in self.Bonds: rmol.AddBond(dict_old_new_idx[b.GetBeginAtomIdx()], dict_old_new_idx[b.GetEndAtomIdx()], b.GetBondType()) for b in self.bondsenvironments: rmol.AddBond(dict_old_new_idx[b.GetBeginAtomIdx()], dict_old_new_idx[b.GetEndAtomIdx()], b.GetBondType()) rmol.RemoveAtom(0) EmbedMolecule(rmol) drawer = rdMolDraw2D.MolDraw2DSVG(400, 200) drawer.DrawMolecule(rmol) drawer.FinishDrawing() svg = drawer.GetDrawingText() if filename != None: f = open(filename, 'w') f.write(svg) f.close() if ipython: svg = svg.replace('svg:', '') return SVG(svg) else: return None
def get_all_features(self, df, mol_list): # NOW GET ALL THE FEATURES df = df.astype('object') df['x'] = '' df['edge_index'] = '' df['edge_attr'] = '' df['u'] = '' print("Length of df =", len(df), "Length of mol_list =", len(mol_list)) for i in range(len(mol_list)): mol = mol_list[i] if (0 == XYZ): mol = Chem.AddHs(mol) EmbedMolecule(mol) if (0 == addH): mol = Chem.RemoveHs(mol) else: mol = Chem.AddHs(mol) print("Getting features for mol =", i, "refcode =", df.at[i,"refcode_csd"]) df.at[i,"x"], x_sum, x_nbrAtoms = self.get_node_features(mol) if (1 == XYZ): df.at[i,"edge_index"], df.at[i,"edge_attr"], edge_attr_sum, edge_attr_nbrBonds = self.get_edge_features(mol, i) else: df.at[i,"edge_index"], df.at[i,"edge_attr"], df.at[i,"xyz"], edge_attr_sum, edge_attr_nbrBonds = self.get_edge_features(mol, i) df.at[i,"u"] = self.get_global_features(mol) x_sum_all = x_sum x_nbrAtoms_all = x_nbrAtoms edge_attr_sum_all = edge_attr_sum edge_attr_nbrBonds_all = edge_attr_nbrBonds if 0 != len(df.at[i,"edge_attr"]) : if (0 == i): x_sum_all = x_sum x_nbrAtoms_all = x_nbrAtoms edge_attr_sum_all = edge_attr_sum edge_attr_nbrBonds_all = edge_attr_nbrBonds else: x_sum_all = [x_sum_all[i]+x_sum[i] for i in range(len(x_sum))] x_nbrAtoms_all = x_nbrAtoms_all + x_nbrAtoms edge_attr_sum_all = [edge_attr_sum_all[i]+edge_attr_sum[i] for i in range(len(edge_attr_sum))] edge_attr_nbrBonds_all = edge_attr_nbrBonds_all + edge_attr_nbrBonds self.x_mean = [x_sum_all[i]/x_nbrAtoms_all for i in range(len(x_sum_all))] self.edge_attr_mean = [edge_attr_sum_all[i]/edge_attr_nbrBonds_all for i in range(len(edge_attr_sum_all))] #print("Mean values of atomic features", self.x_mean) #print("Mean values of bond features", self.edge_attr_mean) # Now drop the rows that are marked with 'drop' old_len = len(df) df = df[df.edge_index != 'drop'] df = df.reset_index(drop=True) new_len = len(df) if (old_len != new_len): dropped = old_len - new_len print(dropped, "rows were deleted because the coordinates in mol and XYZ did not have the same order") self.df = df return(df)
def dock_mol(self, mol): dock_mol = mol.cap_with_h() EmbedMolecule(dock_mol) mol.docking_mol = dock_mol MolToPDBFile(dock_mol, Path('temp.pdb').as_posix()) pdbqt_file = export_pdbqt('temp') f_out_pdbqt = Path(pdbqt_file.parent, 'temp-out.pdbqt') f_out_log = Path(pdbqt_file.parent, 'temp-out.txt') vina_command = f"vina --config {self.dir}/{self.name}-config.txt --ligand {pdbqt_file} --out {f_out_pdbqt} " \ f"--log {f_out_log}" print(vina_command) c = os_command(vina_command) mol.get_energy() mol.export_data(self.name)
def createXYZ_from_SMILES(df, mol_list): new_mol_list = [] print("Creating XYZ coordinates from SMILES") df = df.astype('object') df['xyz'] = '' for i, row in df.iterrows(): #print(i, "Creating XYZ coordinates for mol with SMILES code = ", row['smiles']) mol = Chem.MolFromSmiles(row['smiles']) mol = Chem.AddHs(mol) EmbedMolecule(mol) xyz = Chem.rdmolfiles.MolToXYZBlock(mol) if (xyz is ''): print(i, "Unable to create XYZ coordinates for", row['smiles'], "Droping it from the dataframe") df.at[i, "xyz"] = 'drop' else: new_mol_list.append(mol_list[i]) df.at[i, "xyz"] = xyz df = df[df["xyz"] != 'drop'] df = df.reset_index(drop=True) return (df, new_mol_list)
def processline(t, step, line): global lensum if t.incr(): return 1 if step == 0: lensum += len(line) else: m = MolFromSmiles(line) if step == 100: lensum += len(line) elif step == 105: lensum += len(sha256(line).hexdigest()) elif step in (110, 120): with open(tmpname, 'wb+') as f: print(line, file=f) if step == 120: os.fsync(f.fileno()) lensum += os.stat(tmpname).st_size elif step == 210: lensum += m.GetNumAtoms() elif step == 220: lensum += m.GetNumBonds() elif step == 300: lensum += len(MolToSmiles(m)) elif step == 400: lensum += len(MolToMolBlock(m)) elif step == 420: m2 = AddHs(m) EmbedMolecule(m2, randomSeed=2020) m2 = RemoveHs(m2) m2.SetProp("_Name", "test") lensum += len(MolToMolBlock(m2)) elif step == 600: lensum += mol2file(m, 'svg') elif step == 610: lensum += mol2file(m, 'png') else: raise ValueError("Not implemented step " + str(step)) return 0
def depictFGs(self, fgs, filename=None, ipython=False, optimize=False): from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem.Draw import MolToImage from rdkit.Chem.Draw import rdMolDraw2D from IPython.display import SVG from rdkit.Chem.AllChem import EmbedMolecule drawer = rdMolDraw2D.MolDraw2DSVG(400, 200) highlightAtoms = [a for fg in fgs for a in fg.AtomsIdx ] + [a for fg in fgs for a in fg.EnviromentsIdx] highlightColors = { a: self._colors[i % len(self._colors)] for i in range(len(fgs)) for a in fgs[i].AtomsIdx } if optimize: EmbedMolecule(self._mol._mol) drawer.DrawMolecule(self._mol._mol, highlightAtoms=highlightAtoms, highlightBonds=[], highlightAtomColors=highlightColors) drawer.FinishDrawing() svg = drawer.GetDrawingText() if filename != None: f = open(filename, 'w') f.write(svg) f.close() if ipython: svg = svg.replace('svg:', '') return SVG(svg) else: return None
def depict(self, sketch=True, filename=None, ipython=False, optimize=False, optimizemode='std', removeHs=True, atomlabels=None, highlightAtoms=None, resolution=(400, 200)): """ Depicts the molecules. It is possible to save it into an svg file and also generates a jupiter-notebook rendering Parameters ---------- sketch: bool Set to True for 2D depiction filename: str Set the filename for the svg file ipython: bool Set to True to return the jupiter-notebook rendering optimize: bool Set to True to optimize the conformation. Works only with 3D. optimizemode: ['std', 'mmff'] Set the optimization mode for 3D conformation removeHs: bool Set to True to hide hydrogens in the depiction atomlabels: str Accept any combinations of the following pararemters as unique string '%a%i%c%*' a:atom name, i:atom index, c:atom formal charge (+/-), *:chiral (* if atom is chiral) highlightAtoms: list List of atom to highlight. It can be also a list of atom list, in this case different colors will be used resolution: tuple of integers Resolution in pixels: (X, Y) Returns ------- ipython_svg: SVG object if ipython is set to True Example ------- >>> sm.depict(ipython=True, optimize=True, optimizemode='std') # doctest: +SKIP >>> sm.depict(ipython=True, sketch=True) # doctest: +SKIP >>> sm.depict(ipython=True, sketch=True) # doctest: +SKIP >>> sm.depict(ipython=True, sketch=True, atomlabels="%a%i%c") # doctest: +SKIP >>> ids = np.intersect1d(sm.get('idx', 'hybridization SP2'), sm.get('idx', 'element C')) # doctest: +SKIP >>> sm.depict(ipython=True, sketch=True,highlightAtoms=ids.tolist(), removeHs=False) # doctest: +SKIP """ from rdkit import Chem from rdkit.Chem.AllChem import Compute2DCoords, EmbedMolecule, MMFFOptimizeMolecule, ETKDG from copy import deepcopy if sketch and optimize: raise ValueError('Impossible to use optimization in 2D sketch representation') if optimizemode not in ['std', 'mmff']: raise ValueError('Optimization mode {} not understood. Can be "std" or "ff"'.format(optimizemode)) elements = self._element indexes = self._idx formalcharges = self._formalcharge chirals = self._chiral _mol = deepcopy(self._mol) if sketch: Compute2DCoords(_mol) if removeHs: _mol = Chem.RemoveHs(_mol) elements = self.get('element', 'element H', invert=True) indexes = self.get('idx', 'element H', invert=True) formalcharges = self.get('formalcharge', 'element H', invert=True) chirals = self.get('chiral', 'element H', invert=True) _labelsFunc = ['a', 'i', 'c', '*'] if atomlabels is not None: labels = atomlabels.split('%')[1:] formalcharges = ['' if c == 0 else "+" if c == 1 else "-" for c in formalcharges] chirals = ['' if c == '' else '*' for c in chirals] values = [elements, indexes, formalcharges, chirals] idxs = [_labelsFunc.index(l) for l in labels] labels_required = [values[i] for i in idxs] atomlabels = ["".join([str(i) for i in a]) for a in list(zip(*labels_required))] if optimize: if optimizemode == 'std': EmbedMolecule(_mol, ETKDG()) elif optimizemode == 'mmff': MMFFOptimizeMolecule(_mol) return _depictMol(_mol, filename=filename, ipython=ipython, atomlabels=atomlabels, highlightAtoms=highlightAtoms, resolution=resolution)
canvas.addCanvasText('%s\r\nMolWt: %g\tTPSA: %g' % (s, MolWt(m), TPSA(m)), pos, font) with open('xx' + s + '.png', 'w') as f: canvas.flush() img.save(f) if __name__ == '__main__': drawmol('CN1CCC[C@H]1c2cccnc2') drawmol('CC(=O)OC1=CC=CC=C1C(=O)O') drawmol('O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5') sys.exit(0) # sample code to use new drawing API (older rdkit do not have DrawString) from rdkit.Chem.AllChem import EmbedMolecule assert EmbedMolecule(m) >= 0 x = Draw.rdMolDraw2D.MolDraw2DSVG(200, 250) x.DrawMolecule(m) x.DrawString('Test String', 20, 200) x.FinishDrawing() print(x.GetDrawingText()) # sample code to generate a legend legstr = '' if molname: legstr += molname + '\n' legstr += '%s\nWt=%g LogP=%g TPSA=%g\nHBA=%d HBD=%d RotBond=%d\n' % \ (smiles, MolWt(mol), MolLogP(mol), TPSA(mol), NumHAcceptors(mol), NumHDonors(mol), NumRotatableBonds(mol))
def depict( self, ids=None, sketch=True, filename=None, ipython=False, optimize=False, optimizemode="std", removeHs=True, legends=None, highlightAtoms=None, mols_perrow=3, ): """ Depicts the molecules into a grid. It is possible to save it into an svg file and also generates a jupiter-notebook rendering Parameters ---------- ids: list The index of the molecules to depict sketch: bool Set to True for 2D depiction filename: str Set the filename for the svg file ipython: bool Set to True to return the jupiter-notebook rendering optimize: bool Set to True to optimize the conformation. Works only with 3D. optimizemode: ['std', 'mmff'] Set the optimization mode for 3D conformation removeHs: bool Set to True to hide hydrogens in the depiction legends: str The name to used for each molecule. Can be 'names':the name of themselves; or 'items': a incremental id highlightAtoms: list A List of atom to highligh for each molecule. It can be also a list of atom list, in this case different colors will be used mols_perrow: int The number of molecules to depict per row of the grid Returns ------- ipython_svg: SVG object if ipython is set to True """ from rdkit.Chem.AllChem import ( Compute2DCoords, EmbedMolecule, MMFFOptimizeMolecule, ETKDG, ) from rdkit.Chem import RemoveHs from moleculekit.smallmol.util import depictMultipleMols if sketch and optimize: raise ValueError( "Impossible to use optmization in 2D sketch representation") if legends is not None and legends not in ["names", "items"]: raise ValueError('The "legends" should be "names" or "items"') _smallmols = self.getMols(ids) if ids is None: _mols = [m._mol for m in self._mols] else: _mols = [m._mol for m in self.getMols(ids)] if highlightAtoms is not None: if len(highlightAtoms) != len(_mols): raise ValueError( "The highlightAtoms {} should have the same length of the " "mols {}".format(len(highlightAtoms), len(_mols))) if sketch: for _m in _mols: Compute2DCoords(_m) if removeHs: _mols = [RemoveHs(_m) for _m in _mols] # activate 3D coords optimization if optimize: if optimizemode == "std": for _m in _mols: EmbedMolecule(_m) elif optimizemode == "mmff": for _m in _mols: MMFFOptimizeMolecule(_m, ETKDG()) legends_list = [] if legends == "names": legends_list = [_m.getProp("ligname") for _m in _smallmols] elif legends == "items": legends_list = [str(n + 1) for n in range(len(_smallmols))] return depictMultipleMols( _mols, ipython=ipython, legends=legends_list, highlightAtoms=highlightAtoms, filename=filename, mols_perrow=mols_perrow, )
#!/usr/bin/python2 # Little harness for timing how long it takes to embed a molecule # which seems extremely variable on one machine, from __future__ import print_function, division import sys, time, os from rdkit.Chem import MolFromSmiles, AddHs, RemoveHs from rdkit.Chem.AllChem import EmbedMolecule if __name__ == "__main__": dotimestamp = int(os.getenv('MOLEMBED_TIME', '0')) doaddh = int(os.getenv('MOLEMBED_ADDH', '0')) rseed = int(os.getenv('MOLEMBED_SEED', '0')) t0 = time.time() for line in sys.stdin.readlines(): s = line.strip() if dotimestamp: t1 = time.time() dt = (t1 - t0) * 1e3 print('%.3f' % dt, s) t0 = t1 else: print(s) m = MolFromSmiles(s) if doaddh: m2 = AddHs(m) else: m2 = m EmbedMolecule(m2, randomSeed=rseed)