def PrintAsBase64PNGString(x, renderer=None): '''returns the molecules as base64 encoded PNG image ''' if highlightSubstructures and hasattr(x, '__sssAtoms'): highlightAtoms = x.__sssAtoms else: highlightAtoms = [] if molRepresentation.lower() == 'svg': from IPython.display import SVG svg = Draw._moltoSVG(x, molSize, highlightAtoms, "", True) return SVG(svg).data else: data = Draw._moltoimg(x, molSize, highlightAtoms, "", returnPNG=True, kekulize=True) return '<img src="data:image/png;base64,%s" alt="Mol"/>' % _get_image(data)
def _toPNG(mol): if hasattr(mol, '__sssAtoms'): highlightAtoms = mol.__sssAtoms else: highlightAtoms = [] kekulize = kekulizeStructures return Draw._moltoimg(mol, molSize, highlightAtoms, "", returnPNG=True, kekulize=kekulize)
def make_similarity_maps(mol, weights, colorMap=cm.PiYG, scale=-1, size=(250, 250), sigma=None,coordScale=1.5, step=0.01, colors='k', contourLines=10, alpha=0.5, **kwargs): """Function to calculate similarity maps Heavily based on the similarity map function in the RDKit. A few changes to deal with exceptions and change rendering, Takes an RDKit molecule and a list of atom-based weights. Returns an image.""" if mol.GetNumAtoms() < 2: raise ValueError("too few atoms") fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) if sigma is None: if mol.GetNumBonds() > 0: bond = mol.GetBondWithIdx(0) idx1 = bond.GetBeginAtomIdx() idx2 = bond.GetEndAtomIdx() sigma = 0.3 * math.sqrt(sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i]) ** 2 for i in range(2)])) else: sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i]) ** 2 for i in range(2)])) sigma = round(sigma, 2) x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) # scaling if scale <= 0.0: maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) else: maxScale = scale # coloring cax = fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', extent=(0,1,0,1), vmin=-maxScale, vmax=maxScale) cbar = fig.colorbar(cax, shrink=.75, pad=.02,ticks=[-maxScale, 0, maxScale], orientation='vertical') cbar.ax.set_yticklabels(['', '', '']) # contour lines fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) return fig
def GetSimilarityMapFromWeights( mol, weights, colorMap=cm.PiYG, scale=-1, size=(250, 250), sigma=None, # @UndefinedVariable #pylint: disable=E1101 coordScale=1.5, step=0.01, colors="k", contourLines=10, alpha=0.5, **kwargs ): """ Generates the similarity map for a molecule given the atomic weights. Parameters: mol -- the molecule of interest colorMap -- the matplotlib color map scheme scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale scale = double -> this is the maximum scale size -- the size of the figure sigma -- the sigma for the Gaussians coordScale -- scaling factor for the coordinates step -- the step for calcAtomGaussian colors -- color of the contour lines contourLines -- if integer number N: N contour lines are drawn if list(numbers): contour lines at these numbers are drawn alpha -- the alpha blending value for the contour lines kwargs -- additional arguments for drawing """ if mol.GetNumAtoms() < 2: raise ValueError("too few atoms") fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) if sigma is None: if mol.GetNumBonds() > 0: bond = mol.GetBondWithIdx(0) idx1 = bond.GetBeginAtomIdx() idx2 = bond.GetEndAtomIdx() sigma = 0.3 * math.sqrt(sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i]) ** 2 for i in range(2)])) else: sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i]) ** 2 for i in range(2)])) sigma = round(sigma, 2) x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) # scaling if scale <= 0.0: maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) else: maxScale = scale # coloring fig.axes[0].imshow( z, cmap=colorMap, interpolation="bilinear", origin="lower", extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale ) # contour lines # only draw them when at least one weight is not zero if len([w for w in weights if w != 0.0]): fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) return fig
def _toSVG(mol): if not ipython_useSVG: return None if hasattr(mol, '__sssAtoms'): highlightAtoms = mol.__sssAtoms else: highlightAtoms = [] return Draw._moltoSVG(mol,molSize,highlightAtoms,"",kekulize)
def GetSimilarityMapFromWeights(mol, weights, colorMap=None, scale=-1, size=(250, 250), sigma=None, coordScale=1.5, step=0.01, colors='k', contourLines=10, alpha=0.5, **kwargs): """ Generates the similarity map for a molecule given the atomic weights. Parameters: mol -- the molecule of interest colorMap -- the matplotlib color map scheme, default is custom PiWG color map scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale scale = double -> this is the maximum scale size -- the size of the figure sigma -- the sigma for the Gaussians coordScale -- scaling factor for the coordinates step -- the step for calcAtomGaussian colors -- color of the contour lines contourLines -- if integer number N: N contour lines are drawn if list(numbers): contour lines at these numbers are drawn alpha -- the alpha blending value for the contour lines kwargs -- additional arguments for drawing """ if mol.GetNumAtoms() < 2: raise ValueError("too few atoms") fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) if sigma is None: if mol.GetNumBonds() > 0: bond = mol.GetBondWithIdx(0) idx1 = bond.GetBeginAtomIdx() idx2 = bond.GetEndAtomIdx() sigma = 0.3 * math.sqrt( sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i])**2 for i in range(2)])) else: sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i])**2 for i in range(2)])) sigma = round(sigma, 2) x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) # scaling if scale <= 0.0: maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) else: maxScale = scale # coloring if colorMap is None: PiYG_cmap = cm.get_cmap('PiYG',2) colorMap = LinearSegmentedColormap.from_list('PiWG', [PiYG_cmap(0), (1.0, 1.0, 1.0), PiYG_cmap(1)], N=255) fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale) # contour lines # only draw them when at least one weight is not zero if len([w for w in weights if w != 0.0]): contourset = fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) for j, c in enumerate(contourset.collections): if contourset.levels[j] == 0.0: c.set_linewidth(0.0) elif contourset.levels[j] < 0: c.set_dashes([(0, (3.0, 3.0))]) fig.axes[0].set_axis_off() return fig
def MolToImage(mol, max_size=(1000, 1000), kekulize=True, options=None, canvas=None, **kwargs): '''Wrapper for RDKit's MolToImage. If mol == None, an arrow is drawn''' if not options: options = defaultDrawOptions() if mol == '->': subImgSize = (160, 160) img, canvas = Draw._createCanvas(subImgSize) p0 = (10, subImgSize[1]//2) p1 = (subImgSize[0]-10, subImgSize[1]//2) p3 = (subImgSize[0]-20, subImgSize[1]//2-10) p4 = (subImgSize[0]-20, subImgSize[1]//2+10) canvas.addCanvasLine(p0, p1, lineWidth=2, color=(0, 0, 0)) canvas.addCanvasLine(p3, p1, lineWidth=2, color=(0, 0, 0)) canvas.addCanvasLine(p4, p1, lineWidth=2, color=(0, 0, 0)) if hasattr(canvas, 'flush'): canvas.flush() else: canvas.save() return img elif mol is not None: return Draw.MolToImage(mol, size=max_size, kekulize=kekulize, options=options, canvas=canvas, **kwargs) else: # retro arrow or error subImgSize = (160, 160) (a, b) = subImgSize img, canvas = Draw._createCanvas(subImgSize) canvas.addCanvasLine((10, b//2-7), (a-17, b//2-7), lineWidth=2, color=(0, 0, 0)) canvas.addCanvasLine((10, b//2+7), (a-17, b//2+7), lineWidth=2, color=(0, 0, 0)) canvas.addCanvasLine((a-24, b//2-14), (a-10, b//2), lineWidth=2, color=(0, 0, 0)) canvas.addCanvasLine((a-24, b//2+14), (a-10, b//2), lineWidth=2, color=(0, 0, 0)) if hasattr(canvas, 'flush'): canvas.flush() else: canvas.save() return img
def MycreateCanvas(size, color='white'): # noinspection PyProtectedMember useAGG, useCairo, Canvas = Draw._getCanvas() if useAGG or useCairo: try: import Image except ImportError: from PIL import Image img = Image.new("RGBA", size, color) canvas = Canvas(img) return img, canvas return None, None
def generateSimilarityMaps(mols, weights, fp): '''Generates a similarity map for a set of molecules and weights''' # colormap to use mycm = cm.PiYG # loop over molecules for i,m in enumerate(mols): fig = Draw.MolToMPL(m, coordScale=1.5, size=(250,250)) # the values 0.02 and 0.01 can be adjusted for the size of the molecule x,y,z = Draw.calcAtomGaussians(m, 0.02, step=0.01, weights=weights[i]) # use the maximum absolute peak as maximum scale maxscale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) # this does the coloring fig.axes[0].imshow(z, cmap=mycm, interpolation='bilinear', origin='lower', extent=(0,1,0,1), vmin=-maxscale, vmax=maxscale) # this draws 10 contour lines # alternatively also the z values for the lines can be specified fig.axes[0].contour(x, y, z, 10, colors='k', alpha=0.5) # this writes the figure in a file fig.savefig('pics/mol'+str(i+1)+'_'+fp+'.png', bbox_inches='tight')
def getMapping(moli, molj, hydrogens=False, fname=None, time_out=150): """ Compute the MCS between two passed molecules Parameters ---------- moli : RDKit molecule object the first molecule used to perform the MCS calculation molj : RDKit molecule object the second molecule used to perform the MCS calculation hydrogens : bool incluse or not the hydrogens in the MCS calculation fname : string the filename used to output a png file depicting the MCS mapping time_out: int the max time in seconds used to compute the MCS Returns: -------- map_moli_molj: python list of tuple [...(i,j)...] the list of tuple which contains the atom mapping indexes between the two molecules. The indexes (i,j) are resplectively related to the first (moli) and the second (molj) passed molecules """ #Molecule copies moli_c = Chem.Mol(moli) molj_c = Chem.Mol(molj) if not hydrogens: moli_c = AllChem.RemoveHs(moli_c) molj_c = AllChem.RemoveHs(molj_c) # MCS calculaton. In RDKit the MCS is a smart string. Ring atoms are # always mapped in ring atoms. mcs = rdFMCS.FindMCS([moli_c, molj_c], timeout=time_out, atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny, matchValences=False, ringMatchesRingOnly=True, completeRingsOnly=False, matchChiralTag=False) # Checking if mcs.canceled: raise ValueError('Timeout! No MCS found between passed molecules') if mcs.numAtoms == 0: raise ValueError('No MCS was found between the molecules') # The found MCS pattern (smart strings) is converted to a RDKit molecule mcs_mol = Chem.MolFromSmarts(mcs.smartsString) try: Chem.SanitizeMol(mcs_mol) except Exception: # if not try to recover the atom aromaticity wich is # important for the ring counter sanitFail = Chem.SanitizeMol( mcs_mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if sanitFail: # if not the MCS is skipped raise ValueError('Sanitization Failed...') # mcs indexes mapped back to the first molecule moli if moli_c.HasSubstructMatch(mcs_mol): moli_sub = moli_c.GetSubstructMatch(mcs_mol) else: raise ValueError('RDkit MCS Subgraph first molecule search failed') # mcs indexes mapped back to the second molecule molj if molj_c.HasSubstructMatch(mcs_mol): molj_sub = molj_c.GetSubstructMatch(mcs_mol) else: raise ValueError( 'RDkit MCS Subgraph second molecule search failed') if mcs_mol.HasSubstructMatch(mcs_mol): mcs_sub = mcs_mol.GetSubstructMatch(mcs_mol) else: raise ValueError('RDkit MCS Subgraph search failed') # Map between the two molecules map_moli_to_molj = zip(moli_sub, molj_sub) # depict the mapping by using a .png file if fname: AllChem.Compute2DCoords(moli_c) AllChem.Compute2DCoords(molj_c) AllChem.Compute2DCoords(mcs_mol) DrawingOptions.includeAtomNumbers = True moli_fname = 'Moli' molj_fname = 'Molj' mcs_fname = 'Mcs' img = Draw.MolsToGridImage( [moli_c, molj_c, mcs_mol], molsPerRow=3, subImgSize=(400, 400), legends=[moli_fname, molj_fname, mcs_fname], highlightAtomLists=[moli_sub, molj_sub, mcs_sub]) img.save(fname) DrawingOptions.includeAtomNumbers = False return map_moli_to_molj
def visualize_mol(path, new_mol): # kk: output/draw the dividetree AllChem.Compute2DCoords(new_mol) print(path) Draw.MolToFile(new_mol, path)
def find_minimum_subgraph(smiles, selected_atoms, vis_dir=None): mol = Chem.MolFromSmiles(smiles) clusters, atom_cls = find_clusters(mol) selected_clusters = set() cluster_votes = {} # First iteration: select a cluster when, # 1. An atom uniquely belongs to this cluster, # 2. Two atoms belong to this cluster. for atom in selected_atoms: assert len(atom_cls[atom]) > 0 if len(atom_cls[atom]) == 1: selected_clusters.add(atom_cls[atom][0]) else: for cls in atom_cls[atom]: if cls not in cluster_votes: cluster_votes[cls] = 0 cluster_votes[cls] += 1 if cluster_votes[cls] >= 2: selected_clusters.add(cls) # Second iteration: randomly select a cluster for the remaining atoms. for atom in selected_atoms: selected = False for cls in atom_cls[atom]: if cls in selected_clusters: selected = True break if not selected: selected_clusters.add(atom_cls[atom][0]) cluster_neighbor = {} for i in range(len(clusters)): cluster_neighbor[i] = set() for atom in clusters[i]: cluster_neighbor[i].update(atom_cls[atom]) cluster_neighbor[i].remove(i) # remove degree-1 unselected clusters iteratively leaf_clusters = set() while True: updated = False for i in range(len(clusters)): if i in selected_clusters or i in leaf_clusters: continue if len(cluster_neighbor[i]) > 1: removable = True neighbor_pairs = [(j, k) for j in cluster_neighbor[i] for k in cluster_neighbor[i] if j < k] for j, k in neighbor_pairs: if j not in cluster_neighbor[k] or k not in cluster_neighbor[j]: removable = False break if not removable: continue leaf_clusters.add(i) for j in cluster_neighbor[i]: cluster_neighbor[j].remove(i) updated = True if not updated: break minimum_atoms = set() for i in range(len(clusters)): if i not in leaf_clusters: minimum_atoms.update(clusters[i]) minimum_smiles, _ = extract_subgraph(smiles, minimum_atoms) # print(f'{smiles} --> {minimum_smiles}') if vis_dir is not None: png_f = f'atoms_selected{len(selected_atoms)}.png' Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=selected_atoms) png_f = f'atoms_minimum{len(selected_atoms)}.png' Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=minimum_atoms) png_f = f'atoms_minimum_extracted{len(selected_atoms)}.png' Draw.MolToFile(Chem.MolFromSmiles(minimum_smiles), filename=os.path.join(vis_dir, png_f)) return minimum_smiles
def PrintAsBase64PNGString(x,renderer = None): '''returns the molecules as base64 encoded PNG image ''' return '<img src="data:image/png;base64,%s" alt="Mol"/>'%_get_image(Draw.MolToImage(x))
print("Dataset target: ", dataset.num_classes) print("Dataset length: ", dataset.len) print("Dataset sample: ", dataset[0]) print("Sample nodes: ", dataset[0].num_nodes) print("Sample edges: ", dataset[0].num_edges) print("Edges indexes: ", dataset[0].edge_index.t()) print(dataset[0].x) print(dataset[0].edge_index.t()) from rdkit import Chem from rdkit.Chem import Draw molecule = Chem.MolFromSmiles(dataset[0]["smiles"]) fig = Draw.MolToImage(molecule, size=(360, 360)) fig.save('/home/anaconda3/work//molecule_first.png') #data.num_classes #data.num_edges #data.num_node_features #data.contains_isolated_nodes() #data.contains_self_loops() #data.is_directed()
s_list = [] s_list.append(s_origin[i_latent]) ### Generated Molecules by VAE s_z = np.random.normal( 0.0, 1.0, [batch_size, latent_dim]) * perturb + latent_vector[i_latent] mol_gen = model.generate_molecule(s_z) for i in range(len(mol_gen)): for j in range(100): s = stochastic_convert_to_smiles(mol_gen[i], char) try: m = Chem.MolFromSmiles(s) if m: if s in s_list: pass else: s_list.append(s) break except: pass print(len(s_list)) for i in s_list: print(s_list[i]) mol_list = [Chem.MolFromSmiles(s) for s in s_list] img = Draw.MolsToGridImage([mol for mol in mol_list], molsPerRow=5) #del mol_list[0] img.save('./figures/convVAE_' + str(i_latent) + '_' + str(perturb) + '.png')
def main(argv): sqlitefile = '' # Process command line options try: opts, args = getopt.getopt(argv, 't:s:d:', ['targetID=', 'sqlitefile=', 'panddadir=']) except getopt.GetoptError as err: print err print 'process.py -t <TargetID> -s <SQLiteFile> -d <PANDDA dir>' sys.exit(2) if len(opts) < 3: print 'Missing arguments:' print 'process.py -t <TargetID> -s <SQLiteFile> -d <PANDDA dir>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'process.py -t <TargetID> -s <SQLiteFile>' sys.exit() elif opt in ("-t", "--targetID"): targetID = arg elif opt in ("-s", "--sqlitefile"): sqlitefile = arg elif opt in ("-d", "--panddadir"): panddadir = arg # Create directory structure if not os.path.exists(panddadir + "/compoundImages"): os.makedirs(panddadir + "/compoundImages") if not os.path.exists(panddadir + "/icbs"): os.makedirs(panddadir + "/icbs") if not os.path.exists(panddadir + "/pdbs"): os.makedirs(panddadir + "/pdbs") if not os.path.exists(panddadir + "/maps"): os.makedirs(panddadir + "/maps") if not os.path.exists(panddadir + "/residueplots"): os.makedirs(panddadir + "/residueplots") if not os.path.exists(panddadir + "/mapImages"): os.makedirs(panddadir + "/mapImages") # Create HTML file and write header htmlfile = open(panddadir + "/index.html", "w") htmlfile.write("<html>\n") htmlfile.write("<head>\n") htmlfile.write( '<link rel="stylesheet" type="text/css" href="css/jquery.dataTables.min.css">\n' ) htmlfile.write( '<meta http-equiv="Content-type" content="text/html; charset=utf-8">\n' ) htmlfile.write( '<meta name="viewport" content="width=device-width,initial-scale=1">\n' ) htmlfile.write('<title>' + targetID + ' Fragment Hits</title>\n') htmlfile.write( '<script type="text/javascript" language="javascript" src="js/jquery-1.12.3.min.js">\n' ) htmlfile.write('</script>\n') htmlfile.write( '<script type="text/javascript" language="javascript" src="js/jquery.dataTables.min.js">\n' ) htmlfile.write('</script>\n') htmlfile.write('<script type="text/javascript" class="init">\n') htmlfile.write('$(document).ready(function() {\n') htmlfile.write("$('#example').DataTable( {\n") htmlfile.write("'bautoWidth': false,\n") htmlfile.write("'columns': [\n") htmlfile.write("{ 'width': '6%' },\n") htmlfile.write("{ 'width': '6%' },\n") htmlfile.write("{ 'width': '7%' },\n") htmlfile.write("{ 'width': '8%' },\n") htmlfile.write("{ 'width': '6%' },\n") htmlfile.write("{ 'width': '6%' },\n") htmlfile.write("{ 'width': '9%' },\n") htmlfile.write("{ 'width': '9%' },\n") htmlfile.write("{ 'width': '12%' },\n") htmlfile.write("{ 'width': '5%' },\n") htmlfile.write("{ 'width': '4%' },\n") htmlfile.write("{ 'width': '6%' },\n") htmlfile.write("{ 'width': '6%' },\n") htmlfile.write("{ 'width': '3%' },\n") htmlfile.write("{ 'width': '4%' },\n") htmlfile.write("{ 'width': '3%' }\n") htmlfile.write("]\n") htmlfile.write('} )\n') htmlfile.write('} );\n') htmlfile.write('</script>\n') htmlfile.write("</head>\n") htmlfile.write("<body>\n") htmlfile.write("<H3>Ligand-bound models for " + targetID + "</h3>") htmlfile.write("""<h4>Interpreting 'Ligand confidence'</h4> <p><u>4 - High Confidence:</u> The expected ligand was easily interpretable from clear density, and subsequent refinement was well-behaved. This ligand can be trusted. <br><u>3 - Clear density, unexpected ligand:</u> Density very clearly showed a well-defined ligand, but that ligand was unexpected in that crystal/dataset. The observed ligand was modelled anyway, because its presence could be explained in some way. <br><u>2 - Correct ligand, weak density:</u> Though density was weak, it was possible to model the expected ligand, possibly including other circumstantial evidence (e.g. similar ligand in another model). <br><u>1 - Low Confidence:</u> The ligand model is to be treated with scepticism, because the evidence (density, identity, pose) were not convincing. <h4>Interpreting 'Model status':</h4> <p><u>6 - Deposited:</u> The model has been deposited in the PDB. <br><u>5 - Deposition ready:</u> The model is fully error-free, in every residue, and is ready for deposition. <br><u>4 - CompChem ready:</u> The model is complete and correct in the region of the bound ligand. There may be remaining small errors elsewhere in the structure, but they are far away and unlikely to be relevant to any computational analysis or compound design. <h4>Interpreting 'Ligand validation' spider plots:</h4> Each axis represents one of the values described below; small is better, and large values on any axis implies that further investigation is warranted. <p><u>Quality (RSCC)</u> reflects the fit of the atoms to the experimental density, and should typically be greater than 0.7. <br><u>Accuracy (RSZD)</u> measures the amount of difference density that is found around these atoms, and should be below 3. <br><u>B-factor ratio</u> measures the consistency of the model with surrounding protein, and is calculated from the B factors of respectively the changed atoms and all side-chain atoms within 4Å. Large values (>3) reflect poor evidence for the model, and intermediate values (1.5+) indicate errors in refinement or modelling; for weakly-binding ligands, systematically large ratios may be justifiable. <br><u>RMSD</u> compares the positions of all atoms built into event density, with their positions after final refinement, and should be below 1Å. <br><u>Precision (RSZO/OCC)</u> measures how clear the density is after refinement. (This is not a quality indicator, but is related to strength of binding but not in a straightforward way.) <p></p>\n""") htmlfile.write("<h4>Download data</h4>\n") htmlfile.write("<ul>\n") htmlfile.write( "<li><a href='pdbs/allPDBs.zip'>Download all PDB model files<a></li>\n" ) htmlfile.write( "<li><a href='maps/allEventMaps.zip'>Download all Event Map files<a></li>\n" ) htmlfile.write("</ul>") htmlfile.write('<table id="example" class="display" cellspacing="0">\n') htmlfile.write("<thead>\n") htmlfile.write("<tr>\n") htmlfile.write("<th>Model Name</th>\n") htmlfile.write("<th>Compound SMILES</th>\n") htmlfile.write("<th>Compound Structure</th>\n") htmlfile.write("<th>Site Name</th>\n") htmlfile.write("<th>Ligand Confidence</th>\n") htmlfile.write("<th>Model Status</th>\n") htmlfile.write("<th>Ligand Validation</th>\n") htmlfile.write("<th>Event Map 3D</th>\n") htmlfile.write("<th>Comment</th>\n") htmlfile.write("<th>PDB Identifier</th>\n") htmlfile.write("<th>Resol</th>\n") htmlfile.write("<th>Spacegroup</th>\n") htmlfile.write("<th>Cell</th>\n") htmlfile.write("<th>PDB</th>\n") htmlfile.write("<th>MTZ</th>\n") htmlfile.write("<th>Event Map</th>\n") htmlfile.write("</tr>\n") htmlfile.write("</thead>\n") htmlfile.write("<tfoot>\n") htmlfile.write("<tr>\n") htmlfile.write("<th>Model Name</th>\n") htmlfile.write("<th>Compound SMILES</th>\n") htmlfile.write("<th>Compound Structure</th>\n") htmlfile.write("<th>Site Name</th>\n") htmlfile.write("<th>Ligand Confidence</th>\n") htmlfile.write("<th>Model Status</th>\n") htmlfile.write("<th>Ligand Validation</th>\n") htmlfile.write("<th>Event Map 3D</th>\n") htmlfile.write("<th>Comment</th>\n") htmlfile.write("<th>PDB Identifier</th>\n") htmlfile.write("<th>Resol</th>\n") htmlfile.write("<th>Spacegroup</th>\n") htmlfile.write("<th>Cell</th>\n") htmlfile.write("<th>PDB</th>\n") htmlfile.write("<th>MTZ</th>\n") htmlfile.write("<th>Event Map</th>\n") htmlfile.write("</tr>\n") htmlfile.write("</tfoot>\n") htmlfile.write("<tbody>\n") # Now walk through the input data with open('foricm.csv', 'wb') as f: with sqlite3.connect(sqlitefile) as c: c.row_factory = sqlite3.Row cur = c.cursor() # sql = ( "select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index " # " as ModelName,m.CompoundCode,m.CompoundSMILES,p.PANDDA_site_name,p.PANDDA_site_confidence " # " as LigandConfidence,p.RefinementOutcome " # " as ModelStatus,p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, " # " p.PANDDA_site_spider_plot,m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup," # " m.DataProcessingUnitCell,m.RefinementPDB_latest,m.RefinementMTZ_latest,p.PANDDA_site_event_map " # " from panddaTable as p, mainTable as m " # " where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' and " # " (LigandConfidence like '1%' or LigandConfidence like '2%' or LigandConfidence like '3%' or LigandConfidence like '4%') " # " order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index" # ) # query below is without the LigandConfidence being constrained; this is because some older DBs don't have a starting digit # here we constrain RefinementOutcome of site # cur.execute("select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index as ModelName,m.CompoundCode,m.CompoundSMILES,m.Deposition_PDB_ID,p.PANDDA_site_name,p.PANDDA_site_confidence as LigandConfidence,p.RefinementOutcome as ModelStatus,p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, p.PANDDA_site_spider_plot,m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,m.DataProcessingUnitCell,m.RefinementBoundConformation,m.RefinementMTZ_latest,p.PANDDA_site_event_map from panddaTable as p, mainTable as m where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' and (p.RefinementOutcome like '4%' or p.RefinementOutcome like '5%' or p.RefinementOutcome like '6%') order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index") sql = ( "select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index " " as ModelName,m.CompoundCode,m.CompoundSMILES,m.Deposition_PDB_ID,p.PANDDA_site_name," " p.PANDDA_site_confidence as LigandConfidence," " p.RefinementOutcome as ModelStatus," " p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, p.PANDDA_site_spider_plot," " m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,m.DataProcessingUnitCell," " m.RefinementBoundConformation,m.RefinementMTZ_latest," " p.PANDDA_site_event_map from panddaTable as p, " " mainTable as m where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' " " and (p.RefinementOutcome like '4%' or p.RefinementOutcome like '5%' or p.RefinementOutcome like '6%') " " and (LigandConfidence like '1%' or LigandConfidence like '2%' or LigandConfidence like '3%' or LigandConfidence like '4%')" " order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index" ) sql = ( "select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index " " as ModelName,m.CompoundCode,m.CompoundSMILES,m.Deposition_PDB_ID,p.PANDDA_site_name," " p.PANDDA_site_confidence as LigandConfidence," " p.RefinementOutcome as ModelStatus," " p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, p.PANDDA_site_spider_plot," " m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,m.DataProcessingUnitCell," " m.RefinementBoundConformation,m.RefinementMTZ_latest," " p.PANDDA_site_event_map from panddaTable as p, " " mainTable as m where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' " " and (m.RefinementOutcome like '4%' or m.RefinementOutcome like '5%' or m.RefinementOutcome like '6%') " " and (LigandConfidence like '1%' or LigandConfidence like '2%' or LigandConfidence like '3%' or LigandConfidence like '4%')" " order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index" ) cur.execute(sql) rows = cur.fetchall() if not rows: print '==> WARNING: none of your samples seems to be at least CompChem ready (4)' return None writer = csv.DictWriter(f, fieldnames=rows[1].keys()) writer.writeheader() for row in rows: # Make compound structure print row['ModelName'], row['PANDDA_site_spider_plot'] compound = Chem.MolFromSmiles( row['CompoundSMILES'].encode("ascii")) Draw.MolToFile( compound, panddadir + '/compoundImages/' + row['CompoundCode'] + '.png', (150, 150)) # Write out table information for event eventID = row['ModelName'] + "_" + row['CompoundCode'] actID = (row['ModelName'] + row['CompoundCode']).replace( targetID + '-', '') writeTableRow(row, htmlfile) writeICBPage(row, panddadir) try: shutil.copy( row['RefinementBoundConformation'], panddadir + "/pdbs/" + row['ModelName'] + ".pdb") shutil.copy( row['RefinementMTZ_latest'], panddadir + "/maps/" + row['ModelName'] + ".mtz") shutil.copy( row['PANDDA_site_event_map'], panddadir + "/maps/" + row['ModelName'] + ".ccp4") if row['PANDDA_site_spider_plot'] is not None: shutil.copy( row['PANDDA_site_spider_plot'], panddadir + "/residueplots/" + row['ModelName'] + ".png") except (IOError, TypeError): print '*** WARNING: cannot find PDB and/or MTZ of ' + row[ 'ModelName'] + ' ***' print 'PDB bound :', row['RefinementBoundConformation'] print 'MTZ :', row['RefinementMTZ_latest'] print 'event map :', row['PANDDA_site_event_map'] print 'spider plot:', row['PANDDA_site_spider_plot'] pass # shutil.copy(row['RefinementPDB_latest'],panddadir+"/pdbs/"+row['ModelName']+".pdb") # if row['PANDDA_site_spider_plot'] is not None: # shutil.copy(row['PANDDA_site_spider_plot'],panddadir+"/residueplots/"+row['ModelName']+".png") # Write row to CSV for ICM writer.writerow(dict(row)) # Conclude HTML htmlfile.write("</tbody>\n") htmlfile.write("</table>\n") htmlfile.write("</body>\n") htmlfile.write("</html>\n") htmlfile.close() # Copy JS & CSS files if not os.path.exists(panddadir + "/js"): os.makedirs(panddadir + "/js") if not os.path.exists(panddadir + "/css"): os.makedirs(panddadir + "/css") shutil.copy( os.path.join(os.getenv('XChemExplorer_DIR'), "web/jscss/css/jquery.dataTables.min.css"), panddadir + "/css/jquery.dataTables.min.css") shutil.copy( os.path.join(os.getenv('XChemExplorer_DIR'), "web/jscss/js/jquery-1.12.3.min.js"), panddadir + "/js/jquery-1.12.3.min.js") shutil.copy( os.path.join(os.getenv('XChemExplorer_DIR'), "web/jscss/js/jquery.dataTables.min.js"), panddadir + "/js/jquery.dataTables.min.js") # Create zip files print "Creating zipfile of PDBs..." os.chdir(panddadir + "/pdbs") zf = zipfile.ZipFile("allPDBs.zip", "w") for pdb in glob.glob("*.pdb"): zf.write(pdb) zf.close() print "Creatig zipfile of event maps..." os.chdir("../maps") zf = zipfile.ZipFile("allEventMaps.zip", "w") for pdb in glob.glob("*.mtz"): zf.write(pdb) zf.close() # change folder permissions os.system('chmod -R 775 {0!s}'.format(panddadir)) return
def draw_dummy_core_ixns(mol, core, bonds, dummy_group, color_blind=False): """ Draw a grid of molecules with interactions between atoms in dummy_group and the core highlighted. """ if color_blind: COLOR_DUMMY_IXN = rgb_to_decimal(230, 159, 0) COLOR_DUMMY_ACTIVE = rgb_to_decimal(240, 228, 66) COLOR_DUMMY_INACTIVE = rgb_to_decimal(0, 158, 115) COLOR_CORE_ACTIVE = rgb_to_decimal(213, 94, 0) COLOR_CORE_INACTIVE = rgb_to_decimal(204, 121, 167) COLOR_BOND = (0.96, 0.74, 0) else: COLOR_DUMMY_IXN = (0, 0.7, 0) COLOR_DUMMY_ACTIVE = (0.6, 1, 0.6) COLOR_DUMMY_INACTIVE = (0.188, 0.835, 0.784) COLOR_CORE_ACTIVE = (0.9, 0.5, 0.5) COLOR_CORE_INACTIVE = (1, 0.8, 0.8) COLOR_BOND = (0.96, 0.74, 0) assert len(set(core).intersection(set(dummy_group))) == 0 highlightAtomLists = [] highlightBondLists = [] highlightAtomColorsLists = [] highlightBondColorsLists = [] all_mols = [] bonds.sort(key=len) legends = [] for atom_idxs in bonds: mol_copy = Chem.Mol(mol) highlightAtomColors = {} highlightBondColors = {} highlightAtoms = set() highlightBonds = set() # default colors for a in range(mol_copy.GetNumAtoms()): highlightAtoms.add(a) if a in core: highlightAtomColors[a] = COLOR_CORE_INACTIVE elif a in dummy_group: highlightAtomColors[a] = COLOR_DUMMY_ACTIVE else: highlightAtomColors[a] = COLOR_DUMMY_INACTIVE # interacting atoms for a in atom_idxs: atom = mol_copy.GetAtomWithIdx(int(a)) atom.SetProp("molAtomMapNumber", str(atom.GetIdx())) if a in core: highlightAtomColors[a] = COLOR_CORE_ACTIVE else: highlightAtomColors[a] = COLOR_DUMMY_IXN is_improper = False for idx in range(len(atom_idxs)): if idx != len(atom_idxs) - 1: bond = mol_copy.GetBondBetweenAtoms(int(atom_idxs[idx]), int(atom_idxs[idx + 1])) # this may be none if we have an improper torsion if bond is None: if len(atom_idxs) == 4: # improper is_improper = True else: assert 0, "Bad idxs" else: highlightBonds.add(bond.GetIdx()) highlightBondColors[bond.GetIdx()] = COLOR_BOND highlightAtomLists.append(list(range(mol_copy.GetNumAtoms()))) highlightBondLists.append(list(highlightBonds)) highlightAtomColorsLists.append(highlightAtomColors) highlightBondColorsLists.append(highlightBondColors) if is_improper: label = "improper" elif len(atom_idxs) == 2: label = "bond" elif len(atom_idxs) == 3: label = "angle" elif len(atom_idxs) == 4: label = "proper" else: assert 0 all_mols.append(mol_copy) legends.append(label + " " + repr([int(x) for x in atom_idxs])) return Draw.MolsToGridImage( all_mols, molsPerRow=4, highlightAtomLists=highlightAtomLists, highlightAtomColors=highlightAtomColorsLists, highlightBondLists=highlightBondLists, highlightBondColors=highlightBondColorsLists, subImgSize=(250, 250), legends=legends, useSVG=True, )
#!/usr/bin/env python import rdkit.Chem.Draw as Draw import rdkit.Chem.Crippen as Crippen import rdkit.Chem as Chem import matplotlib.cm as cm if __name__ == "__main__": m = Chem.MolFromSmiles("CCC") fig=Draw.MolToMPL(m) x,y,z=Draw.calcAtomGaussians(m,0.03,step=0.01,weights=(40,1,3)) fig.axes[0].imshow(z,cmap=cm.Oranges,interpolation='bilinear',origin='lower',extent=(0,1,0,1)) fig.axes[0].contour(x,y,z,20,colors='k',alpha=0.5) #fig.show() fig.savefig('coumlogps.colored.png',bbox_inches='tight')
def testGithubIssue54(self): # Assert that radicals depict with PIL os.environ['RDKIT_CANVAS'] = 'sping' mol = Chem.MolFromSmiles('c1([O])ccc(O)cc1') img = Draw.MolToImage(mol) self.assertTrue(img)
def testGithub1829(self): d = Draw.MolDraw2DSVG(300, 300, 100, 100) d.DrawMolecules(tuple()) d.FinishDrawing() d.GetDrawingText()
def main(): #first things first, load the dataframe of molecules and their properties: #df = pd.read_csv('sample.smifi') df = load_df() #handy values mwmin = float(df['mw'].min()-1) mwmax = float(df['mw'].max()+1) clogpmin = float(df['clogp'].min()-1) clogpmax = float(df['clogp'].max()-1) #print out some explanation stuff in the sidebar: st.sidebar.title("WDMPLL?") st.sidebar.write("If you want to see your favourite molecular property included, drop a line at [@lewischewis](https://twitter.com/lewischewis) or ljmartin at hey dot com, or open a github issue") st.sidebar.write("""If you ask 'but why?' or 'but how?', see the readme at the [github page](https://github.com/ljmartin/what_do_mol_prop_look_like)""") st.sidebar.write('Click the ✖️ to close this bar and widen the view') #and some intro text in the main frame: st.title('What do molecular properties look like?') st.write("""The [Lipinski Ro5](https://en.wikipedia.org/wiki/Lipinski%27s_rule_of_five) helps people focus their drug discovery efforts on the molecules most likely to make good therapeutic drugs.""") st.write("""But, [increasingly](https://doi.org/10.1021/acs.jmedchem.8b00686), drug-like molecules break the Ro5, so it's helpful to push the boundaries of molecular properties when considering a molecule library. One way to get a feel for how far they can be pushed is to just stare at molecules in a certain property-space and decide if they look reasonable or not.""") st.write("""### Instructions""") st.write('There are sliders below that set the minimum or maximum Molecular Weight (MW) or calculated logP (cLogP). First, set a desired range. Then, click the "**Show Sample**" button. A small sample of 24 molecules satisfying the filters will be chosen and visualized. Just click it again to get a new batch.') st.write("""### Histograms """) st.write("If you set an unrealistic range, there won't be any molecules left. There are 500k molecules in the set, but the distribution isn't uniform. Here's a guide to help:") st.image('density.svg') st.write("""### Filters:""") ###now the app: #property sliders: mw_min = st.slider('Molecular weight (MW) min:', min_value = mwmin, max_value = mwmax, #value = float(np.percentile(df['mw'], 5)), value = (mwmax-mwmin)*0.05 + mwmin, step=0.05 ) mw_max = st.slider('Molecular weight (MW) max:', min_value = mwmin, max_value = mwmax, value = (mwmax-mwmin)*0.95 + mwmin, ) clogp_min = st.slider('cLogP min', min_value = clogpmin, max_value = clogpmax, value = (clogpmax-clogpmin)*0.05 + clogpmin, ) clogp_max = st.slider('cLogP max', min_value = clogpmin, max_value = clogpmax, value = (clogpmax-clogpmin)*0.95 + clogpmin ) mask = (df['mw'] <= mw_max) & (df['mw'] >= mw_min) \ & (df['clogp'] <= clogp_max) & (df['clogp'] >= clogp_min) st.write("""### Molecules:""") st.write('Number of molecules left: ', mask.sum()) #this is the main event. Based on the filters/sliders above: #1. select a random sample of N ligands that meet the selected filter. #2. turn them into molecules, #3. and draw! N = 24 if st.button('Show sample'): ##1: mask = (df['mw'] <= mw_max) & (df['mw'] >= mw_min) \ & (df['clogp'] <= clogp_max) & (df['clogp'] >= clogp_min) ##1.5: quick error check: flag = mask.sum()>0 if not flag: st.write('Set the property filters again - there are no molecules that fit those parameters') ##good to go. else: sample = df[mask].sample(min([N, mask.sum()])) ##2: mols = [Chem.MolFromSmiles(i) for i in sample['smiles']] ##3: st.image(Draw.MolsToGridImage(mols, molsPerRow=6, legends=list(sample['zinc_id'])))
def testRepeatedPrepareForDrawingCalls(self): m = Chem.MolFromMolBlock(""" 11280715312D 1 1.00000 0.00000 0 33 36 0 1 0 999 V2000 7.6125 -5.7917 0.0000 C 0 0 0 0 0 0 0 0 0 7.0917 -6.0917 0.0000 C 0 0 1 0 0 0 0 0 0 6.4792 -6.8917 0.0000 C 0 0 2 0 0 0 0 0 0 8.1292 -6.0792 0.0000 N 0 0 0 0 0 0 0 0 0 5.5042 -6.8917 0.0000 C 0 0 3 0 0 0 0 0 0 11.2375 -4.8542 0.0000 N 0 0 0 0 0 0 0 0 0 9.6792 -5.1667 0.0000 N 0 0 3 0 0 0 0 0 0 5.9917 -6.5417 0.0000 C 0 0 0 0 0 0 0 0 0 7.6042 -5.1917 0.0000 O 0 0 0 0 0 0 0 0 0 10.7167 -5.1625 0.0000 C 0 0 0 0 0 0 0 0 0 6.2917 -7.4667 0.0000 C 0 0 0 0 0 0 0 0 0 6.5750 -5.7917 0.0000 C 0 0 0 0 0 0 0 0 0 10.2000 -4.8667 0.0000 C 0 0 0 0 0 0 0 0 0 8.6500 -5.7792 0.0000 C 0 0 3 0 0 0 0 0 0 8.6417 -5.1792 0.0000 C 0 0 0 0 0 0 0 0 0 9.1667 -6.0750 0.0000 C 0 0 0 0 0 0 0 0 0 9.6875 -5.7667 0.0000 C 0 0 0 0 0 0 0 0 0 9.1542 -4.8750 0.0000 C 0 0 0 0 0 0 0 0 0 5.6917 -7.4667 0.0000 C 0 0 0 0 0 0 0 0 0 5.2042 -7.4042 0.0000 F 0 0 0 0 0 0 0 0 0 4.9875 -6.5917 0.0000 F 0 0 0 0 0 0 0 0 0 7.5167 -6.5167 0.0000 O 0 0 0 0 0 0 0 0 0 11.7542 -5.1500 0.0000 C 0 0 0 0 0 0 0 0 0 11.2417 -6.0542 0.0000 C 0 0 0 0 0 0 0 0 0 10.7250 -5.7625 0.0000 C 0 0 0 0 0 0 0 0 0 6.5750 -5.1917 0.0000 C 0 0 0 0 0 0 0 0 0 6.0542 -6.0917 0.0000 C 0 0 0 0 0 0 0 0 0 11.7667 -5.7542 0.0000 C 0 0 0 0 0 0 0 0 0 12.2750 -4.8417 0.0000 C 0 0 0 0 0 0 0 0 0 6.0542 -4.8917 0.0000 C 0 0 0 0 0 0 0 0 0 5.5375 -5.7917 0.0000 C 0 0 0 0 0 0 0 0 0 5.5375 -5.1917 0.0000 C 0 0 0 0 0 0 0 0 0 6.3167 -6.3042 0.0000 H 0 0 0 0 0 0 0 0 0 2 1 1 0 0 0 3 2 1 0 0 0 4 1 1 0 0 0 5 8 1 0 0 0 6 10 1 0 0 0 7 17 1 0 0 0 8 3 1 0 0 0 9 1 2 0 0 0 10 13 1 0 0 0 11 3 1 0 0 0 12 2 1 0 0 0 13 7 1 0 0 0 14 4 1 0 0 0 15 14 1 0 0 0 16 14 1 0 0 0 17 16 1 0 0 0 18 15 1 0 0 0 19 11 1 0 0 0 20 5 1 0 0 0 21 5 1 0 0 0 2 22 1 6 0 0 23 6 2 0 0 0 24 25 1 0 0 0 25 10 2 0 0 0 26 12 1 0 0 0 27 12 2 0 0 0 28 24 2 0 0 0 29 23 1 0 0 0 30 26 2 0 0 0 31 27 1 0 0 0 32 31 2 0 0 0 3 33 1 6 0 0 7 18 1 0 0 0 19 5 1 0 0 0 32 30 1 0 0 0 28 23 1 0 0 0 M END""") nm = Draw.PrepareMolForDrawing(m) self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondType(), Chem.BondType.SINGLE) self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondDir(), Chem.BondDir.NONE) self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondType(), Chem.BondType.SINGLE) self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondDir(), Chem.BondDir.BEGINWEDGE) nm = Draw.PrepareMolForDrawing(nm) self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondType(), Chem.BondType.SINGLE) self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondDir(), Chem.BondDir.NONE) self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondType(), Chem.BondType.SINGLE) self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondDir(), Chem.BondDir.BEGINWEDGE)
samples = pd.read_csv( f'../cbas/slurm/results/{name}/docking_results/{step}.csv') samples = samples.sort_values('norm_score') smiles = samples.smile scores = samples.norm_score mols = [Chem.MolFromSmiles(s) for s in smiles] mols = mols[-N_top:] scores = scores[-N_top:] save_smiles['smile'] += list(samples.smile[-N_top:]) img = Draw.MolsToGridImage(mols, molsPerRow=4, legends=[ f'{sc:.2f}, {q:.2f}' for i, (sc, q) in enumerate(zip(scores, qeds)) ]) soft_mkdir('plots') img.save(f'plots/cbas_{name}_mols_{step}.png') df = pd.DataFrame.from_dict(save_smiles) df.to_csv('clogp_smiles.csv') #======= # name = 'clogp_adam_clamp_less' # # norm_scores = False # set to true for clogp
#! /usr/bin/python # coding: utf-8 # @Time: 2020-05-29 14:36:04 # @Author: zeoy # rdkit 修改分子 # 一、引入所需库 from rdkit import Chem from rdkit.Chem import Draw # 二、增删H原子 mol = Chem.MolFromSmiles('OC1C2C1CC2') # 画分子结构 Draw.MolToImageFile( mol, '/drug_development/studyRdkit/st_rdcit/img/mol5.jpg' ) # 2.1 增加H原子函数解析 # 将氢添加到分子图上 rdkit.Chem.rdmolops.AddHs( (Mol)mol # 要修饰的分子 [, (bool) explicitOnly=False # (可选)如果设置了此切换,则仅将显式Hs添加到分子中。默认值为0(添加隐式和显式Hs)。 [, (bool) addCoords=False # (可选) 如果设置了此开关,则Hs将设置3D坐标。默认值为0(无3D坐标)。 [, (AtomPairsParameters) onlyOnAtoms=None # (可选)如果提供了此序列,则仅将这些原子视为具有添加的Hs [, (bool)addResidueInfo=False # (可选)如果为true,则将残基信息添加到氢原子(对PDB文件有用)。 ]]]] ) # 2.2 增加H原子
react_atoms, react_atom_tags = get_tagged_atoms_from_mol(mol_react) react_atoms_index = [atom.GetIdx() for atom in react_atoms] react_atom_index_all = [atom.GetIdx() for atom in mol_react.GetAtoms()] atoms_to_remove = [ idx for idx in react_atom_index_all if idx not in react_atoms_index ] atoms_to_remove.sort(reverse=True) emol = Chem.EditableMol(mol_react) for atom in atoms_to_remove: emol.RemoveAtom(atom) mol_new = emol.GetMol() Chem.SanitizeMol(mol_new) smi_react = Chem.MolToSmiles(mol_new) print('smi_react:', smi_react) react_mols.append(mol_new) react_smis.append(smi_react) img = Draw.MolsToGridImage(react_mols, molsPerRow=1, subImgSize=(200, 200), legends=react_smis) plt.imshow(img) plt.tight_layout() plt.axis('off') plt.show()
# In[4]: Cnlist = f.get_Cnlist_from_label2("C(C)-C(C)-C(C-C-C-C-C-C)-C-C-C-C-C-C-C-C-C") # In[5]: print(Cnlist) # In[6]: for Cn in Cnlist: with open('KNApSAck_mol/%s.mol' % (Cn)) as fi: mol = Chem.MolFromMolBlock(fi.read()) rdDepictor.Compute2DCoords(mol) filename = f.gene + "/" + Cn + "back.png" Draw.MolToFile(mol, filename) break # Cnlist内のCnの図を全て保存するためのコード # とりあえず必要なのだけ抜く # In[9]: for Cn in ["C00017726", "C00026595", "C00026596", "C00015229", "C00015228"]: with open('KNApSAck_mol/%s.mol' % (Cn)) as fi: mol = Chem.MolFromMolBlock(fi.read()) rdDepictor.Compute2DCoords(mol) filename = f.gene + "/" + Cn + "back.png" Draw.MolToFile(mol, filename, size=(500, 500))
from rdkit.Chem import AllChem from rdkit.Chem import Draw import sys print 'Number of arguments:', len(sys.argv), 'arguments.' print 'Argument List:', str(sys.argv) print sys.argv[1] #smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" smiles = sys.argv[1] file = sys.argv[2] x = int(sys.argv[3]) y = int(sys.argv[4]) print smiles mol = AllChem.MolFromSmiles(smiles) Draw.MolToFile(mol, file, size=(x, y))
def diversity_plots(dset_key, datastore=True, bucket='gsk_ml', title_prefix=None, ecfp_radius=4, out_dir=None, id_col='compound_id', smiles_col='rdkit_smiles', max_for_mcs=300): """ Plot visualizations of diversity for an arbitrary table of compounds. At minimum, the file should contain columns for a compound ID and a SMILES string. """ # Load table of compound names, IDs and SMILES strings if datastore: cmpd_df = dsf.retrieve_dataset_by_datasetkey(dset_key, bucket) else: cmpd_df = pd.read_csv(dset_key, index_col=False) file_prefix = os.path.splitext(os.path.basename(dset_key))[0] if title_prefix is None: title_prefix = file_prefix.replace('_', ' ') compound_ids = cmpd_df[id_col].values smiles_strs = cmpd_df[smiles_col].values ncmpds = len(smiles_strs) print(ncmpds) # Strip salts, canonicalize SMILES strings and create RDKit Mol objects print("Canonicalizing molecules...") base_mols = [struct_utils.base_mol_from_smiles(smiles) for smiles in smiles_strs] for i, mol in enumerate(base_mols): if mol is None: print('Unable to get base molecule for compound %d = %s' % (i, compound_ids[i])) base_smiles = [Chem.MolToSmiles(mol) for mol in base_mols] print("Done") # Generate ECFP fingerprints print("Computing fingerprints...") fps = [AllChem.GetMorganFingerprintAsBitVect(mol, ecfp_radius, 1024) for mol in base_mols if mol is not None] print("Done") if ncmpds <= max_for_mcs: # Get MCS distance matrix and draw a heatmap print("Computing MCS distance matrix...") mcs_dist = dm.mcs(base_mols) print("Done") cmpd1 = [] cmpd2 = [] dist = [] ind1 = [] ind2 = [] for i in range(ncmpds-1): for j in range(i+1, ncmpds): cmpd1.append(compound_ids[i]) cmpd2.append(compound_ids[j]) dist.append(mcs_dist[i,j]) ind1.append(i) ind2.append(j) dist_df = pd.DataFrame({'compound_1' : cmpd1, 'compound_2' : cmpd2, 'dist' : dist, 'i' : ind1, 'j' : ind2}) dist_df = dist_df.sort_values(by='dist') print(dist_df.head(10)) if out_dir is not None: dist_df.to_csv('%s/%s_mcs_dist_table.csv' % (out_dir, file_prefix), index=False) for k in range(10): mol_i = base_mols[dist_df.i.values[k]] mol_j = base_mols[dist_df.j.values[k]] img_file_i = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.i.values[k]]) img_file_j = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.j.values[k]]) Draw.MolToFile(mol_i, img_file_i, size=(500,500), fitImage=False) Draw.MolToFile(mol_j, img_file_j, size=(500,500), fitImage=False) mcs_linkage = linkage(mcs_dist, method='complete') mcs_df = pd.DataFrame(mcs_dist, columns=compound_ids, index=compound_ids) if out_dir is not None: pdf_path = '%s/%s_mcs_clustermap.pdf' % (out_dir, file_prefix) pdf = PdfPages(pdf_path) g = sns.clustermap(mcs_df, row_linkage=mcs_linkage, col_linkage=mcs_linkage, figsize=(12,12), cmap='plasma') if out_dir is not None: pdf.savefig(g.fig) pdf.close() # Draw a UMAP projection based on MCS distance mapper = umap.UMAP(n_neighbors=10, n_components=2, metric='precomputed', random_state=17) reps = mapper.fit_transform(mcs_dist) rep_df = pd.DataFrame.from_records(reps, columns=['x', 'y']) rep_df['compound_id'] = compound_ids if out_dir is not None: pdf_path = '%s/%s_mcs_umap_proj.pdf' % (out_dir, file_prefix) pdf = PdfPages(pdf_path) fig, ax = plt.subplots(figsize=(12,12)) sns.scatterplot(x='x', y='y', data=rep_df, ax=ax) ax.set_title("%s, 2D projection based on MCS distance" % title_prefix) if out_dir is not None: pdf.savefig(fig) pdf.close() rep_df.to_csv('%s/%s_mcs_umap_proj.csv' % (out_dir, file_prefix), index=False) # Get Tanimoto distance matrix print("Computing Tanimoto distance matrix...") tani_dist = dm.tanimoto(fps) print("Done") # Draw a UMAP projection based on Tanimoto distance mapper = umap.UMAP(n_neighbors=10, n_components=2, metric='precomputed', random_state=17) reps = mapper.fit_transform(tani_dist) rep_df = pd.DataFrame.from_records(reps, columns=['x', 'y']) rep_df['compound_id'] = compound_ids if out_dir is not None: pdf_path = '%s/%s_tani_umap_proj.pdf' % (out_dir, file_prefix) pdf = PdfPages(pdf_path) fig, ax = plt.subplots(figsize=(12,12)) sns.scatterplot(x='x', y='y', data=rep_df, ax=ax) ax.set_title("%s, 2D projection based on Tanimoto distance" % title_prefix) if out_dir is not None: pdf.savefig(fig) pdf.close() # Draw a cluster heatmap based on Tanimoto distance tani_linkage = linkage(tani_dist, method='complete') tani_df = pd.DataFrame(tani_dist, columns=compound_ids, index=compound_ids) if out_dir is not None: pdf_path = '%s/%s_tanimoto_clustermap.pdf' % (out_dir, file_prefix) pdf = PdfPages(pdf_path) g = sns.clustermap(tani_df, row_linkage=tani_linkage, col_linkage=tani_linkage, figsize=(12,12), cmap='plasma') if out_dir is not None: pdf.savefig(g.fig) pdf.close()
def compare_datasets_tsne(args: Args): if len(args.smiles_paths) > len(args.colors) or len( args.smiles_paths) > len(args.sizes): raise ValueError( 'Must have at least as many colors and sizes as datasets') # Random seed for random subsampling np.random.seed(0) # Load the smiles datasets print('Loading data') smiles, slices, labels = [], [], [] for smiles_path in args.smiles_paths: # Get label label = os.path.basename(smiles_path).replace('.csv', '') # Get SMILES new_smiles = get_smiles(path=smiles_path, smiles_columns=args.smiles_column, flatten=True) print(f'{label}: {len(new_smiles):,}') # Subsample if dataset is too large if len(new_smiles) > args.max_per_dataset: print(f'Subsampling to {args.max_per_dataset:,} molecules') new_smiles = np.random.choice(new_smiles, size=args.max_per_dataset, replace=False).tolist() slices.append(slice(len(smiles), len(smiles) + len(new_smiles))) labels.append(label) smiles += new_smiles # Compute Morgan fingerprints print('Computing Morgan fingerprints') morgan_generator = get_features_generator('morgan') morgans = [ morgan_generator(smile) for smile in tqdm(smiles, total=len(smiles)) ] print('Running t-SNE') start = time.time() tsne = TSNE(n_components=2, init='pca', random_state=0, metric='jaccard') X = tsne.fit_transform(morgans) print(f'time = {time.time() - start:.2f} seconds') if args.cluster: import hdbscan # pip install hdbscan print('Running HDBSCAN') start = time.time() clusterer = hdbscan.HDBSCAN(min_cluster_size=5, gen_min_span_tree=True) colors = clusterer.fit_predict(X) print(f'time = {time.time() - start:.2f} seconds') print('Plotting t-SNE') x_min, x_max = np.min(X, axis=0), np.max(X, axis=0) X = (X - x_min) / (x_max - x_min) makedirs(args.save_path, isfile=True) plt.clf() fontsize = 50 * args.scale fig = plt.figure(figsize=(64 * args.scale, 48 * args.scale)) plt.title('t-SNE using Morgan fingerprint with Jaccard similarity', fontsize=2 * fontsize) ax = fig.gca() handles = [] legend_kwargs = dict(loc='upper right', fontsize=fontsize) if args.cluster: plt.scatter(X[:, 0], X[:, 1], s=150 * np.mean(args.sizes), c=colors, cmap='nipy_spectral') else: for slc, color, label, size in zip(slices, args.colors, labels, args.sizes): if args.plot_molecules: # Plots molecules handles.append(mpatches.Patch(color=color, label=label)) for smile, (x, y) in zip(smiles[slc], X[slc]): img = Draw.MolsToGridImage([Chem.MolFromSmiles(smile)], molsPerRow=1, subImgSize=(200, 200)) imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(img), (x, y), bboxprops=dict(color=color)) ax.add_artist(imagebox) else: # Plots points plt.scatter(X[slc, 0], X[slc, 1], s=150 * size, color=color, label=label) if args.plot_molecules: legend_kwargs['handles'] = handles plt.legend(**legend_kwargs) plt.xticks([]), plt.yticks([]) print('Saving t-SNE') plt.savefig(args.save_path)
def pixmap_from_smiles(self, s): p = Chem.MolFromSmiles(s) return Draw.MolToQPixmap(p)
def draw_multi(smiles): # list of smiles mols=[Chem.MolFromSmiles(s) for s in smiles] img = Draw.MolsToGridImage(mols, molsPerRow=7,maxMols=75, subImgSize=(100, 100), legends=[str(i) for i in range(len(mols))]) return img
def smiles_to_image_file(smiles, path): mol = Chem.MolFromSmiles(smiles) Draw.MolToFile(mol, path)
def mols2grid_image(mols, molsPerRow): mols = [e if e is not None else Chem.RWMol() for e in mols] for mol in mols: AllChem.Compute2DCoords(mol) return Draw.MolsToGridImage(mols, molsPerRow=molsPerRow, subImgSize=(150, 150))
import rdkit.Chem.Draw as draw import rdkit.Chem.AllChem as ac # FIXME: ugly hack for filename in glob.glob('%s/*.mol2' % args.mol2_dir[0]): mol = rd.MolFromMol2File(filename, **_mol_params) dirname = os.path.dirname(filename) basename = os.path.splitext(os.path.basename(filename))[0] outname = os.path.join(dirname, basename + os.extsep + 'svg') tmp = ac.Compute2DCoords(mol) draw.MolToFile(mol, outname, wedgeBonds=False, size=(150, 150), fitImage=True, kekulize=False) with open(args.graph[0], 'rb') as pfile: mst = pickle.load(pfile) mol_names = pickle.load(pfile) dir_names = pickle.load(pfile) mst_a = mst.toarray() draw_graph(mst, mst_a, mol_names, dir_names, args.method[0]) else: # FIXME: other file types mol2_files = glob.glob('%s/*.mol2' % args.mol2_dir[0]) if not mol2_files:
smile = np.array(smile).astype('str')[0] print("Second:", smile) mol = MolFromSmiles(smile) mols.append(mol) second_best_score = 1e10 if np.any(scores == third_best_score): smile = smiles[scores == third_best_score] smile = np.array(smile).astype('str')[0] print("Third:", smile) mol = MolFromSmiles(smile) mols.append(mol) third_best_score = 1e10 img = Draw.MolsToGridImage(mols, molsPerRow=len(mols), subImgSize=(300, 300), useSVG=True) with open("molecule_images/best_grammar_molecule.svg", "w") as text_file: text_file.write(img) results_character = np.zeros((n_simulations, 3)) for j in range(1, n_simulations + 1): best_value = 1e10 n_valid = 0 max_value = 0 for i in range(iteration): smiles = load_object( 'simulation{}/character/results/valid_smiles{}.dat'.format(j, i)) scores = load_object( 'simulation{}/character/results/scores{}.dat'.format(j, i)) n_valid += len([x for x in smiles if x is not None])
def calc_MST(filenames, method, do_draw=True, parallel=False): from functools import partial import numpy as np from scipy.sparse import csr_matrix from scipy.sparse.csgraph import minimum_spanning_tree import rdkit.Chem.AllChem as ac score = valid_methods[method] N = len(filenames) M = N * (N - 1) / 2 npout = (M + (100 - M % 100)) / 100 simmat = np.zeros(shape=(N, N), dtype=np.float32) mols = [] mol_names = [] dir_names = [] print('Reading input files...') for filename in filenames: mol = rd.MolFromMol2File(filename, **_mol_params) dirname = os.path.dirname(filename) basename = os.path.splitext(os.path.basename(filename))[0] outname = os.path.join(dirname, basename + os.extsep + 'svg') mols.append(mol) mol_names.append(basename) dir_names.append(dirname) tmp = ac.Compute2DCoords(mol) if do_draw: import rdkit.Chem.Draw as draw draw.MolToFile(mol, outname, wedgeBonds=False, size=(150, 150), fitImage=True, kekulize=False) print('Computing similarity matrix using %s...' % method) if parallel: pool = mp.Pool(mp.cpu_count()) map_func = pool.imap else: map_func = map results = [] for i in range(N - 1): print('%s...' % mol_names[i]) partial_func = partial(score, mols[i]) results.append(map_func(partial_func, mols[i + 1:N])) for i, row in enumerate(results): simmat[i][i + 1:N] = [s for s in row] if parallel: pool.close() pool.join() print('similarity score matrix:\n', simmat) # NOTE: this removes edges with the larger weight mst = minimum_spanning_tree(csr_matrix(simmat)) cnt = 0 mst_a = mst.toarray() print('\nminimal spanning tree (MST):\n', mst_a) print('\nsuggested mappings from MST:') for i, j in zip(mst.nonzero()[0], mst.nonzero()[1]): cnt += 1 n1 = mol_names[i] n2 = mol_names[j] score = mst_a[i][j] print('%6i) %s <> %s (%f)\n' % (cnt, n1, n2, score), end='') with open(MST_PICKLE_FILE, 'wb') as pfile: pickle.dump(mst, pfile, pickle.HIGHEST_PROTOCOL) pickle.dump(mol_names, pfile, pickle.HIGHEST_PROTOCOL) pickle.dump(dir_names, pfile, pickle.HIGHEST_PROTOCOL) return mst, mst_a, mol_names, dir_names
def rdkit2d(smiles: str, tag: str): mol = Chem.MolFromSmiles(smiles) Draw.MolToFile(mol, GRAPH_DIR + tag + '.png')
# -*- coding: utf-8 -*- """ @author: hkaneko """ from rdkit import Chem from rdkit.Chem import Draw molecule = Chem.MolFromSmiles( 'CC(N)C(=O)O') # SMILES の読み込み。'CC(N)C(=O)O' はアラニン Draw.MolToFile(molecule, 'molecule.png') # 右の IPython コンソールに # Draw.MolToImage(molecule) # と入力して実行しても、分子を描画できます
def testDrawReaction(self): # this shouldn't throw an exception... rxn = AllChem.ReactionFromSmarts( "[c;H1:3]1:[c:4]:[c:5]:[c;H1:6]:[c:7]2:[nH:8]:[c:9]:[c;H1:1]:[c:2]:1:2.O=[C:10]1[#6;H2:11][#6;H2:12][N:13][#6;H2:14][#6;H2:15]1>>[#6;H2:12]3[#6;H1:11]=[C:10]([c:1]1:[c:9]:[n:8]:[c:7]2:[c:6]:[c:5]:[c:4]:[c:3]:[c:2]:1:2)[#6;H2:15][#6;H2:14][N:13]3" ) _ = Draw.ReactionToImage(rxn)
def draw_mol( smiles, height=200, width=200, img_type=None, highlightAtoms=[], atomcolors=[], highlightBonds=[], bondcolors={}, mol=None, ): """ Draw a molecule from a smiles :param smiles: the SMILES to render :param height: the height in px :param width: the width in px :return: an SVG as a string of the inage """ if mol is None: mol = Chem.MolFromSmiles(smiles) if mol is None: return "None Mol" AllChem.Compute2DCoords(mol) Chem.Kekulize(mol) if not height: height = 200 if not width: width = 200 if img_type == "png": img = Draw.MolToImage( mol, options=options, highlightBonds=highlightBonds, highlightBondColors=bondcolors, ) img = img.convert("RGBA") datas = img.getdata() newData = [] for item in datas: if item[0] == 255 and item[1] == 255 and item[2] == 255: newData.append((255, 255, 255, 0)) else: newData.append(item) img.putdata(newData) response = HttpResponse(content_type="image/png") img.save(response, "PNG") return response else: drawer = rdMolDraw2D.MolDraw2DSVG(height, width) drawopt = drawer.drawOptions() drawopt.clearBackground = False drawer.DrawMolecule( mol, highlightAtoms=highlightAtoms, highlightAtomColors=atomcolors, highlightBonds=highlightBonds, highlightBondColors=bondcolors, ) drawer.DrawMolecule(mol) drawer.FinishDrawing() return drawer.GetDrawingText().replace("svg:", "")