def _mcs(data, asSmiles, atomCompare, bondCompare, threshold, ringMatchesRingOnly, completeRingsOnly, sanitize=True, removeHs=True, strictParsing=True, isomericSmiles=False, canonical=True, kekuleSmiles=False): ms = _parseMolData(data, sanitize=sanitize, removeHs=removeHs, strictParsing=strictParsing) if not ms: return if len(ms) == 1: if asSmiles: print 'SMARTS' return Chem.MolToSmiles(ms[0]) else: print 'SMILES' return Chem.MolToSmarts(ms[0]) if threshold: threshold = float(threshold) try: mcs = MCS.FindMCS(ms, atomCompare=atomCompare, bondCompare=bondCompare, ringMatchesRingOnly=ringMatchesRingOnly, completeRingsOnly=completeRingsOnly, threshold=threshold) except TypeError: mcs = MCS.FindMCS(ms, atomCompare=atomCompare, bondCompare=bondCompare, ringMatchesRingOnly=ringMatchesRingOnly, completeRingsOnly=completeRingsOnly) res = mcs.smarts if asSmiles: p = Chem.MolFromSmarts(res) for m in ms: if m.HasSubstructMatch(p): match = m.GetSubstructMatch(p) res = Chem.MolFragmentToSmiles(m, atomsToUse=match, isomericSmiles=isomericSmiles, canonical=canonical, kekuleSmiles=kekuleSmiles) break return res #-----------------------------------------------------------------------------------------------------------------------
def mcs(self, fragments): """Find the maximum common substructure from a list of fragments. N.B.: Currently does not expose the many options provided by rdkit: http://www.rdkit.org/Python_Docs/rdkit.Chem.MCS-module.html Also, SMARTS match naturally includes heavy atoms only. :param fragments: two or more fragments containing common substructure :type fragments : list :return: maximum common substructure result :rtype : MCSResult """ try: global MCS global rdk MCS except NameError: from rdkit.Chem import MCS from cinfony import rdk rf = [rdk.Molecule(f.molecule).Mol for f in fragments] cs = MCS.FindMCS(rf) return cs
def test_timeout_negative(self): try: MCS.FindMCS(lengthy_mols, timeout=-1) except ValueError: pass else: raise AssertionError("bad range check for timeout")
def test_min_atoms_1(self): try: result = MCS.FindMCS(simple_mols, minNumAtoms=1) except ValueError: pass else: raise AssertionError("should have raised an exception")
def _mcs(data, params): ms = _parseMolData(data) if not ms: return if len(ms) == 1: if bool(int(params.get('asSmiles', '0'))): print 'SMARTS' return Chem.MolToSmiles(ms[0]) else: print 'SMILES' return Chem.MolToSmarts(ms[0]) atomCompare = params.get('atomCompare', 'elements') bondCompare = params.get('bondCompare', 'bondtypes') ringMatchesRingOnly = bool(int(params.get('ringMatchesRingOnly', '0'))) completeRingsOnly = bool(int(params.get('completeRingsOnly', '0'))) threshold = params.get('threshold', None) if threshold: threshold = float(threshold) try: mcs = MCS.FindMCS(ms, atomCompare=atomCompare, bondCompare=bondCompare, ringMatchesRingOnly=ringMatchesRingOnly, completeRingsOnly=completeRingsOnly, threshold=threshold) except TypeError: mcs = MCS.FindMCS(ms, atomCompare=atomCompare, bondCompare=bondCompare, ringMatchesRingOnly=ringMatchesRingOnly, completeRingsOnly=completeRingsOnly) res = mcs.smarts if bool(int(params.get('asSmiles', '0'))): p = Chem.MolFromSmarts(res) for m in ms: if m.HasSubstructMatch(p): match = m.GetSubstructMatch(p) res = Chem.MolFragmentToSmiles(m, atomsToUse=match, isomericSmiles=True, canonical=False) break return res #-----------------------------------------------------------------------------------------------------------------------
def test_timeout(self): t1 = time.time() result = MCS.FindMCS(lengthy_mols, timeout=0.1) self.assert_result(result, completed=0) self.assertTrue(result.numAtoms > 1) self.assertTrue(result.numBonds >= result.numAtoms-1, (result.numAtoms, result.numBonds)) t2 = time.time() self.assertTrue(t2-t1 < 0.5, t2-t1)
def make2dcanv(mmpcomps): """Function to make a 2D canv from MMPComps""" rdmols = [] acts = [] subs = [] smsubs = [] donemols = [] for m in mmpcomps: # Ensure that this is not just the same comparison mol1 = Chem.MolFromMolBlock((str(m.xtal_mol.sdf_info))) mol2 = Chem.MolFromMolBlock((str(m.chembl_mol.sdf_info))) if [m.xtal_mol.cmpd_id.pk, m.chembl_mol.pk] in donemols or [ m.chembl_mol.cmpd_id.pk, m.xtal_mol.pk ] in donemols: # Don't do the same comparison twice continue else: donemols.append([m.xtal_mol.cmpd_id.pk, m.chembl_mol.cmpd_id.pk]) # Set the molecule name for the 3D display acts.append(render_act(m.xtal_act)) acts.append(render_act(m.chembl_act)) # Generate the two-d depictions after canonicalising the smiles mol1 = Chem.MolFromSmiles(Chem.MolToSmiles(mol1, isomericSmiles=True)) mol2 = Chem.MolFromSmiles(Chem.MolToSmiles(mol2, isomericSmiles=True)) smp = MCS.FindMCS([mol1, mol2], completeRingsOnly=True, ringMatchesRingOnly=True, timeout=0.5).smarts p = Chem.MolFromSmarts(smp) subs.append(p) smsubs.append(smp) AllChem.Compute2DCoords(p) AllChem.GenerateDepictionMatching2DStructure(mol1, p, acceptFailure=True) AllChem.GenerateDepictionMatching2DStructure(mol2, p, acceptFailure=True) rdmols.extend([mol1, mol2]) # So now we have the mols in a list with actvity information in a list # Order this list of molecules based on scaffold (p) # Get a list of the indices of rdmols to rearrange myinds = sorted(range(len(smsubs)), key=lambda x: smsubs[x]) nmols = [] nacts = [] nsubs = [] # Now rearrange everthing to suit for ind_m in myinds: nmols.extend([rdmols[ind_m * 2], rdmols[ind_m * 2 + 1]]) nacts.extend([acts[ind_m * 2], acts[ind_m * 2 + 1]]) nsubs.append(subs[ind_m]) image = draw_acts(nmols, nacts, nsubs) output = StringIO.StringIO() image.save(output, format="PNG") contents = output.getvalue() return contents
def assert_search(self, smiles, numAtoms, numBonds, smarts=_ignore, **kwargs): result = MCS.FindMCS(smiles, **kwargs) self.assert_result(result, completed=1, numAtoms=numAtoms, numBonds=numBonds, smarts=smarts)
def get_decoys(pdb_file, mol_file, num_atoms, init='get_decoys_init'): """For each binding ligand, gets a list of decoy ligands. We filter by number of atoms and maximum common substructure (MCS). Then we generate conformers for each decoy and save them to the decoy_ligands folder""" init = eval(init) reader = SDMolSupplier(mol_file) mol = reader[0] output = [] iterator = range(len(init.all_mols)) random.shuffle(iterator) for i in iterator: if (init.all_mol_files[i] == mol_file or \ abs(init.all_num_atoms[i] - num_atoms) > init.max_atom_dif): continue mcs = MCS.FindMCS([init.all_mols[i], mol], minNumAtoms=init.max_substruct, ringMatchesRingOnly=True, completeRingsOnly=True, timeout=1) if mcs.numAtoms == -1: #save the mol object as a PDB file in the decoys folder decoy_file = pdb_file.replace('/binding_ligands/', '/decoy_ligands/').replace( '.pdb', str(len(output)) + '.pdb') pdb_writer = PDBWriter(decoy_file) # generate the decoy and its conformers decoy2 = Chem.AddHs(init.all_mols[i]) conf_ids = AllChem.EmbedMultipleConfs(decoy2, init.num_conformers) for cid in conf_ids: AllChem.MMFFOptimizeMolecule(decoy2, confId=cid) decoy = Chem.RemoveHs(decoy2) pdb_writer.write(decoy) pdb_writer.close() output.append([init.all_pdb_files[i], decoy_file]) if len(output) >= init.max_num_decoys: break print 'Got the decoys for one ligand' return output
def get_decoys(pdb_file, mol_file, num_atoms, init='get_decoys_init'): """ For each binding ligand, get a list of decoy ligands. We filter by number of atoms and maximum common substructure (MCS). Returns filepaths to all binding ligand - decoy pair. :param pdb_file: pdb format ligand :param mol_file: mol format ligand :param num_atoms: ligand's atom number :param init: :return: nested list [[pdb_file, decoy_files]] """ init = eval(init) reader = SDMolSupplier(mol_file) mol = reader[0] output = "" counter = 0 # Shuffle which ligands we sample to avoid biases in decoy ligands iterator = range(len(init.all_mols)) random.shuffle(iterator) for i in iterator: if (init.all_mol_files[i] == mol_file or abs(init.all_num_atoms[i] - num_atoms) > init.max_atom_dif): # FIXME O2 time continue # FIXME mcs = MCS.FindMCS([init.all_mols[i], mol], minNumAtoms=init.max_substruct, ringMatchesRingOnly=True, completeRingsOnly=True, timeout=1) if mcs.numAtoms == -1: if counter == init.max_num_decoys - 1: output += init.all_pdb_files[i] counter += 1 break # FIXME output += init.all_pdb_files[i] + ',' counter += 1 # Check to make sure there are enough decoys if counter < init.max_num_decoys: raise Exception("Not enough decoys for ligand " + pdb_file) print 'Got the decoys for one ligand' return [[pdb_file, output]]
def create_lexicon(molecule1, molecule2): #Chem.Kekulize(molecule1) #Chem.Kekulize(molecule2) patt1 = Chem.MolFromSmarts(MCS.FindMCS([molecule2, molecule1], matchValences=True).smarts) matching1 = molecule2.GetSubstructMatch(patt1) matching2 = molecule1.GetSubstructMatch(patt1) #below is indices in m, ordered as patt‘s atoms index1 = range(molecule2.GetNumAtoms()) #these are the atoms in the product that are NOT in the metastructure product_specific_atoms = list(set(index1) - set(matching1)) matching1 = zip(matching1, range(molecule2.GetNumAtoms()) ) matching2 = zip(matching2, range(molecule1.GetNumAtoms()) ) #lexicon for what values equal what. this is a bit confusing but it's the product's substructure that's similar with the meta-metastructure's #then the corresponding atom on the meta-metastructure to the metastructure lexicon = sorted(zip([int(i[0]) for i in matching2], [int(i[0]) for i in matching1])) return lexicon
def pattern_findersub(steroid1, steroid2, exceptions): m1 = Chem.MolFromSmiles(steroid1) m2 = Chem.MolFromSmiles(steroid2) patt1 = Chem.MolFromSmarts(MCS.FindMCS([Chem.MolFromSmiles(steroid1), Chem.MolFromSmiles(steroid2)]).smarts) matching1 = m1.GetSubstructMatch(patt1) matching1 = list(matching1) ####################important exception line for i in exceptions: matching1.append( i ) #below is indices in m, ordered as patt‘s atoms index1 = range(Chem.MolFromSmiles(steroid1).GetNumAtoms()) #these are the atoms in the substrate that are NOT in the product substrate_specific_atoms = list(set(index1) - set(matching1)) del_bonds = [] add_connections = [] add_bonds = [] del_connections = [] for i in substrate_specific_atoms: atom = m1.GetAtomWithIdx(i) #get the bonds that are connected to indexed atom but not the ones that are in the 'meta-structure' neighbors = [x.GetIdx() for x in atom.GetNeighbors()] extra_bonds = list(set(neighbors) & set(substrate_specific_atoms)) #get the bonds of these atoms that need to be deleted bondtype = [] for bond in extra_bonds: bond = str(m1.GetBondBetweenAtoms(i, bond).GetBondType()) bond = bond.replace('rdkit.Chem.rdchem.BondType.', '') bondtype.append( bond ) del_connections.append( extra_bonds ) del_bonds.append( bondtype ) substrate_modifications = pd.DataFrame({'Substrate Unique Atoms': substrate_specific_atoms, 'Connections to be deleted': del_connections, 'Bonds to be deleted': del_bonds}) return substrate_modifications
def moonshot(): from dgllife.utils import mol_to_bigraph, CanonicalAtomFeaturizer import pandas as pd import os df = pd.read_csv( os.path.dirname(graca.data.collections.__file__) + "/covid_submissions_all_info.csv") df = df.dropna(subset=["f_avg_pIC50"]) from rdkit import Chem from rdkit.Chem import MCS ds = [] for idx0, row0 in df.iterrows(): smiles0 = row0["SMILES"] mol0 = Chem.MolFromSmiles(smiles0) for idx1, row1 in df.iloc[idx0 + 1:].iterrows(): smiles1 = row1["SMILES"] mol1 = Chem.MolFromSmiles(smiles1) res = MCS.FindMCS([mol0, mol1]) if res.numAtoms > 15: ds.append(( mol_to_bigraph(mol1, node_featurizer=CanonicalAtomFeaturizer( atom_data_field='feat')), mol_to_bigraph(mol0, node_featurizer=CanonicalAtomFeaturizer( atom_data_field='feat')), row1["f_avg_pIC50"], row0["f_avg_pIC50"], )) ds_tr = ds[:500] ds_te = ds[500:] return ds_tr, ds_te
def modify_metastructure(product_modifications, metastructure, steroid2): m2 = Chem.MolFromSmiles(steroid2) #msubstrate = Chem.MolFromSmiles(steroid1) patt1 = Chem.MolFromSmarts(MCS.FindMCS([metastructure, m2], matchValences=True).smarts) #convert number of product specifc atom to our metastructure anchors = [] #anchors are in the MCS, they will ultimately be deleted but are important for figuring out where to add bonds anchortype = [] lexicon = create_lexicon(metastructure, m2) for i in product_modifications['Connections to be added'].tolist(): for k in i: #connections in list if there are multiple for j in lexicon: if k == j[1]: anchors.append( j[0] ) atom = metastructure.GetAtomWithIdx(int(j[0])) anchortype.append( atom.GetAtomicNum()) neighbors = [] #for every index gives the neighbors of the same index in the pandas DF earlier for i in anchors: adjacent_atoms = [] atom = metastructure.GetAtomWithIdx(i) adjacent_atoms = [x.GetIdx() for x in atom.GetNeighbors()] neighbors.append( adjacent_atoms ) #add the product-specific atoms em = Chem.EditableMol(metastructure) newindexes = [] newanchors = [] ###KEEP THESE DELETED#### '''for i in product_modifications['Atomic Number'].tolist(): newidx = em.AddAtom(Chem.Atom( int(i) )) newindexes.append( newidx ) for i in range(len(anchors)): newanchor = em.AddAtom(Chem.Atom( anchortype[i] )) newanchors.append( newanchor )''' #####logic gate for if a carboxyl like addition is going on similar_indices = [] for i in range(len(product_modifications)): for j in range(len(product_modifications)): if product_modifications['Connections to be added'].irow(i) == product_modifications['Connections to be added'].irow(j): similar_indices.append( i ) if len(similar_indices) > 2: #translate neighbor number to what it corresponds to in m1 m1 = em.GetMol() for i in lexicon: if product_modifications['Neighbors'].irow( similar_indices[0] )[0] == i[1]: neighbor = i[0] atom = m1.GetAtomWithIdx(neighbor) neighbortype = atom.GetAtomicNum() neighbor_of_neighbor = [x.GetIdx() for x in atom.GetNeighbors()] neighboranchor = em.AddAtom(Chem.Atom( int(neighbortype) )) new_atoms = [] for i in range(len(product_modifications)): new_atom = em.AddAtom( Chem.Atom( int(product_modifications['Atomic Number'].irow(i)) )) new_atoms.append( new_atom ) for i in range(len(product_modifications)): if str(product_modifications['Bonds to be added'].irow(i)[0]) == 'DOUBLE': em.AddBond( int(neighboranchor),int(new_atoms[i]), Chem.BondType.DOUBLE) elif str(product_modifications['Bonds to be added'].irow(i)[0]) == 'SINGLE': em.AddBond(int(neighboranchor),int(new_atoms[i]), Chem.BondType.SINGLE) for i in neighbor_of_neighbor: em.AddBond(int(i), int(neighboranchor), Chem.BondType.SINGLE) #get rid of old anchor for i in list(set(anchors)): em.RemoveAtom(i) else: #add the product-specific atoms em = Chem.EditableMol(metastructure) newindexes = [] newanchors = [] for i in product_modifications['Atomic Number'].tolist(): newidx = em.AddAtom(Chem.Atom( int(i) )) newindexes.append( newidx ) for i in range(len(anchors)): newanchor = em.AddAtom(Chem.Atom( anchortype[i] )) newanchors.append( newanchor ) mref = em.GetMol() #combine the new atom with it's new anchor for i in range(len(newindexes)): if str(product_modifications['Bonds to be added'][i][0]) == 'DOUBLE': em.AddBond(int(newindexes[i]), int(newanchors[i]), Chem.BondType.DOUBLE) elif str(product_modifications['Bonds to be added'][i][0]) == 'SINGLE': em.AddBond(int(newindexes[i]), int(newanchors[i]), Chem.BondType.SINGLE) #combine new structure (newanchor + new atom) to the neighbors of the old anchor for i in range(len(anchors)): for j in range(len(neighbors[i])): atom = mref.GetAtomWithIdx( int(neighbors[i][j]) ) em.AddBond(int(newanchors[i]), int(neighbors[i][j]), Chem.BondType.SINGLE) #get rid of old anchor for i in anchors: em.RemoveAtom(i) m1 = em.GetMol() for atom in m1.GetAtoms(): atom.SetNumRadicalElectrons(0) Chem.SanitizeMol(m1) return m1
def modify_substrate(substrate_modifications, steroid1, steroid2, steroids): m1 = Chem.MolFromSmiles(steroid1) m2 = Chem.MolFromSmiles(steroid2) ms = Chem.MolFromSmiles(steroids) ############################################################ #removes atoms that are removed via lyase activity, first must find atoms that are removed from the native substrate to the product #then we have to compare those atoms to our non-native substrate then systematically remove them the tricky thing here will be indexing (as always) native_lexicon = create_lexicon(ms, m2) substrates_lexicon = create_lexicon(m1, ms) ms_atoms = [] ms_m2_atoms = [] m2matchingatoms = [] for atom in ms.GetAtoms(): ms_atoms.append( atom.GetIdx() ) for i in native_lexicon: ms_m2_atoms.append( i[0] ) m2matchingatoms.append( i[1] ) #see which atoms don't have overlap i.e. things that need to be deleted ms_cleaved = [] for i in ms_atoms: if i not in ms_m2_atoms: ms_cleaved.append( i ) mp_atoms = [] for atom in m2.GetAtoms(): mp_atoms.append( atom ) mp_unique_atoms = [] for i in mp_atoms: if i.GetIdx() not in m2matchingatoms: mp_unique_atoms.append( i.GetIdx() ) #aromatic atoms will screw this code up, we need to make sure the atoms we're going to delete are due to aromaticitiy aromatic_check = [] aromatic_idxi = [] aromatic_idxj = [] for i in mp_unique_atoms: for j in mp_unique_atoms: try: aromatic_check.append( str( m2.GetBondBetweenAtoms(i, j).GetIsAromatic() ) ) aromatic_idxi.append( i ) aromatic_idxj.append( j ) except: pass #translate ms_cleaved to our target target_specifics = [] for i in ms_cleaved: for j in substrates_lexicon: if i == j[1]: target_specifics.append( j[0] ) if 'True' not in aromatic_check: AROMATIC_FLAG = None temp_lex = create_lexicon( ms, m1 ) em = Chem.EditableMol(m1) for i in range(len(ms_cleaved)): temp_lex = create_lexicon( ms, m1 ) for j in temp_lex: if j[0] == ms_cleaved[i]: deletion_atom = j[1] em.RemoveAtom(deletion_atom) m1 = em.GetMol() m1smiles = Chem.MolToSmiles( m1 ) m1smiles = clean_smiles( m1smiles ) try: m1 = Chem.MolFromSmiles( m1smiles ) except: pass else: AROMATIC_FLAG = 'GO' #create lexicon to compare atom indices''' lexicon = create_lexicon(m1, m2) ############################################################ #This will be the double bonds specific to the substrate #I have to do this because double bonds seem to be more specific than single bonds in RDkit, #By knowing the exact position of the double bonds I need to remove and add, I can more accurately transform the molecule m2bondtypessub = [] m2bondidxsub = [] m2bondstartsub = [] m2bondendsub = [] m1bondtypessub = [] m1bondidxsub = [] m1bondstartsub = [] m1bondendsub = [] for i in lexicon: idx1 = i[0] m1bondidxsub.append( idx1 ) m1bondtypessub.append( m1.GetBondWithIdx(idx1).GetBondType() ) for i in m1bondidxsub: m1bondstartsub.append( m1.GetBondWithIdx(int(i)).GetBeginAtomIdx() ) m1bondendsub.append( m1.GetBondWithIdx(int(i)).GetEndAtomIdx() ) for i in m1bondstartsub: for j in lexicon: if i == j[0]: m2bondstartsub.append( j[1] ) for i in m1bondendsub: for j in lexicon: if i == j[0]: m2bondendsub.append( j[1] ) for i in m1bondidxsub: for j in lexicon: if i == j[0]: m2bondidxsub.append( j[1] ) for i in m2bondidxsub: m2bondtypessub.append( m2.GetBondWithIdx(int(i)).GetBondType() ) bondindices = '' bondindicessub = '' em = Chem.EditableMol(m1) if len(BondIndex( ms, m2 )) != 0 and AROMATIC_FLAG != 'GO': #compares bonds between both the substrate, product, and target to determine if anything needs to be added bondindicessub = BondIndex( m1, m2 ) bondindicessub = bondindicessub[bondindicessub['M1 Bond Type'] != bondindicessub['M2 Bond Type']] #bondindices = bondindices[bondindices['M1 Bond Index'] != bondindices['M1 Bond Start']] em = Chem.EditableMol(m1) if len(BondIndex(m1, m2)) != 0: ###This logic gate passes the bond formation if the substrate > product chemistry has no bond changes. I may actually destroy this step entirely... for i in range(len(bondindicessub)): if str(bondindicessub['M1 Bond Type'].irow(i)) == str('SINGLE') and str(bondindicessub['M2 Bond Type'].irow(i)) == str('DOUBLE'): em.RemoveBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i))) em.AddBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i)), Chem.BondType.DOUBLE) else: str(bondindicessub['M1 Bond Type'].irow(i)) == str('DOUBLE') em.RemoveBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i))) em.AddBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i)), Chem.BondType.SINGLE) else: pass m1 = em.GetMol() ############################################################iterate through bonds in both molecules to see if we need to delete any #This will be the double bonds specific to the product m2bondtypes = [] m2bondidx = [] m2bondstart = [] m2bondend = [] m1bondtypes = [] m1bondidx = [] m1bondstart = [] m1bondend = [] for i in lexicon: idx2 = i[1] m2bondidx.append( idx2 ) m2bondtypes.append( m2.GetBondWithIdx(idx2).GetBondType() ) for i in m2bondidx: m2bondstart.append( m2.GetBondWithIdx(int(i)).GetBeginAtomIdx() ) m2bondend.append( m2.GetBondWithIdx(int(i)).GetEndAtomIdx() ) m1bondtypes = [] m1bondidx = [] m1bondstart = [] m1bondend = [] for i in m2bondstart: for j in lexicon: if i == j[1]: m1bondstart.append( j[0] ) for i in m2bondend: for j in lexicon: if i == j[1]: m1bondend.append( j[0] ) for i in m2bondidx: for j in lexicon: if i == j[1]: m1bondidx.append( j[0] ) for i in m1bondidx: m1bondtypes.append( m1.GetBondWithIdx(int(i)).GetBondType() ) #exceptions are atoms that extend from the start bond that actually are irrelevant because where they should go don't exist in the starting molecule #this is problematic because they could add bonds to places they shouldn't be exceptions = [] for i in range(len(m2bondend)): if m2bondend[i] not in [int(j[1]) for j in lexicon]: exceptions.append( m2bondend[i] ) try: #this try skips the deconstruction phase if it doesn't need to happen bondindices = BondIndex( m2, m1 ) patt1 = Chem.MolFromSmarts(MCS.FindMCS([Chem.MolFromSmiles(steroid1), Chem.MolFromSmiles(steroid2)]).smarts) #remove specififed atoms from the substrate to_delete_atoms = substrate_modifications['Substrate Unique Atoms'].tolist() #deletes atoms that are in the substrate but not the product, could be from lyases or whatnot for i in range(len(substrate_modifications['Substrate Unique Atoms'])): try: patt1 = Chem.MolFromSmarts(MCS.FindMCS([m1, Chem.MolFromSmiles(steroid2)]).smarts) matching1 = m1.GetSubstructMatch(patt1) matching1 = list(matching1) #below is indices in m, ordered as patt‘s atoms index1 = range(m1.GetNumAtoms()) #these are the atoms in the substrate that are NOT in the product substrate_specific_atoms = list(set(index1) - set(matching1)) #delete just the first substrate_specific_atom because the whole molecule will reindex em = Chem.EditableMol(m1) em.RemoveAtom(to_delete_atoms[0]) #need to fix the valences of the atoms we deleted earlier, for whatever reason a radical or a hydrogen is thrown on and the valence is incorrect. #Using bondindicessub because it's coordinates are 100% reliable for j in range(len(bondindicessub)): if int(bondindicessub['M1 Bond End'].irow(j)) == int(substrate_specific_atoms[0]): deletion_target_num = int(bondindicessub['M1 Bond Start'].irow(j)) deletion_target = m1.GetAtomWithIdx( int(bondindicessub['M1 Bond Start'].irow(j)) ) deletion_neighbors = [x.GetIdx() for x in deletion_target.GetNeighbors()] #get the bonds that are connected to indexed atom but not the ones that are in the 'meta-structure' em = Chem.EditableMol(m1) newidx = em.AddAtom(Chem.Atom(6)) if len(deletion_neighbors) == 1: em.AddBond(newidx, deletion_neighbors[0], Chem.BondType.SINGLE) elif len(deletion_neighbors) == 2: em.AddBond(newidx, deletion_neighbors[0], Chem.BondType.SINGLE) em.AddBond(newidx, deletion_neighbors[1], Chem.BondType.SINGLE) elif len(deletion_neighbors) == 3: em.AddBond(newidx, deletion_neighbors[0], Chem.BondType.SINGLE) em.AddBond(newidx, deletion_neighbors[1], Chem.BondType.SINGLE) em.AddBond(newidx, deletion_neighbors[2], Chem.BondType.SINGLE) em.RemoveAtom(deletion_target_num) except: pass except: bondindices = '' bondindicessub = '' m1 = em.GetMol() #Chem.SanitizeMol(m1) return m1, bondindices, bondindicessub, AROMATIC_FLAG
def do_lloommppaa_proc(target_id, pdb_protein, smiles, mol2_protein=None, reactants=None, products=None, context=None): """Function to do the processing for LLOOMMPPAA""" from LLOOMMPPAA.models import PossReact from LLOOMMPPAA.reactions import run_react_proc, define_reacts, load_in_reagents, load_in_follow_ups, find_follow_ups, define_reaction_process # Load the data load_wonka_prot(target_id, pdb_protein, smiles) if reactants: # Find the potential synthesis vectors define_reacts() find_follow_ups(target_id=target_id) # Show the sides -> USER MUST SELECT A SIDE poss_reacts = PossReact.objects.filter( mol_id__prot_id__target_id=target_id) # Loop through all the possible reactions for ps_r in poss_reacts: print ps_r.react_id.name print "1):", ps_r.replaced_frag print "2):", ps_r.retained_frag print "3):", "SKIP" choice = int(raw_input("Select a fragment to replace...")) if choice == 1: context = ps_r.retained_frag_context react_frag = ps_r.retained_frag this_react = ps_r.react_id break elif choice == 2: context = ps_r.replaced_frag_context react_frag = ps_r.replaced_frag this_react = ps_r.react_id break else: continue if not context: print "YOU MUST SPECIFY A CONTEXT" if products: print "AUTO GENERATING FROM PRODUCTS" print products context = Chem.CanonSmiles( Chem.MolToSmiles(Chem.MolFromSmarts( MCS.FindMCS(Chem.SDMolSupplier(products)).smarts), isomericSmiles=True)) print context else: return if reactants: mol_id = ps_r.mol_id prot_id = ps_r.mol_id.prot_id else: mol_id = Molecule.objects.filter(prot_id__target_id=target_id, smiles=smiles)[0] prot_id = mol_id.prot_id my_prots = [prot_id] # Set the mol2 protein for this target -> throw a warning if this doesn't happen if mol2_protein: from PLIFS.models import PlifProtein pp = PlifProtein() pp.prot_id = prot_id pp.mol2_data = open(mol2_protein).read() pp.save() # Define the reactants and products if reactants: react_id = load_in_reagents("RUN_DEF", reactants, ps_r.react_id) else: react_id = None if products: my_react = Reaction.objects.get_or_create(name="DUMMY", react_smarts="DUMMY", retro_smarts="DUMMY", cont_smarts="DUMMY")[0] this_react = my_react react_frag = context prod_id = load_in_follow_ups("RUN_PROD", products, my_react, mol_id) else: prod_id = None # Now set up the reaction itself react_proc = define_reaction_process(mol_id, context, my_prots, this_react, context, react_frag, products_id=prod_id, reactants_id=react_id) if react_id: react_proc.proc_stage = "RUN_REACTION" react_proc.save() if products: react_proc.proc_stage = "GENERATE CONFS" react_proc.save() # Now run the reaction and analysis itself run_react_proc(react_proc)
# xyz to Mol ## call babel ## babel -ixyz query_mol = readfile(file1) template_mol = readfile(file2) # mcs skeleton_mcs = MCS.FindMCS([query_mol, template_mol]) skeleton_mol = Chem.MolFromSmarts(skeleton_mcs.smarts) min_overlap = 6 # Require the overlap to be at least min_overlap atoms if ( len(skeleton_mol.GetAtoms()) < min_overlap): print("These molecules share less than min_overlap atoms. Quitting.") query_mcs_matches = query_mol.GetSubstructMatches(skeleton_mol) print("query_mol contains overlapping fragment " + len(query_mcs_matches) + " times.") template_mcs_matches = template_mol.GetSubstructMatches(skeleton_mol) print("template_mol contains overlapping fragment " + len(template_mcs_matches) + " times.") for query_mcs_match_index in range(len(query_mcs_matches)):
if (arg[2].endswith('.smi')): optSuppl = Chem.SmilesMolSupplier(arg[2]) elif (arg[2].endswith('sdf')): optSuppl = Chem.SDMolSupplier(arg[2]) else: print('File type not supported') sys.exit() w = Chem.SDWriter('alignLeadsOut.sdf') # Use align mol # Get common substructure for lead and .smi # Align copy to lead # Use AlignMolConformers with argument to specific atom IDs of substructure mols = [lead, optSuppl[0]] res = MCS.FindMCS( mols, threshold=0.9, completeRingsOnly=True ) # Calculates most common substructure and outputs SMARTS pattern p = Chem.MolFromSmarts( res.smarts) # Creates mol object of most common substructure core = AllChem.DeleteSubstructs( AllChem.ReplaceSidechains(Chem.RemoveHs(lead), p), Chem.MolFromSmiles('*')) core.UpdatePropertyCache() for mol in optSuppl: try: AllChem.ConstrainedEmbed(mol, core, useTethers=False) w.write(mol) except: pass
def fmcstimeout(p, q): return MCS.FindMCS([p, q], timeout=0.01).smarts
def fmcstimeout(p, q): return MCS.FindMCS([p, q]).smarts
def view_2dmol(option, maps=None, out_put=None, target_id=None, legends=None, extra=None): """Function to render a mol image from a smiles. The input (option) could be 1) a list of smiles 2) a smiles 3) pdb_code Returns a molecule image as data""" option = str(option) print option try: option = ast.literal_eval(option) except: pass if type(option) is list: mols = [Chem.MolFromSmiles(str(x)) for x in option] p = Chem.MolFromSmarts(MCS.FindMCS(mols).smarts) AllChem.Compute2DCoords(p) [AllChem.GenerateDepictionMatching2DStructure(x, p) for x in mols] image = Draw.MolsToGridImage(mols, 2, legends=legends) # If it's a PDB code elif Chem.MolFromSmiles(str(option)) is None and type(option) is str: mol = Chem.MolFromSmiles( str( Molecule.objects.filter( prot_id__code=option)[0].cmpd_id.smiles)) AllChem.GenerateDepictionMatching3DStructure( mol, Chem.MolFromMolBlock( str(Molecule.objects.filter( prot_id__code=option)[0].sdf_info))) image = Draw.MolToImage(mol) # If it's a elif type(option) is str: mol = Chem.MolFromSmiles(str(option)) if extra == "SIMPLE": image = Draw.MolToImage(mol) elif extra is None: h_map = None image = Draw.MolToImage(mol, size=(100, 100), fitImage=True, highlightMap=h_map) else: sub = Chem.MolFromSmiles(str(extra)) h_map = get_h_map(mol, sub) image = Draw.MolToImage(mol, highlightMap=h_map) if maps is None: pass else: maps = float(maps) draw = ImageDraw.Draw(image) dim = (20, 0) + (20, image.size[1]) draw.line(dim, fill=(255 - int(255 * maps), int(255 * maps), 0), width=10) else: print "NOT VALID TYPE" return "NOT VALID TYPE" output = StringIO.StringIO() image.save(output, format="PNG") contents = output.getvalue() return contents