def test4Serialize(self): smiLines = open(self.smiName, 'r').readlines() fparams = FragmentCatalog.FragCatParams(1, 6, self.fName) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0) smiles = [] for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) smiles.append(Chem.MolToSmiles(mol)) assert fcat.GetNumEntries() == 21 assert fcat.GetFPLength() == 21, fcat.GetFPLength() pkl = cPickle.dumps(fcat) fcat2 = cPickle.loads(pkl) assert fcat2.GetNumEntries() == 21 assert fcat2.GetFPLength() == 21, fcat2.GetFPLength() fpgen = FragmentCatalog.FragFPGenerator() for i in range(len(smiles)): smi = smiles[i] mol = Chem.MolFromSmiles(smi) fp1 = fpgen.GetFPForMol(mol, fcat) fp2 = fpgen.GetFPForMol(mol, fcat2) assert fp1.GetNumOnBits() == fp2.GetNumOnBits() obl1 = fp1.GetOnBits() obl2 = fp2.GetOnBits() assert tuple(obl1) == tuple(obl2)
def test3FPgenerator(self): smiLines = open(self.smiName, 'r').readlines() fparams = FragmentCatalog.FragCatParams(1, 6, self.fName) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0) smiles = [] for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) smiles.append(Chem.MolToSmiles(mol)) assert fcat.GetNumEntries() == 21 assert fcat.GetFPLength() == 21, fcat.GetFPLength() fpgen = FragmentCatalog.FragFPGenerator() obits = [3, 2, 3, 3, 2, 3, 5, 5, 5, 4, 5, 6] obls = [(0, 1, 2), (1, 3), (1, 4, 5), (1, 6, 7), (0, 8), (0, 6, 9), (0, 1, 2, 3, 10), (0, 1, 2, 8, 11), (1, 3, 4, 5, 12), (1, 4, 5, 13), (1, 3, 6, 7, 14), (0, 1, 6, 7, 9, 15)] for i in range(len(smiles)): smi = smiles[i] mol = Chem.MolFromSmiles(smi) fp = fpgen.GetFPForMol(mol, fcat) if i < len(obits): assert fp.GetNumOnBits() == obits[i], '%s: %s' % ( smi, str(fp.GetOnBits())) obl = fp.GetOnBits() if i < len(obls): assert tuple(obl) == obls[i], '%s: %s' % (smi, obl)
def setUp(self): self.smiList = ["S(SC1=NC2=CC=CC=C2S1)C3=NC4=C(S3)C=CC=C4", "CC1=CC(=O)C=CC1=O", "OC1=C(Cl)C=C(C=C1[N+]([O-])=O)[N+]([O-])=O", "[O-][N+](=O)C1=CNC(=N)S1", "NC1=CC2=C(C=C1)C(=O)C3=C(C=CC=C3)C2=O", "OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br", "CN(C)C1=C(Cl)C(=O)C2=C(C=CC=C2)C1=O", "CC1=C(C2=C(C=C1)C(=O)C3=CC=CC=C3C2=O)[N+]([O-])=O", "CC(=NO)C(C)=NO"] self.smiList2 = ['OCCC', 'CCC', 'C=CC', 'OC=CC', 'CC(O)C', 'C=C(O)C', 'OCCCC', 'CC(O)CC', 'C=CCC', 'CC=CC', 'OC=CCC', 'CC=C(O)C', 'OCC=CC', 'C=C(O)CC', 'C=CC(O)C', 'C=CCCO', ] self.list2Acts = [1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1] self.list2Obls = [(0, 1, 2), (1, 3), (1, 4, 5), (1, 6, 7), (0, 8), (0, 6, 9), (0, 1, 2, 3, 10), (0, 1, 2, 8, 11), (1, 3, 4, 5, 12), (1, 4, 5, 13), (1, 3, 6, 7, 14), (0, 1, 6, 7, 9, 15)] ffile = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt') self.catParams = FragmentCatalog.FragCatParams(1, 6, ffile) self.fragCat = FragmentCatalog.FragCatalog(self.catParams) self.fgen = FragmentCatalog.FragCatGenerator()
def calculate_fragments(smiles): """ Objective: Create fragments and import them into Neo4j based on our ontology Intent: This script is based on Adam's "mol_frag.ipynb" file in his deepml branch, which is based on rdkit's https://www.rdkit.org/docs/GettingStartedInPython.html. I still need some council on this one since we can tune how much fragment this script can generate for one SMILES. Also, everything (line 69 to 77) needs to be under a for loop or else it will break (as in not generating the correct amount of fragments, usually much less than the actual amount). I'm not sure why :param smiles: :return: """ fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt') fparams = FragmentCatalog.FragCatParams( 0, 4, fName) # I need more research and tuning on this one fcat = FragmentCatalog.FragCatalog( fparams) # The fragments are stored as entries fcgen = FragmentCatalog.FragCatGenerator() mol = MolFromSmiles(smiles) fcount = fcgen.AddFragsFromMol(mol, fcat) # print("This SMILES, %s, has %d fragments" % (smiles, fcount)) frag_list = [] for frag in range(fcount): frag_list.append( fcat.GetEntryDescription(frag)) # List of molecular fragments return frag_list
def generate_geneset(): atoms = [6, 7, 8, 9, 5, 15, 16, 17] fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt') rdkitFrags = FragmentCatalog.FragCatParams(1, 5, fName) customFrags = FragmentCatalog.FragCatalog(rdkitFrags) fcgen = FragmentCatalog.FragCatGenerator() m = Chem.MolFromSmiles('CCCC') fcgen.AddFragsFromMol(m, customFrags) return GeneSet(atoms, rdkitFrags, customFrags)
def test1Catalog(self): fparams = FragmentCatalog.FragCatParams(1, 6, self.fName, 1.0e-8) fcat = FragmentCatalog.FragCatalog(fparams) self.assertEqual(fcat.GetNumEntries(), 0) self.assertEqual(fcat.GetFPLength(), 0) nparams = fcat.GetCatalogParams() self.assertEqual(nparams.GetLowerFragLength(), 1) self.assertEqual(nparams.GetUpperFragLength(), 6)
def test1Catalog(self): fparams = FragmentCatalog.FragCatParams(1, 6, self.fName, 1.0e-8) fcat = FragmentCatalog.FragCatalog(fparams) assert (fcat.GetNumEntries() == 0) assert (fcat.GetFPLength() == 0) nparams = fcat.GetCatalogParams() assert (nparams.GetLowerFragLength() == 1) assert (nparams.GetUpperFragLength() == 6)
def test8Issue118(self): smiList = ['CCN(C(N)=O)N=O'] fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt') suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList), ',', 0, -1, 0) fparams = FragmentCatalog.FragCatParams(2, 4, fName, 1.0e-8) cat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() for mol in suppl: nent = fgen.AddFragsFromMol(mol, cat) self.assertEqual(cat.GetFPLength(), 1) self.assertEqual(cat.GetBitDescription(0), 'CCN(<-C(=O)N>)<-N=O>')
def test6DownEntries(self): fparams = FragmentCatalog.FragCatParams(1, 6, self.fName, 1.0e-8) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0) for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) assert fcat.GetNumEntries() == 21 assert fcat.GetFPLength() == 21 assert tuple(fcat.GetEntryDownIds(0)) == (2, 8, 9, 16) assert tuple(fcat.GetEntryDownIds(1)) == (2, 3, 5, 7)
def BuildCatalog(suppl, maxPts=-1, groupFileName=None, minPath=2, maxPath=6, reportFreq=10): """ builds a fragment catalog from a set of molecules in a delimited text block **Arguments** - suppl: a mol supplier - maxPts: (optional) if provided, this will set an upper bound on the number of points to be considered - groupFileName: (optional) name of the file containing functional group information - minPath, maxPath: (optional) names of the minimum and maximum path lengths to be considered - reportFreq: (optional) how often to display status information **Returns** a FragmentCatalog """ if groupFileName is None: groupFileName = os.path.join(RDConfig.RDDataDir, "FunctionalGroups.txt") fpParams = FragmentCatalog.FragCatParams(minPath, maxPath, groupFileName) catalog = FragmentCatalog.FragCatalog(fpParams) fgen = FragmentCatalog.FragCatGenerator() if maxPts > 0: nPts = maxPts else: if hasattr(suppl, '__len__'): nPts = len(suppl) else: nPts = -1 for i, mol in enumerate(suppl): if i == nPts: break if i and not i % reportFreq: if nPts > -1: message('Done %d of %d, %d paths\n' % (i, nPts, catalog.GetFPLength())) else: message('Done %d, %d paths\n' % (i, catalog.GetFPLength())) fgen.AddFragsFromMol(mol, catalog) return catalog
def test5FPsize(self): smiLines = open(self.smiName, 'r').readlines() fparams = FragmentCatalog.FragCatParams(6, 6, self.fName) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = [Chem.MolFromSmiles('C1CCCOC1O')] for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) assert fcat.GetFPLength() == 1 for i in range(fcat.GetFPLength()): assert fcat.GetBitOrder(i) == 6 assert fcat.GetBitDescription( i) == "C1CCOC<-O>C1", fcat.GetBitDescription(i) assert tuple(fcat.GetBitFuncGroupIds(i)) == (1, )
def test2Generator(self): fparams = FragmentCatalog.FragCatParams(1, 6, self.fName, 1.0e-8) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0) for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) self.assertEqual(fcat.GetNumEntries(), 21) self.assertEqual(fcat.GetFPLength(), 21) for id in range(fcat.GetNumEntries()): self.assertEqual(fcat.GetEntryBitId(id), id) self.assertEqual(fcat.GetEntryOrder(id), fcat.GetBitOrder(id)) self.assertEqual(fcat.GetEntryDescription(id), fcat.GetBitDescription(id)) self.assertEqual(tuple(fcat.GetEntryFuncGroupIds(id)), tuple(fcat.GetBitFuncGroupIds(id)))
def test5FPsize(self): with open(self.smiName, 'r') as smiF: smiLines = smiF.readlines() fparams = FragmentCatalog.FragCatParams(6, 6, self.fName) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = [Chem.MolFromSmiles('C1CCCOC1O')] for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) self.assertEqual(fcat.GetFPLength(), 1) for i in range(fcat.GetFPLength()): self.assertEqual(fcat.GetBitOrder(i), 6) self.assertEqual(fcat.GetBitDescription(i), "C1CC<-O>OCC1") self.assertEqual(tuple(fcat.GetBitFuncGroupIds(i)), (1, ))
def test7Issue116(self): smiList = ['Cc1ccccc1'] suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList), ',', 0, -1, 0) fparams = FragmentCatalog.FragCatParams(2, 2, self.fName, 1.0e-8) cat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() for mol in suppl: nent = fgen.AddFragsFromMol(mol, cat) self.assertEqual(cat.GetFPLength(), 2) self.assertEqual(cat.GetBitDescription(0), 'ccC') fpgen = FragmentCatalog.FragFPGenerator() mol = Chem.MolFromSmiles('Cc1ccccc1') fp = fpgen.GetFPForMol(mol, cat) self.assertEqual(fp[0], 1) self.assertEqual(fp[1], 1) mol = Chem.MolFromSmiles('c1ccccc1-c1ccccc1') fp = fpgen.GetFPForMol(mol, cat) self.assertEqual(fp[0], 0) self.assertEqual(fp[1], 1)
def fragment_database(self): fName = 'C:/RDKit_2017_03_2/Data/FunctionalGroups.txt' fparams = FragmentCatalog.FragCatParams(1, 6, fName) self.fcat = FragmentCatalog.FragCatalog(fparams) ## macrocycle_file = 'macrocycles_IDs.csv' ## suppl = [i.split(',')[0] for i in open(self.directory+name,'r').read().splitlines()][1:] # read all the macrocycle smiles from file ## ms = [Chem.MolFromSmiles(i) for i in suppl] # mols of macrocycles zinc_file = 'data/smiles_database.csv' zinc_suppl = [ i.split(',')[1] for i in open(self.directory + zinc_file, 'r').read().splitlines() ][1:] zinc_ms = [Chem.MolFromSmiles(i) for i in zinc_suppl] pre_synthetic_frag_database = [ BRICS.BRICSDecompose(i) for i in zinc_ms ] self.synthetic_frag_database = list( set(chain.from_iterable(pre_synthetic_frag_database)))
def generate_geneset(): """ Populates the GeneSet class with atoms and fragments to be used by the engine. As it stands these are hardcoded into the engine but will probably be adapted in future versions Parameters ---------- None Returns ---------- GeneSet : object returns an instance of the GeneSet class containing atoms, rdkit fragments, and custom fragments """ atoms = [6, 7, 8, 9, 5, 15, 16, 17] fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt') rdkitFrags = FragmentCatalog.FragCatParams(1, 5, fName) customFrags = FragmentCatalog.FragCatalog(rdkitFrags) fcgen = FragmentCatalog.FragCatGenerator() m = Chem.MolFromSmiles('CCCC') fcgen.AddFragsFromMol(m, customFrags) return GeneSet(atoms, rdkitFrags, customFrags)
#!/usr/bin/env python import os from rdkit import Chem from rdkit import RDConfig from rdkit.Chem import FragmentCatalog fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt') fparams = FragmentCatalog.FragCatParams(1, 6, fName) print('found %d functional groups in catalog' % (fparams.GetNumFuncGroups())) fcat = FragmentCatalog.FragCatalog(fparams) fcgen = FragmentCatalog.FragCatGenerator() smiles = 'OCC=CC(=O)O' m = Chem.MolFromSmiles(smiles) print('examining molecule: ' + smiles) frag_count = fcgen.AddFragsFromMol(m, fcat) print('identified %d fragments' % (frag_count)) for m in range(frag_count): print(fcat.GetEntryDescription(m))