def testRandomSample(self): log("testRandomSample") rgroups = [[Chem.MolFromSmiles("C")] * 10, [Chem.MolFromSmiles("N")] * 5, [Chem.MolFromSmiles("O")] * 6] rxn = rdChemReactions.ChemicalReaction() randProd = rdChemReactions.RandomSampleStrategy() randProd.Initialize(rxn, rgroups) self.assertEquals(randProd.GetNumPermutations(), 10 * 5 * 6) groups = [] for i in range(10 * 5 * 6): groups.append(tuple(randProd.next())) print(len(set(groups)), "out of", 10 * 5 * 6) randProd = rdChemReactions.RandomSampleStrategy() randProd.Initialize(rxn, rgroups) self.assertEquals(randProd.GetNumPermutations(), 10 * 5 * 6) groups = [] for i in range(10): groups.append(tuple(randProd.next())) for i in range(3): print(i, len(set([g[i] for g in groups])), "out of", [10, 5, 6][i]) copy.copy(randProd)
def testTimings(self): log("testTimings") rxn = rdChemReactions.ChemicalReaction() rgroups = [[Chem.MolFromSmiles("C")] * 17000, [Chem.MolFromSmiles("N")] * 50000, [Chem.MolFromSmiles("O")] * 4000] cartProd = rdChemReactions.CartesianProductStrategy() randProd = rdChemReactions.RandomSampleStrategy() randAllBBs = rdChemReactions.RandomSampleAllBBsStrategy() for r in [cartProd, randProd, randAllBBs]: r.Initialize(rxn, rgroups) num = 10000000 t1 = time.time() r.Skip(num) t2 = time.time() print("%s Skipped %s in %s seconds" % (r, num, t2 - t1))
def testRGroupState(self): if not rdChemReactions.EnumerateLibraryCanSerialize(): print( "-- Skipping testRGroupState, serialization of EnumerateLibrary not enabled", file=sys.stderr) return log("testRGroupState") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) reagents = [[ Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S') ], [ Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), ]] def tostr(l): return [[str(x) for x in v] for v in l] enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() enumerator.SetState(state) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() enumerator.SetState(state) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleAllBBsStrategy()) state = enumerator.GetState() p = enumerator.nextSmiles() p2 = enumerator.nextSmiles() enumerator.SetState(state) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p)) self.assertEquals(tostr(enumerator.nextSmiles()), tostr(p2)) enumerator = rdChemReactions.EnumerateLibrary(rxn, reagents) smiresults = [ 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' ] smiresults = [ Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults ] enumerator.GetEnumerator().Skip(10) enumerator.ResetState() results = [] for result in enumerator: for prodSet in result: for mol in prodSet: results.append(Chem.MolToSmiles(mol)) self.assertEquals(results, smiresults)
def testRandomEnumerateLibrary(self): log("testRandomEnumerateLibrary") smirks_thiourea = "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:2]" rxn = rdChemReactions.ReactionFromSmarts(smirks_thiourea) reagents = [[ Chem.MolFromSmiles('C=CCN=C=S'), Chem.MolFromSmiles('CC=CCN=C=S') ], [ Chem.MolFromSmiles('NCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCc1ncc(Cl)cc1Br'), Chem.MolFromSmiles('NCCCc1ncc(Cl)cc1Br'), ]] enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) self.assertTrue(enumerator) smiresults = [ 'C=CCNC(=S)NCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCc1ncc(Cl)cc1Br', 'C=CCNC(=S)NCCCc1ncc(Cl)cc1Br', 'CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br' ] results = [ Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiresults ] enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) iteren = iter(enumerator) res = set() count = 0 while res != set(results): count += 1 if count > 100000: print( "Unable to find enumerate set with 100,000 random samples!", file=sys.stderr) self.assertEquals(res, set(results)) prod = iteren.next() for mols in prod: smi1 = Chem.MolToSmiles(mols[0]) res.add(smi1) if rdChemReactions.EnumerateLibraryCanSerialize(): enumerator = rdChemReactions.EnumerateLibrary( rxn, reagents, rdChemReactions.RandomSampleStrategy()) pickle = enumerator.Serialize() enumerator2 = rdChemReactions.EnumerateLibrary() enumerator2.InitFromString(pickle) self.assertEquals(enumerator.GetEnumerator().Type(), enumerator2.GetEnumerator().Type()) iteren = iter(enumerator) iteren2 = iter(enumerator2) outsmiles = [] for i in range(10): prods1 = iteren.next() prods2 = iteren2.next() self.assertEquals(len(prods1), len(prods2)) for mols1, mols2 in zip(prods1, prods2): self.assertEquals(len(mols1), 1) smi1 = Chem.MolToSmiles(mols1[0]) self.assertEquals(smi1, Chem.MolToSmiles(mols2[0])) outsmiles.append(smi1) if i == 1: pickle_at_2 = enumerator.Serialize() # make sure we can pickle the state as well enumerator3 = rdChemReactions.EnumerateLibrary() enumerator3.InitFromString(pickle_at_2) iteren3 = iter(enumerator3) outsmiles2 = [] for i in range(8): prods3 = iteren3.next() for mols3 in prods3: self.assertEquals(len(mols3), 1) smi1 = Chem.MolToSmiles(mols3[0]) self.assertEquals(smi1, Chem.MolToSmiles(mols3[0])) outsmiles2.append(smi1) self.assertEquals(outsmiles2, outsmiles[2:])