def test0FilterCatalogEntry(self): matcher = FilterCatalog.SmartsMatcher("Aromatic carbon chain") self.assertTrue(not matcher.IsValid()) pat = Chem.MolFromSmarts("c:c:c:c:c") matcher.SetPattern(pat) matcher.SetMinCount(1) entry = FilterCatalog.FilterCatalogEntry("Bar", matcher) if FilterCatalog.FilterCatalogCanSerialize(): pickle = entry.Serialize() else: pickle = None self.assertTrue(entry.GetDescription() == "Bar") self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.GetMaxCount() == 2**32 - 1) self.assertTrue(matcher.IsValid()) entry.SetDescription("Foo") self.assertTrue(entry.GetDescription() == "Foo") mol = Chem.MolFromSmiles("c1ccccc1") self.assertTrue(matcher.HasMatch(mol)) matcher = FilterCatalog.SmartsMatcher(pat) self.assertEqual(str(matcher), "Unnamed SmartsMatcher") self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.HasMatch(mol)) matches = matcher.GetMatches(mol) matcher = FilterCatalog.ExclusionList() matcher.SetExclusionPatterns([matcher]) self.assertTrue(not matcher.HasMatch(mol))
def testSmartsMatcherAPI(self): sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1) sm2 = FilterCatalog.SmartsMatcher("ok # carbons", "[#6]", 0, 40) sm3 = FilterCatalog.FilterMatchOps.Not(sm2) m = Chem.MolFromSmiles("C" * 40) self.assertFalse(sm.HasMatch(m)) self.assertTrue(sm2.HasMatch(m)) self.assertFalse(sm3.HasMatch(m)) m = Chem.MolFromSmiles("C" * 41) self.assertTrue(sm.HasMatch(m)) self.assertFalse(sm2.HasMatch(m)) self.assertTrue(sm3.HasMatch(m))
def test4CountTests(self): matcher = FilterCatalog.SmartsMatcher("Carbon", "[#6]", 0, 2) m = Chem.MolFromSmiles("N") self.assertTrue(matcher.HasMatch(m)) m = Chem.MolFromSmiles("C") self.assertTrue(matcher.HasMatch(m)) m = Chem.MolFromSmiles("CC") self.assertTrue(matcher.HasMatch(m)) m = Chem.MolFromSmiles("CCC") self.assertFalse(matcher.HasMatch(m)) matcher = FilterCatalog.SmartsMatcher("Carbon", "[#6]", 1, 2) m = Chem.MolFromSmiles("N") self.assertFalse(matcher.HasMatch(m))
def testAddEntry(self): sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1) entry = FilterCatalog.FilterCatalogEntry("Bar", sm) fc = FilterCatalog.FilterCatalog() fc.AddEntry(entry) del entry del fc
def test3ExclusionFilter(self): mol = Chem.MolFromSmiles("c1ccccc1") pat = Chem.MolFromSmarts("c:c:c:c:c") matcher = FilterCatalog.SmartsMatcher("Five aromatic carbons", pat) self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.HasMatch(mol)) matches = matcher.GetMatches(mol) exclusionFilter = FilterCatalog.ExclusionList() exclusionFilter.AddPattern(matcher) self.assertFalse(exclusionFilter.HasMatch(mol)) matches2 = exclusionFilter.GetMatches(mol) self.assertTrue(matches) self.assertFalse(matches2)
def buildFilterCatalog(): inhousefilter = pd.read_csv( 'SubstructureFilter_HitTriaging_wPubChemExamples.csv') inhouseFiltersCat = FilterCatalog.FilterCatalog() for i in range(inhousefilter.shape[0]): mincount = 1 if inhousefilter['MIN_COUNT'][i] != 0: mincount = int(inhousefilter['MIN_COUNT'][i]) pname = inhousefilter['PATTERN_NAME'][i] sname = inhousefilter['SET_NAME'][i] pname_final = '{0}_min({1})__{2}__{3}__{4}'.format( pname, mincount, inhousefilter['SEVERITY_SCORE'][i], inhousefilter['COVALENT'][i], inhousefilter['SPECIAL_MOL'][i]) fil = FilterCatalog.SmartsMatcher(pname_final, inhousefilter['SMARTS'][i], mincount) inhouseFiltersCat.AddEntry( FilterCatalog.FilterCatalogEntry(pname_final, fil)) inhouseFiltersCat.GetEntry(i).SetProp('Scope', sname) return inhouseFiltersCat
def test1FilterMatchOps(self): mol = Chem.MolFromSmiles("c1ccccc1") pat = Chem.MolFromSmarts("c:c:c:c:c") matcher = FilterCatalog.SmartsMatcher("Five aromatic carbons", pat) self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.HasMatch(mol)) matches = matcher.GetMatches(mol) matcher2 = FilterCatalog.ExclusionList() matcher2.SetExclusionPatterns([matcher]) self.assertTrue(not matcher2.HasMatch(mol)) and_match = FilterMatchOps.And(matcher, matcher2) self.assertTrue(not and_match.HasMatch(mol)) not_match = FilterMatchOps.Not(and_match) self.assertTrue(not_match.HasMatch(mol)) or_match = FilterMatchOps.Or(matcher, matcher2) self.assertTrue(or_match.HasMatch(mol)) print(and_match) print(or_match) print(not_match)
def testFilterHierarchyMatcher(self): # test root = FilterCatalog.FilterHierarchyMatcher() sm = h = FilterCatalog.SmartsMatcher( "Halogen", "[$([F,Cl,Br,I]-!@[#6]);!$([F,Cl,Br,I]" "-!@C-!@[F,Cl,Br,I]);!$([F,Cl,Br,I]-[C,S]" "(=[O,S,N]))]", 1) root.SetPattern(sm) def hierarchy(matcher): node = FilterCatalog.FilterHierarchyMatcher(matcher) self.assertEquals(matcher.GetName(), node.GetName()) return node sm = FilterCatalog.SmartsMatcher("Halogen.Aromatic", "[F,Cl,Br,I;$(*-!@c)]") root.AddChild(hierarchy(sm)) sm = FilterCatalog.SmartsMatcher( "Halogen.NotFluorine", "[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]" "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S]" "(=[O,S,N]))]") node = hierarchy(sm) halogen_notf_children = [ hierarchy(x) for x in [ FilterCatalog.SmartsMatcher( "Halogen.NotFluorine.Aliphatic", "[$([Cl,Br,I]-!@C);!$([Cl,Br,I]" "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]"), FilterCatalog.SmartsMatcher("Halogen.NotFluorine.Aromatic", "[$([Cl,Br,I]-!@c)]") ] ] for child in halogen_notf_children: node.AddChild(child) root.AddChild(node) sm = FilterCatalog.SmartsMatcher( "Halogen.Bromine", "[Br;$([Br]-!@[#6]);!$([Br]-!@C-!@[F,Cl,Br,I])" ";!$([Br]-[C,S](=[O,S,N]))]", 1) node = hierarchy(sm) halogen_bromine_children = [ hierarchy(x) for x in [ FilterCatalog.SmartsMatcher( "Halogen.Bromine.Aliphatic", "[Br;$(Br-!@C);!$(Br-!@C-!@[F,Cl,Br,I]);" "!$(Br-[C,S](=[O,S,N]))]"), FilterCatalog.SmartsMatcher("Halogen.Bromine.Aromatic", "[Br;$(Br-!@c)]"), FilterCatalog.SmartsMatcher("Halogen.Bromine.BromoKetone", "[Br;$(Br-[CH2]-C(=O)-[#6])]") ] ] for child in halogen_bromine_children: node.AddChild(child) root.AddChild(node) m = Chem.MolFromSmiles("CCl") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 1) self.assertEquals([match.filterMatch.GetName() for match in res], ['Halogen.NotFluorine.Aliphatic']) m = Chem.MolFromSmiles("c1ccccc1Cl") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 2) m = Chem.MolFromSmiles("c1ccccc1Br") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 3) self.assertEquals([match.filterMatch.GetName() for match in res], [ 'Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic', 'Halogen.Bromine.Aromatic' ]) m = Chem.MolFromSmiles("c1ccccc1F") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 1) self.assertEquals([match.filterMatch.GetName() for match in res], ['Halogen.Aromatic']) m = Chem.MolFromSmiles("CBr") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals( [match.filterMatch.GetName() for match in res], ['Halogen.NotFluorine.Aliphatic', 'Halogen.Bromine.Aliphatic'])