Exemplo n.º 1
0
    def testThreadedPythonFilter(self):
        class MWFilter(FilterCatalog.FilterMatcher):
            def __init__(self, minMw, maxMw):
                FilterCatalog.FilterMatcher.__init__(self, "MW violation")
                self.minMw = minMw
                self.maxMw = maxMw

            def IsValid(self):
                return True

            def HasMatch(self, mol):
                mw = rdMolDescriptors.CalcExactMolWt(mol)
                res = not self.minMw <= mw <= self.maxMw
                Chem.MolFromSmiles("---")
                Chem.LogErrorMsg("dasfsadf")
                return res

        path = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol',
                            'test_data', 'pains.smi')
        with open(path) as f:
            smiles = [f.strip() for f in f.readlines()][1:]

        print("1")
        self.assertEqual(len(smiles), 3)

        print("2")
        entry = FilterCatalog.FilterCatalogEntry("MW Violation",
                                                 MWFilter(100, 500))
        fc = FilterCatalog.FilterCatalog()
        fc.AddEntry(entry)
        self.assertTrue(entry.GetDescription() == "MW Violation")

        print("running")
        results = FilterCatalog.RunFilterCatalog(fc, smiles * 10, numThreads=3)
Exemplo n.º 2
0
  def testFunctionalGroupHierarchy(self):
    fc = FilterCatalog.GetFunctionalGroupHierarchy()

    matches = [(Chem.MolFromSmiles("CCl"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic']),
               (Chem.MolFromSmiles("c1ccccc1Cl"),
                ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic']),
               (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen.Aromatic']), (
                 Chem.MolFromSmiles("CBr"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic',
                                             'Halogen.Bromine.Aliphatic'])]

    catalogs = [fc]
    if FilterCatalog.FilterCatalogCanSerialize():
      pickle = fc.Serialize()
      fc2 = FilterCatalog.FilterCatalog(pickle)
      catalogs.append(fc2)

    for fc in catalogs:
      # test GetMatches API
      for mol, res in matches:
        entries = list(fc.GetMatches(mol))
        for entry in entries:
          hits = [match.filterMatch.GetName() for match in entry.GetFilterMatches(mol)]
          self.assertEquals(res, hits)

      # test GetFilterMatches API
      for mol, res in matches:
        self.assertEquals(res, [match.filterMatch.GetName() for match in fc.GetFilterMatches(mol)])
Exemplo n.º 3
0
    def testThreadedRunner(self):
        path = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol',
                            'test_data', 'pains.smi')
        with open(path) as f:
            smiles = [f.strip() for f in f.readlines()][1:]

        self.assertEquals(len(smiles), 3)
        params = FilterCatalog.FilterCatalogParams()
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
        fc = FilterCatalog.FilterCatalog(params)

        results = FilterCatalog.RunFilterCatalog(fc, smiles)
        self.assertEquals(len(results), 3)

        descriptions = [
            "hzone_phenol_A(479)", "cyano_imine_B(17)", "keto_keto_gamma(5)"
        ]

        for i, res in enumerate(results):
            self.assertTrue(len(res) > 0)
            self.assertEquals(res[0].GetDescription(), descriptions[i])

        # Test with some bad input
        smiles = ['mydoghasfleas']
        results = FilterCatalog.RunFilterCatalog(fc, smiles, numThreads=3)
        self.assertEquals(len(results[0]), 1)
        self.assertEquals(results[0][0].GetDescription(),
                          "no valid RDKit molecule")
Exemplo n.º 4
0
    def testRemoveEntry(self):
        params = FilterCatalog.FilterCatalogParams(
            FilterCatalogParams.FilterCatalogs.ZINC)
        catalog = FilterCatalog.FilterCatalog(params)
        entry = catalog.GetEntryWithIdx(10)
        desc = entry.GetDescription()
        count = 0
        descs = set([
            catalog.GetEntryWithIdx(i).GetDescription()
            for i in range(catalog.GetNumEntries())
        ])
        for i in range(catalog.GetNumEntries()):
            if catalog.GetEntryWithIdx(i).GetDescription() == desc:
                count += 1
        print("Count", count)
        sz = catalog.GetNumEntries()
        print("*" * 44)
        self.assertTrue(catalog.RemoveEntry(entry))
        del entry
        self.assertTrue(catalog.GetNumEntries() == sz - 1)

        descs2 = set([
            catalog.GetEntryWithIdx(i).GetDescription()
            for i in range(catalog.GetNumEntries())
        ])
        print(descs - descs2)

        newcount = 0
        for i in range(catalog.GetNumEntries()):
            if catalog.GetEntryWithIdx(i).GetDescription() == desc:
                newcount += 1
        self.assertEquals(count, newcount + 1)
Exemplo n.º 5
0
    def get_filters_list(self):
        """
        This loads in the filters which will be used.

        Returns:
        :returns: rdkit.Chem.rdfiltercatalog.FilterCatalog filters: A set of
            RDKit Filters
        """

        # Make a list of all the different PAINS Filters. PAINS should include
        # PAINS_A,PAINS_B, and PAINS_C, but because RDKit documentation
        # doesn't specify this explicitly we have included all 4 of the PAINS
        # FilterCatalogs for precaution.
        params_PAINS_A = FilterCatalogParams()
        params_PAINS_A.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
        params_PAINS_B = FilterCatalogParams()
        params_PAINS_B.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
        params_PAINS_C = FilterCatalogParams()
        params_PAINS_C.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
        params_PAINS = FilterCatalogParams()
        params_PAINS.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)

        params_list = [
            params_PAINS_A, params_PAINS_B, params_PAINS_C, params_PAINS
        ]
        filters_list = []
        for param in params_list:
            filter = FilterCatalog.FilterCatalog(param)
            filters_list.append(filter)

        return filters_list
Exemplo n.º 6
0
 def testAddEntry(self):
     sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1)
     entry = FilterCatalog.FilterCatalogEntry("Bar", sm)
     fc = FilterCatalog.FilterCatalog()
     fc.AddEntry(entry)
     del entry
     del fc
Exemplo n.º 7
0
  def testZinc(self):
    params = FilterCatalog.FilterCatalogParams(FilterCatalogParams.FilterCatalogs.ZINC)
    catalog = FilterCatalog.FilterCatalog(params)
    self.assertTrue(catalog.GetNumEntries())

    m = Chem.MolFromSmiles("C" * 41)
    entry = catalog.GetFirstMatch(m)
    self.assertTrue(entry.GetDescription(), "Non-Hydrogen_atoms")

    m = Chem.MolFromSmiles("CN" * 20)
    entry = catalog.GetFirstMatch(m)
    self.assertEquals(catalog.GetFirstMatch(m), None)
Exemplo n.º 8
0
    def get_filters(self):
        """
        This loads in the filters which will be used.

        Returns:
        :returns: rdkit.Chem.rdfiltercatalog.FilterCatalog filters: A set of
            RDKit Filters
        """

        # Make a list of the NIH filter.
        params = FilterCatalogParams()
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH)
        # This is our set of all the NIH filters
        filters = FilterCatalog.FilterCatalog(params)
        return filters
Exemplo n.º 9
0
    def pains(self):
        """
        Baell and Holloway (2010) New Substructure Filters for Removal of Pan
        Assay Interference Compounds (PAINS) from Screening Libraries and for
        Their Exclusion in Bioassays

        This filter finds promiscuous compounds that are likely to show activity
        regardless of the target.

        Returns:
            Boolean of whether the molecule triggers the PAINS filter.
        """
        params = FilterCatalog.FilterCatalogParams()
        params.AddCatalog(
            FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS)
        catalog = FilterCatalog.FilterCatalog(params)
        return catalog.HasMatch(self.mol)
Exemplo n.º 10
0
    def brenk(self):
        """
        Brenk (2008) Lessons Learnt from Assembling Screening Libraries for
        Drug Discovery for Neglected Diseases

        Brenk's Structural Alert filter finds fragments "putatively toxic,
        chemically reactive, metabolically unstable or to bear properties
        responsible for poor pharmacokinetics."

        Returns:
            Boolean of whether the molecule triggers the Brenk filter.
        """
        params = FilterCatalog.FilterCatalogParams()
        params.AddCatalog(
            FilterCatalog.FilterCatalogParams.FilterCatalogs.BRENK)
        catalog = FilterCatalog.FilterCatalog(params)
        return catalog.HasMatch(self.mol)
Exemplo n.º 11
0
 def generate_fingerprints_and_create_list(self):
     #generate fingerprints of predicted ligands and known ligands:
     gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                        radius=2)
     predicted_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules']
     ]
     true_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules']
     ]
     similarities = list()
     for count, mol in enumerate(predicted_fps):
         tanimoto_values = ([
             DataStructs.TanimotoSimilarity(mol, i) for i in true_fps
         ])
         index_of_highest = np.argmax(tanimoto_values)
         similarities.append(tanimoto_values[index_of_highest])
     #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score
     sa_score = [
         sascorer.calculateScore(i)
         for i in list(self.predicted['molecules'])
     ]
     #create a list holding the QED drug-likeness score
     #reference: https://doi.org/10.1038/nchem.1243
     qeds = [qed(mol) for mol in self.predicted['molecules']]
     #create a list holding logp:
     logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']]
     #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536
     params = FilterCatalogParams()
     params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
     catalog = FilterCatalog(params)
     self.brenk = np.array(
         [catalog.HasMatch(m) for m in self.predicted['molecules']])
     #add these lists as columns to the 'predicted' pd.DataFrame
     self.predicted['similarities'] = similarities
     self.predicted['sa_score'] = sa_score
     self.predicted['qeds'] = qeds
     self.predicted['logp'] = logp
     print(self.predicted['logp'] < 6)
     shortlist_mask = ((self.predicted['similarities'] < 0.2) &
                       (self.predicted['sa_score'] < 4) &
                       (self.predicted['qeds'] > 0.25) &
                       (self.predicted['logp'] < 6) & (~self.brenk))
Exemplo n.º 12
0
def pains(filtered_df):
	filteredData = filtered_df
	params = FilterCatalogParams()
	# Build a catalog from all PAINS (A, B and C)
	params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
	catalog = FilterCatalog(params)
	# Create empty dataframes for filtered data
	rdkit_highLightFramePAINS = pd.DataFrame(columns=('CompID', 'CompMol', 'unwantedID'))
	rdkit_noPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles','pIC50'))
	rdkit_withPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles', 'pIC50','unwantedID'))
	# For index and row in the filtered df
	for i,row in filteredData.iterrows():
		curMol = Chem.MolFromSmiles(row.smiles) # Current molecule
		match = False # Set match to false
		rdkit_PAINSList = []
		# Get the first match
		entry = catalog.GetFirstMatch(curMol)
		if entry!=None:
			# Add name of current unwanted subsftructure to list
			rdkit_PAINSList.append(entry.GetDescription().capitalize())
			# Add relevant matching information to dataframe
			rdkit_highLightFramePAINS.loc[len(rdkit_highLightFramePAINS)] = [row.molecule_chembl_id, curMol,
			entry.GetDescription().capitalize()]
			match = True
		if not match:
			# Add to frame of PAINS free compounds
			rdkit_noPAINS.loc[len(rdkit_noPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50]
		else: 
			# Add to frame of compounds that contain PAINS
			# Put the relevant information in the dataframe with the unwanted substructures
			rdkit_withPAINS.loc[len(rdkit_withPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50, entry.GetDescription().capitalize()]
	df = rdkit_noPAINS
	# Drop unnecessary columns
	## df_new = df.drop(['units', 'IC50'], axis=1)
	df_new = df
	# Create molecules from smiles and their fingerprints
	create_mol(df_new, 2048)
	# Add column for activity
	df_new['active'] = np.zeros(len(df_new))
	# Mark every molecule as active with an pIC50 of > 6.3
	df_new.loc[df_new[df_new.pIC50 >= 6.3].index, 'active'] = 1.0
	return df_new
Exemplo n.º 13
0
    def testPyFilter(self):
        class MyFilterMatcher(FilterCatalog.FilterMatcher):
            def IsValid(self):
                return True

            def HasMatch(self, mol):
                return True

            def GetMatches(self, mol, vect):
                v = FilterCatalog.MatchTypeVect()
                v.append(FilterCatalog.IntPair(1, 1))
                match = FilterCatalog.FilterMatch(self, v)
                vect.append(match)
                return True

        func = MyFilterMatcher("FilterMatcher")
        self.assertEquals(func.GetName(), "FilterMatcher")
        mol = Chem.MolFromSmiles("c1ccccc1")
        self.assertEquals(func.HasMatch(mol), True)

        or_match = FilterMatchOps.Or(func, func)
        self.assertEquals([[tuple(x) for x in filtermatch.atomPairs]
                           for filtermatch in or_match.GetMatches(mol)],
                          [[(1, 1)], [(1, 1)]])

        not_match = FilterMatchOps.Not(func)
        print(not_match)
        self.assertEquals(not_match.HasMatch(mol), False)
        # test memory
        del func

        self.assertEquals(not_match.HasMatch(mol), False)
        self.assertEquals([[tuple(x) for x in filtermatch.atomPairs]
                           for filtermatch in not_match.GetMatches(mol)], [])

        entry = FilterCatalog.FilterCatalogEntry(
            "Bar", MyFilterMatcher("FilterMatcher"))
        fc = FilterCatalog.FilterCatalog()
        fc.AddEntry(entry)

        catalogEntry = fc.GetFirstMatch(mol)
        print(catalogEntry.GetDescription())
Exemplo n.º 14
0
def buildFilterCatalog():

    inhousefilter = pd.read_csv(
        'SubstructureFilter_HitTriaging_wPubChemExamples.csv')
    inhouseFiltersCat = FilterCatalog.FilterCatalog()
    for i in range(inhousefilter.shape[0]):
        mincount = 1
        if inhousefilter['MIN_COUNT'][i] != 0:
            mincount = int(inhousefilter['MIN_COUNT'][i])
        pname = inhousefilter['PATTERN_NAME'][i]
        sname = inhousefilter['SET_NAME'][i]
        pname_final = '{0}_min({1})__{2}__{3}__{4}'.format(
            pname, mincount, inhousefilter['SEVERITY_SCORE'][i],
            inhousefilter['COVALENT'][i], inhousefilter['SPECIAL_MOL'][i])
        fil = FilterCatalog.SmartsMatcher(pname_final,
                                          inhousefilter['SMARTS'][i], mincount)
        inhouseFiltersCat.AddEntry(
            FilterCatalog.FilterCatalogEntry(pname_final, fil))
        inhouseFiltersCat.GetEntry(i).SetProp('Scope', sname)
    return inhouseFiltersCat
Exemplo n.º 15
0
def painspredict(thefile, theoutput):

    os.remove('output.txt')
    f1 = open(theoutput, 'w+')

    mySMILESinput = pd.DataFrame(columns=['ID', 'my_smiles'])

    params = FilterCatalogParams()
    params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH)
    catalog = FilterCatalog(params)
    suppl = Chem.SmilesMolSupplier(thefile)
    with open(thefile, 'r') as inf:
        first_line = inf.readline()
        inf.close()

    with open(thefile, 'a') as inf:

        inf.write(first_line)
        inf.close()

    inf = open(thefile, 'r')

    sub_strct = [line.rstrip().split(" ") for line in inf]

    ms = [x for x in suppl if x is not None]
    i = 0

    for mol in ms:
        entry = catalog.GetFirstMatch(mol)
        sphybrid = Chem.rdMolDescriptors.CalcFractionCSP3(mol)
        if (entry is not None):
            print(i,
                  sub_strct[i],
                  "PAINS",
                  entry.GetDescription(),
                  "Fsp3",
                  sphybrid,
                  file=f1)
        else:
            print(i, sub_strct[i], "PAINS OK", "Fsp3", sphybrid, file=f1)
        i += 1
Exemplo n.º 16
0
    def testMWFilter(self):
        class MWFilter(FilterCatalog.FilterMatcher):
            def __init__(self, minMw, maxMw):
                FilterCatalog.FilterMatcher.__init__(self, "MW violation")
                self.minMw = minMw
                self.maxMw = maxMw

            def IsValid(self):
                return True

            def HasMatch(self, mol):
                mw = rdMolDescriptors.CalcExactMolWt(mol)
                return not self.minMw <= mw <= self.maxMw

        entry = FilterCatalog.FilterCatalogEntry("MW Violation",
                                                 MWFilter(100, 500))
        fc = FilterCatalog.FilterCatalog()
        fc.AddEntry(entry)
        self.assertTrue(entry.GetDescription() == "MW Violation")

        mol = Chem.MolFromSmiles("c1ccccc1")
        catalogEntry = fc.GetFirstMatch(mol)
Exemplo n.º 17
0
import json, gzip
from ijson import items
from django.core.exceptions import ValidationError
from threading import Thread
import CloseableQueue
#from multiprocessing.dummy import Pool
from mol_parsing.functions import find_lib_type, read_input, write_json_results, write_results
from mol_parsing.rdkit_parse import parse_mol_json, generate_mols_from_json

from StringIO import StringIO

params = FilterCatalog.FilterCatalogParams()
params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
catalog = FilterCatalog.FilterCatalog(params)


def request_params(request):
    """Function to handle the request parameters"""
    #screen_lib, mol_type = find_lib_type(request)
    screen_lib, mol_type = read_input(request)
    if "filter" in request.GET:
        filter = request.GET["filter"]
        if not (filter == "INCLUDE_MATCHING" or filter
                == "INCLUDE_NON_MATCHING" or filter == "INCLUDE_ALL"):
            raise ValidationError("Invalid filter value " + filter)
    else:
        filter = "FILTER_NONE"
    return screen_lib, mol_type, filter
Exemplo n.º 18
0
 def __init__(self, n_cores=-1):
     params = FilterCatalog.FilterCatalogParams()
     params.AddCatalog(FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS)
     self.filter = FilterCatalog.FilterCatalog(params)
     self.n_cores = n_cores
Exemplo n.º 19
0
        smiles_.append(row[0])

data = get_data_from_smiles(smiles)
data = data.mols(flatten=True)

params = [
    FilterCatalogParams(),
    FilterCatalogParams(),
    FilterCatalogParams(),
    FilterCatalogParams()
]
params[0].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
params[1].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
params[2].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
params[3].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
catalog_pains = FilterCatalog(params[0])
catalog_painsA = FilterCatalog(params[1])
catalog_painsB = FilterCatalog(params[2])
catalog_painsC = FilterCatalog(params[3])

entries_pains = []
entries_painsA = []
entries_painsB = []
entries_painsC = []
entries_painsAll = []

for i in range(len(data)):
    if catalog_pains.HasMatch(data[i]):
        entries_pains.append(i)
    if catalog_painsA.HasMatch(data[i]):
        entries_painsA.append(i)
Exemplo n.º 20
0
    def test2FilterCatalogTest(self):
        tests = ((FilterCatalogParams.FilterCatalogs.PAINS_A,
                  16), (FilterCatalogParams.FilterCatalogs.PAINS_B,
                        55), (FilterCatalogParams.FilterCatalogs.PAINS_C, 409),
                 (FilterCatalogParams.FilterCatalogs.PAINS, 409 + 16 + 55))

        for catalog_idx, num in tests:
            params = FilterCatalog.FilterCatalogParams()
            print("*" * 44)
            print("Testing:", catalog_idx, int(catalog_idx))
            self.assertTrue(params.AddCatalog(catalog_idx))
            catalog1 = FilterCatalog.FilterCatalog(params)

            if FilterCatalog.FilterCatalogCanSerialize():
                pickle = catalog1.Serialize()
                catalog2 = FilterCatalog.FilterCatalog(pickle)
                catalogs = [catalog1, catalog2]
            else:
                catalogs = [catalog1]

            catalogs.append(FilterCatalog.FilterCatalog(catalog_idx))
            for index, catalog in enumerate(catalogs):
                self.assertEqual(catalog.GetNumEntries(), num)

                if catalog_idx in [
                        FilterCatalogParams.FilterCatalogs.PAINS_A,
                        FilterCatalogParams.FilterCatalogs.PAINS
                ]:
                    # http://chemistrycompass.com/chemsearch/58909/
                    mol = Chem.MolFromSmiles(
                        "O=C(Cn1cnc2c1c(=O)n(C)c(=O)n2C)N/N=C/c1c(O)ccc2c1cccc2"
                    )
                    entry = catalog.GetFirstMatch(mol)
                    for key in entry.GetPropList():
                        if key == "Reference":
                            self.assertEquals(
                                entry.GetProp(key),
                                "Baell JB, Holloway GA. New Substructure Filters for "
                                "Removal of Pan Assay Interference Compounds (PAINS) "
                                "from Screening Libraries and for Their Exclusion in "
                                "Bioassays. J Med Chem 53 (2010) 2719D40. "
                                "doi:10.1021/jm901137j.")
                        elif key == "Scope":
                            self.assertEquals(entry.GetProp(key),
                                              "PAINS filters (family A)")

                    self.assertEqual(entry.GetDescription(),
                                     "hzone_phenol_A(479)")
                    result = catalog.GetMatches(mol)
                    self.assertEquals(len(result), 1)

                    for entry in result:
                        for filtermatch in entry.GetFilterMatches(mol):
                            self.assertEquals(str(filtermatch.filterMatch),
                                              "hzone_phenol_A(479)")
                            atomPairs = [
                                tuple(x) for x in filtermatch.atomPairs
                            ]
                            self.assertEquals(atomPairs, [(0, 23), (1, 22),
                                                          (2, 20), (3, 19),
                                                          (4, 25), (5, 24),
                                                          (6, 18), (7, 17),
                                                          (8, 16), (9, 21)])

                elif catalog_idx == FilterCatalogParams.FilterCatalogs.PAINS_B:
                    mol = Chem.MolFromSmiles(
                        "FC(F)(F)Oc1ccc(NN=C(C#N)C#N)cc1")  # CHEMBL457504
                    entry = catalog.GetFirstMatch(mol)
                    self.assertTrue(entry)
                    self.assertEquals(entry.GetDescription(),
                                      "cyano_imine_B(17)")

                elif catalog_idx == FilterCatalogParams.FilterCatalogs.PAINS_C:
                    mol = Chem.MolFromSmiles(
                        "O=C1C2OC2C(=O)c3cc4CCCCc4cc13")  # CHEMBL476649
                    entry = catalog.GetFirstMatch(mol)
                    self.assertTrue(entry)
                    self.assertEquals(entry.GetDescription(),
                                      "keto_keto_gamma(5)")