Exemplo n.º 1
0
    def testUnpBatchFetchGetRequests(self):
        """UniProt batch fetch (uploadlists) get test (requests)"""

        baseUrl = "https://www.uniprot.org"
        # baseUrl = "https://pir3.uniprot.org"

        endPoint = "uploadlists"
        idList = self.__unpIdList1[:10]
        try:
            hD = {"Accept": "application/xml"}
            # hL = [("Accept", "application/xml")]
            pD = {
                "from": "ACC+ID",
                "to": "ACC",
                "format": "xml",
                "query": " ".join(idList)
            }
            ureq = UrlRequestUtil()
            # using unwrapped (requests) version
            ret, retCode = ureq.getUnWrapped(baseUrl,
                                             endPoint,
                                             pD,
                                             headers=hD,
                                             sslCert="enable")
            logger.debug("XML result %r", ret)
            nm = ret.count("<entry ")
            logger.info("Result count %d status code %r", nm, retCode)
            self.assertGreaterEqual(nm, len(idList))

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 2
0
 def doGeneLookup(self, geneName, taxId, reviewed=False):
     """ """
     rL = []
     try:
         baseUrl = self.__urlPrimary
         endPoint = "uniprot"
         # hL = [("Accept", "application/xml")]
         hL = []
         if reviewed:
             pD = {
                 "query":
                 'gene:"%s" and taxonomy:%s and reviewed:yes' %
                 (geneName, taxId),
                 "format":
                 "list"
             }
         else:
             pD = {
                 "query": 'gene:"%s" and taxonomy:%s' % (geneName, taxId),
                 "format": "list"
             }
         ureq = UrlRequestUtil()
         rspTxt, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL)
         tValL = rspTxt.split("\n") if rspTxt else []
         idList = [tVal for tVal in tValL if tVal]
         return idList, retCode
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return rL, None
Exemplo n.º 3
0
 def getAccessionMapping(self, wurcsTupL):
     """Fetch GlyTouCan accessions for the input WURCS desriptor list"""
     accessionMapD = {}
     logger.info("Fetching (%d) WURCS descriptors", len(wurcsTupL))
     baseUrl = "https://api.glycosmos.org"
     endPoint = "glytoucan/sparql/wurcs2gtcids"
     numDescriptors = len(wurcsTupL)
     for ii, (entityId, wurcs) in enumerate(wurcsTupL, 1):
         try:
             pD = {}
             pD["wurcs"] = wurcs
             uR = UrlRequestUtil()
             rDL, retCode = uR.post(baseUrl, endPoint, pD, returnContentType="JSON")
             logger.debug(" %r wurcs fetch result (%r) %r", entityId, retCode, rDL)
             if rDL:
                 for rD in rDL:
                     if "id" in rD:
                         accessionMapD.setdefault(wurcs, []).append(rD["id"])
                     else:
                         logger.info("%r fetch fails (%r) (%r) %r", entityId, retCode, wurcs, rDL)
             if ii % 5 == 0:
                 logger.info("Fetched %d/%d", ii, numDescriptors)
         except Exception as e:
             logger.exception("Failing for (%r) wurcs (%r) with %s", entityId, wurcs, str(e))
     return accessionMapD
Exemplo n.º 4
0
 def testGetChemSearchRequests(self):
     """ChemSearch repetition GET protocol test (using requests module)"""
     # dev instances east
     # baseUrl = ["http://128.6.159.86"]
     #
     # Production west instances
     # baseUrlList = ["http://132.249.213.210", "http://132.249.213.110", "https://chemsearch-west.rcsb.org"]
     # baseUrlList = ["http://128.6.158.85", "http://128.6.158.158", "https://chemsearch-east.rcsb.org"]
     baseUrlList = [
         "https://chemsearch-west.rcsb.org",
         "https://chemsearch-east.rcsb.org"
     ]
     #
     endPoint = "chem-match-v1/InChI"
     resultLen = 13
     descr = "InChI=1S/C9H15N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3-4,6,12,15-16H,2H2,1H3,(H4,10,11,13,14,17)/t3-,4-,6-/m1/s1"
     try:
         for baseUrl in baseUrlList:
             pD = {"query": descr, "matchType": "fingerprint-similarity"}
             for ii in range(100):
                 ureq = UrlRequestUtil()
                 ret, retCode = ureq.getUnWrapped(baseUrl,
                                                  endPoint,
                                                  pD,
                                                  headers={},
                                                  sslCert="enable",
                                                  returnContentType="JSON")
                 if len(ret["matchedIdList"]) != resultLen:
                     logger.info(">>> %3d (%r) (%r) result length %r",
                                 ii, baseUrl, retCode,
                                 len(ret["matchedIdList"]))
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemplo n.º 5
0
 def __doRequestPrimary(self, idList):
     """
     http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=ID1,ID2,...
     """
     baseUrl = "http://eutils.ncbi.nlm.nih.gov"
     endPoint = "entrez/eutils/efetch.fcgi"
     hL = [("Accept", "application/xml")]
     pD = {"db": "pubmed", "retmode": "xml", "id": ",".join(idList)}
     ureq = UrlRequestUtil()
     return ureq.get(baseUrl, endPoint, pD, headers=hL)
Exemplo n.º 6
0
 def __doRequestSecondary(self, idList):
     baseUrl = self.__urlSecondary
     endPoint = "proteins/api/proteins"
     #
     hL = [("Accept", "application/xml")]
     pD = {}
     pD["size"] = "-1"
     pD["accession"] = ",".join(idList)
     ureq = UrlRequestUtil()
     return ureq.get(baseUrl, endPoint, pD, headers=hL)
Exemplo n.º 7
0
 def __doRequestPrimary(self, idList):
     """ """
     baseUrl = self.__urlPrimary
     endPoint = "uploadlists"
     hL = [("Accept", "application/xml")]
     pD = {
         "from": "ACC+ID",
         "to": "ACC",
         "format": "xml",
         "query": " ".join(idList)
     }
     ureq = UrlRequestUtil()
     return ureq.get(baseUrl, endPoint, pD, headers=hL)
Exemplo n.º 8
0
 def getStatusDetails(self):
     try:
         version = releaseDateString = None
         baseUrl = "https://www.ebi.ac.uk"
         endPoint = "chembl/api/data/status.json"
         hL = []
         pD = {}
         ureq = UrlRequestUtil()
         ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL, returnContentType="JSON")
         logger.info("retCode %r ret %r", retCode, ret)
         if ret:
             tS = ret["chembl_db_version"] if ret and "chembl_db_version" in ret else None
             version = tS.split("_")[1] if tS and tS.split("_")[1] else None
             releaseDateString = ret["chembl_release_date"] if "chembl_release_date" in ret else None
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return version, releaseDateString
Exemplo n.º 9
0
    def testUnpBatchFetchFail(self):
        """UniProt batch fetch (proteins) get test (expected failure)"""
        baseUrl = "https://www0.ebi.ac.uk"
        endPoint = "proteins/api/proteins"
        idList = self.__unpIdList1[:10]
        try:
            hL = [("Accept", "application/xml")]
            pD = {}
            pD["size"] = "-1"
            pD["accession"] = ",".join(idList)
            ureq = UrlRequestUtil()
            ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL)
            logger.debug("XML result %r", ret)
            logger.debug("Result status code %r", retCode)
            self.assertEqual(ret, None)
            self.assertEqual(retCode, None)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 10
0
    def testUnpBatchFetchGetEbi(self):
        """UniProt batch fetch (proteins) get test (EBI endpoint)"""
        baseUrl = "https://www.ebi.ac.uk"
        endPoint = "proteins/api/proteins"
        idList = self.__unpIdList1[:10]
        try:
            hL = [("Accept", "application/xml")]
            pD = {}
            pD["size"] = "-1"
            pD["accession"] = ",".join(idList)
            ureq = UrlRequestUtil()
            ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL)
            logger.debug("XML result %r", ret)
            nm = ret.count("<entry ")
            logger.info("Result count %d status code %r", nm, retCode)
            self.assertGreaterEqual(nm, len(idList) - 1)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 11
0
 def testUnpBatchFetchPost(self):
     """UniProt batch fetch (ebi dbfetch) post test"""
     baseUrl = "https://www.ebi.ac.uk"
     endPoint = "Tools/dbfetch/dbfetch"
     idList = self.__unpIdList1[:10]
     try:
         pD = {}
         pD["db"] = "uniprotkb"
         pD["id"] = ",".join(idList)
         pD["format"] = "uniprotxml"
         pD["style"] = "raw"
         #
         ureq = UrlRequestUtil()
         ret, retCode = ureq.post(baseUrl, endPoint, pD)
         logger.debug("XML result %r", ret)
         nm = ret.count("<entry ")
         logger.info("Result count %d status code %r", nm, retCode)
         self.assertGreaterEqual(nm, len(idList))
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemplo n.º 12
0
 def __doSequenceRequestSecondary(self, unpIdList):
     """ """
     sD = {}
     baseUrl = self.__urlSecondary
     hD = {"Accept": "text/x-fasta"}
     pD = {}
     ok = True
     for unpId in unpIdList:
         endPoint = "proteins/api/proteins/" + unpId
         ureq = UrlRequestUtil()
         ret, retCode = ureq.getUnWrapped(baseUrl, endPoint, pD, headers=hD)
         if retCode in [200] and ret and len(ret) > 0:
             rOk, seqId, rD = self.__parseFastaResponse(ret)
             if rOk:
                 sD[seqId] = rD
             else:
                 logger.error("Parsing error in sequence data for %r",
                              unpId)
         else:
             ok = False
     return ok, sD
Exemplo n.º 13
0
    def testNcbiFetchEntryPost(self):
        """NCBI batch fetch (efetch) get test"""
        idList = ["AP012306.1", "U53879.1"]
        database = "Nucleotide"
        baseUrl = "https://eutils.ncbi.nlm.nih.gov"
        endPoint = "entrez/eutils/efetch.fcgi"
        try:
            hL = [("Accept", "application/xml")]
            pD = {}
            pD["db"] = database
            pD["id"] = ",".join(idList)
            pD["retmode"] = "xml"
            ureq = UrlRequestUtil()
            ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL)
            nm = ret.count("<GBSeq_length>")
            logger.debug("XML result %r", ret)
            logger.info("Result count %d status code %r", nm, retCode)
            self.assertGreaterEqual(nm, len(idList))

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 14
0
 def doLookup(self, itemList, itemKey="GENENAME"):
     """ """
     rL = []
     try:
         baseUrl = self.__urlPrimary
         endPoint = "uploadlists"
         # hL = [("Accept", "application/xml")]
         hL = []
         pD = {
             "from": itemKey,
             "to": "ACC",
             "format": "list",
             "query": " ".join(itemList)
         }
         ureq = UrlRequestUtil()
         rspTxt, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL)
         tValL = rspTxt.split("\n") if rspTxt else []
         idList = [tVal for tVal in tValL if tVal]
         return idList, retCode
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return rL, None
Exemplo n.º 15
0
 def __doSequenceRequestPrimary(self, unpIdList):
     """ """
     sD = {}
     fmt = "fasta"
     baseUrl = self.__urlPrimary
     hD = {"Accept": "text/x-fasta"}
     pD = {}
     ureq = UrlRequestUtil()
     ok = True
     for unpId in unpIdList:
         endPoint = "uniprot/" + unpId + "." + fmt
         ret, retCode = ureq.getUnWrapped(baseUrl, endPoint, pD, headers=hD)
         logger.debug("unpId %r url %s endpoint %r ret %r retCode %r",
                      unpId, baseUrl, endPoint, ret, retCode)
         if retCode in [200] and ret and len(ret) > 0:
             rOk, seqId, rD = self.__parseFastaResponse(ret)
             if rOk:
                 sD[seqId] = rD
             else:
                 logger.error("Parsing error in sequence data for %r",
                              unpId)
         else:
             ok = False
     return ok, sD
    def __fetchDescriptors(self, ccIdList, ccidxP, chunkSize=100):
        """Fetch transformed SMILES descriptors from the ChemAxon webservice.

            Args:
                ccIdList (list, str): chemical component identifier list
                ccidxP (object): instance of the ChemCompIndexProvider()
                chunksize (int, optional): number of SMILES per request. Defaults to 100.

            Returns:
                (dict): dictionary {<ccId>: [<transformed SMILES>, ...], ...}

        Example API parameter data:
                            {
                            "errorHandlingMode": "FAIL_ON_ERROR",
                            "inputParams": "smiles",
                            "outputParams": "smiles",
                            "structures": [
                                "CC(C)[C@H](N)C=O",
                                "CC[C@H](C)[C@H](N)C=O",
                                "CC(C)C[C@H](N)C=O"
                            ]
                            }

        Example query:
        curl -X POST "https://jchem-microservices.chemaxon.com/jwsio/rest-v1/molconvert/batch" -H "accept: */*"
               -H "Content-Type: application/json" -d "{ \"errorHandlingMode\": \"FAIL_ON_ERROR\", \"inputParams\": \"smiles\",
               \"outputParams\": \"mrv\", \"structures\": [ \"CC(C)[C@H](N)C=O\", \"CC[C@H](C)[C@H](N)C=O\", \"CC(C)C[C@H](N)C=O\" ]}"
        """
        descrD = {}
        smilesCcIdD = {}
        smilesD = {}
        for ccId in ccIdList:
            smiL = list(
                set(
                    ccidxP.getSMILES(ccId,
                                     smiTypeList=[
                                         "oe-iso-smiles", "oe-smiles",
                                         "cactvs-iso-smiles", "cactvs-smiles"
                                     ])))
            smilesCcIdD.setdefault(ccId, []).extend(smiL)
            for smi in smiL:
                smilesD.setdefault(smi, []).append(ccId)
        #
        logger.info("Translating (%d) SMILES for components (%d)",
                    len(smilesD), len(smilesCcIdD))
        # ----
        smiLL = [
            list(smilesD.keys())[i:i + chunkSize]
            for i in range(0, len(smilesD), chunkSize)
        ]
        # ---
        baseUrl = "https://jchem-microservices.chemaxon.com"
        endPoint = "jwsio/rest-v1/molconvert/batch"
        # hL = [("Accept", "application/json"), ("Content-Type", "application/json")]
        hD = {"Accept": "application/json", "Content-Type": "application/json"}
        try:
            pD = {
                "errorHandlingMode": "SKIP_ERROR",
                "inputParams": "smiles",
                "outputParams": "smiles"
            }
            #
            iCount = 0
            for smiL in smiLL:
                iCount += 1
                ureq = UrlRequestUtil()
                pD["structures"] = smiL
                logger.debug("pD %r", pD)
                rDL, retCode = ureq.postUnWrapped(
                    baseUrl,
                    endPoint,
                    pD,
                    headers=hD,
                    sendContentType="application/json",
                    returnContentType="application/json")
                logger.debug("API result (%r) %r", retCode, rDL)
                if rDL and len(rDL) == len(smiL):
                    for ii, rD in enumerate(rDL):
                        if "structure" in rD and "successful" in rD and rD[
                                "successful"]:
                            if smiL[ii] == rD["structure"]:
                                continue
                            for ccId in smilesD[smiL[ii]]:
                                if ccId in descrD and rD[
                                        "structure"] in descrD[ccId]:
                                    continue
                                if rD["structure"] in smilesCcIdD[ccId]:
                                    continue
                                descrD.setdefault(ccId,
                                                  []).append(rD["structure"])
                else:
                    logger.info("Chunk %d failed (%d)", iCount, len(rDL))
                if iCount % 10 == 0:
                    logger.info("Completed processing chunk (%d/%d)", iCount,
                                len(smiLL))

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return descrD
Exemplo n.º 17
0
    def testPubChemFetchClassification(self):
        """PubChem fetch classification test - can timeout"""
        idTupList = [("2244", 200, "2244", "record"),
                     ("123631", 200, "123631", "record"),
                     ("2244", 200, "2244", "classification"),
                     ("123631", 200, "123631", "classification")]
        nameSpace = "cid"
        domain = "compound"
        searchType = "lookup"
        # returnType = "record"
        requestType = "GET"
        outputType = "JSON"
        baseUrl = "https://pubchem.ncbi.nlm.nih.gov"
        httpCodesCatch = [404]

        try:
            for (identifier, testRetCode, testPcId, returnType) in idTupList:
                for requestType in ["GET", "POST"]:
                    logger.info(
                        "namespace %r identifier %r returnType %r requestType %r",
                        nameSpace, identifier, returnType, requestType)
                    ret, retCode = None, None
                    pD = {}
                    hL = []
                    ureq = UrlRequestUtil()
                    if nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "GET":
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, outputType
                        ])
                        ret, retCode = ureq.get(baseUrl,
                                                endPoint,
                                                pD,
                                                headers=hL,
                                                httpCodesCatch=httpCodesCatch,
                                                returnContentType="JSON")
                    elif nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "POST":
                        endPoint = "/".join(
                            ["rest", "pug", domain, nameSpace, outputType])
                        pD = {nameSpace: identifier}
                        ret, retCode = ureq.post(baseUrl,
                                                 endPoint,
                                                 pD,
                                                 headers=hL,
                                                 httpCodesCatch=httpCodesCatch,
                                                 returnContentType="JSON")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "GET":
                        # Needs to be specifically targeted on a particular compound ...
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, returnType,
                            outputType
                        ])
                        # pD = {"classification_type": "simple"}
                        pD = {}
                        # pD = {nameSpace: identifier}
                        ret, retCode = ureq.getUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers={},
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "POST":
                        # Needs to be specifically targeted on a particular compound ...
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, returnType,
                            outputType
                        ])
                        # This is a long request return server codes may be observed 500
                        # pD = {nameSpace: identifier, "classification_type": "simple"}
                        pD = {nameSpace: identifier}
                        ret, retCode = ureq.postUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers={},
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON")
                    #
                    #
                    logger.debug("Result status code %r", retCode)
                    self.assertEqual(retCode, testRetCode)
                    if retCode == 200 and returnType == "record":
                        pcId = str(ret["PC_Compounds"][0]["id"]["id"]["cid"])
                        self.assertEqual(pcId, testPcId)

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 18
0
    def testPubChemFetch(self):
        """PubChem fetch test"""
        idTupList = [("JTOKYIBTLUQVQV-FGHQGBLESA-N", 404, None),
                     ("CXHHBNMLPJOKQD-UHFFFAOYSA-N", 200, 78579)]
        nameSpace = "inchikey"
        domain = "compound"
        searchType = "lookup"
        returnType = "record"
        requestType = "GET"
        outputType = "JSON"
        baseUrl = "https://pubchem.ncbi.nlm.nih.gov"
        httpCodesCatch = [404]

        try:
            for (identifier, testRetCode, testPcId) in idTupList:
                for requestType in ["GET", "POST"]:
                    ret, retCode = None, None
                    pD = {}
                    hL = {}
                    ureq = UrlRequestUtil()
                    if nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "GET":
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, outputType
                        ])
                        ret, retCode = ureq.getUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    elif nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "POST":
                        endPoint = "/".join(
                            ["rest", "pug", domain, nameSpace, outputType])
                        pD = {nameSpace: identifier}
                        ret, retCode = ureq.postUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "GET":
                        # Needs to be specifically targeted on a particular compound ...
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, returnType,
                            outputType
                        ])
                        pD = {"classification_type": "simple"}
                        # pD = {nameSpace: identifier}
                        ret, retCode = ureq.getUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "POST":
                        # Needs to be specifically targeted on a particular compound ...
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, returnType,
                            outputType
                        ])
                        # This is a long request return server codes may be observed 500
                        pD = {
                            nameSpace: identifier,
                            "classification_type": "simple"
                        }
                        # pD = {nameSpace: identifier}
                        ret, retCode = ureq.postUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    #
                    #
                    logger.debug("Result status code %r", retCode)
                    self.assertEqual(retCode, testRetCode)
                    if retCode == 200:
                        pcId = ret["PC_Compounds"][0]["id"]["id"]["cid"]
                        self.assertEqual(pcId, testPcId)

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()