Exemplo n.º 1
0
    def testPubChemFetch(self):
        """PubChem fetch test"""
        idTupList = [("JTOKYIBTLUQVQV-FGHQGBLESA-N", 404, None),
                     ("CXHHBNMLPJOKQD-UHFFFAOYSA-N", 200, 78579)]
        nameSpace = "inchikey"
        domain = "compound"
        searchType = "lookup"
        returnType = "record"
        requestType = "GET"
        outputType = "JSON"
        baseUrl = "https://pubchem.ncbi.nlm.nih.gov"
        httpCodesCatch = [404]

        try:
            for (identifier, testRetCode, testPcId) in idTupList:
                for requestType in ["GET", "POST"]:
                    ret, retCode = None, None
                    pD = {}
                    hL = {}
                    ureq = UrlRequestUtil()
                    if nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "GET":
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, outputType
                        ])
                        ret, retCode = ureq.getUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    elif nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "POST":
                        endPoint = "/".join(
                            ["rest", "pug", domain, nameSpace, outputType])
                        pD = {nameSpace: identifier}
                        ret, retCode = ureq.postUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "GET":
                        # Needs to be specifically targeted on a particular compound ...
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, returnType,
                            outputType
                        ])
                        pD = {"classification_type": "simple"}
                        # pD = {nameSpace: identifier}
                        ret, retCode = ureq.getUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "POST":
                        # Needs to be specifically targeted on a particular compound ...
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, returnType,
                            outputType
                        ])
                        # This is a long request return server codes may be observed 500
                        pD = {
                            nameSpace: identifier,
                            "classification_type": "simple"
                        }
                        # pD = {nameSpace: identifier}
                        ret, retCode = ureq.postUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers=hL,
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON",
                            sslCert="enable")
                    #
                    #
                    logger.debug("Result status code %r", retCode)
                    self.assertEqual(retCode, testRetCode)
                    if retCode == 200:
                        pcId = ret["PC_Compounds"][0]["id"]["id"]["cid"]
                        self.assertEqual(pcId, testPcId)

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 2
0
    def testPubChemFetchClassification(self):
        """PubChem fetch classification test - can timeout"""
        idTupList = [("2244", 200, "2244", "record"),
                     ("123631", 200, "123631", "record"),
                     ("2244", 200, "2244", "classification"),
                     ("123631", 200, "123631", "classification")]
        nameSpace = "cid"
        domain = "compound"
        searchType = "lookup"
        # returnType = "record"
        requestType = "GET"
        outputType = "JSON"
        baseUrl = "https://pubchem.ncbi.nlm.nih.gov"
        httpCodesCatch = [404]

        try:
            for (identifier, testRetCode, testPcId, returnType) in idTupList:
                for requestType in ["GET", "POST"]:
                    logger.info(
                        "namespace %r identifier %r returnType %r requestType %r",
                        nameSpace, identifier, returnType, requestType)
                    ret, retCode = None, None
                    pD = {}
                    hL = []
                    ureq = UrlRequestUtil()
                    if nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "GET":
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, outputType
                        ])
                        ret, retCode = ureq.get(baseUrl,
                                                endPoint,
                                                pD,
                                                headers=hL,
                                                httpCodesCatch=httpCodesCatch,
                                                returnContentType="JSON")
                    elif nameSpace in [
                            "cid", "name", "inchikey"
                    ] and returnType in ["record"] and searchType in [
                            "lookup"
                    ] and requestType == "POST":
                        endPoint = "/".join(
                            ["rest", "pug", domain, nameSpace, outputType])
                        pD = {nameSpace: identifier}
                        ret, retCode = ureq.post(baseUrl,
                                                 endPoint,
                                                 pD,
                                                 headers=hL,
                                                 httpCodesCatch=httpCodesCatch,
                                                 returnContentType="JSON")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "GET":
                        # Needs to be specifically targeted on a particular compound ...
                        uId = quote(identifier.encode("utf8"))
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, uId, returnType,
                            outputType
                        ])
                        # pD = {"classification_type": "simple"}
                        pD = {}
                        # pD = {nameSpace: identifier}
                        ret, retCode = ureq.getUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers={},
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON")
                    #
                    elif nameSpace in ["cid"] and returnType in [
                            "classification"
                    ] and searchType in ["lookup"] and requestType == "POST":
                        # Needs to be specifically targeted on a particular compound ...
                        endPoint = "/".join([
                            "rest", "pug", domain, nameSpace, returnType,
                            outputType
                        ])
                        # This is a long request return server codes may be observed 500
                        # pD = {nameSpace: identifier, "classification_type": "simple"}
                        pD = {nameSpace: identifier}
                        ret, retCode = ureq.postUnWrapped(
                            baseUrl,
                            endPoint,
                            pD,
                            headers={},
                            httpCodesCatch=httpCodesCatch,
                            returnContentType="JSON")
                    #
                    #
                    logger.debug("Result status code %r", retCode)
                    self.assertEqual(retCode, testRetCode)
                    if retCode == 200 and returnType == "record":
                        pcId = str(ret["PC_Compounds"][0]["id"]["id"]["cid"])
                        self.assertEqual(pcId, testPcId)

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def __fetchDescriptors(self, ccIdList, ccidxP, chunkSize=100):
        """Fetch transformed SMILES descriptors from the ChemAxon webservice.

            Args:
                ccIdList (list, str): chemical component identifier list
                ccidxP (object): instance of the ChemCompIndexProvider()
                chunksize (int, optional): number of SMILES per request. Defaults to 100.

            Returns:
                (dict): dictionary {<ccId>: [<transformed SMILES>, ...], ...}

        Example API parameter data:
                            {
                            "errorHandlingMode": "FAIL_ON_ERROR",
                            "inputParams": "smiles",
                            "outputParams": "smiles",
                            "structures": [
                                "CC(C)[C@H](N)C=O",
                                "CC[C@H](C)[C@H](N)C=O",
                                "CC(C)C[C@H](N)C=O"
                            ]
                            }

        Example query:
        curl -X POST "https://jchem-microservices.chemaxon.com/jwsio/rest-v1/molconvert/batch" -H "accept: */*"
               -H "Content-Type: application/json" -d "{ \"errorHandlingMode\": \"FAIL_ON_ERROR\", \"inputParams\": \"smiles\",
               \"outputParams\": \"mrv\", \"structures\": [ \"CC(C)[C@H](N)C=O\", \"CC[C@H](C)[C@H](N)C=O\", \"CC(C)C[C@H](N)C=O\" ]}"
        """
        descrD = {}
        smilesCcIdD = {}
        smilesD = {}
        for ccId in ccIdList:
            smiL = list(
                set(
                    ccidxP.getSMILES(ccId,
                                     smiTypeList=[
                                         "oe-iso-smiles", "oe-smiles",
                                         "cactvs-iso-smiles", "cactvs-smiles"
                                     ])))
            smilesCcIdD.setdefault(ccId, []).extend(smiL)
            for smi in smiL:
                smilesD.setdefault(smi, []).append(ccId)
        #
        logger.info("Translating (%d) SMILES for components (%d)",
                    len(smilesD), len(smilesCcIdD))
        # ----
        smiLL = [
            list(smilesD.keys())[i:i + chunkSize]
            for i in range(0, len(smilesD), chunkSize)
        ]
        # ---
        baseUrl = "https://jchem-microservices.chemaxon.com"
        endPoint = "jwsio/rest-v1/molconvert/batch"
        # hL = [("Accept", "application/json"), ("Content-Type", "application/json")]
        hD = {"Accept": "application/json", "Content-Type": "application/json"}
        try:
            pD = {
                "errorHandlingMode": "SKIP_ERROR",
                "inputParams": "smiles",
                "outputParams": "smiles"
            }
            #
            iCount = 0
            for smiL in smiLL:
                iCount += 1
                ureq = UrlRequestUtil()
                pD["structures"] = smiL
                logger.debug("pD %r", pD)
                rDL, retCode = ureq.postUnWrapped(
                    baseUrl,
                    endPoint,
                    pD,
                    headers=hD,
                    sendContentType="application/json",
                    returnContentType="application/json")
                logger.debug("API result (%r) %r", retCode, rDL)
                if rDL and len(rDL) == len(smiL):
                    for ii, rD in enumerate(rDL):
                        if "structure" in rD and "successful" in rD and rD[
                                "successful"]:
                            if smiL[ii] == rD["structure"]:
                                continue
                            for ccId in smilesD[smiL[ii]]:
                                if ccId in descrD and rD[
                                        "structure"] in descrD[ccId]:
                                    continue
                                if rD["structure"] in smilesCcIdD[ccId]:
                                    continue
                                descrD.setdefault(ccId,
                                                  []).append(rD["structure"])
                else:
                    logger.info("Chunk %d failed (%d)", iCount, len(rDL))
                if iCount % 10 == 0:
                    logger.info("Completed processing chunk (%d/%d)", iCount,
                                len(smiLL))

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return descrD