def testUnpBatchFetchGetRequests(self): """UniProt batch fetch (uploadlists) get test (requests)""" baseUrl = "https://www.uniprot.org" # baseUrl = "https://pir3.uniprot.org" endPoint = "uploadlists" idList = self.__unpIdList1[:10] try: hD = {"Accept": "application/xml"} # hL = [("Accept", "application/xml")] pD = { "from": "ACC+ID", "to": "ACC", "format": "xml", "query": " ".join(idList) } ureq = UrlRequestUtil() # using unwrapped (requests) version ret, retCode = ureq.getUnWrapped(baseUrl, endPoint, pD, headers=hD, sslCert="enable") logger.debug("XML result %r", ret) nm = ret.count("<entry ") logger.info("Result count %d status code %r", nm, retCode) self.assertGreaterEqual(nm, len(idList)) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def doGeneLookup(self, geneName, taxId, reviewed=False): """ """ rL = [] try: baseUrl = self.__urlPrimary endPoint = "uniprot" # hL = [("Accept", "application/xml")] hL = [] if reviewed: pD = { "query": 'gene:"%s" and taxonomy:%s and reviewed:yes' % (geneName, taxId), "format": "list" } else: pD = { "query": 'gene:"%s" and taxonomy:%s' % (geneName, taxId), "format": "list" } ureq = UrlRequestUtil() rspTxt, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL) tValL = rspTxt.split("\n") if rspTxt else [] idList = [tVal for tVal in tValL if tVal] return idList, retCode except Exception as e: logger.exception("Failing with %s", str(e)) return rL, None
def getAccessionMapping(self, wurcsTupL): """Fetch GlyTouCan accessions for the input WURCS desriptor list""" accessionMapD = {} logger.info("Fetching (%d) WURCS descriptors", len(wurcsTupL)) baseUrl = "https://api.glycosmos.org" endPoint = "glytoucan/sparql/wurcs2gtcids" numDescriptors = len(wurcsTupL) for ii, (entityId, wurcs) in enumerate(wurcsTupL, 1): try: pD = {} pD["wurcs"] = wurcs uR = UrlRequestUtil() rDL, retCode = uR.post(baseUrl, endPoint, pD, returnContentType="JSON") logger.debug(" %r wurcs fetch result (%r) %r", entityId, retCode, rDL) if rDL: for rD in rDL: if "id" in rD: accessionMapD.setdefault(wurcs, []).append(rD["id"]) else: logger.info("%r fetch fails (%r) (%r) %r", entityId, retCode, wurcs, rDL) if ii % 5 == 0: logger.info("Fetched %d/%d", ii, numDescriptors) except Exception as e: logger.exception("Failing for (%r) wurcs (%r) with %s", entityId, wurcs, str(e)) return accessionMapD
def testGetChemSearchRequests(self): """ChemSearch repetition GET protocol test (using requests module)""" # dev instances east # baseUrl = ["http://128.6.159.86"] # # Production west instances # baseUrlList = ["http://132.249.213.210", "http://132.249.213.110", "https://chemsearch-west.rcsb.org"] # baseUrlList = ["http://128.6.158.85", "http://128.6.158.158", "https://chemsearch-east.rcsb.org"] baseUrlList = [ "https://chemsearch-west.rcsb.org", "https://chemsearch-east.rcsb.org" ] # endPoint = "chem-match-v1/InChI" resultLen = 13 descr = "InChI=1S/C9H15N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3-4,6,12,15-16H,2H2,1H3,(H4,10,11,13,14,17)/t3-,4-,6-/m1/s1" try: for baseUrl in baseUrlList: pD = {"query": descr, "matchType": "fingerprint-similarity"} for ii in range(100): ureq = UrlRequestUtil() ret, retCode = ureq.getUnWrapped(baseUrl, endPoint, pD, headers={}, sslCert="enable", returnContentType="JSON") if len(ret["matchedIdList"]) != resultLen: logger.info(">>> %3d (%r) (%r) result length %r", ii, baseUrl, retCode, len(ret["matchedIdList"])) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __doRequestPrimary(self, idList): """ http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=ID1,ID2,... """ baseUrl = "http://eutils.ncbi.nlm.nih.gov" endPoint = "entrez/eutils/efetch.fcgi" hL = [("Accept", "application/xml")] pD = {"db": "pubmed", "retmode": "xml", "id": ",".join(idList)} ureq = UrlRequestUtil() return ureq.get(baseUrl, endPoint, pD, headers=hL)
def __doRequestSecondary(self, idList): baseUrl = self.__urlSecondary endPoint = "proteins/api/proteins" # hL = [("Accept", "application/xml")] pD = {} pD["size"] = "-1" pD["accession"] = ",".join(idList) ureq = UrlRequestUtil() return ureq.get(baseUrl, endPoint, pD, headers=hL)
def __doRequestPrimary(self, idList): """ """ baseUrl = self.__urlPrimary endPoint = "uploadlists" hL = [("Accept", "application/xml")] pD = { "from": "ACC+ID", "to": "ACC", "format": "xml", "query": " ".join(idList) } ureq = UrlRequestUtil() return ureq.get(baseUrl, endPoint, pD, headers=hL)
def getStatusDetails(self): try: version = releaseDateString = None baseUrl = "https://www.ebi.ac.uk" endPoint = "chembl/api/data/status.json" hL = [] pD = {} ureq = UrlRequestUtil() ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL, returnContentType="JSON") logger.info("retCode %r ret %r", retCode, ret) if ret: tS = ret["chembl_db_version"] if ret and "chembl_db_version" in ret else None version = tS.split("_")[1] if tS and tS.split("_")[1] else None releaseDateString = ret["chembl_release_date"] if "chembl_release_date" in ret else None except Exception as e: logger.exception("Failing with %s", str(e)) return version, releaseDateString
def testUnpBatchFetchFail(self): """UniProt batch fetch (proteins) get test (expected failure)""" baseUrl = "https://www0.ebi.ac.uk" endPoint = "proteins/api/proteins" idList = self.__unpIdList1[:10] try: hL = [("Accept", "application/xml")] pD = {} pD["size"] = "-1" pD["accession"] = ",".join(idList) ureq = UrlRequestUtil() ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL) logger.debug("XML result %r", ret) logger.debug("Result status code %r", retCode) self.assertEqual(ret, None) self.assertEqual(retCode, None) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testUnpBatchFetchGetEbi(self): """UniProt batch fetch (proteins) get test (EBI endpoint)""" baseUrl = "https://www.ebi.ac.uk" endPoint = "proteins/api/proteins" idList = self.__unpIdList1[:10] try: hL = [("Accept", "application/xml")] pD = {} pD["size"] = "-1" pD["accession"] = ",".join(idList) ureq = UrlRequestUtil() ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL) logger.debug("XML result %r", ret) nm = ret.count("<entry ") logger.info("Result count %d status code %r", nm, retCode) self.assertGreaterEqual(nm, len(idList) - 1) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testUnpBatchFetchPost(self): """UniProt batch fetch (ebi dbfetch) post test""" baseUrl = "https://www.ebi.ac.uk" endPoint = "Tools/dbfetch/dbfetch" idList = self.__unpIdList1[:10] try: pD = {} pD["db"] = "uniprotkb" pD["id"] = ",".join(idList) pD["format"] = "uniprotxml" pD["style"] = "raw" # ureq = UrlRequestUtil() ret, retCode = ureq.post(baseUrl, endPoint, pD) logger.debug("XML result %r", ret) nm = ret.count("<entry ") logger.info("Result count %d status code %r", nm, retCode) self.assertGreaterEqual(nm, len(idList)) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __doSequenceRequestSecondary(self, unpIdList): """ """ sD = {} baseUrl = self.__urlSecondary hD = {"Accept": "text/x-fasta"} pD = {} ok = True for unpId in unpIdList: endPoint = "proteins/api/proteins/" + unpId ureq = UrlRequestUtil() ret, retCode = ureq.getUnWrapped(baseUrl, endPoint, pD, headers=hD) if retCode in [200] and ret and len(ret) > 0: rOk, seqId, rD = self.__parseFastaResponse(ret) if rOk: sD[seqId] = rD else: logger.error("Parsing error in sequence data for %r", unpId) else: ok = False return ok, sD
def testNcbiFetchEntryPost(self): """NCBI batch fetch (efetch) get test""" idList = ["AP012306.1", "U53879.1"] database = "Nucleotide" baseUrl = "https://eutils.ncbi.nlm.nih.gov" endPoint = "entrez/eutils/efetch.fcgi" try: hL = [("Accept", "application/xml")] pD = {} pD["db"] = database pD["id"] = ",".join(idList) pD["retmode"] = "xml" ureq = UrlRequestUtil() ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL) nm = ret.count("<GBSeq_length>") logger.debug("XML result %r", ret) logger.info("Result count %d status code %r", nm, retCode) self.assertGreaterEqual(nm, len(idList)) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def doLookup(self, itemList, itemKey="GENENAME"): """ """ rL = [] try: baseUrl = self.__urlPrimary endPoint = "uploadlists" # hL = [("Accept", "application/xml")] hL = [] pD = { "from": itemKey, "to": "ACC", "format": "list", "query": " ".join(itemList) } ureq = UrlRequestUtil() rspTxt, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL) tValL = rspTxt.split("\n") if rspTxt else [] idList = [tVal for tVal in tValL if tVal] return idList, retCode except Exception as e: logger.exception("Failing with %s", str(e)) return rL, None
def __doSequenceRequestPrimary(self, unpIdList): """ """ sD = {} fmt = "fasta" baseUrl = self.__urlPrimary hD = {"Accept": "text/x-fasta"} pD = {} ureq = UrlRequestUtil() ok = True for unpId in unpIdList: endPoint = "uniprot/" + unpId + "." + fmt ret, retCode = ureq.getUnWrapped(baseUrl, endPoint, pD, headers=hD) logger.debug("unpId %r url %s endpoint %r ret %r retCode %r", unpId, baseUrl, endPoint, ret, retCode) if retCode in [200] and ret and len(ret) > 0: rOk, seqId, rD = self.__parseFastaResponse(ret) if rOk: sD[seqId] = rD else: logger.error("Parsing error in sequence data for %r", unpId) else: ok = False return ok, sD
def __fetchDescriptors(self, ccIdList, ccidxP, chunkSize=100): """Fetch transformed SMILES descriptors from the ChemAxon webservice. Args: ccIdList (list, str): chemical component identifier list ccidxP (object): instance of the ChemCompIndexProvider() chunksize (int, optional): number of SMILES per request. Defaults to 100. Returns: (dict): dictionary {<ccId>: [<transformed SMILES>, ...], ...} Example API parameter data: { "errorHandlingMode": "FAIL_ON_ERROR", "inputParams": "smiles", "outputParams": "smiles", "structures": [ "CC(C)[C@H](N)C=O", "CC[C@H](C)[C@H](N)C=O", "CC(C)C[C@H](N)C=O" ] } Example query: curl -X POST "https://jchem-microservices.chemaxon.com/jwsio/rest-v1/molconvert/batch" -H "accept: */*" -H "Content-Type: application/json" -d "{ \"errorHandlingMode\": \"FAIL_ON_ERROR\", \"inputParams\": \"smiles\", \"outputParams\": \"mrv\", \"structures\": [ \"CC(C)[C@H](N)C=O\", \"CC[C@H](C)[C@H](N)C=O\", \"CC(C)C[C@H](N)C=O\" ]}" """ descrD = {} smilesCcIdD = {} smilesD = {} for ccId in ccIdList: smiL = list( set( ccidxP.getSMILES(ccId, smiTypeList=[ "oe-iso-smiles", "oe-smiles", "cactvs-iso-smiles", "cactvs-smiles" ]))) smilesCcIdD.setdefault(ccId, []).extend(smiL) for smi in smiL: smilesD.setdefault(smi, []).append(ccId) # logger.info("Translating (%d) SMILES for components (%d)", len(smilesD), len(smilesCcIdD)) # ---- smiLL = [ list(smilesD.keys())[i:i + chunkSize] for i in range(0, len(smilesD), chunkSize) ] # --- baseUrl = "https://jchem-microservices.chemaxon.com" endPoint = "jwsio/rest-v1/molconvert/batch" # hL = [("Accept", "application/json"), ("Content-Type", "application/json")] hD = {"Accept": "application/json", "Content-Type": "application/json"} try: pD = { "errorHandlingMode": "SKIP_ERROR", "inputParams": "smiles", "outputParams": "smiles" } # iCount = 0 for smiL in smiLL: iCount += 1 ureq = UrlRequestUtil() pD["structures"] = smiL logger.debug("pD %r", pD) rDL, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers=hD, sendContentType="application/json", returnContentType="application/json") logger.debug("API result (%r) %r", retCode, rDL) if rDL and len(rDL) == len(smiL): for ii, rD in enumerate(rDL): if "structure" in rD and "successful" in rD and rD[ "successful"]: if smiL[ii] == rD["structure"]: continue for ccId in smilesD[smiL[ii]]: if ccId in descrD and rD[ "structure"] in descrD[ccId]: continue if rD["structure"] in smilesCcIdD[ccId]: continue descrD.setdefault(ccId, []).append(rD["structure"]) else: logger.info("Chunk %d failed (%d)", iCount, len(rDL)) if iCount % 10 == 0: logger.info("Completed processing chunk (%d/%d)", iCount, len(smiLL)) # except Exception as e: logger.exception("Failing with %s", str(e)) return descrD
def testPubChemFetchClassification(self): """PubChem fetch classification test - can timeout""" idTupList = [("2244", 200, "2244", "record"), ("123631", 200, "123631", "record"), ("2244", 200, "2244", "classification"), ("123631", 200, "123631", "classification")] nameSpace = "cid" domain = "compound" searchType = "lookup" # returnType = "record" requestType = "GET" outputType = "JSON" baseUrl = "https://pubchem.ncbi.nlm.nih.gov" httpCodesCatch = [404] try: for (identifier, testRetCode, testPcId, returnType) in idTupList: for requestType in ["GET", "POST"]: logger.info( "namespace %r identifier %r returnType %r requestType %r", nameSpace, identifier, returnType, requestType) ret, retCode = None, None pD = {} hL = [] ureq = UrlRequestUtil() if nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "GET": uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, outputType ]) ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON") elif nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "POST": endPoint = "/".join( ["rest", "pug", domain, nameSpace, outputType]) pD = {nameSpace: identifier} ret, retCode = ureq.post(baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "GET": # Needs to be specifically targeted on a particular compound ... uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, returnType, outputType ]) # pD = {"classification_type": "simple"} pD = {} # pD = {nameSpace: identifier} ret, retCode = ureq.getUnWrapped( baseUrl, endPoint, pD, headers={}, httpCodesCatch=httpCodesCatch, returnContentType="JSON") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "POST": # Needs to be specifically targeted on a particular compound ... endPoint = "/".join([ "rest", "pug", domain, nameSpace, returnType, outputType ]) # This is a long request return server codes may be observed 500 # pD = {nameSpace: identifier, "classification_type": "simple"} pD = {nameSpace: identifier} ret, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers={}, httpCodesCatch=httpCodesCatch, returnContentType="JSON") # # logger.debug("Result status code %r", retCode) self.assertEqual(retCode, testRetCode) if retCode == 200 and returnType == "record": pcId = str(ret["PC_Compounds"][0]["id"]["id"]["cid"]) self.assertEqual(pcId, testPcId) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testPubChemFetch(self): """PubChem fetch test""" idTupList = [("JTOKYIBTLUQVQV-FGHQGBLESA-N", 404, None), ("CXHHBNMLPJOKQD-UHFFFAOYSA-N", 200, 78579)] nameSpace = "inchikey" domain = "compound" searchType = "lookup" returnType = "record" requestType = "GET" outputType = "JSON" baseUrl = "https://pubchem.ncbi.nlm.nih.gov" httpCodesCatch = [404] try: for (identifier, testRetCode, testPcId) in idTupList: for requestType in ["GET", "POST"]: ret, retCode = None, None pD = {} hL = {} ureq = UrlRequestUtil() if nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "GET": uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, outputType ]) ret, retCode = ureq.getUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") elif nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "POST": endPoint = "/".join( ["rest", "pug", domain, nameSpace, outputType]) pD = {nameSpace: identifier} ret, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "GET": # Needs to be specifically targeted on a particular compound ... uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, returnType, outputType ]) pD = {"classification_type": "simple"} # pD = {nameSpace: identifier} ret, retCode = ureq.getUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "POST": # Needs to be specifically targeted on a particular compound ... endPoint = "/".join([ "rest", "pug", domain, nameSpace, returnType, outputType ]) # This is a long request return server codes may be observed 500 pD = { nameSpace: identifier, "classification_type": "simple" } # pD = {nameSpace: identifier} ret, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") # # logger.debug("Result status code %r", retCode) self.assertEqual(retCode, testRetCode) if retCode == 200: pcId = ret["PC_Compounds"][0]["id"]["id"]["cid"] self.assertEqual(pcId, testPcId) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()