def full_heatfile(disease, network): """ Output tab-separated ENSG and assoc scores for all nodes in network, including zero values. Not needed? disease = disease of interest (EFO code) network = source network (stringdb or omnipath) """ #Read in network info frame idFrame = pd.read_csv('./networks/' + network + 'IDFrame.csv') idFrame = idFrame.set_index('geneID') ot = OpenTargetsClient() all_associations = ot.filter_associations(disease=disease) #build frame of associations assocFrame = pd.DataFrame(columns=['geneID', 'score']) for i, r in enumerate(all_associations): assocFrame.loc[len(assocFrame)] = [ r['target']['id'], r['association_score']['overall'] ] #Join with sample data idFrame = idFrame.join(assocFrame.set_index('geneID'), how='left').fillna(0.0000000001) #Output to heatfile with open('./data/heatfile', 'w') as outfile: for index, row in idFrame.iterrows(): if isinstance(index, basestring): outfile.write("{} {}\n".format(index, row['score']))
def run_analysis(queryType, identifier, verbose = False): """ This function will run the actual analysis Args: queryType (str): based on what we are fetching data either disease or target identifier (str): disease ID or target ID depending on the query type. verbose (bool): if we want extra information printed to STDOUT Returns: Dictionary: { "queryTerm" : <str> "target-disease-pairs" : <pandas.dataframe>, "score_max" : <float>, "score_min" : <float>, "score_mean" : <float>, "score_std" : <float> } The analysis values might be None if there are no returned values. """ # Initializing output variable: analysisOutput = { "queryTerm" : identifier, "target-disease-pairs" : None, "score_max" : None, "score_min" : None, "score_mean" : None, "score_std" : None } # Initializing OTAR query object: client = OpenTargetsClient() otar_results = client.filter_associations() # Retrieving queried data: x = otar_results.filter(**{queryType : identifier}) # Submit result to parser: OT_parser = OTAR_result_parser(x, verbose=verbose) # If the result set is empty, we can't get stats: if not len(OT_parser): if verbose: print('[Warning] The result set is empty. Can\'t calculate stats.') return analysisOutput if verbose: print('[Info] Number of associations: {}'.format(len(OT_parser))) # Retrieving target-disease pairs: analysisOutput['target-disease-pairs'] = OT_parser.get_target_disease_pairs() # Retrieving stats of the association scores: analysisOutput['score_max'] = OT_parser.get_association_score_max() analysisOutput['score_min'] = OT_parser.get_association_score_min() analysisOutput['score_mean'] = OT_parser.get_association_score_mean() analysisOutput['score_std'] = OT_parser.get_association_score_std() return analysisOutput
def simple_heatfile(disease): """ Output tab-separated ENSG and assoc scores for nodes with non-zero assoc scores disease = disease of interest (EFO code) """ ot = OpenTargetsClient() all_associations = ot.filter_associations(disease=disease) with open('./data/heatfile', 'w') as outfile: for i, r in enumerate(all_associations): outfile.write("{} {}\n".format(r['target']['id'], r['association_score']['overall']))
class OpenTargetClientTest(unittest.TestCase): _AUTO_GET_TOKEN = 'auto' def setUp(self): self.client = OpenTargetsClient() self.http2_client = OpenTargetsClient(use_http2=True) self.auth_client = OpenTargetsClient( auth_app_name='test', auth_secret='test', ) def tearDown(self): self.client.close() def testSearchTargetCorrectResult(self): target_symbol = 'BRAF' response = self.client.search(target_symbol) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['type'], 'search-object-target') self.assertEqual(result['id'], 'ENSG00000157764') self.assertEqual(result['data']['approved_symbol'], target_symbol) def testSearchTargetFetchAllResults(self): target_symbol = 'BRAF' response = self.client.search(target_symbol) total_results = len(response) self.assertGreater(total_results, 0) c = 0 for i in response: c += 1 self.assertEqual(total_results, c) def testSearchTargetFetchAllResultsAuth(self): target_symbol = 'BRAF' response = self.auth_client.search(target_symbol) total_results = len(response) self.assertGreater(total_results, 0) c = 0 for i in response: c += 1 self.assertEqual(total_results, c) def testSearchTargetCorrectResultHTTP2(self): target_symbol = 'BRAF' response = self.http2_client.search(target_symbol) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['type'], 'search-object-target') self.assertEqual(result['id'], 'ENSG00000157764') self.assertEqual(result['data']['approved_symbol'], target_symbol) def testSearchTargetFetchAllResultsHTTP2(self): target_symbol = 'BRAF' response = self.http2_client.search(target_symbol) total_results = len(response) self.assertGreater(total_results, 0) c = 0 for i in response: c += 1 self.assertEqual(total_results, c) def testSearchDiseaseCorrectResult(self): disease_label = 'cancer' response = self.client.search(disease_label) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['type'], 'search-object-disease') self.assertEqual(result['id'], 'EFO_0000311') # #this takes a lot to run # def testSearchDiseaseFetchAllResults(self): # disease_label = 'cancer' # response = self.client.search(disease_label, size = 100) # total_results = len(response) # self.assertGreater(total_results,0) # c=0 # for i in response: # c+=1 # self.assertEqual(total_results, c) # print(total_results, c) def testGetAssociation(self): association_id = "ENSG00000157764-EFO_0005803" response = self.client.get_association(association_id) self.assertEquals(len(response), 1) self.assertEquals(association_id, response[0]['id']) def testFilterAssociations(self): response = self.client.filter_associations() self.assertGreater(len(response), 0) total = response.info.total response.filter(target='ENSG00000157764') self.assertLess(len(response), total) total = response.info.total response.filter(direct=True) self.assertLess(len(response), total) total = response.info.total response.filter(scorevalue_min=0.2) self.assertLess(len(response), total) total = response.info.total response.filter(therapeutic_area='efo_0000701') self.assertLess(len(response), total) results = [] for i, r in enumerate(response): print(i, r['id'], r['association_score']['overall'], r['disease']['efo_info']['label']) results.append(r) response_multi = self.client.filter_associations( target='ENSG00000157764', direct=True, scorevalue_min=0.2, therapeutic_area='efo_0000701') self.assertEqual(len(response_multi), response.info.total) for i, r in enumerate(response_multi): self.assertEqual(results[i]['id'], r['id']) response_chained = self.client.filter_associations().filter( target='ENSG00000157764').filter(direct=True).filter( therapeutic_area='efo_0000701').filter(scorevalue_min=0.2) self.assertEqual(len(response_chained), response.info.total) for i, r in enumerate(response_chained): self.assertEqual(results[i]['id'], r['id']) def testGetAssociationsForTarget(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol, size=30) self.assertGreater(len(response), 0) for i, result in enumerate(response): self.assertEqual(result['target']['gene_info']['symbol'], target_symbol) if i > 90: break def testGetAssociationsForDisease(self): disease_label = 'cancer' response = self.client.get_associations_for_disease(disease_label) self.assertGreater(len(response), 0) for result in response: self.assertEqual(result['disease']['efo_info']['label'], disease_label) @unittest.expectedFailure def testGetEvidence(self): evidence_id = "03fba0599655b9040012b29cf0de8060" response = self.client.get_evidence(evidence_id) self.assertEquals(len(response), 1) self.assertEquals(evidence_id, response[0]['id']) def testFilterEvidence(self): response = self.client.filter_evidence() self.assertGreater(len(response), 0) def testGetEvidenceForTarget(self): target_symbol = 'BRAF' response = self.client.get_evidence_for_target(target_symbol, size=1000) self.assertGreater(len(response), 0) for i, result in enumerate(response): self.assertEqual(result['target']['gene_info']['symbol'], target_symbol) if i > 100: break def testGetSimilarTargets(self): target_symbol = 'BRAF' response = self.client.get_similar_target(target_symbol) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['subject']['label'], target_symbol) self.assertEqual(result['object']['label'], 'KRAS') def testGetSimilarDisease(self): disease_label = 'ulcerative colitis' response = self.client.get_similar_disease(disease_label) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['subject']['label'], disease_label) self.assertEqual(result['object']['label'], "Crohn's disease") def testGetEvidenceForDisease(self): disease_label = 'medulloblastoma' response = self.client.get_evidence_for_disease(disease_label) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['disease']['efo_info']['label'], disease_label) def testSerialiseToJson(self): target_symbol = 'BRAF' '''test iterable version''' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) json_output = response.to_json() parsed_json = [json.loads(i) for i in json_output] self.assertEqual(items, len(parsed_json)) '''test non iterable version''' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) json_output = response.to_json(iterable=False) parsed_json = json.loads(json_output) self.assertEqual(items, len(parsed_json)) def testResultToPandasDataFrame(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) dataframe = response.to_dataframe() self.assertEqual(len(dataframe), items) def testResultToPandasCSV(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target( target_symbol, fields=[ 'association_score.*', 'target.gene_info.symbol', 'disease.efo_info.*' ]) items = len(response) self.assertGreater(len(response), 0) csv = response.to_csv() filename = 'braf_associations.csv' open(filename, 'wb').write(csv.encode('utf-8')) self.assertEqual(len(csv.split('\n')), items + 2) self.assertTrue(os.path.isfile(filename)) os.remove(filename) def testResultToPandasExcel(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target( target_symbol, fields=[ 'association_score.*', 'target.gene_info.symbol', 'disease.efo_info.*' ]) self.assertGreater(len(response), 0) filename = 'braf_associations.xls' response.to_excel(filename) self.assertTrue(os.path.isfile(filename)) os.remove(filename) def testResultToFile(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target( target_symbol, fields=[ 'association_score.*', 'target.gene_info.symbol', 'disease.efo_info.*' ]) self.assertGreater(len(response), 0) filename = 'braf_associations.json.gz' response.to_file(filename) self.assertTrue(os.path.isfile(filename)) os.remove(filename) def testSerialiseToObject(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) obj_output = list(response.to_object()) for i, result in enumerate(obj_output): self.assertIsNotNone(result.target.id) self.assertEqual(items, i + 1) def testGetStats(self): response = self.client.get_stats() self.assertEquals(len(response), 0) def testAutodetectPost(self): self.assertFalse( Connection._auto_detect_post({'target': ['ENSG00000157764']})) self.assertTrue( Connection._auto_detect_post({ 'target': [ 'ENSG00000157764', 'ENSG00000171862', 'ENSG00000136997', 'ENSG00000012048', 'ENSG00000139618', ] })) def testGetToPost(self): response = self.client.conn.get('/platform/public/association/filter', params={ 'target': [ 'ENSG00000157764', 'ENSG00000171862', 'ENSG00000136997', 'ENSG00000012048', 'ENSG00000139618', ] }) self.assertGreater(len(response), 0) def testCustomScore(self): def score_with_datatype_subset(datatypes, results): for r in results: datatype_scores = r['association_score']['datatypes'] filtered_scores = [datatype_scores[dt] for dt in datatypes] custom_score = HarmonicSumScorer.harmonic_sum(filtered_scores) if custom_score: yield (round(custom_score, 3), r['disease']['id'], dict(zip(datatypes, filtered_scores))) target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol) self.assertGreater(len(response), 0) for i, filtered_data in enumerate( score_with_datatype_subset( ['genetic_association', 'known_drug', 'somatic_mutation'], response)): self.assertGreater(filtered_data[0], 0.) self.assertLess(i, len(response)) def testGetAvailableEndpoints(self): endpoints = self.client.conn.get_api_endpoints() self.assertTrue('/platform/public/search' in endpoints) def testGetEndpointDocs(self): docs = self.client.conn.api_endpoint_docs('/platform/public/search') self.assertGreater(len(docs['get']['parameters']), 0) def testPing(self): response = self.client.conn.ping() if isinstance(response, bool): self.assertTrue(response) else: self.assertIsNotNone(response)
import json ot = OpenTargetsClient() data = {} #codes = ['EFO_0000249', 'EFO_0003885', 'EFO_0000685', 'EFO_0000313', 'EFO_0001071', 'EFO_0000305', 'EFO_0002890', 'EFO_0000478', 'EFO_1001516', 'EFO_0007460', 'Orphanet_2781', 'Orphanet_3261'] #uniqueEFOs - disease id extracted from every OpenTargets evidence object and reduced to set of unique values codes = set() with open('uniqueEFOs.txt') as infile: for line in infile: codes.add(line.rstrip()) otdata = ot.filter_associations() j = 0 k = len(codes) for code in codes: j += 1 #if (j>333) : exit() print(j, "/", k) label = otdata.filter(disease=code)[0]['disease']['efo_info']['label'] assocs = otdata.filter(disease=code).total if 'EFO_' in code: url = 'https://www.ebi.ac.uk/ols/api/ontologies/efo/terms?iri=http://www.ebi.ac.uk/efo/' + code elif 'Orpha' in code: url = 'https://www.ebi.ac.uk/ols/api/ontologies/efo/terms?iri=http://www.orpha.net/ORDO/' + code elif 'HP_' in code: url = 'https://www.ebi.ac.uk/ols/api/ontologies/efo/terms?iri=http://purl.obolibrary.org/obo/' + code else:
disease_id = datadict_disease.get("disease") #First, print an error message if no argument is provided. #Then, run appropriat code depending on input argument (target or disase) if target_id == None and disease_id == None: print( "Error: target_id or disease_id must be provided. Provide a target_id such as ENSG00000197386 or disease_id such as Orphanet_399." ) elif target_id != None and disease_id != None: print( "Error: One argument should be provided at a time. Provide either a target_id such as ENSG00000197386 or disease_id such as Orphanet_399." ) else: #get associations client = OpenTargetsClient() response = client.filter_associations() #filter, calculate and print required information for a target or disease if target_id != None: #filter for target_id. Print target_id, disease_id & association_score.overall. filtered = response.filter(target=target_id) target_filtered = filtered.to_dataframe() target_fin = target_filtered[[ "target.id", "disease.id", "association_score.overall" ]] print(target_fin) #calculate and print maximum, minimum and average and standard deviation values of association_score.overall maximum = target_fin["association_score.overall"].max() minimum = target_fin["association_score.overall"].min() average = target_fin["association_score.overall"].mean() std_dev = target_fin["association_score.overall"].std()