def generate_targets_file(disease_id, outpath, anno_type: str = 'entrezgene') -> None: """Creates a disease list :param disease_id: EFO code from the disease. :param outpath: :param anno_type: `entrezgene` for Entrez Id or `symbol` for Gene symbol. :return: """ ot = OpenTargetsClient() assoc = ot.get_associations_for_disease( disease_id, fields=['association_scoredatatypes', 'target.id'] ).filter( datatype='known_drug' ) ensembl_list = [a['target']['id'] for a in assoc] # TODO use the converters.get_converter_to_entrez mg = mygene.MyGeneInfo() id_mappings = mg.getgenes(ensembl_list, fields=anno_type) with open(outpath, 'w+') as outfile: for mapping in id_mappings: if anno_type in mapping.keys(): outfile.write(mapping[anno_type]) outfile.write('\n')
def generate_disease_gene_association_file(disease_id, outpath, anno_type: str = 'entrezgene'): """Obtain the association scores from the specified disease that are stored in the OpenTargets database. :param disease_id: The EFO code to the disease. :param outpath: The path to the file to be created. :param anno_type: `entrezgene` for Entrez Id or `symbol` for Gene symbol. :return: """ ot = OpenTargetsClient() assoc = ot.get_associations_for_disease( disease_id, fields=['association_scoreoverall', 'target.id']) assoc_simple = [{ 'id': a['target']['id'], 'score': a['association_score']['overall'] } for a in assoc] ensembl_list = [a['id'] for a in assoc_simple] # Obtain the symbols for the genes associated to disease_id id_mappings = get_converter_to_entrez(ensembl_list) # Get the symbols and the scores ensembl_list = [(id_mappings[a['id']], a['score']) for a in assoc_simple if a['id'] in id_mappings] with open(outpath, 'w+') as outfile: for symbol, score in ensembl_list: print(f'{symbol}\t{score}', file=outfile)
def search_disease(x): from opentargets import OpenTargetsClient from sys import argv client = OpenTargetsClient() response = client.get_associations_for_disease(x) return response.to_dataframe()
def download_for_disease(disease_id, outpath): ot = OpenTargetsClient() assoc = ot.get_associations_for_disease( disease_id, fields=['associationscore.datatypes', 'target.id']).filter(datatype='known_drug') ensembl_list = [a['target']['id'] for a in assoc] mg = mygene.MyGeneInfo() id_mappings = mg.getgenes(ensembl_list, fields="entrezgene") with open(outpath, 'w+') as outfile: for mapping in id_mappings: if 'entrezgene' in mapping.keys(): outfile.write(mapping['entrezgene']) outfile.write('\n')
target_as = ot.get_associations_for_target(target_id) print(target_as) l = [] print('Target id associations:\n') for a in target_as: print(a['id'], a['association_score']['overall']) l.append(a['association_score']['overall']) print('Maximum target id association:', np.max(l)) print('Minimum target id association:', np.min(l)) print('Mean target id association:', np.mean(l)) print('Standard deviation of target id association:', np.std(l)) except ValueError: print('Target id not found') #Checks for a disease id as input if disease_id is not None: try: disease_as = ot.get_associations_for_disease(disease_id) l = [] for a in disease_as: print(a['id'], a['association_score']['overall']) l.append(a['association_score']['overall']) print('Maximum disease id association:', np.max(l)) print('Minimum disease id association:', np.min(l)) print('Mean disease id association:', np.mean(l)) print('Standard deviation of disease id association:', np.std(l)) except ValueError: print('Disease id not found')
class OpenTargetClientTest(unittest.TestCase): _AUTO_GET_TOKEN = 'auto' def setUp(self): self.client = OpenTargetsClient() self.http2_client = OpenTargetsClient(use_http2=True) self.auth_client = OpenTargetsClient( auth_app_name='test', auth_secret='test', ) def tearDown(self): self.client.close() def testSearchTargetCorrectResult(self): target_symbol = 'BRAF' response = self.client.search(target_symbol) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['type'], 'search-object-target') self.assertEqual(result['id'], 'ENSG00000157764') self.assertEqual(result['data']['approved_symbol'], target_symbol) def testSearchTargetFetchAllResults(self): target_symbol = 'BRAF' response = self.client.search(target_symbol) total_results = len(response) self.assertGreater(total_results, 0) c = 0 for i in response: c += 1 self.assertEqual(total_results, c) def testSearchTargetFetchAllResultsAuth(self): target_symbol = 'BRAF' response = self.auth_client.search(target_symbol) total_results = len(response) self.assertGreater(total_results, 0) c = 0 for i in response: c += 1 self.assertEqual(total_results, c) def testSearchTargetCorrectResultHTTP2(self): target_symbol = 'BRAF' response = self.http2_client.search(target_symbol) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['type'], 'search-object-target') self.assertEqual(result['id'], 'ENSG00000157764') self.assertEqual(result['data']['approved_symbol'], target_symbol) def testSearchTargetFetchAllResultsHTTP2(self): target_symbol = 'BRAF' response = self.http2_client.search(target_symbol) total_results = len(response) self.assertGreater(total_results, 0) c = 0 for i in response: c += 1 self.assertEqual(total_results, c) def testSearchDiseaseCorrectResult(self): disease_label = 'cancer' response = self.client.search(disease_label) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['type'], 'search-object-disease') self.assertEqual(result['id'], 'EFO_0000311') # #this takes a lot to run # def testSearchDiseaseFetchAllResults(self): # disease_label = 'cancer' # response = self.client.search(disease_label, size = 100) # total_results = len(response) # self.assertGreater(total_results,0) # c=0 # for i in response: # c+=1 # self.assertEqual(total_results, c) # print(total_results, c) def testGetAssociation(self): association_id = "ENSG00000157764-EFO_0005803" response = self.client.get_association(association_id) self.assertEquals(len(response), 1) self.assertEquals(association_id, response[0]['id']) def testFilterAssociations(self): response = self.client.filter_associations() self.assertGreater(len(response), 0) total = response.info.total response.filter(target='ENSG00000157764') self.assertLess(len(response), total) total = response.info.total response.filter(direct=True) self.assertLess(len(response), total) total = response.info.total response.filter(scorevalue_min=0.2) self.assertLess(len(response), total) total = response.info.total response.filter(therapeutic_area='efo_0000701') self.assertLess(len(response), total) results = [] for i, r in enumerate(response): print(i, r['id'], r['association_score']['overall'], r['disease']['efo_info']['label']) results.append(r) response_multi = self.client.filter_associations( target='ENSG00000157764', direct=True, scorevalue_min=0.2, therapeutic_area='efo_0000701') self.assertEqual(len(response_multi), response.info.total) for i, r in enumerate(response_multi): self.assertEqual(results[i]['id'], r['id']) response_chained = self.client.filter_associations().filter( target='ENSG00000157764').filter(direct=True).filter( therapeutic_area='efo_0000701').filter(scorevalue_min=0.2) self.assertEqual(len(response_chained), response.info.total) for i, r in enumerate(response_chained): self.assertEqual(results[i]['id'], r['id']) def testGetAssociationsForTarget(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol, size=30) self.assertGreater(len(response), 0) for i, result in enumerate(response): self.assertEqual(result['target']['gene_info']['symbol'], target_symbol) if i > 90: break def testGetAssociationsForDisease(self): disease_label = 'cancer' response = self.client.get_associations_for_disease(disease_label) self.assertGreater(len(response), 0) for result in response: self.assertEqual(result['disease']['efo_info']['label'], disease_label) @unittest.expectedFailure def testGetEvidence(self): evidence_id = "03fba0599655b9040012b29cf0de8060" response = self.client.get_evidence(evidence_id) self.assertEquals(len(response), 1) self.assertEquals(evidence_id, response[0]['id']) def testFilterEvidence(self): response = self.client.filter_evidence() self.assertGreater(len(response), 0) def testGetEvidenceForTarget(self): target_symbol = 'BRAF' response = self.client.get_evidence_for_target(target_symbol, size=1000) self.assertGreater(len(response), 0) for i, result in enumerate(response): self.assertEqual(result['target']['gene_info']['symbol'], target_symbol) if i > 100: break def testGetSimilarTargets(self): target_symbol = 'BRAF' response = self.client.get_similar_target(target_symbol) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['subject']['label'], target_symbol) self.assertEqual(result['object']['label'], 'KRAS') def testGetSimilarDisease(self): disease_label = 'ulcerative colitis' response = self.client.get_similar_disease(disease_label) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['subject']['label'], disease_label) self.assertEqual(result['object']['label'], "Crohn's disease") def testGetEvidenceForDisease(self): disease_label = 'medulloblastoma' response = self.client.get_evidence_for_disease(disease_label) self.assertGreater(len(response), 0) result = next(response) self.assertEqual(result['disease']['efo_info']['label'], disease_label) def testSerialiseToJson(self): target_symbol = 'BRAF' '''test iterable version''' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) json_output = response.to_json() parsed_json = [json.loads(i) for i in json_output] self.assertEqual(items, len(parsed_json)) '''test non iterable version''' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) json_output = response.to_json(iterable=False) parsed_json = json.loads(json_output) self.assertEqual(items, len(parsed_json)) def testResultToPandasDataFrame(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) dataframe = response.to_dataframe() self.assertEqual(len(dataframe), items) def testResultToPandasCSV(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target( target_symbol, fields=[ 'association_score.*', 'target.gene_info.symbol', 'disease.efo_info.*' ]) items = len(response) self.assertGreater(len(response), 0) csv = response.to_csv() filename = 'braf_associations.csv' open(filename, 'wb').write(csv.encode('utf-8')) self.assertEqual(len(csv.split('\n')), items + 2) self.assertTrue(os.path.isfile(filename)) os.remove(filename) def testResultToPandasExcel(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target( target_symbol, fields=[ 'association_score.*', 'target.gene_info.symbol', 'disease.efo_info.*' ]) self.assertGreater(len(response), 0) filename = 'braf_associations.xls' response.to_excel(filename) self.assertTrue(os.path.isfile(filename)) os.remove(filename) def testResultToFile(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target( target_symbol, fields=[ 'association_score.*', 'target.gene_info.symbol', 'disease.efo_info.*' ]) self.assertGreater(len(response), 0) filename = 'braf_associations.json.gz' response.to_file(filename) self.assertTrue(os.path.isfile(filename)) os.remove(filename) def testSerialiseToObject(self): target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol) items = len(response) self.assertGreater(len(response), 0) obj_output = list(response.to_object()) for i, result in enumerate(obj_output): self.assertIsNotNone(result.target.id) self.assertEqual(items, i + 1) def testGetStats(self): response = self.client.get_stats() self.assertEquals(len(response), 0) def testAutodetectPost(self): self.assertFalse( Connection._auto_detect_post({'target': ['ENSG00000157764']})) self.assertTrue( Connection._auto_detect_post({ 'target': [ 'ENSG00000157764', 'ENSG00000171862', 'ENSG00000136997', 'ENSG00000012048', 'ENSG00000139618', ] })) def testGetToPost(self): response = self.client.conn.get('/platform/public/association/filter', params={ 'target': [ 'ENSG00000157764', 'ENSG00000171862', 'ENSG00000136997', 'ENSG00000012048', 'ENSG00000139618', ] }) self.assertGreater(len(response), 0) def testCustomScore(self): def score_with_datatype_subset(datatypes, results): for r in results: datatype_scores = r['association_score']['datatypes'] filtered_scores = [datatype_scores[dt] for dt in datatypes] custom_score = HarmonicSumScorer.harmonic_sum(filtered_scores) if custom_score: yield (round(custom_score, 3), r['disease']['id'], dict(zip(datatypes, filtered_scores))) target_symbol = 'BRAF' response = self.client.get_associations_for_target(target_symbol) self.assertGreater(len(response), 0) for i, filtered_data in enumerate( score_with_datatype_subset( ['genetic_association', 'known_drug', 'somatic_mutation'], response)): self.assertGreater(filtered_data[0], 0.) self.assertLess(i, len(response)) def testGetAvailableEndpoints(self): endpoints = self.client.conn.get_api_endpoints() self.assertTrue('/platform/public/search' in endpoints) def testGetEndpointDocs(self): docs = self.client.conn.api_endpoint_docs('/platform/public/search') self.assertGreater(len(docs['get']['parameters']), 0) def testPing(self): response = self.client.conn.ping() if isinstance(response, bool): self.assertTrue(response) else: self.assertIsNotNone(response)
def main(t, d): # Quick Check - arguments (none/both) if t == '' and d == '':sys.exit(1) if t != '' and d != '':sys.exit(1) # Load Func re opentargets querying ( pip install opentargets ) # https://opentargets.readthedocs.io/en/stable/index.html from opentargets import OpenTargetsClient ot = OpenTargetsClient() # dir(ot) # Define Func re statistical analyses def doScoreStats(search_score): import statistics print('-----') print('Max:', max(search_score)) print('Min:', min(search_score)) print('Ave:', statistics.mean(search_score)) print('SD:', statistics.stdev(search_score)) # Run Analysis re TARGET (t) if t != '': search_id = t; a_for_target = ot.get_associations_for_target(search_id) # Quick Check - correct 'target' queried if a_for_target.info['query']['target'] == [search_id]: search_score = [] # initialise list # Loop Over Entries for a in a_for_target: search_score.append(a['association_score']['overall']) print(a['target']['id'], a['disease']['id'], a['association_score']['overall']) doScoreStats(search_score) else: print("please check best match (exited) -", [search_id], " vs ", a_for_target.info['query']['target']) sys.exit(1) # Run Analysis re DISEASE (d) elif d != '': search_id = d a_for_disease = ot.get_associations_for_disease(search_id) # Quick Check - correct 'disease' queried if a_for_disease.info['query']['disease'] == [search_id]: search_score = [] # initialise list # Loop Over Entries for a in a_for_disease: search_score.append(a['association_score']['overall']) print(a['target']['id'], a['disease']['id'], a['association_score']['overall']) doScoreStats(search_score) else: print("please check best match (exited) - ", [search_id], " vs ", a_for_disease.info['query']['disease']) sys.exit(1)