Exemplo n.º 1
0
def full_heatfile(disease, network):
    """
    Output tab-separated ENSG and assoc scores for all nodes
    in network, including zero values.  Not needed?
    
    disease = disease of interest (EFO code)
    network = source network (stringdb or omnipath)
    """
    #Read in network info frame
    idFrame = pd.read_csv('./networks/' + network + 'IDFrame.csv')
    idFrame = idFrame.set_index('geneID')
    ot = OpenTargetsClient()
    all_associations = ot.filter_associations(disease=disease)
    #build frame of associations
    assocFrame = pd.DataFrame(columns=['geneID', 'score'])
    for i, r in enumerate(all_associations):
        assocFrame.loc[len(assocFrame)] = [
            r['target']['id'], r['association_score']['overall']
        ]
    #Join with sample data
    idFrame = idFrame.join(assocFrame.set_index('geneID'),
                           how='left').fillna(0.0000000001)
    #Output to heatfile
    with open('./data/heatfile', 'w') as outfile:
        for index, row in idFrame.iterrows():
            if isinstance(index, basestring):
                outfile.write("{} {}\n".format(index, row['score']))
def run_analysis(queryType, identifier, verbose = False):
    """ This function will run the actual analysis

    Args:
        queryType (str): based on what we are fetching data either disease or target
        identifier (str): disease ID or target ID depending on the query type.
        verbose (bool): if we want extra information printed to STDOUT
    Returns:
        Dictionary:
        {
            "queryTerm" : <str>
            "target-disease-pairs" : <pandas.dataframe>,
            "score_max" : <float>,
            "score_min" : <float>,
            "score_mean" : <float>,
            "score_std" : <float>
        }

        The analysis values might be None if there are no returned values.
    """

    # Initializing output variable:
    analysisOutput = {
        "queryTerm" : identifier,
        "target-disease-pairs" : None,
        "score_max" : None,
        "score_min" : None,
        "score_mean" : None,
        "score_std" : None
    }

    # Initializing OTAR query object:
    client = OpenTargetsClient()
    otar_results = client.filter_associations()

    # Retrieving queried data:
    x = otar_results.filter(**{queryType : identifier})

    # Submit result to parser:
    OT_parser = OTAR_result_parser(x, verbose=verbose)

    # If the result set is empty, we can't get stats:
    if not len(OT_parser):
        if verbose: print('[Warning] The result set is empty. Can\'t calculate stats.')
        return analysisOutput

    if verbose: print('[Info] Number of associations: {}'.format(len(OT_parser)))

    # Retrieving target-disease pairs:
    analysisOutput['target-disease-pairs'] = OT_parser.get_target_disease_pairs()

    # Retrieving stats of the association scores:
    analysisOutput['score_max'] = OT_parser.get_association_score_max()
    analysisOutput['score_min'] = OT_parser.get_association_score_min()
    analysisOutput['score_mean'] = OT_parser.get_association_score_mean()
    analysisOutput['score_std'] = OT_parser.get_association_score_std()

    return analysisOutput
Exemplo n.º 3
0
def simple_heatfile(disease):
    """
    Output tab-separated ENSG and assoc scores for nodes with non-zero assoc scores
    
    disease = disease of interest (EFO code)
    """
    ot = OpenTargetsClient()
    all_associations = ot.filter_associations(disease=disease)
    with open('./data/heatfile', 'w') as outfile:
        for i, r in enumerate(all_associations):
            outfile.write("{} {}\n".format(r['target']['id'],
                                           r['association_score']['overall']))
Exemplo n.º 4
0
class OpenTargetClientTest(unittest.TestCase):
    _AUTO_GET_TOKEN = 'auto'

    def setUp(self):

        self.client = OpenTargetsClient()
        self.http2_client = OpenTargetsClient(use_http2=True)
        self.auth_client = OpenTargetsClient(
            auth_app_name='test',
            auth_secret='test',
        )

    def tearDown(self):
        self.client.close()

    def testSearchTargetCorrectResult(self):
        target_symbol = 'BRAF'
        response = self.client.search(target_symbol)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['type'], 'search-object-target')
        self.assertEqual(result['id'], 'ENSG00000157764')
        self.assertEqual(result['data']['approved_symbol'], target_symbol)

    def testSearchTargetFetchAllResults(self):
        target_symbol = 'BRAF'
        response = self.client.search(target_symbol)
        total_results = len(response)
        self.assertGreater(total_results, 0)
        c = 0
        for i in response:
            c += 1
        self.assertEqual(total_results, c)

    def testSearchTargetFetchAllResultsAuth(self):
        target_symbol = 'BRAF'
        response = self.auth_client.search(target_symbol)
        total_results = len(response)
        self.assertGreater(total_results, 0)
        c = 0
        for i in response:
            c += 1
        self.assertEqual(total_results, c)

    def testSearchTargetCorrectResultHTTP2(self):
        target_symbol = 'BRAF'
        response = self.http2_client.search(target_symbol)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['type'], 'search-object-target')
        self.assertEqual(result['id'], 'ENSG00000157764')
        self.assertEqual(result['data']['approved_symbol'], target_symbol)

    def testSearchTargetFetchAllResultsHTTP2(self):
        target_symbol = 'BRAF'
        response = self.http2_client.search(target_symbol)
        total_results = len(response)
        self.assertGreater(total_results, 0)
        c = 0
        for i in response:
            c += 1
        self.assertEqual(total_results, c)

    def testSearchDiseaseCorrectResult(self):
        disease_label = 'cancer'
        response = self.client.search(disease_label)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['type'], 'search-object-disease')
        self.assertEqual(result['id'], 'EFO_0000311')

    # #this takes a lot to run
    # def testSearchDiseaseFetchAllResults(self):
    #     disease_label = 'cancer'
    #     response = self.client.search(disease_label, size = 100)
    #     total_results = len(response)
    #     self.assertGreater(total_results,0)
    #     c=0
    #     for i in response:
    #         c+=1
    #     self.assertEqual(total_results, c)
    #     print(total_results, c)

    def testGetAssociation(self):
        association_id = "ENSG00000157764-EFO_0005803"
        response = self.client.get_association(association_id)
        self.assertEquals(len(response), 1)
        self.assertEquals(association_id, response[0]['id'])

    def testFilterAssociations(self):
        response = self.client.filter_associations()
        self.assertGreater(len(response), 0)
        total = response.info.total
        response.filter(target='ENSG00000157764')
        self.assertLess(len(response), total)
        total = response.info.total
        response.filter(direct=True)
        self.assertLess(len(response), total)
        total = response.info.total
        response.filter(scorevalue_min=0.2)
        self.assertLess(len(response), total)
        total = response.info.total
        response.filter(therapeutic_area='efo_0000701')
        self.assertLess(len(response), total)
        results = []
        for i, r in enumerate(response):
            print(i, r['id'], r['association_score']['overall'],
                  r['disease']['efo_info']['label'])
            results.append(r)
        response_multi = self.client.filter_associations(
            target='ENSG00000157764',
            direct=True,
            scorevalue_min=0.2,
            therapeutic_area='efo_0000701')
        self.assertEqual(len(response_multi), response.info.total)
        for i, r in enumerate(response_multi):
            self.assertEqual(results[i]['id'], r['id'])
        response_chained = self.client.filter_associations().filter(
            target='ENSG00000157764').filter(direct=True).filter(
                therapeutic_area='efo_0000701').filter(scorevalue_min=0.2)
        self.assertEqual(len(response_chained), response.info.total)
        for i, r in enumerate(response_chained):
            self.assertEqual(results[i]['id'], r['id'])

    def testGetAssociationsForTarget(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol,
                                                           size=30)
        self.assertGreater(len(response), 0)
        for i, result in enumerate(response):
            self.assertEqual(result['target']['gene_info']['symbol'],
                             target_symbol)
            if i > 90:
                break

    def testGetAssociationsForDisease(self):
        disease_label = 'cancer'
        response = self.client.get_associations_for_disease(disease_label)
        self.assertGreater(len(response), 0)
        for result in response:
            self.assertEqual(result['disease']['efo_info']['label'],
                             disease_label)

    @unittest.expectedFailure
    def testGetEvidence(self):
        evidence_id = "03fba0599655b9040012b29cf0de8060"
        response = self.client.get_evidence(evidence_id)
        self.assertEquals(len(response), 1)
        self.assertEquals(evidence_id, response[0]['id'])

    def testFilterEvidence(self):
        response = self.client.filter_evidence()
        self.assertGreater(len(response), 0)

    def testGetEvidenceForTarget(self):
        target_symbol = 'BRAF'
        response = self.client.get_evidence_for_target(target_symbol,
                                                       size=1000)
        self.assertGreater(len(response), 0)
        for i, result in enumerate(response):
            self.assertEqual(result['target']['gene_info']['symbol'],
                             target_symbol)
            if i > 100:
                break

    def testGetSimilarTargets(self):
        target_symbol = 'BRAF'
        response = self.client.get_similar_target(target_symbol)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['subject']['label'], target_symbol)
        self.assertEqual(result['object']['label'], 'KRAS')

    def testGetSimilarDisease(self):
        disease_label = 'ulcerative colitis'
        response = self.client.get_similar_disease(disease_label)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['subject']['label'], disease_label)
        self.assertEqual(result['object']['label'], "Crohn's disease")

    def testGetEvidenceForDisease(self):
        disease_label = 'medulloblastoma'
        response = self.client.get_evidence_for_disease(disease_label)
        self.assertGreater(len(response), 0)
        result = next(response)
        self.assertEqual(result['disease']['efo_info']['label'], disease_label)

    def testSerialiseToJson(self):
        target_symbol = 'BRAF'
        '''test iterable version'''
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        json_output = response.to_json()
        parsed_json = [json.loads(i) for i in json_output]
        self.assertEqual(items, len(parsed_json))
        '''test non iterable version'''
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        json_output = response.to_json(iterable=False)
        parsed_json = json.loads(json_output)
        self.assertEqual(items, len(parsed_json))

    def testResultToPandasDataFrame(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        dataframe = response.to_dataframe()
        self.assertEqual(len(dataframe), items)

    def testResultToPandasCSV(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(
            target_symbol,
            fields=[
                'association_score.*', 'target.gene_info.symbol',
                'disease.efo_info.*'
            ])
        items = len(response)
        self.assertGreater(len(response), 0)
        csv = response.to_csv()
        filename = 'braf_associations.csv'
        open(filename, 'wb').write(csv.encode('utf-8'))
        self.assertEqual(len(csv.split('\n')), items + 2)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)

    def testResultToPandasExcel(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(
            target_symbol,
            fields=[
                'association_score.*', 'target.gene_info.symbol',
                'disease.efo_info.*'
            ])
        self.assertGreater(len(response), 0)
        filename = 'braf_associations.xls'
        response.to_excel(filename)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)

    def testResultToFile(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(
            target_symbol,
            fields=[
                'association_score.*', 'target.gene_info.symbol',
                'disease.efo_info.*'
            ])
        self.assertGreater(len(response), 0)
        filename = 'braf_associations.json.gz'
        response.to_file(filename)
        self.assertTrue(os.path.isfile(filename))
        os.remove(filename)

    def testSerialiseToObject(self):
        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol)
        items = len(response)
        self.assertGreater(len(response), 0)
        obj_output = list(response.to_object())
        for i, result in enumerate(obj_output):
            self.assertIsNotNone(result.target.id)
        self.assertEqual(items, i + 1)

    def testGetStats(self):
        response = self.client.get_stats()
        self.assertEquals(len(response), 0)

    def testAutodetectPost(self):
        self.assertFalse(
            Connection._auto_detect_post({'target': ['ENSG00000157764']}))
        self.assertTrue(
            Connection._auto_detect_post({
                'target': [
                    'ENSG00000157764',
                    'ENSG00000171862',
                    'ENSG00000136997',
                    'ENSG00000012048',
                    'ENSG00000139618',
                ]
            }))

    def testGetToPost(self):
        response = self.client.conn.get('/platform/public/association/filter',
                                        params={
                                            'target': [
                                                'ENSG00000157764',
                                                'ENSG00000171862',
                                                'ENSG00000136997',
                                                'ENSG00000012048',
                                                'ENSG00000139618',
                                            ]
                                        })
        self.assertGreater(len(response), 0)

    def testCustomScore(self):
        def score_with_datatype_subset(datatypes, results):
            for r in results:
                datatype_scores = r['association_score']['datatypes']
                filtered_scores = [datatype_scores[dt] for dt in datatypes]
                custom_score = HarmonicSumScorer.harmonic_sum(filtered_scores)
                if custom_score:
                    yield (round(custom_score, 3), r['disease']['id'],
                           dict(zip(datatypes, filtered_scores)))

        target_symbol = 'BRAF'
        response = self.client.get_associations_for_target(target_symbol)
        self.assertGreater(len(response), 0)
        for i, filtered_data in enumerate(
                score_with_datatype_subset(
                    ['genetic_association', 'known_drug', 'somatic_mutation'],
                    response)):
            self.assertGreater(filtered_data[0], 0.)

        self.assertLess(i, len(response))

    def testGetAvailableEndpoints(self):
        endpoints = self.client.conn.get_api_endpoints()
        self.assertTrue('/platform/public/search' in endpoints)

    def testGetEndpointDocs(self):
        docs = self.client.conn.api_endpoint_docs('/platform/public/search')
        self.assertGreater(len(docs['get']['parameters']), 0)

    def testPing(self):
        response = self.client.conn.ping()
        if isinstance(response, bool):
            self.assertTrue(response)
        else:
            self.assertIsNotNone(response)
import json

ot = OpenTargetsClient()

data = {}

#codes = ['EFO_0000249', 'EFO_0003885', 'EFO_0000685', 'EFO_0000313', 'EFO_0001071', 'EFO_0000305', 'EFO_0002890', 'EFO_0000478', 'EFO_1001516', 'EFO_0007460', 'Orphanet_2781', 'Orphanet_3261']

#uniqueEFOs - disease id extracted from every OpenTargets evidence object and reduced to set of unique values

codes = set()
with open('uniqueEFOs.txt') as infile:
    for line in infile:
        codes.add(line.rstrip())

otdata = ot.filter_associations()
j = 0
k = len(codes)
for code in codes:
    j += 1
    #if (j>333) : exit()
    print(j, "/", k)
    label = otdata.filter(disease=code)[0]['disease']['efo_info']['label']
    assocs = otdata.filter(disease=code).total
    if 'EFO_' in code:
        url = 'https://www.ebi.ac.uk/ols/api/ontologies/efo/terms?iri=http://www.ebi.ac.uk/efo/' + code
    elif 'Orpha' in code:
        url = 'https://www.ebi.ac.uk/ols/api/ontologies/efo/terms?iri=http://www.orpha.net/ORDO/' + code
    elif 'HP_' in code:
        url = 'https://www.ebi.ac.uk/ols/api/ontologies/efo/terms?iri=http://purl.obolibrary.org/obo/' + code
    else:
Exemplo n.º 6
0
disease_id = datadict_disease.get("disease")

#First, print an error message if no argument is provided.
#Then, run appropriat code depending on input argument (target or disase)
if target_id == None and disease_id == None:
    print(
        "Error: target_id or disease_id must be provided. Provide a target_id such as ENSG00000197386 or disease_id such as Orphanet_399."
    )
elif target_id != None and disease_id != None:
    print(
        "Error: One argument should be provided at a time. Provide either a target_id such as ENSG00000197386 or disease_id such as Orphanet_399."
    )
else:
    #get associations
    client = OpenTargetsClient()
    response = client.filter_associations()

    #filter, calculate and print required information for a target or disease
    if target_id != None:
        #filter for target_id. Print target_id, disease_id & association_score.overall.
        filtered = response.filter(target=target_id)
        target_filtered = filtered.to_dataframe()
        target_fin = target_filtered[[
            "target.id", "disease.id", "association_score.overall"
        ]]
        print(target_fin)
        #calculate and print maximum, minimum and average and standard deviation values of association_score.overall
        maximum = target_fin["association_score.overall"].max()
        minimum = target_fin["association_score.overall"].min()
        average = target_fin["association_score.overall"].mean()
        std_dev = target_fin["association_score.overall"].std()