Exemplo n.º 1
0
    def expand_chemical_substance(self, node):
        assert node.nodetype == "chemical_substance"
        compound_desc = node.desc
        target_uniprot_ids = QueryChEMBL.get_target_uniprot_ids_for_drug(compound_desc)
        if target_uniprot_ids is not None:
            for target_uniprot_id_curie in target_uniprot_ids.keys():
                target_uniprot_id = target_uniprot_id_curie.replace("UniProtKB:", "")
                probability = target_uniprot_ids[target_uniprot_id]
                gene_names = self.query_mygene_obj.convert_uniprot_id_to_gene_symbol(target_uniprot_id)
                node_desc = ';'.join(list(gene_names))
                target_node = self.add_node_smart('protein', target_uniprot_id, desc=node_desc)
                if target_node is not None:
                    self.orangeboard.add_rel('physically_interacts_with', 'ChEMBL', node, target_node, prob=probability, extended_reltype='targets')

        targets = QueryPharos.query_drug_name_to_targets(compound_desc)
        if targets is not None:
            for target in targets:
                uniprot_id = QueryPharos.query_target_uniprot_accession(str(target["id"]))
                assert '-' not in uniprot_id
                gene_symbol = self.query_mygene_obj.convert_uniprot_id_to_gene_symbol(uniprot_id)
                if gene_symbol is not None:
                    gene_symbol = ';'.join(list(gene_symbol))
                else:
                    gene_symbol = ''
                target_node = self.add_node_smart('protein', uniprot_id, desc=gene_symbol)
                if target_node is not None:
                    self.orangeboard.add_rel('physically_interacts_with', 'Pharos', node, target_node, extended_reltype="targets")

        res_dict = DrugMapper.map_drug_to_ontology(node.name)
        res_indications_set = res_dict['indications']
        res_contraindications_set = res_dict['contraindications']

        for ont_term in res_indications_set:
            if ont_term.startswith('DOID:') or ont_term.startswith('OMIM:'):
                ont_name = QueryBioLink.get_label_for_disease(ont_term)
                ont_node = self.add_node_smart('disease', ont_term, desc=ont_name)
                self.orangeboard.add_rel('indicated_for', 'MyChem.info', node, ont_node, extended_reltype='indicated_for')
            elif ont_term.startswith('HP:'):
                ont_name = QueryBioLink.get_label_for_phenotype(ont_term)
                ont_node = self.add_node_smart('phenotypic_feature', ont_term, desc=ont_name)
                self.orangeboard.add_rel('indicated_for', 'MyChem.info', node, ont_node, extended_reltype='indicated_for')

        for ont_term in res_contraindications_set:
            if ont_term.startswith('DOID:') or ont_term.startswith('OMIM:'):
                ont_name = QueryBioLink.get_label_for_disease(ont_term)
                ont_node = self.add_node_smart('disease', ont_term, desc=ont_name)
                self.orangeboard.add_rel('contraindicated_for', 'MyChem.info', node, ont_node, extended_reltype='contraindicated_for')
            elif ont_term.startswith('HP:'):
                ont_name = QueryBioLink.get_label_for_phenotype(ont_term)
                ont_node = self.add_node_smart('phenotypic_feature', ont_term, desc=ont_name)
                self.orangeboard.add_rel('contraindicated_for', 'MyChem.info', node, ont_node, extended_reltype='contraindicated_for')

        res_hp_set = DrugMapper.map_drug_to_hp_with_side_effects(node.name)
        for hp_term in res_hp_set:
            if hp_term.startswith('HP:'):
                hp_name = QueryBioLink.get_label_for_phenotype(hp_term)
                hp_node = self.add_node_smart('phenotypic_feature', hp_term, desc=hp_name)
                self.orangeboard.add_rel('causes_or_contributes_to', 'SIDER', node, hp_node, extended_reltype="causes_or_contributes_to")
Exemplo n.º 2
0
    def test_get_label_for_disease(self):
        # unknown_resp = QBL.get_label_for_disease('XXX')
        # self.assertEqual(unknown_resp, 'UNKNOWN')

        chlr_label = QBL.get_label_for_disease('DOID:1498')  # cholera
        self.assertIsNone(chlr_label)

        pd_label = QBL.get_label_for_disease(
            'OMIM:605543')  # Parkinson’s disease 4
        self.assertEqual(pd_label, "Parkinson Disease 4, Autosomal Dominant")
Exemplo n.º 3
0
    def test_get_bio_process_entity(self):
        result = QBL.get_bio_process_entity('GO:0097289')
        self.assertIsNotNone(result)
        if result != "None":
            self.assertEqual(json.loads(result),
                             json.loads(get_from_test_file('GO:0097289')))

        # invalid id, code == 500
        result = QBL.get_bio_process_entity('GO:00972890')
        self.assertIsNotNone(result)
        self.assertEqual(result, "None")
Exemplo n.º 4
0
    def test_get_disease_entity(self):
        result = QBL.get_disease_entity('DOID:3965')
        self.assertIsNotNone(result)
        if result != "None":
            self.assertEqual(json.loads(result),
                             json.loads(get_from_test_file('DOID:3965')))

        # invalid id, code == 500
        result = QBL.get_disease_entity('DOID:39650')
        self.assertIsNotNone(result)
        self.assertEqual(result, "None")
Exemplo n.º 5
0
    def test_get_phenotype_entity(self):
        result = QBL.get_phenotype_entity('HP:0011515')
        self.assertIsNotNone(result)
        if result != "None":
            self.assertEqual(json.loads(result),
                             json.loads(get_from_test_file('HP:0011515')))

        # invalid id, code == 500
        result = QBL.get_phenotype_entity('HP:00115150')
        self.assertIsNotNone(result)
        self.assertEqual(result, "None")
Exemplo n.º 6
0
    def test_get_anatomy_entity(self):
        result = QBL.get_anatomy_entity('UBERON:0004476')
        self.assertIsNotNone(result)
        if result != "None":
            self.assertDictEqual(
                json.loads(result),
                json.loads(get_from_test_file('UBERON:0004476')))

        # invalid id, code == 500
        result = QBL.get_anatomy_entity('UBERON:000447600')
        self.assertIsNotNone(result)
        self.assertEqual(result, "None")
Exemplo n.º 7
0
    def expand_ncbigene_microrna(self, node):
        ncbi_gene_id = node.name
        assert 'NCBIGene:' in ncbi_gene_id

        anatomy_dict = QueryBioLink.get_anatomies_for_gene(ncbi_gene_id)
        for anatomy_id, anatomy_desc in anatomy_dict.items():
            anatomy_node = self.orangeboard.add_node('anatont_anatomy', anatomy_id, desc=anatomy_desc)
            self.orangeboard.add_rel('is_expressed_in', 'BioLink', node, anatomy_node)

        disease_ids_dict = QueryBioLink.get_diseases_for_gene_desc(ncbi_gene_id)
        for disease_id in disease_ids_dict.keys():
            if 'OMIM:' in disease_id:
                disease_node = self.orangeboard.add_node('omim_disease', disease_id, desc=disease_ids_dict[disease_id])
                self.orangeboard.add_rel('gene_assoc_with', 'BioLink', node, disease_node)
            elif 'DOID:' in disease_id:
                disease_node = self.orangeboard.add_node('disont_disease', disease_id,
                                                         desc=disease_ids_dict[disease_id])
                self.orangeboard.add_rel('gene_assoc_with', 'BioLink', node, disease_node)
            else:
                print('Warning: unexpected disease ID: ' + disease_id)

        phenotype_ids_dict = QueryBioLink.get_phenotypes_for_gene_desc(ncbi_gene_id)
        for phenotype_id in phenotype_ids_dict.keys():
            phenotype_node = self.orangeboard.add_node('phenont_phenotype', phenotype_id,
                                                       desc=phenotype_ids_dict[phenotype_id])
            self.orangeboard.add_rel('gene_assoc_with', 'BioLink', node, phenotype_node)

        mirbase_ids = self.query_mygene_obj.convert_entrez_gene_ID_to_mirbase_ID(
            int(ncbi_gene_id.replace('NCBIGene:', '')))
        for mirbase_id in mirbase_ids:
            mature_mir_ids = QueryMiRBase.convert_mirbase_id_to_mature_mir_ids(mirbase_id)
            for mature_mir_id in mature_mir_ids:
                target_gene_symbols = QueryMiRGate.get_gene_symbols_regulated_by_microrna(mature_mir_id)
                for target_gene_symbol in target_gene_symbols:
                    uniprot_ids = self.query_mygene_obj.convert_gene_symbol_to_uniprot_id(target_gene_symbol)
                    for uniprot_id in uniprot_ids:
                        assert '-' not in uniprot_id
                        target_prot_node = self.orangeboard.add_node('uniprot_protein', uniprot_id,
                                                                     desc=target_gene_symbol)
                        self.orangeboard.add_rel('controls_expression_of', 'miRGate', node, target_prot_node)
                    if len(uniprot_ids) == 0:
                        if BioNetExpander.is_mir(target_gene_symbol):
                            target_ncbi_entrez_ids = self.query_mygene_obj.convert_gene_symbol_to_entrez_gene_ID(
                                target_gene_symbol)
                            for target_ncbi_entrez_id in target_ncbi_entrez_ids:
                                target_mir_node = self.orangeboard.add_node('ncbigene_microrna',
                                                                            'NCBIGene:' + str(target_ncbi_entrez_id),
                                                                            desc=target_gene_symbol)
                                if target_mir_node != node:
                                    self.orangeboard.add_rel('controls_expression_of', 'miRGate', node, target_mir_node)
Exemplo n.º 8
0
    def test_get_anatomies_for_phenotype(self):
        mcd_dict = QBL.get_anatomies_for_phenotype(
            'HP:0000003')  # Multicystic kidney dysplasia

        known_dict = {'UBERON:0002113': 'kidney'}

        self.assertDictEqual(mcd_dict, known_dict)
Exemplo n.º 9
0
def expand_disont_disease(orangeboard, node):
    disont_id = node.name
    child_disease_ids_dict = QueryDisont.query_disont_to_child_disonts_desc(
        disont_id)
    for child_disease_id in child_disease_ids_dict.keys():
        target_node = orangeboard.add_node(
            'disont_disease',
            child_disease_id,
            desc=child_disease_ids_dict[child_disease_id])
        orangeboard.add_rel('is_parent_of', 'DiseaseOntology', node,
                            target_node)
    mesh_ids_set = QueryDisont.query_disont_to_mesh_id(disont_id)
    for mesh_id in mesh_ids_set:
        uniprot_ids_dict = QueryDisGeNet.query_mesh_id_to_uniprot_ids_desc(
            mesh_id)
        for uniprot_id in uniprot_ids_dict.keys():
            source_node = orangeboard.add_node(
                'uniprot_protein',
                uniprot_id,
                desc=uniprot_ids_dict[uniprot_id])
            orangeboard.add_rel('gene_assoc_with', 'DisGeNet', source_node,
                                node)
    ## query for phenotypes associated with this disease
    phenotype_id_dict = QueryBioLink.get_phenotypes_for_disease_desc(disont_id)
    for phenotype_id_str in phenotype_id_dict.keys():
        phenotype_node = orangeboard.add_node(
            'phenont_phenotype',
            phenotype_id_str,
            desc=phenotype_id_dict[phenotype_id_str])
        orangeboard.add_rel('phenotype_assoc_with', 'BioLink', phenotype_node,
                            node)
Exemplo n.º 10
0
    def test_add_disease_has_phenotype_relations(self):

        f = open('config.json', 'r')
        config_data = f.read()
        f.close()
        config = json.loads(config_data)

        conn = Neo4jConnection(config['url'], config['username'], config['password'])
        disease_nodes = conn.get_disease_nodes()

        # generate random number array
        random_indexes = random_int_list(0, len(disease_nodes) - 1, 10)

        #   query BioLink
        relation_array = []
        for random_index in random_indexes:
            d_id = disease_nodes[random_index]
            hp_array = QueryBioLink.map_disease_to_phenotype(d_id)
            for hp_id in hp_array:
                relation_array.append({"d_id": d_id, "p_id": hp_id})

        #   query Neo4j Database
        for relation_item in relation_array:
            result = conn.count_has_phenotype_relation(relation_item)
            self.assertEqual(result, 1)

        conn.close()
Exemplo n.º 11
0
    def test_get_genes_for_anatomy(self):
        iol_list = QBL.get_genes_for_anatomy(
            'UBERON:0000006')  # islet of Langerhans
        known_list = [
            'HGNC:1298', 'ENSEMBL:ENSG00000221639', 'HGNC:6357', 'HGNC:37207',
            'HGNC:378', 'MGI:108094', 'HGNC:40742', 'MGI:3694898',
            'MGI:3697701', 'HGNC:16713', 'ENSEMBL:ENSG00000260329',
            'MGI:1351502', 'MGI:1277193', 'MGI:1914926', 'HGNC:6081',
            'HGNC:29161', 'HGNC:16523', 'HGNC:16015', 'MGI:1920185',
            'HGNC:24483', 'HGNC:2458', 'HGNC:23472', 'HGNC:25538',
            'MGI:1924233', 'HGNC:31602', 'HGNC:7517', 'HGNC:28510',
            'HGNC:9772', 'HGNC:41140', 'HGNC:4057', 'HGNC:17407', 'HGNC:29859',
            'HGNC:51653', 'HGNC:20711', 'MGI:88588', 'MGI:3642232',
            'HGNC:42000', 'MGI:1916998', 'HGNC:491', 'HGNC:28177',
            'MGI:2177763', 'MGI:1914721', 'HGNC:18003', 'HGNC:13812',
            'HGNC:23817', 'HGNC:13452', 'MGI:2148019', 'HGNC:3391',
            'HGNC:15518', 'HGNC:28145', 'MGI:96432', 'HGNC:23488',
            'ENSEMBL:ENSG00000233895', 'HGNC:28695', 'MGI:3036267',
            'MGI:5477162', 'MGI:88175', 'HGNC:10808', 'HGNC:23467',
            'MGI:109589', 'HGNC:26777', 'MGI:108471', 'HGNC:3528',
            'HGNC:18817', 'ENSEMBL:ENSG00000177764', 'HGNC:5192', 'MGI:109124',
            'MGI:1336885', 'MGI:88610', 'HGNC:25629', 'HGNC:17859',
            'MGI:2685955', 'HGNC:21222', 'HGNC:52164', 'HGNC:29612',
            'HGNC:24913', 'MGI:2159649', 'HGNC:6532', 'HGNC:29125',
            'HGNC:1706', 'MGI:1917904', 'HGNC:1388', 'HGNC:1960',
            'ENSEMBL:ENSG00000260526', 'HGNC:16275', 'MGI:1922469',
            'HGNC:3518', 'HGNC:6172', 'MGI:97010', 'ENSEMBL:ENSG00000121848',
            'HGNC:24045', 'HGNC:6003', 'HGNC:24172', 'MGI:2429955',
            'HGNC:6130', 'MGI:1927126', 'HGNC:11513', 'MGI:1922935',
            'MGI:1922977', 'HGNC:26460'
        ]

        # Sequence does not matter here
        self.assertSetEqual(set(iol_list), set(known_list))
Exemplo n.º 12
0
    def test_get_phenotypes_for_gene(self):
        # NEK1, NIMA related kinase 1
        nek1_list = QBL.get_phenotypes_for_gene('NCBIGene:4750')
        known_list = [
            'HP:0000003', 'HP:0000023', 'HP:0000054', 'HP:0000062',
            'HP:0000089', 'HP:0000105', 'HP:0000110', 'HP:0000171',
            'HP:0000204', 'HP:0000248', 'HP:0000256', 'HP:0000286',
            'HP:0000348', 'HP:0000358', 'HP:0000369', 'HP:0000377',
            'HP:0000470', 'HP:0000695', 'HP:0000773', 'HP:0000774',
            'HP:0000800', 'HP:0000882', 'HP:0000888', 'HP:0000895',
            'HP:0001169', 'HP:0001274', 'HP:0001302', 'HP:0001320',
            'HP:0001360', 'HP:0001395', 'HP:0001405', 'HP:0001511',
            'HP:0001538', 'HP:0001539', 'HP:0001541', 'HP:0001561',
            'HP:0001629', 'HP:0001631', 'HP:0001643', 'HP:0001655',
            'HP:0001744', 'HP:0001762', 'HP:0001769', 'HP:0001773',
            'HP:0001789', 'HP:0001831', 'HP:0002023', 'HP:0002089',
            'HP:0002093', 'HP:0002240', 'HP:0002323', 'HP:0002350',
            'HP:0002557', 'HP:0002566', 'HP:0002979', 'HP:0002980',
            'HP:0003016', 'HP:0003022', 'HP:0003026', 'HP:0003038',
            'HP:0003811', 'HP:0005054', 'HP:0005257', 'HP:0005349',
            'HP:0005766', 'HP:0005817', 'HP:0005873', 'HP:0006426',
            'HP:0006488', 'HP:0006610', 'HP:0006956', 'HP:0008501',
            'HP:0008873', 'HP:0009381', 'HP:0010306', 'HP:0010442',
            'HP:0010454', 'HP:0010579', 'HP:0012368', 'HP:0100259',
            'HP:0100750'
        ]

        # Sequence does not matter here
        self.assertSetEqual(set(nek1_list), set(known_list))
Exemplo n.º 13
0
    def expand_phenotypic_feature(self, node):
        # expand phenotype=>anatomy
        phenotype_id = node.name
        anatomy_dict = QueryBioLink.get_anatomies_for_phenotype(phenotype_id)
        for anatomy_id, anatomy_desc in anatomy_dict.items():
            anatomy_node = self.add_node_smart("anatomical_entity",
                                               anatomy_id,
                                               desc=anatomy_desc)
            if anatomy_node is not None:
                self.orangeboard.add_rel("affects",
                                         'BioLink',
                                         node,
                                         anatomy_node,
                                         extended_reltype="affects")

        sub_phe_dict = QuerySciGraph.query_sub_ontology_terms_for_ontology_term(
            phenotype_id)
        for sub_phe_id, sub_phe_desc in sub_phe_dict.items():
            sub_phe_node = self.add_node_smart("phenotypic_feature",
                                               sub_phe_id,
                                               desc=sub_phe_desc)
            if sub_phe_node is not None:
                self.orangeboard.add_rel("subclass_of",
                                         'Monarch_SciGraph',
                                         sub_phe_node,
                                         node,
                                         extended_reltype="subclass_of")
Exemplo n.º 14
0
    def test_update_phenotype_entity(self):
        f = open('config.json', 'r')
        config_data = f.read()
        f.close()
        config = json.loads(config_data)

        conn = Neo4jConnection(config['url'], config['username'],
                               config['password'])
        nodes = conn.get_phenotype_nodes()

        # generate random number array
        random_indexes = random_int_list(0, len(nodes) - 1, 100)

        for i in random_indexes:
            # retrieve data from BioLink API
            node_id = nodes[i]
            extended_info_json_from_api = QueryBioLink.get_phenotype_entity(
                node_id)

            # retrieve data from Neo4j
            node = conn.get_phenotype_node(node_id)
            self.assertIsNotNone(node['n']['id'])
            self.assertIsNotNone(node['n']['extended_info_json'])
            self.assertEqual(node_id, node['n']['id'])
            self.maxDiff = None
            if node['n']['extended_info_json'] != "None":
                self.assertEqual(json.loads(extended_info_json_from_api),
                                 json.loads(node['n']['extended_info_json']))

        conn.close()
Exemplo n.º 15
0
def expand_phenont_phenotype(orangeboard, node):
    # EXPAND PHENOTYPE -> ANATOMY
    phenotype_id = node.name

    anatomy_dict = QueryBioLink.get_anatomies_for_phenotype(phenotype_id)
    for anatomy_id, anatomy_desc in anatomy_dict.items():
        anatomy_node = orangeboard.add_node('anatont_anatomy', anatomy_id, desc=anatomy_desc)
        orangeboard.add_rel('phenotype_assoc_with', 'BioLink', node, anatomy_node)
Exemplo n.º 16
0
    def test_get_anatomies_for_gene(self):
        mir96_dict = QBL.get_anatomies_for_gene('NCBIGene:407053')  # MIR96

        known_dict = {
            'UBERON:0000007': 'pituitary gland',
            'UBERON:0001301': 'epididymis',
            'UBERON:0000074': 'renal glomerulus',
            'UBERON:0000006': 'islet of Langerhans'
        }

        self.assertDictEqual(mir96_dict, known_dict)
Exemplo n.º 17
0
    def expand_phenont_phenotype(self, node):
        # expand phenotype=>anatomy
        phenotype_id = node.name
        anatomy_dict = QueryBioLink.get_anatomies_for_phenotype(phenotype_id)
        for anatomy_id, anatomy_desc in anatomy_dict.items():
            anatomy_node = self.orangeboard.add_node('anatont_anatomy', anatomy_id, desc=anatomy_desc)
            self.orangeboard.add_rel('phenotype_assoc_with', 'BioLink', node, anatomy_node)

        sub_phe_dict = QuerySciGraph.query_sub_phenotypes_for_phenotype(phenotype_id)
        for sub_phe_id, sub_phe_desc in sub_phe_dict.items():
            sub_phe_node = self.orangeboard.add_node('phenont_phenotype', sub_phe_id, desc=sub_phe_desc)
            self.orangeboard.add_rel('is_parent_of', 'Monarch_SciGraph', node, sub_phe_node)
Exemplo n.º 18
0
    def expand_disease(self, node):
        assert node.nodetype == "disease"
        disease_name = node.name

        gene_ontology_dict = QuerySciGraph.get_gene_ontology_curie_ids_for_disease_curie_id(disease_name)
        for gene_ontology_curie_id_str, gene_ontology_term_dict in gene_ontology_dict.items():
            gene_ontology_type_str = gene_ontology_term_dict["ontology"].replace(" ", "_")
            target_node = self.add_node_smart(gene_ontology_type_str, gene_ontology_curie_id_str,
                                              desc=gene_ontology_term_dict["name"])
            if target_node is not None:
                predicate_str = gene_ontology_term_dict["predicate"].replace(" ", "_")
                self.orangeboard.add_rel("affects", "Monarch_SciGraph", node, target_node, extended_reltype=predicate_str)

        if "OMIM:" in disease_name:
            self.expand_genetic_condition(node)
            return

        if "MONDO:" in disease_name:
            self.expand_mondo_disease(node)
            return

        # if we get here, this is a Disease Ontology disease
        disont_id = disease_name

        child_disease_ids_dict = QueryDisont.query_disont_to_child_disonts_desc(disont_id)
        for child_disease_id in child_disease_ids_dict.keys():
            target_node = self.add_node_smart('disease', child_disease_id,
                                              desc=child_disease_ids_dict[child_disease_id])
            if target_node is not None:
                self.orangeboard.add_rel('subclass_of', 'DiseaseOntology',
                                         target_node, node, extended_reltype="subclass_of")

        mesh_ids_set = QueryDisont.query_disont_to_mesh_id(disont_id)
        for mesh_id in mesh_ids_set:
            uniprot_ids_dict = QueryDisGeNet.query_mesh_id_to_uniprot_ids_desc(mesh_id)
            for uniprot_id in uniprot_ids_dict.keys():
                assert '-' not in uniprot_id
                source_node = self.add_node_smart('protein', uniprot_id,
                                                  desc=uniprot_ids_dict[uniprot_id])
                if source_node is not None:
                    self.orangeboard.add_rel("gene_associated_with_condition", "DisGeNet", source_node,
                                             node, extended_reltype="gene_associated_with_condition")

        # query for phenotypes associated with this disease
        phenotype_id_dict = QueryBioLink.get_phenotypes_for_disease_desc(disont_id)
        for phenotype_id_str in phenotype_id_dict.keys():
            phenotype_node = self.add_node_smart("phenotypic_feature", phenotype_id_str,
                                                 desc=phenotype_id_dict[phenotype_id_str])
            if phenotype_node is not None:
                self.orangeboard.add_rel("has_phenotype", 'BioLink', node, phenotype_node, extended_reltype="has_phenotype")
Exemplo n.º 19
0
 def expand_mondo_disease(self, node):
     genes_list = QueryBioLink.get_genes_for_disease_desc(node.name)
     for hgnc_gene_id in genes_list:
         if hgnc_gene_id.startswith("HGNC:"):
             uniprot_id_set = self.query_mygene_obj.convert_hgnc_gene_id_to_uniprot_id(hgnc_gene_id)
             if len(uniprot_id_set) > 0:
                 uniprot_id = next(iter(uniprot_id_set))
                 gene_symbol_set = self.query_mygene_obj.convert_uniprot_id_to_gene_symbol(uniprot_id)
                 if len(gene_symbol_set) > 0:
                     protein_node = self.add_node_smart('protein', uniprot_id,
                                                        desc=next(iter(gene_symbol_set)))
                     self.orangeboard.add_rel("gene_associated_with_condition",
                                              "BioLink",
                                              protein_node, node, extended_reltype="associated_with_disease")
Exemplo n.º 20
0
    def test_get_phenotypes_for_gene_desc(self):
        # Test for issue #22
        # CFTR, cystic fibrosis transmembrane conductance regulator
        cftr_dict = QBL.get_phenotypes_for_gene_desc('NCBIGene:1080')

        known_dict = {
            'HP:0000952': 'Jaundice',
            'HP:0011227': 'Elevated C-reactive protein level',
            'HP:0030247': 'Splanchnic vein thrombosis',
            'HP:0012379': 'Abnormal enzyme/coenzyme activity',
            'HP:0001974': 'Leukocytosis'
        }

        self.assertDictEqual(cftr_dict, known_dict)
Exemplo n.º 21
0
    def test_map_disease_to_phenotype(self):

        results = QBL.map_disease_to_phenotype("OMIM:605543")
        self.assertIsNotNone(results)
        self.assertEqual([
            'HP:0000726', 'HP:0000738', 'HP:0001278', 'HP:0001300',
            'HP:0001824', 'HP:0002459', 'HP:0011999', 'HP:0100315'
        ], results)

        results = QBL.map_disease_to_phenotype("DOID:3218")
        self.assertIsNotNone(results)
        self.assertEqual(57, len(results))

        #   invalid parameter
        results = QBL.map_disease_to_phenotype(605543)
        self.assertEqual([], results)

        #   invalid parameter
        results = QBL.map_disease_to_phenotype("OMIM_605543")
        self.assertEqual([], results)

        #   invalid parameter
        results = QBL.map_disease_to_phenotype("DOID_14477")
        self.assertEqual([], results)
Exemplo n.º 22
0
    def test_get_phenotypes_for_disease_desc(self):
        ret_dict = QBL.get_phenotypes_for_disease_desc(
            'OMIM:605543')  # Parkinson’s disease 4

        known_dict = {
            'HP:0000726': 'Dementia',
            'HP:0001824': 'Weight loss',
            'HP:0002459': 'Dysautonomia',
            'HP:0100315': 'Lewy bodies',
            'HP:0011999': 'Paranoia',
            'HP:0001278': 'Orthostatic hypotension',
            'HP:0000738': 'Hallucinations',
            'HP:0001300': 'Parkinsonism'
        }

        self.assertDictEqual(ret_dict, known_dict)
Exemplo n.º 23
0
    def expand_genetic_condition(self, node):
        assert node.name.startswith("OMIM:")
        res_dict = self.query_omim_obj.disease_mim_to_gene_symbols_and_uniprot_ids(node.name)
        uniprot_ids = res_dict['uniprot_ids']
        gene_symbols = res_dict['gene_symbols']
        if len(uniprot_ids) == 0 and len(gene_symbols) == 0:
            return  # nothing else to do, for this MIM number
        uniprot_ids_to_gene_symbols_dict = dict()
        for gene_symbol in gene_symbols:
            uniprot_ids = self.query_mygene_obj.convert_gene_symbol_to_uniprot_id(gene_symbol)
            if len(uniprot_ids) == 0:
                # this might be a microRNA
                if BioNetExpander.is_mir(gene_symbol):
                    entrez_gene_ids = self.query_mygene_obj.convert_gene_symbol_to_entrez_gene_ID(gene_symbol)
                    if len(entrez_gene_ids) > 0:
                        for entrez_gene_id in entrez_gene_ids:
                            curie_entrez_gene_id = 'NCBIGene:' + str(entrez_gene_id)
                            node2 = self.add_node_smart('microRNA',
                                                        curie_entrez_gene_id,
                                                        desc=gene_symbol)
                            if node2 is not None:
                                self.orangeboard.add_rel("gene_mutations_contribute_to",
                                                         "OMIM", node2, node,
                                                         extended_reltype="gene_mutations_contribute_to")
            for uniprot_id in uniprot_ids:
                uniprot_ids_to_gene_symbols_dict[uniprot_id] = gene_symbol
        for uniprot_id in uniprot_ids:
            gene_symbol = self.query_mygene_obj.convert_uniprot_id_to_gene_symbol(uniprot_id)
            if gene_symbol is not None:
                gene_symbol_str = ';'.join(gene_symbol)
                uniprot_ids_to_gene_symbols_dict[uniprot_id] = gene_symbol_str
        source_node = node
        for uniprot_id in uniprot_ids_to_gene_symbols_dict.keys():
            assert '-' not in uniprot_id
            target_node = self.add_node_smart('protein', uniprot_id,
                                              desc=uniprot_ids_to_gene_symbols_dict[uniprot_id])
            if target_node is not None:
                self.orangeboard.add_rel("gene_mutations_contribute_to",
                                         "OMIM", target_node, source_node,
                                         extended_reltype="gene_mutations_contribute_to")

        # query for phenotypes associated with this disease
        phenotype_id_dict = QueryBioLink.get_phenotypes_for_disease_desc(node.name)
        for phenotype_id_str in phenotype_id_dict.keys():
            phenotype_node = self.add_node_smart("phenotypic_feature", phenotype_id_str, desc=phenotype_id_dict[phenotype_id_str])
            if phenotype_node is not None:
                self.orangeboard.add_rel("has_phenotype", 'BioLink', node, phenotype_node, extended_reltype="has_phenotype")
Exemplo n.º 24
0
    def add_disease_has_phenotype_relations():

        f = open('config.json', 'r')
        config_data = f.read()
        f.close()
        config = json.loads(config_data)

        conn = Neo4jConnection(config['url'], config['username'],
                               config['password'])
        disease_nodes = conn.get_disease_nodes()
        print("disease nodes count: " + str(len(disease_nodes)))

        from time import time
        t = time()

        array = []
        for d_id in disease_nodes:
            hp_array = QueryBioLink.map_disease_to_phenotype(d_id)
            if hp_array:
                for hp_id in hp_array:
                    array.append({'d_id': d_id, 'p_id': hp_id})

        print("time for querying: %f" % (time() - t))
        t = time()

        print("relations count = " + str(len(array)))
        nodes_nums = len(array)
        chunk_size = 10000
        group_nums = nodes_nums // chunk_size + 1
        for i in range(group_nums):
            start = i * chunk_size
            end = (i + 1) * chunk_size if (
                i + 1) * chunk_size < nodes_nums else nodes_nums
            conn.create_disease_has_phenotype(array[start:end])

        print("time for creating relations: %f" % (time() - t))
        t = time()

        #   remove duplicated relations
        conn.remove_duplicate_has_phenotype_relations()
        print("time for remove duplicate relations: %f" % (time() - t))

        conn.close()
Exemplo n.º 25
0
    def test_get_phenotypes_for_gene(self):
        # NEK1, NIMA related kinase 1
        nek1_list = QBL.get_phenotypes_for_gene('NCBIGene:4750')

        known_list = [
            'HP:0000003', 'HP:0000054', 'HP:0000062', 'HP:0000105',
            'HP:0000110', 'HP:0000171', 'HP:0000204', 'HP:0000248',
            'HP:0000773', 'HP:0000774', 'HP:0000888', 'HP:0000895',
            'HP:0001274', 'HP:0001302', 'HP:0001320', 'HP:0001395',
            'HP:0001629', 'HP:0001631', 'HP:0001762', 'HP:0001789',
            'HP:0002023', 'HP:0002089', 'HP:0002350', 'HP:0002566',
            'HP:0002980', 'HP:0003016', 'HP:0003022', 'HP:0003038',
            'HP:0005054', 'HP:0005257', 'HP:0005349', 'HP:0005766',
            'HP:0005817', 'HP:0005873', 'HP:0006426', 'HP:0006956',
            'HP:0010454', 'HP:0010579', 'HP:0100259'
        ]

        # Sequence does not matter here
        self.assertSetEqual(set(nek1_list), set(known_list))
Exemplo n.º 26
0
    def add_disease_has_phenotype_relations():

        # create the RTXConfiguration object
        rtxConfig = RTXConfiguration()

        conn = Neo4jConnection(rtxConfig.neo4j_bolt, rtxConfig.neo4j_username,
                               rtxConfig.neo4j_password)
        disease_nodes = conn.get_disease_nodes()
        print("disease nodes count: " + str(len(disease_nodes)))

        from time import time
        t = time()

        array = []
        for d_id in disease_nodes:
            hp_array = QueryBioLink.map_disease_to_phenotype(d_id)
            if hp_array:
                for hp_id in hp_array:
                    array.append({'d_id': d_id, 'p_id': hp_id})

        print("time for querying: %f" % (time() - t))
        t = time()

        print("relations count = " + str(len(array)))
        nodes_nums = len(array)
        chunk_size = 10000
        group_nums = nodes_nums // chunk_size + 1
        for i in range(group_nums):
            start = i * chunk_size
            end = (i + 1) * chunk_size if (
                i + 1) * chunk_size < nodes_nums else nodes_nums
            conn.create_disease_has_phenotype(array[start:end])

        print("time for creating relations: %f" % (time() - t))
        t = time()

        #   remove duplicated relations
        conn.remove_duplicate_has_phenotype_relations()
        print("time for remove duplicate relations: %f" % (time() - t))

        conn.close()
Exemplo n.º 27
0
    def test_update_phenotype_entity(self):

        conn = Neo4jConnection(self.rtxConfig.neo4j_bolt, self.rtxConfig.neo4j_username, self.rtxConfig.neo4j_password)
        nodes = conn.get_phenotype_nodes()

        # generate random number array
        random_indexes = random_int_list(0, len(nodes)-1, 10)

        for i in random_indexes:
            # retrieve data from BioLink API
            node_id = nodes[i]
            extended_info_json_from_api = QueryBioLink.get_phenotype_entity(node_id)

            # retrieve data from Neo4j
            node = conn.get_phenotype_node(node_id)
            self.assertIsNotNone(node['n']['id'])
            self.assertIsNotNone(node['n']['extended_info_json'])
            self.assertEqual(node_id, node['n']['id'])
            self.maxDiff = None
            if node['n']['extended_info_json'] != "None":
                self.assertEqual(json.loads(extended_info_json_from_api), json.loads(node['n']['extended_info_json']))

        conn.close()
Exemplo n.º 28
0
    def test_add_disease_has_phenotype_relations(self):

        conn = Neo4jConnection(self.rtxConfig.neo4j_bolt,
                               self.rtxConfig.neo4j_username,
                               self.rtxConfig.neo4j_password)
        disease_nodes = conn.get_disease_nodes()

        # generate random number array
        random_indexes = random_int_list(0, len(disease_nodes) - 1, 10)

        #   query BioLink
        relation_array = []
        for random_index in random_indexes:
            d_id = disease_nodes[random_index]
            hp_array = QueryBioLink.map_disease_to_phenotype(d_id)
            for hp_id in hp_array:
                relation_array.append({"d_id": d_id, "p_id": hp_id})

        #   query Neo4j Database
        for relation_item in relation_array:
            result = conn.count_has_phenotype_relation(relation_item)
            self.assertEqual(result, 1)

        conn.close()
Exemplo n.º 29
0
    def test_get_genes_for_anatomy(self):
        iol_list = QBL.get_genes_for_anatomy(
            'UBERON:0000006')  # islet of Langerhans

        known_list = [
            'MGI:1929735', 'HGNC:28826', 'HGNC:31579', 'HGNC:28995',
            'HGNC:24172', 'HGNC:6130', 'MGI:2429955', 'MGI:1922935',
            'MGI:1927126', 'HGNC:11513', 'MGI:3647725', 'HGNC:1960',
            'MGI:1922977', 'HGNC:6172', 'MGI:1922469', 'HGNC:3518',
            'MGI:97010', 'ENSEMBL:ENSG00000237404', 'HGNC:20253', 'HGNC:14676',
            'MGI:1915416', 'MGI:1891697', 'HGNC:30237', 'HGNC:12970',
            'HGNC:30766', 'HGNC:51534', 'ENSEMBL:ENSG00000271946',
            'HGNC:17013', 'HGNC:25481', 'MGI:104755', 'MGI:1336199',
            'ENSEMBL:ENSG00000261159', 'MGI:98003', 'MGI:1916344',
            'HGNC:28412', 'MGI:88082', 'MGI:2656825', 'MGI:1918345',
            'HGNC:20189', 'HGNC:33526', 'HGNC:31481',
            'ENSEMBL:ENSG00000248632', 'MGI:3615306', 'MGI:109177',
            'HGNC:2697', 'MGI:1342304', 'HGNC:3816', 'HGNC:11656',
            'HGNC:29332', 'HGNC:6294', 'HGNC:13787', 'HGNC:18994',
            'MGI:2387188', 'MGI:3584508', 'ENSEMBL:ENSG00000244306',
            'HGNC:29101', 'MGI:1919247', 'HGNC:23433', 'HGNC:31393',
            'MGI:1347084', 'MGI:1316650', 'MGI:2179507', 'MGI:96163',
            'MGI:2146012', 'MGI:1916043', 'HGNC:10435', 'HGNC:48629',
            'HGNC:23094', 'HGNC:25139', 'MGI:2444946', 'HGNC:34236',
            'HGNC:26466', 'HGNC:3725', 'MGI:2141207', 'HGNC:50580',
            'HGNC:18294', 'HGNC:33754', 'MGI:1924150', 'HGNC:12499',
            'HGNC:17451', 'NCBIGene:100506691', 'HGNC:2522', 'MGI:106199',
            'HGNC:17811', 'HGNC:8001', 'ENSEMBL:ENSG00000167765', 'HGNC:33520',
            'HGNC:7200', 'HGNC:11996', 'HGNC:29503', 'HGNC:30021',
            'MGI:2139593', 'HGNC:24825', 'ENSEMBL:ENSMUSG00000093459',
            'ENSEMBL:ENSG00000265179', 'HGNC:18696', 'MGI:5504148',
            'HGNC:1553', 'MGI:1353654', 'MGI:88139'
        ]

        # Sequence does not matter here
        self.assertSetEqual(set(iol_list), set(known_list))
Exemplo n.º 30
0
    def expand_protein(self, node):
        assert node.nodetype == "protein"
        uniprot_id_str = node.name

        # # SAR:  I suspect these pathways are too high-level and not useful:
        # pathways_set_from_pc2 = QueryPC2.uniprot_id_to_reactome_pathways(uniprot_id_str)
        # doesn't provide pathway descriptions; see if we can get away with not using it?
        # pathways_set_from_uniprot = QueryUniprot.uniprot_id_to_reactome_pathways(uniprot_id_str)

        # protein-pathway membership:
        pathways_dict_from_reactome = QueryReactome.query_uniprot_id_to_reactome_pathway_ids_desc(uniprot_id_str)
        pathways_dict_sourcedb = dict.fromkeys(pathways_dict_from_reactome.keys(), 'reactome')
        node1 = node
        for pathway_id in pathways_dict_from_reactome.keys():
            target_node = self.add_node_smart('pathway',
                                              "REACT:" + pathway_id,
                                              desc=pathways_dict_from_reactome[pathway_id])
            if target_node is not None:
                self.orangeboard.add_rel('participates_in', pathways_dict_sourcedb[pathway_id], node1, target_node, extended_reltype="participates_in")
        gene_symbols_set = self.query_mygene_obj.convert_uniprot_id_to_gene_symbol(uniprot_id_str)
        for gene_symbol in gene_symbols_set:
            # protein-DNA (i.e., gene regulatory) interactions:
            regulator_gene_symbols_set = QueryGeneProf.gene_symbol_to_transcription_factor_gene_symbols(gene_symbol)
            for reg_gene_symbol in regulator_gene_symbols_set:
                reg_uniprot_ids_set = self.query_mygene_obj.convert_gene_symbol_to_uniprot_id(reg_gene_symbol)
                for reg_uniprot_id in reg_uniprot_ids_set:
                    assert '-' not in reg_uniprot_id
                    node2 = self.add_node_smart('protein', reg_uniprot_id, desc=reg_gene_symbol)
                    if node2 is not None and node2.uuid != node1.uuid:
                        self.orangeboard.add_rel('regulates', 'GeneProf', node2, node1, extended_reltype="regulates_expression_of")

            # microrna-gene interactions:
            microrna_regulators = QueryMiRGate.get_microrna_ids_that_regulate_gene_symbol(gene_symbol)
            for microrna_id in microrna_regulators:
                mir_gene_symbol = QueryMiRBase.convert_mirbase_id_to_mir_gene_symbol(microrna_id)
                if mir_gene_symbol is not None:
                    mir_entrez_gene_ids = self.query_mygene_obj.convert_gene_symbol_to_entrez_gene_ID(mir_gene_symbol)
                    if len(mir_entrez_gene_ids) > 0:
                        for mir_entrez_gene_id in mir_entrez_gene_ids:
                            mir_node = self.add_node_smart('microRNA',
                                                           'NCBIGene:' + str(mir_entrez_gene_id),
                                                           desc=mir_gene_symbol)
                            if mir_node is not None:
                                self.orangeboard.add_rel('regulates', 'miRGate', mir_node, node, extended_reltype="regulates_expression_of")

        entrez_gene_id = self.query_mygene_obj.convert_uniprot_id_to_entrez_gene_ID(uniprot_id_str)
        if len(entrez_gene_id) > 0:
            entrez_gene_id_str = 'NCBIGene:' + str(next(iter(entrez_gene_id)))

            # protein-to-anatomy associations:
            anatomy_dict = QueryBioLink.get_anatomies_for_gene(entrez_gene_id_str)
            for anatomy_id, anatomy_desc in anatomy_dict.items():
                anatomy_node = self.add_node_smart("anatomical_entity", anatomy_id, desc=anatomy_desc)
                if anatomy_node is not None:
                    self.orangeboard.add_rel('expressed_in', 'BioLink', node, anatomy_node, extended_reltype="expressed_in")

            # protein-disease associations:
            disont_id_dict = QueryBioLink.get_diseases_for_gene_desc(entrez_gene_id_str)
            for disont_id in disont_id_dict.keys():
                if 'DOID:' in disont_id:
                    node2 = self.add_node_smart('disease', disont_id, desc=disont_id_dict[disont_id])
                    if node2 is not None:
                        self.orangeboard.add_rel('gene_associated_with_condition', 'BioLink', node1, node2, extended_reltype="associated_with_disease")
                else:
                    if 'OMIM:' in disont_id:
                        node2 = self.add_node_smart('disease', disont_id, desc=disont_id_dict[disont_id])
                        if node2 is not None:
                            self.orangeboard.add_rel('gene_associated_with_condition', 'BioLink', node1, node2, extended_reltype="associated_with_disease")

            # protein-phenotype associations:
            phenotype_id_dict = QueryBioLink.get_phenotypes_for_gene_desc(entrez_gene_id_str)
            for phenotype_id_str in phenotype_id_dict.keys():
                node2 = self.add_node_smart("phenotypic_feature", phenotype_id_str,
                                            desc=phenotype_id_dict[phenotype_id_str])
                if node2 is not None:
                    self.orangeboard.add_rel('has_phenotype', 'BioLink', node1, node2, extended_reltype="has_phenotype")

        # protein-protein interactions:
        int_dict = QueryReactome.query_uniprot_id_to_interacting_uniprot_ids_desc(uniprot_id_str)
        for int_uniprot_id in int_dict.keys():
            if self.query_mygene_obj.uniprot_id_is_human(int_uniprot_id):
                int_alias = int_dict[int_uniprot_id]
                if 'BINDSGENE:' not in int_alias:
                    node2 = self.add_node_smart('protein', int_uniprot_id, desc=int_alias)
                    if node2 is not None and node2.uuid != node1.uuid:
                        self.orangeboard.add_rel('physically_interacts_with', 'reactome', node1, node2, extended_reltype="physically_interacts_with")
                else:
                    target_gene_symbol = int_alias.split(':')[1]
                    target_uniprot_ids_set = self.query_mygene_obj.convert_gene_symbol_to_uniprot_id(target_gene_symbol)
                    for target_uniprot_id in target_uniprot_ids_set:
                        assert '-' not in target_uniprot_id
                        node2 = self.add_node_smart('protein', target_uniprot_id, desc=target_gene_symbol)
                        if node2 is not None and node2 != node1:
                            self.orangeboard.add_rel('regulates', 'Reactome', node1, node2, extended_reltype="regulates_expression_of")

        # protein-to-GO (biological process):
        go_dict = self.query_mygene_obj.get_gene_ontology_ids_for_uniprot_id(uniprot_id_str)
        for go_id, go_term_dict in go_dict.items():
            go_term = go_term_dict.get('term', None)
            ontology_name_str = go_term_dict.get('ont', None)
            if go_term is not None and ontology_name_str is not None:
                node2 = self.add_node_smart(ontology_name_str, go_id, desc=go_term)
                if node2 is not None:
                    predicate = self.GO_ONTOLOGY_TO_PREDICATE[ontology_name_str]
                    self.orangeboard.add_rel(predicate,
                                             'gene_ontology', node1, node2, extended_reltype=predicate)