def open_ontology(path=None): """ builds a networkx graph from obo parsed data Returns: networkx graph object, alt IDs and obsolete IDs """ header, entries = load_hpo_database(path) graph = ICSimilarity() # track alternate HPO IDs (since we use HPO IDs as node IDs) alt_ids = {} obsolete_ids = set() # add the hpo header values as attributes for the graph for header_id in header: graph.graph[header_id] = header[header_id] for entry in entries: add_entry(graph, entry, alt_ids, obsolete_ids) return graph, alt_ids, obsolete_ids
def setUp(self): """ construct a ICSimilarity object for unit tests """ path = os.path.join(os.path.dirname(__file__), "data", "obo.txt") ontology = Ontology(path) graph = ontology.get_graph() self.hpo_terms = { "person_01": ["HP:0000924"], "person_02": ["HP:0000118", "HP:0002011"], "person_03": ["HP:0000707", "HP:0002011"] } self.hpo_graph = ICSimilarity(self.hpo_terms, graph)
def get_graph(self): """ builds a networkx graph from obo parsed data Returns: networkx graph object """ graph = ICSimilarity() # add the hpo header values as attributes for the graph for header_id in self.hpo_header: graph.graph[header_id] = self.hpo_header[header_id] for entry in self.hpo_list: tags = entry.tags # ignore obsolete HPO entries if self.is_obsolete(tags): self.obsolete_ids.add(str(tags["id"][0])) continue node_id = str(tags["id"][0]) graph.add_node(node_id) # make sure we can convert between HPO ID and their alternate IDs self.track_alt_ids(tags, node_id) # include the attribute data for the node self.add_hpo_attributes_to_node(graph, node_id, tags) # add the predecessors to the node if "is_a" in tags: for predecessor in tags["is_a"]: predecessor = str(predecessor) graph.add_edge(predecessor, node_id) return graph
class TestICSimilarityPy(unittest.TestCase): """ class to test ICSimilarity """ def setUp(self): """ construct a ICSimilarity object for unit tests """ path = os.path.join(os.path.dirname(__file__), "data", "obo.txt") ontology = Ontology(path) graph = ontology.get_graph() self.hpo_terms = { "person_01": ["HP:0000924"], "person_02": ["HP:0000118", "HP:0002011"], "person_03": ["HP:0000707", "HP:0002011"] } self.hpo_graph = ICSimilarity(self.hpo_terms, graph) def test_get_term_count(self): """ check that get_term_count works correctly All of the counts here are derived from their usage in self.hpo_terms """ # check that we count the term usage (and subterms correctly) self.assertEqual(self.hpo_graph.get_term_count("HP:0000118"), 5) self.assertEqual(self.hpo_graph.get_term_count("HP:0000707"), 3) self.assertEqual(self.hpo_graph.get_term_count("HP:0002011"), 2) # check that a terminal node, only used once in the probands, has a # count of 1 self.assertEqual(self.hpo_graph.get_term_count("HP:0000924"), 1) # check the term/subterm count for a term that isn't used within any of # he probands, but which all of the used terms descend from. self.assertEqual(self.hpo_graph.get_term_count("HP:0000001"), 5) def test_calculate_information_content(self): """ check that calculate_information_content works correctly """ # check that the top node has an information content of 0 self.assertEqual(self.hpo_graph.calculate_information_content("HP:0000001"), \ 0) # check the information content for a terminal node self.assertAlmostEqual(self.hpo_graph.calculate_information_content("HP:0000924"), \ 1.6094379) # check the information content for a node that is somewhat distant, but # which has some descendant nodes that need to be included in the term # count self.assertAlmostEqual(self.hpo_graph.calculate_information_content("HP:0000707"), \ 0.5108256) def test_get_most_informative_ic(self): """ check that get_most_informative_ic works correctly """ # check the most informative information content for two nodes where # every common ancestor is the ancestor of all terms used in the probands self.assertAlmostEqual(self.hpo_graph.get_most_informative_ic("HP:0000707", \ "HP:0000924"), 0) # check the most informative information content for two nodes where # both nodes are somewhat down the HPO graph self.assertAlmostEqual(self.hpo_graph.get_most_informative_ic("HP:0000707", \ "HP:0002011"), 0.5108256) # check the most informative information content for two identical nodes self.assertAlmostEqual(self.hpo_graph.get_most_informative_ic("HP:0000924", \ "HP:0000924"), 1.6094379)