def is_mesh_subroot_node(bio_ontology, node): ns, id = bio_ontology.get_ns_id(node) if ns == 'MESH': tree_numbers = mesh_client.get_mesh_tree_numbers(id) if len(tree_numbers) == 1 and '.' not in tree_numbers[0]: return tree_numbers[0][0] return None
def add_mesh_parents(bio_ontology: BioOntology): """Add missing root level nodes to the MeSH ontology.""" for letter, name in mesh_roots_map.items(): bio_ontology.add_node(bio_ontology.label('MESH', letter), name=name) edges_to_add = [] for node in bio_ontology.nodes(): # First deal with subtree root nodes subtree = is_mesh_subroot_node(bio_ontology, node) if subtree is not None: edges_to_add.append(( node, bio_ontology.label('MESH', subtree), {'type': 'isa'} )) db_ns, db_id = bio_ontology.get_ns_id(node) # Then deal with supplementary concepts if db_ns == 'MESH' and db_id.startswith('C') \ and db_id != 'C': # To skip the previously added subroot node edges_to_add.append(( node, bio_ontology.label('MESH', 'S'), {'type': 'isa'} )) bio_ontology.add_edges_from(edges_to_add)
def get_categories(fplx_node): """Return category labels for a given protein family ontology node.""" children = bio_ontology.get_children(*bio_ontology.get_ns_id(fplx_node), ns_filter='HGNC') children_names = {bio_ontology.get_name(*ch) for ch in children} child_categories = {categories[name] for name in children_names if name in categories} return child_categories
def get_category(node): """Return a category label for a given specific protein ontology node.""" name = bio_ontology.get_name(*bio_ontology.get_ns_id(node)) category = categories.get(name) if category: category_node = category_map[category] return category_node return None
def add_ido_parents(bio_ontology: BioOntology): ido_root = bio_ontology.label('IDO', '0') bio_ontology.add_node(ido_root, name='infectious disease concept') edges_to_add = [] for node in bio_ontology.nodes(): if bio_ontology.get_ns(node) == 'IDO' and not \ bio_ontology.get_parents(*bio_ontology.get_ns_id(node)): edges_to_add.append((node, ido_root, {'type': 'isa'})) bio_ontology.add_edges_from(edges_to_add)
def add_efo_parents(bio_ontology): edges_to_add = [] efo_root = 'EFO:0000001' for node in bio_ontology.nodes(): if bio_ontology.get_ns(node) == 'EFO' and \ not bio_ontology.get_parents(*bio_ontology.get_ns_id(node)): edges_to_add.append((node, efo_root, {'type': 'isa'})) print('Adding %d EFO isa edges.' % len(edges_to_add)) bio_ontology.add_edges_from(edges_to_add)
def _make_famplex_lookup(): """Create a famplex lookup dictionary. Keys are sorted tuples of HGNC gene names and values are the corresponding FamPlex ID. """ fplx_lookup = {} bio_ontology.initialize() for node in bio_ontology.nodes: ns, id = bio_ontology.get_ns_id(node) if ns == 'FPLX': children = bio_ontology.get_children(ns, id) hgnc_children = [ bio_ontology.get_name(*c) for c in children if c[0] == 'HGNC' ] fplx_lookup[tuple(sorted(hgnc_children))] = id return fplx_lookup
def has_fplx_parents(bio_ontology, node): """Return True if the given ontology node has FamPlex parents.""" parents = bio_ontology.get_parents(*bio_ontology.get_ns_id(node)) if any(p[0] == 'FPLX' for p in parents): return True return False