def set_all_information_content_values(ontology: Ontology, relations: List[str] = None): logger.info("calculating information content for all terms in ontology") roots = ontology.get_roots(relations=relations) for root_id in roots: if "num_subsumers" not in ontology.node(root_id): _set_num_subsumers_in_subgraph(ontology=ontology, root_id=root_id, relations=relations) for root_id in roots: if "num_leaves" not in ontology.node(root_id): _set_num_leaves_in_subgraph(ontology=ontology, root_id=root_id, relations=relations) for root_id in roots: if "depth" not in ontology.node(root_id): set_all_depths_in_subgraph(ontology=ontology, root_id=root_id, relations=relations) for root_id in roots: _set_information_content_in_subgraph( ontology=ontology, root_id=root_id, maxleaves=ontology.node(root_id)["num_leaves"], relations=relations)
def get_all_paths_to_root(node_id: str, ontology: Ontology, min_distance_from_root: int = 0, relations: List[str] = None, nodeids_blacklist: List[str] = None, previous_path: Union[None, List[str]] = None, root_node=None) -> Set[Tuple[str]]: """get all possible paths connecting a go term to its root terms Args: node_id (str): a valid GO id for the starting term ontology (Ontology): the go ontology min_distance_from_root (int): return only terms at a specified minimum distance from root terms relations (List[str]): the list of relations to be used nodeids_blacklist (List[str]): a list of node ids to exclude from the paths previous_path (Union[None, List[str]]): the path to get to the current node Returns: Set[Tuple[str]]: the set of paths connecting the specified term to its root terms, each of which contains a sequence of terms ids """ if previous_path is None: previous_path = [] new_path = previous_path[:] if not nodeids_blacklist or node_id not in nodeids_blacklist: new_path.append(node_id) parents = [ parent for parent in ontology.parents(node=node_id, relations=relations) if ontology.node(parent)["depth"] >= min_distance_from_root ] parents_same_root = [] if root_node: for parent in parents: parent_root = None if "meta" in parent and "basicPropertyValues" in parent["meta"]: for basic_prop_val in parent["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": parent_root = basic_prop_val["val"] if parent_root and parent_root == root_node: parents_same_root.append(parent) parents = parents_same_root if len(parents) > 0: # go up the tree, following a depth first visit paths_to_return = set() for parent in parents: for path in get_all_paths_to_root( node_id=parent, ontology=ontology, previous_path=new_path, min_distance_from_root=min_distance_from_root, relations=relations, nodeids_blacklist=nodeids_blacklist, root_node=root_node): paths_to_return.add(path) return paths_to_return if len(new_path) == 0: return {(node_id, )} else: return {tuple(new_path)}
def translate_file_to_ontology(handle, **args): if handle.endswith(".json"): g = obograph_util.convert_json_file(handle, **args) return Ontology(handle=handle, payload=g) elif handle.endswith(".ttl"): from ontobio.sparql.rdf2nx import RdfMapper logging.info("RdfMapper: {}".format(args)) m = RdfMapper(**args) return m.convert(handle, 'ttl') else: if not (handle.endswith(".obo") or handle.endswith(".owl")): logging.info( "Attempting to parse non obo or owl file with owltools: " + handle) encoded = hashlib.sha256(handle.encode()).hexdigest() logging.info(" encoded: " + str(encoded)) fn = '/tmp/' + encoded if not os.path.isfile(fn): cmd = ['owltools', handle, '-o', '-f', 'json', fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: " + fn) g = obograph_util.convert_json_file(fn, **args) return Ontology(handle=handle, payload=g)
def get_all_common_ancestors(node_ids: List[str], ontology: Ontology, min_distance_from_root: int = 0, nodeids_blacklist: List[str] = None): # check if all ids are connected to the same root node common_root = None for node_id in node_ids: onto_node = ontology.node(node_id) if "meta" in onto_node and "basicPropertyValues" in onto_node["meta"]: for basic_prop_val in onto_node["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": if common_root and common_root != basic_prop_val["val"]: raise ValueError( "Cannot get common ancestors of nodes connected to different roots" ) common_root = basic_prop_val["val"] ancestors = defaultdict(list) for node_id in node_ids: for ancestor in ontology.ancestors(node=node_id, reflexive=True): onto_anc = ontology.node(ancestor) onto_anc_root = None if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]: for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": onto_anc_root = basic_prop_val["val"] if onto_anc["depth"] >= min_distance_from_root and (not onto_anc_root or onto_anc_root == common_root) \ and (not nodeids_blacklist or ancestor not in nodeids_blacklist): ancestors[ancestor].append(node_id) return [(ancestor, ontology.label(ancestor), set(covered_nodes)) for ancestor, covered_nodes in ancestors.items() if len(covered_nodes) > 1 or ancestor == covered_nodes[0]]
def get_best_nodes_ic( node_ids: List[str], ontology: Ontology, max_number_of_terms: int = 3, min_distance_from_root: int = 0, slim_terms_ic_bonus_perc: int = 0, slim_set: set = None, nodeids_blacklist: List[str] = None ) -> Tuple[bool, List[Tuple[str, Set[str]]]]: """trim the list of terms by selecting the best combination of terms from the initial list or their common ancestors based on information content Args: node_ids (List[str]): the list of nodes to merge by common ancestor max_number_of_terms (int): minimum number of terms above which the merge operation is performed ontology (Ontology): the ontology min_distance_from_root (int): consider only nodes at a minimum distance from root as potential candidate for trimming slim_terms_ic_bonus_perc (int): boost the IC value for terms that appear in the slim set by the provided percentage slim_set (set): set of terms that belong to the slim for the provided ontology nodeids_blacklist (List[str]): a list of node ids to be excluded from common ancestors list Returns: Set[str]: the set of trimmed terms, together with the set of original terms that each of them covers """ common_ancestors = get_all_common_ancestors( node_ids=node_ids, ontology=ontology, nodeids_blacklist=nodeids_blacklist) if "IC" not in ontology.node(common_ancestors[0][0]): logger.warning( "ontology terms do not have information content values set") set_all_information_content_values(ontology=ontology) values = [ 0 if node[0] not in node_ids and ontology.node(node[0])["depth"] < min_distance_from_root else ontology.node(node[0])["IC"] * (1 + slim_terms_ic_bonus_perc) if slim_set and node[0] in slim_set else ontology.node(node[0])["IC"] for node in common_ancestors ] if slim_set and any([node[0] in slim_set for node in common_ancestors]): logger.debug("some candidates are present in the slim set") # remove ancestors with zero IC common_ancestors = [ common_ancestor for common_ancestor, value in zip(common_ancestors, values) if value > 0 ] values = [value for value in values if value > 0] best_terms = find_set_covering(subsets=common_ancestors, max_num_subsets=max_number_of_terms, value=values, ontology=ontology) covered_terms = set([ e for best_term_label, covered_terms in best_terms for e in covered_terms ]) return covered_terms != set(node_ids), best_terms
def create_ontology(handle=None, **args): ont = None logging.info("Determining strategy to load '{}' into memory...".format(handle)) if handle.find("+") > -1: handles = handle.split("+") onts = [create_ontology(ont) for ont in handles] ont = onts.pop() ont.merge(onts) return ont # TODO: consider replacing with plugin architecture if handle.find(".") > 0 and os.path.isfile(handle): logging.info("Fetching obograph-json file from filesystem") ont = translate_file_to_ontology(handle, **args) elif handle.startswith("obo:"): logging.info("Fetching from OBO PURL") if handle.find(".") == -1: handle += '.owl' fn = '/tmp/'+handle if not os.path.isfile(fn): url = handle.replace("obo:","http://purl.obolibrary.org/obo/") cmd = ['owltools',url,'-o','-f','json',fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: "+fn) g = obograph_util.convert_json_file(fn) ont = Ontology(handle=handle, payload=g) elif handle.startswith("wdq:"): from ontobio.sparql.wikidata_ontology import EagerWikidataOntology logging.info("Fetching from Wikidata") ont = EagerWikidataOntology(handle=handle) elif handle.startswith("scigraph:"): from ontobio.neo.scigraph_ontology import RemoteScigraphOntology logging.info("Fetching from SciGraph") ont = RemoteScigraphOntology(handle=handle) elif handle.startswith("http:"): logging.info("Fetching from Web PURL: "+handle) encoded = hashlib.sha256(handle.encode()).hexdigest() #encoded = binascii.hexlify(bytes(handle, 'utf-8')) #base64.b64encode(bytes(handle, 'utf-8')) logging.info(" encoded: "+str(encoded)) fn = '/tmp/'+encoded if not os.path.isfile(fn): cmd = ['owltools',handle,'-o','-f','json',fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: "+fn) g = obograph_util.convert_json_file(fn) ont = Ontology(handle=handle, payload=g) else: logging.info("Fetching from SPARQL") ont = EagerRemoteSparqlOntology(handle=handle) #g = get_digraph(handle, None, True) return ont
def set_ontology(self, ontology_type: DataType, ontology: Ontology, config: GenedescConfigParser, slim_cache_path: str = None) -> None: """set the go ontology and apply terms renaming Args: ontology_type (DataType): the type of ontology to set ontology (Ontology): an ontology object to set as go ontology config (GenedescConfigParser): configuration object where to read properties slim_cache_path (str): path to slim file to use """ if ontology_type == DataType.GO: logger.info("Setting GO ontology") if self.go_relations: self.go_ontology = ontology.subontology( relations=self.go_relations) else: self.go_ontology = ontology elif ontology_type == DataType.DO: logger.info("Setting DO ontology") if self.do_relations: self.do_ontology = ontology.subontology( relations=self.do_relations) else: self.do_ontology = ontology elif ontology_type == DataType.EXPR: logger.info("Setting Expression ontology") if self.expr_relations: self.expression_ontology = ontology.subontology( relations=self.expr_relations) else: self.expression_ontology = ontology module = get_module_from_data_type(ontology_type) ontology = self.get_ontology(data_type=ontology_type) terms_replacement_regex = config.get_module_property( module=module, prop=ConfigModuleProperty.RENAME_TERMS) if terms_replacement_regex: self.rename_ontology_terms( ontology=ontology, terms_replacement_regex=terms_replacement_regex) set_all_depths(ontology=ontology, relations=self.get_relations(ontology_type)) if config.get_module_property( module=module, prop=ConfigModuleProperty.TRIMMING_ALGORITHM) == "ic": set_ic_ontology_struct(ontology=ontology, relations=self.get_relations(ontology_type)) if slim_cache_path: slim_url = config.get_module_property( module=module, prop=ConfigModuleProperty.SLIM_URL) self.load_slim(module=module, slim_url=slim_url, slim_cache_path=slim_cache_path)
def set_all_depths(ontology: Ontology, relations: List[str] = None, comparison_func=max): for root_id in ontology.get_roots(): if "type" not in ontology.node(root_id) or ontology.node_type( root_id) == "CLASS": set_all_depths_in_subgraph(ontology=ontology, root_id=root_id, relations=relations, comparison_func=comparison_func) for node_id, node_content in ontology.nodes().items(): if "depth" not in node_content: node_content["depth"] = 0
def _set_information_content_in_subgraph(ontology: Ontology, root_id: str, maxleaves: int, relations: List[str] = None): node = ontology.node(root_id) node["IC"] = -math.log( (float(node["num_leaves"]) / node["num_subsumers"] + 1) / (maxleaves + 1)) for child_id in ontology.children(node=root_id, relations=relations): _set_information_content_in_subgraph(ontology=ontology, root_id=child_id, maxleaves=maxleaves, relations=relations)
def rdfgraph_to_ontol(rg): """ Return an Ontology object from an rdflib graph object Status: Incomplete """ digraph = networkx.MultiDiGraph() from rdflib.namespace import RDF label_map = {} for c in rg.subjects(RDF.type, OWL.Class): cid = contract_uri_wrap(c) logger.info("C={}".format(cid)) for lit in rg.objects(c, RDFS.label): label_map[cid] = lit.value digraph.add_node(cid, label=lit.value) for s in rg.objects(c, RDFS.subClassOf): # todo - blank nodes sid = contract_uri_wrap(s) digraph.add_edge(sid, cid, pred='subClassOf') logger.info("G={}".format(digraph)) payload = { 'graph': digraph, #'xref_graph': xref_graph, #'graphdoc': obographdoc, #'logical_definitions': logical_definitions } ont = Ontology(handle='wd', payload=payload) return ont
def node_is_in_branch(ontology: Ontology, node_id: str, branch_root_ids: List[str]): branch_root_ids = set(branch_root_ids) return any([ parent_id in branch_root_ids for parent_id in ontology.ancestors(node=node_id, reflexive=True) ])
def convert(self, filename=None, format='ttl'): if filename is not None: self.parse_rdf(filename=filename, format=format) g = networkx.MultiDiGraph() ont = Ontology(graph=g) self.add_triples(ont) return ont
def protein_complex_sublcass_closure(ontology: Ontology) -> Set[str]: protein_containing_complex = association.Curie(namespace="GO", identity="0032991") children_of_complexes = set( ontology.descendants(str(protein_containing_complex), relations=["subClassOf"], reflexive=True)) return children_of_complexes
def set_all_depths_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None, comparison_func=max, current_depth: int = 0): """calculate and set max_depth and min_depth (maximum and minimum distances from root terms in the ontology) recursively for all terms in a branch of the ontology Args: ontology (Ontology): the ontology root_id (str): the ID of the root term of the branch to process relations (List[str]): list of relations to consider comparison_func: a comparison function to calculate the depth when multiple paths exist between the node and the root. max calculates the length of the longest path, min the one of the shortest current_depth (int): the current depth in the ontology """ if "depth" not in ontology.node(root_id): ontology.node(root_id)["depth"] = current_depth else: ontology.node(root_id)["depth"] = comparison_func( ontology.node(root_id)["depth"], current_depth) for child_id in ontology.children(node=root_id, relations=relations): set_all_depths_in_subgraph(ontology=ontology, root_id=child_id, relations=relations, comparison_func=comparison_func, current_depth=current_depth + 1)
def get_all_common_ancestors(node_ids: List[str], ontology: Ontology, min_distance_from_root: int = 0, nodeids_blacklist: List[str] = None): """ Retrieve all common ancestors for the provided list of nodes Args: node_ids (List[str]): list of starting nodes ontology (Ontology): the ontology to which the provided nodes belong min_distance_from_root (int): minimum distance from root node nodeids_blacklist (List[str]): node ids to be excluded from the result Returns: List[CommonAncestor]: list of common ancestors """ common_root = nodes_have_same_root(node_ids=node_ids, ontology=ontology) if common_root is False: raise ValueError( "Cannot get common ancestors of nodes connected to different roots" ) ancestors = defaultdict(list) for node_id in node_ids: for ancestor in ontology.ancestors(node=node_id, reflexive=True): onto_anc = ontology.node(ancestor) onto_anc_root = None if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]: for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": onto_anc_root = basic_prop_val["val"] if (ancestor in node_ids or onto_anc["depth"] >= min_distance_from_root) and ( not onto_anc_root or onto_anc_root == common_root) and (not nodeids_blacklist or ancestor not in nodeids_blacklist): ancestors[ancestor].append(node_id) return [ CommonAncestor(node_id=ancestor, node_label=ontology.label(ancestor), covered_starting_nodes=set(covered_nodes)) for ancestor, covered_nodes in ancestors.items() if len(covered_nodes) > 1 or ancestor == covered_nodes[0] ]
def rename_ontology_terms( ontology: Ontology, terms_replacement_regex: Dict[str, str] = None) -> None: """rename ontology terms based on regular expression matching Args: ontology (Ontology): the ontology containing the terms to be renamed terms_replacement_regex (Dict[str, str]): a dictionary containing the regular expression to be applied for renaming terms. Each key must be a regular expression to search for terms and the associated value another regular expression that defines the final result """ logger.info("Renaming ontology terms") if terms_replacement_regex: for regex_to_substitute, regex_target in terms_replacement_regex.items( ): for node in ontology.search(regex_to_substitute, is_regex=True): ontology.node(node)["label"] = re.sub( regex_to_substitute, regex_target, ontology.node(node)["label"])
def materialize_inferences(ontology_graph: ontol.Ontology, annotation): materialized_annotations = [] #(gp, new_mf) mf = annotation["object"]["id"] gp = annotation["subject"]["id"] global __ancestors_cache mf_ancestors = ancestors(mf, ontology_graph, __ancestors_cache) # if mf_ancestors: # logger.info("For {term} \"{termdef}\":".format(term=mf, termdef=ontology_graph.label(mf))) messages = [] for mf_anc in mf_ancestors: has_part_mfs = neighbor_by_relation(ontology_graph, mf_anc, HAS_PART) # if has_part_mfs: # logger.info("\tHas Parent --> {parent} \"{parentdef}\"".format(parent=mf_anc, parentdef=ontology_graph.label(mf_anc))) if has_part_mfs: messages.append((gp, mf, mf_anc, has_part_mfs)) for new_mf in has_part_mfs: # logger.info("\t\thas_part --> {part} \"{partdef}\"".format(part=new_mf, partdef=ontology_graph.label(new_mf))) new_annotation = transform_relation(annotation, new_mf, ontology_graph) materialized_annotations.append(new_annotation) messages = [message for message in messages if message[3]] # Filter out empty has_parts for message in messages: logger.info("\nFor {gp} -> {term} \"{termdef}\":".format( gp=message[0], term=message[1], termdef=ontology_graph.label(message[1]))) logger.info("\tHas Parent --> {parent} \"{parentdef}\"".format( parent=message[1], parentdef=ontology_graph.label(message[1]))) for part in message[3]: logger.info("\t\t has_part --> {part} \"{partdef}\"".format( part=part, partdef=ontology_graph.label(part))) return materialized_annotations
def __init__(self, wsmap=default_wsmap(), config=None): """ Arguments --------- wdmap: dict maps words to normalized synonyms. config: dict A configuration conforming to LexicalMapConfigSchema """ # maps label or syn value to Synonym object self.lmap = {} # maps node id to synonym objects self.smap = {} self.wsmap = wsmap self.npattern = re.compile('[\W_]+') self.exclude_obsolete = True self.ontology_pairs = None self.id_to_ontology_map = defaultdict(list) self.merged_ontology = Ontology() self.config = config if config is not None else {} self.stats = {}
def _set_num_subsumers_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None): if "num_subsumers" not in ontology.node(root_id): parents = set(ontology.parents(root_id)) parents.discard(root_id) parents = list(parents) if not parents or all( ["set_subsumers" in ontology.node(parent) for parent in parents]): subsumers = {subsumer for parent in parents for subsumer in ontology.node(parent)["set_subsumers"]} | \ {root_id} ontology.node(root_id)["num_subsumers"] = len(subsumers) ontology.node(root_id)["set_subsumers"] = subsumers for child_id in ontology.children(node=root_id): _set_num_subsumers_in_subgraph(ontology, child_id, relations)
def _set_information_content_in_subgraph(ontology: Ontology, root_id: str, maxleaves: int, relations: List[str] = None): node = ontology.node(root_id) if str(root_id) == root_id and "ARTIFICIAL_NODE:" in root_id: node["IC"] = 0 else: if "num_leaves" in node and "num_subsumers" in node: node["IC"] = -math.log( (float(node["num_leaves"]) / node["num_subsumers"] + 1) / (maxleaves + 1)) else: logger.warning("Disconnected node: " + root_id) node["IC"] = 0 children = set(ontology.children(node=root_id, relations=relations)) children.discard(root_id) children = list(children) for child_id in children: _set_information_content_in_subgraph(ontology=ontology, root_id=child_id, maxleaves=maxleaves, relations=relations)
def _set_num_subsumers_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None): parents = ontology.parents(root_id) if len(parents) == 1: ontology.node(root_id)["num_subsumers"] = ontology.node( parents[0])["num_subsumers"] + 1 else: ontology.node(root_id)["num_subsumers"] = len( ontology.ancestors(node=root_id, relations=relations, reflexive=True)) for child_id in ontology.children(node=root_id, relations=relations): _set_num_subsumers_in_subgraph(ontology=ontology, root_id=child_id, relations=relations)
def set_ontology(self, ontology_type: DataType, ontology: Ontology, terms_replacement_regex: Dict[str, str] = None) -> None: """set the go ontology and apply terms renaming Args: ontology_type (DataType): the type of ontology to set ontology (Ontology): an ontology object to set as go ontology terms_replacement_regex (Dict[str, str]): a dictionary containing the regular expression to be applied for renaming terms. Each key must be a regular expression to search for terms and the associated value another regular expression that defines the final result """ new_ontology = None if ontology_type == DataType.GO: logger.info("Setting GO ontology") self.go_ontology = ontology.subontology( relations=self.go_relations) new_ontology = self.go_ontology elif ontology_type == DataType.DO: logger.info("Setting DO ontology") self.do_ontology = ontology.subontology( relations=self.do_relations) new_ontology = self.do_ontology elif ontology_type == DataType.EXPR: logger.info("Setting Expression ontology") self.expression_ontology = ontology.subontology() DataManager.add_article_to_expression_nodes( self.expression_ontology) new_ontology = self.expression_ontology self.rename_ontology_terms( ontology=new_ontology, terms_replacement_regex=terms_replacement_regex) for root_id in new_ontology.get_roots(): set_all_depths_in_subgraph(ontology=new_ontology, root_id=root_id, relations=None)
def ancestors(term: str, ontology: ontol.Ontology, cache) -> Set[str]: click.echo("Computing ancestors for {}".format(term)) if term == MF: click.echo("Found 0") return set() if term not in cache: anc = set( ontology.ancestors(term, relations=["subClassOf"], reflexive=True)) cache[term] = anc click.echo("Found {} (from adding to cache: {} terms added)".format( len(anc), len(cache))) else: anc = cache[term] click.echo("Found {} (from cache)".format(len(anc))) return anc
def find_set_covering( subsets: List[Tuple[str, str, Set[str]]], value: List[float] = None, max_num_subsets: int = None, ontology: Ontology = None) -> Union[None, List[Tuple[str, Set[str]]]]: """greedy algorithm to solve set covering problem Args: subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first element being the ID of the subset, the second being the name, and the third the actual set of elements value (List[float]): list of costs of the subsets max_num_subsets (int): maximum number of subsets in the final list ontology (Ontology): ontology to use to remove possible parent-child relationships in the result set Returns: Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements in the universe """ logger.debug("starting set covering optimization") elem_to_process = {subset[0] for subset in subsets} if value and len(value) != len(elem_to_process): return None universe = set([e for subset in subsets for e in subset[2]]) included_elmts = set() included_sets = [] while len(elem_to_process) > 0 and included_elmts != universe and \ (not max_num_subsets or len(included_sets) < max_num_subsets): if value: effect_sets = sorted( [(v * len(s[2] - included_elmts), s[2], s[1], s[0]) for s, v in zip(subsets, value) if s[0] in elem_to_process], key=lambda x: (-x[0], x[2])) else: effect_sets = sorted( [(len(s[2] - included_elmts), s[2], s[1], s[0]) for s in subsets if s[0] in elem_to_process], key=lambda x: (-x[0], x[2])) elem_to_process.remove(effect_sets[0][3]) if ontology: for elem in included_sets: if effect_sets[0][3] in ontology.ancestors(elem[0]): included_sets.remove(elem) included_elmts |= effect_sets[0][1] included_sets.append((effect_sets[0][3], effect_sets[0][1])) logger.debug("finished set covering optimization") return included_sets
def expand_tsv(input: str, ontology: Ontology = None, outfile=None, sep='\t', cols: List[str] = None) -> None: """ Adds additional columns to a TSV by performing additional ontology lookups For example, given a TSV with a column `term`, this can add a column `term_label` in future it may also add closures :param input: filename of a TSV (must have column headers) :param ontology: used for lookup :param outfile: pathname for output file :param sep: delimiter :param cols: names of columns :return: """ with open(input, newline='') as io: reader = csv.DictReader(io, delimiter='\t') items = [] if True: outwriter = csv.writer(outfile, delimiter=sep) first = True for row in reader: if first: first = False hdr = [] for k in row.keys(): hdr.append(k) if k in cols: hdr.append(f'{k}_label') outwriter.writerow(hdr) vals = [] for k, v in row.items(): vals.append(v) if k in cols: id = row[k] label = ontology.label(id) vals.append(label) if label is None: logging.warning(f"No id: {id}") #item[f'{k}_label'] = label outwriter.writerow(vals)
def _set_tot_annots_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None): if "tot_annot_genes" not in ontology.node(root_id): children = set(ontology.children(root_id, relations=relations)) children.discard(root_id) children = list(children) ontology.node(root_id)["tot_annot_genes"] = ontology.node( root_id)["rel_annot_genes"] | set([ annot_gene for child_id in children for annot_gene in _set_tot_annots_in_subgraph( ontology, child_id) ]) return ontology.node(root_id)["tot_annot_genes"]
def _set_num_leaves_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None): num_leaves = 0 for child_id in ontology.children(node=root_id): if "num_leaves" not in ontology.node(child_id): _set_num_leaves_in_subgraph(ontology=ontology, root_id=child_id, relations=relations) if ontology.node(child_id)["num_leaves"] == 0: num_leaves += 1 else: num_leaves += ontology.node(child_id)["num_leaves"] ontology.node(root_id)["num_leaves"] = num_leaves
def nodes_have_same_root(node_ids: List[str], ontology: Ontology) -> Union[bool, str]: """ Check whether all provided nodes are connected to the same root only Args: node_ids (List[str]): List of nodes to be checked ontology (Ontology): the ontology to which the provided nodes belong Returns: Union[bool, str]: the ID of the common root if all nodes are connected to the same and only root, False otherwise """ common_root = None for node_id in node_ids: onto_node = ontology.node(node_id) if "meta" in onto_node and "basicPropertyValues" in onto_node["meta"]: for basic_prop_val in onto_node["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": if common_root and common_root != basic_prop_val["val"]: return False common_root = basic_prop_val["val"] return common_root
def _set_num_leaves_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None): if "set_leaves" in ontology.node(root_id): return ontology.node(root_id)["set_leaves"] children = set(ontology.children(node=root_id)) children.discard(root_id) children = list(children) if not children: leaves = {root_id} num_leaves = 0 else: leaves = { leaf for child_id in children for leaf in _set_num_leaves_in_subgraph( ontology=ontology, root_id=child_id, relations=relations) } num_leaves = len(leaves) ontology.node(root_id)["num_leaves"] = num_leaves ontology.node(root_id)["set_leaves"] = leaves return leaves
def neighbor_by_relation(ontology_graph: ontol.Ontology, term, relation): return ontology_graph.parents(term, relations=[relation])