Exemplo n.º 1
0
def get_all_common_ancestors(node_ids: List[str],
                             ontology: Ontology,
                             min_distance_from_root: int = 0,
                             nodeids_blacklist: List[str] = None):
    # check if all ids are connected to the same root node
    common_root = None
    for node_id in node_ids:
        onto_node = ontology.node(node_id)
        if "meta" in onto_node and "basicPropertyValues" in onto_node["meta"]:
            for basic_prop_val in onto_node["meta"]["basicPropertyValues"]:
                if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                    if common_root and common_root != basic_prop_val["val"]:
                        raise ValueError(
                            "Cannot get common ancestors of nodes connected to different roots"
                        )
                    common_root = basic_prop_val["val"]
    ancestors = defaultdict(list)
    for node_id in node_ids:
        for ancestor in ontology.ancestors(node=node_id, reflexive=True):
            onto_anc = ontology.node(ancestor)
            onto_anc_root = None
            if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]:
                for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]:
                    if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                        onto_anc_root = basic_prop_val["val"]
            if onto_anc["depth"] >= min_distance_from_root and (not onto_anc_root or onto_anc_root == common_root) \
                and (not nodeids_blacklist or ancestor not in nodeids_blacklist):
                ancestors[ancestor].append(node_id)
    return [(ancestor, ontology.label(ancestor), set(covered_nodes))
            for ancestor, covered_nodes in ancestors.items()
            if len(covered_nodes) > 1 or ancestor == covered_nodes[0]]
def node_is_in_branch(ontology: Ontology, node_id: str,
                      branch_root_ids: List[str]):
    branch_root_ids = set(branch_root_ids)
    return any([
        parent_id in branch_root_ids
        for parent_id in ontology.ancestors(node=node_id, reflexive=True)
    ])
Exemplo n.º 3
0
def _set_num_subsumers_in_subgraph(ontology: Ontology,
                                   root_id: str,
                                   relations: List[str] = None):
    parents = ontology.parents(root_id)
    if len(parents) == 1:
        ontology.node(root_id)["num_subsumers"] = ontology.node(
            parents[0])["num_subsumers"] + 1
    else:
        ontology.node(root_id)["num_subsumers"] = len(
            ontology.ancestors(node=root_id,
                               relations=relations,
                               reflexive=True))
    for child_id in ontology.children(node=root_id, relations=relations):
        _set_num_subsumers_in_subgraph(ontology=ontology,
                                       root_id=child_id,
                                       relations=relations)
Exemplo n.º 4
0
def ancestors(term: str, ontology: ontol.Ontology, cache) -> Set[str]:
    click.echo("Computing ancestors for {}".format(term))
    if term == MF:
        click.echo("Found 0")
        return set()

    if term not in cache:
        anc = set(
            ontology.ancestors(term, relations=["subClassOf"], reflexive=True))
        cache[term] = anc
        click.echo("Found {} (from adding to cache: {} terms added)".format(
            len(anc), len(cache)))
    else:
        anc = cache[term]
        click.echo("Found {} (from cache)".format(len(anc)))

    return anc
Exemplo n.º 5
0
def find_set_covering(
        subsets: List[Tuple[str, str, Set[str]]],
        value: List[float] = None,
        max_num_subsets: int = None,
        ontology: Ontology = None) -> Union[None, List[Tuple[str, Set[str]]]]:
    """greedy algorithm to solve set covering problem

    Args:
        subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first
        element being the ID of the subset, the second being the name, and the third the actual set of elements
        value (List[float]): list of costs of the subsets
        max_num_subsets (int): maximum number of subsets in the final list
        ontology (Ontology): ontology to use to remove possible parent-child relationships in the result set
    Returns:
        Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements in
        the universe
    """
    logger.debug("starting set covering optimization")
    elem_to_process = {subset[0] for subset in subsets}
    if value and len(value) != len(elem_to_process):
        return None
    universe = set([e for subset in subsets for e in subset[2]])
    included_elmts = set()
    included_sets = []
    while len(elem_to_process) > 0 and included_elmts != universe and \
            (not max_num_subsets or len(included_sets) < max_num_subsets):
        if value:
            effect_sets = sorted(
                [(v * len(s[2] - included_elmts), s[2], s[1], s[0])
                 for s, v in zip(subsets, value) if s[0] in elem_to_process],
                key=lambda x: (-x[0], x[2]))
        else:
            effect_sets = sorted(
                [(len(s[2] - included_elmts), s[2], s[1], s[0])
                 for s in subsets if s[0] in elem_to_process],
                key=lambda x: (-x[0], x[2]))
        elem_to_process.remove(effect_sets[0][3])
        if ontology:
            for elem in included_sets:
                if effect_sets[0][3] in ontology.ancestors(elem[0]):
                    included_sets.remove(elem)
        included_elmts |= effect_sets[0][1]
        included_sets.append((effect_sets[0][3], effect_sets[0][1]))
    logger.debug("finished set covering optimization")
    return included_sets
def get_all_common_ancestors(node_ids: List[str],
                             ontology: Ontology,
                             min_distance_from_root: int = 0,
                             nodeids_blacklist: List[str] = None):
    """
    Retrieve all common ancestors for the provided list of nodes

    Args:
        node_ids (List[str]): list of starting nodes
        ontology (Ontology): the ontology to which the provided nodes belong
        min_distance_from_root (int): minimum distance from root node
        nodeids_blacklist (List[str]): node ids to be excluded from the result

    Returns:
        List[CommonAncestor]: list of common ancestors
    """
    common_root = nodes_have_same_root(node_ids=node_ids, ontology=ontology)
    if common_root is False:
        raise ValueError(
            "Cannot get common ancestors of nodes connected to different roots"
        )
    ancestors = defaultdict(list)
    for node_id in node_ids:
        for ancestor in ontology.ancestors(node=node_id, reflexive=True):
            onto_anc = ontology.node(ancestor)
            onto_anc_root = None
            if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]:
                for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]:
                    if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                        onto_anc_root = basic_prop_val["val"]
            if (ancestor in node_ids
                    or onto_anc["depth"] >= min_distance_from_root) and (
                        not onto_anc_root or onto_anc_root
                        == common_root) and (not nodeids_blacklist or ancestor
                                             not in nodeids_blacklist):
                ancestors[ancestor].append(node_id)
    return [
        CommonAncestor(node_id=ancestor,
                       node_label=ontology.label(ancestor),
                       covered_starting_nodes=set(covered_nodes))
        for ancestor, covered_nodes in ancestors.items()
        if len(covered_nodes) > 1 or ancestor == covered_nodes[0]
    ]