Python Ontology.nodes примеры использования

Язык программирования: Python

Пространство имен/Пакет: ontobio.ontol

Класс/Тип: Ontology

Метод/Функция: nodes

Примеров на hotexamples.com: 3

Python Ontology.nodes - 3 примера найдено. Это лучшие примеры Python кода для ontobio.ontol.Ontology.nodes, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

node(20)

Ontology(11)

children(8)

label(7)

ancestors(6)

parents(4)

get_roots(4)

merge(3)

node_type(3)

nodes(3)

subontology(2)

add_xref(1)

add_to_subset(1)

descendants(1)

get_graph(1)

add_node(1)

id(1)

add_text_definition(1)

add_synonym(1)

add_parent(1)

prefix(1)

search(1)

Пример #1

Показать файл

Файл: ontology_tools.py Проект: alliance-genome/agr_genedescriptions

def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet):
    logger.info(
        "Setting information content values based on annotation frequency")
    for node_id in ontology.nodes():
        node_prop = ontology.node(node_id)
        if "rel_annot_genes" in node_prop:
            del node_prop["rel_annot_genes"]
        if "tot_annot_genes" in node_prop:
            del node_prop["tot_annot_genes"]
        if "IC" in node_prop:
            del node_prop["IC"]
    for root_id in ontology.get_roots():
        if "depth" not in ontology.node(root_id) and (
                "type" not in ontology.node(root_id)
                or ontology.node_type(root_id) == "CLASS"):
            set_all_depths_in_subgraph(ontology=ontology, root_id=root_id)
    node_gene_map = defaultdict(set)
    for subj, obj in annotations.associations_by_subj_obj.keys():
        node_gene_map[obj].add(subj)
    for node_id in ontology.nodes():
        node_pr = ontology.node(node_id)
        node_pr["rel_annot_genes"] = node_gene_map[node_id]
    for root_id in ontology.get_roots():
        _set_tot_annots_in_subgraph(ontology, root_id)
    for node_prop in ontology.nodes().values():
        if "tot_annot_genes" not in node_prop:
            node_prop["tot_annot_genes"] = set()
    tot_annots = len(
        set([
            gene for set_genes in node_gene_map.values() for gene in set_genes
        ]))
    min_annots = min([
        len(node["tot_annot_genes"]) for node in ontology.nodes().values()
        if "tot_annot_genes" in node and len(node["tot_annot_genes"]) > 0
    ])
    if not min_annots:
        min_annots = 1
    for node_prop in ontology.nodes().values():
        node_prop["IC"] = -math.log(len(node_prop["tot_annot_genes"]) / tot_annots) if \
            len(node_prop["tot_annot_genes"]) > 0 else -math.log(min_annots / (tot_annots + 1))
    logger.info("Finished setting information content values")

Пример #2

Показать файл

Файл: ontology_tools.py Проект: alliance-genome/agr_genedescriptions

def set_all_depths(ontology: Ontology,
                   relations: List[str] = None,
                   comparison_func=max):
    for root_id in ontology.get_roots():
        if "type" not in ontology.node(root_id) or ontology.node_type(
                root_id) == "CLASS":
            set_all_depths_in_subgraph(ontology=ontology,
                                       root_id=root_id,
                                       relations=relations,
                                       comparison_func=comparison_func)
    for node_id, node_content in ontology.nodes().items():
        if "depth" not in node_content:
            node_content["depth"] = 0

Пример #3

Показать файл

Файл: lexmap.py Проект: meftaul/ontobio

class LexicalMapEngine():
    """
    generates lexical matches between pairs of ontology classes
    """

    SCORE = 'score'
    LEXSCORE = 'lexscore'
    SIMSCORES = 'simscores'
    CONDITIONAL_PR = 'cpr'

    def __init__(self, wsmap=default_wsmap(), config=None):
        """
        Arguments
        ---------
        wdmap: dict
            maps words to normalized synonyms.
        config: dict
            A configuration conforming to LexicalMapConfigSchema
        """
        # maps label or syn value to Synonym object
        self.lmap = {}
        # maps node id to synonym objects
        self.smap = {}
        self.wsmap = wsmap
        self.npattern = re.compile('[\W_]+')
        self.exclude_obsolete = True
        self.ontology_pairs = None
        self.id_to_ontology_map = defaultdict(list)
        self.merged_ontology = Ontology()
        self.config = config if config is not None else {}
        self.stats = {}

    def index_ontologies(self, onts):
        logging.info('Indexing: {}'.format(onts))
        for ont in onts:
            self.index_ontology(ont)

    def index_ontology(self, ont):
        """
        Adds an ontology to the index

        This iterates through all labels and synonyms in the ontology, creating an index
        """
        self.merged_ontology.merge([ont])
        syns = ont.all_synonyms(include_label=True)

        include_id = self._is_meaningful_ids()
        logging.info("Include IDs as synonyms: {}".format(include_id))
        if include_id:
            for n in ont.nodes():
                v = n
                # Get fragment
                if v.startswith('http'):
                    v = re.sub('.*/', '', v)
                    v = re.sub('.*#', '', v)
                syns.append(Synonym(n, val=v, pred='label'))

        logging.info("Indexing {} syns in {}".format(len(syns), ont))
        logging.info("Distinct lexical values: {}".format(len(
            self.lmap.keys())))
        for syn in syns:
            self.index_synonym(syn, ont)
        for nid in ont.nodes():
            self.id_to_ontology_map[nid].append(ont)

    def label(self, nid):
        return self.merged_ontology.label(nid)

    def index_synonym(self, syn, ont):
        """
        Index a synonym

        Typically not called from outside this object; called by `index_ontology`
        """
        if not syn.val:
            if syn.pred == 'label':
                if not self._is_meaningful_ids():
                    if not ont.is_obsolete(syn.class_id):
                        pass
                        #logging.error('Use meaningful ids if label not present: {}'.format(syn))
            else:
                logging.warning("Incomplete syn: {}".format(syn))
            return
        if self.exclude_obsolete and ont.is_obsolete(syn.class_id):
            return

        syn.ontology = ont
        prefix, _ = ont.prefix_fragment(syn.class_id)

        v = syn.val

        caps_match = re.match('[A-Z]+', v)
        if caps_match:
            # if > 75% of length is caps, assume abbreviation
            if caps_match.span()[1] >= len(v) / 3:
                syn.is_abbreviation(True)

        # chebi 'synonyms' are often not real synonyms
        # https://github.com/ebi-chebi/ChEBI/issues/3294
        if not re.match('.*[a-zA-Z]', v):
            if prefix != 'CHEBI':
                logging.warning('Ignoring suspicous synonym: {}'.format(syn))
            return

        v = self._standardize_label(v)

        # TODO: do this once ahead of time
        wsmap = {}
        for w, s in self.wsmap.items():
            wsmap[w] = s
        for ss in self._get_config_val(prefix, 'synsets', []):
            # TODO: weights
            wsmap[ss['synonym']] = ss['word']
        nv = self._normalize_label(v, wsmap)

        self._index_synonym_val(syn, v)
        nweight = self._get_config_val(prefix, 'normalized_form_confidence',
                                       0.8)
        if nweight > 0 and not syn.is_abbreviation():
            if nv != v:
                nsyn = Synonym(syn.class_id,
                               val=syn.val,
                               pred=syn.pred,
                               lextype=syn.lextype,
                               ontology=ont,
                               confidence=syn.confidence * nweight)
                self._index_synonym_val(nsyn, nv)

    def _index_synonym_val(self, syn, v):
        lmap = self.lmap
        smap = self.smap
        cid = syn.class_id
        if v not in lmap:
            lmap[v] = []
        lmap[v].append(syn)
        if cid not in smap:
            smap[cid] = []
        smap[cid].append(syn)

    def _standardize_label(self, v):
        # Add spaces separating camelcased strings
        v = re.sub('([a-z])([A-Z])', r'\1 \2', v)

        # always use lowercase when comparing
        # we may want to make this configurable in future
        v = v.lower()
        return v

    def _normalize_label(self, s, wsmap):
        """
        normalized form of a synonym
        """
        toks = []
        for tok in list(set(self.npattern.sub(' ', s).split(' '))):
            if tok in wsmap:
                tok = wsmap[tok]
            if tok != "":
                toks.append(tok)
        toks.sort()
        return " ".join(toks)

    def _get_config_val(self, prefix, k, default=None):
        v = None
        for oc in self.config.get('ontology_configurations', []):
            if prefix == oc.get('prefix', ''):
                v = oc.get(k, None)
        if v is None:
            v = self.config.get(k, None)
        if v is None:
            v = default
        return v

    def _is_meaningful_ids(self):
        return self.config.get('meaningful_ids', False)

    def find_equiv_sets(self):
        return self.lmap

    def get_xref_graph(self):
        """

        Generate mappings based on lexical properties and return as nx graph.

        Algorithm
        ~~~~~~~~~

        - A dictionary is stored between ref:`Synonym` values and synonyms. See ref:`index_synonym`.
          Note that Synonyms include the primary label

        - Each key in the dictionary is examined to determine if there exist two Synonyms from
          different ontology classes

        This avoids N^2 pairwise comparisons: instead the time taken is linear

        After initial mapping is made, additional scoring is performed on each mapping

        Edge properties
        ~~~~~~~~~~~~~~~
        The return object is a nx graph, connecting pairs of ontology classes.

        Edges are annotated with metadata about how the match was found:

        syns: pair
            pair of `Synonym` objects, corresponding to the synonyms for the two nodes
        score: int
            score indicating strength of mapping, between 0 and 100

        Returns
        -------
        Graph
            nx graph (bidirectional)
        """

        # initial graph; all matches
        g = nx.MultiDiGraph()

        # lmap collects all syns by token
        items = self.lmap.items()
        logging.info("collecting initial xref graph, items={}".format(
            len(items)))
        i = 0
        sum_nsyns = 0
        n_skipped = 0
        has_self_comparison = False
        if self.ontology_pairs:
            for (o1id, o2id) in self.ontology_pairs:
                if o1id == o2id:
                    has_self_comparison = True

        for (v, syns) in items:
            sum_nsyns += len(syns)
            i += 1
            if i % 1000 == 1:
                logging.info(
                    '{}/{}  lexical items avgSyns={}, skipped={}'.format(
                        i, len(items), sum_nsyns / len(items), n_skipped))
            if len(syns) < 2:
                n_skipped += 1
                next
            if len(syns) > 10:
                logging.info('Syns for {} = {}'.format(v, len(syns)))
            for s1 in syns:
                s1oid = s1.ontology.id
                s1cid = s1.class_id
                for s2 in syns:
                    # optimization step: although this is redundant with _is_comparable,
                    # we avoid inefficient additional calls
                    if s1oid == s2.ontology.id and not has_self_comparison:
                        next
                    if s1cid != s2.class_id:
                        if self._is_comparable(s1, s2):
                            g.add_edge(s1.class_id, s2.class_id, syns=(s1, s2))

        logging.info("getting best supporting synonym pair for each match")
        # graph of best matches
        xg = nx.Graph()
        for i in g.nodes():
            for j in g.neighbors(i):
                best = 0
                bestm = None
                for m in g.get_edge_data(i, j).values():
                    (s1, s2) = m['syns']
                    score = self._combine_syns(s1, s2)
                    if score > best:
                        best = score
                        bestm = m
                syns = bestm['syns']
                xg.add_edge(i,
                            j,
                            score=best,
                            lexscore=best,
                            syns=syns,
                            idpair=(i, j))

        self.score_xrefs_by_semsim(xg)
        self.assign_best_matches(xg)
        if self.merged_ontology.xref_graph is not None:
            self.compare_to_xrefs(xg, self.merged_ontology.xref_graph)
        else:
            logging.error("No xref graph for merged ontology")
        logging.info("finished xref graph")
        return xg

    # true if syns s1 and s2 should be compared.
    #  - if ontology_pairs is set, then only consider (s1,s2) if their respective source ontologies are in the list of pairs
    #  - otherwise compare all classes, but only in one direction
    def _is_comparable(self, s1, s2):
        if s1.class_id == s2.class_id:
            return False
        if self.ontology_pairs is not None:
            #logging.debug('TEST: {}{} in {}'.format(s1.ontology.id, s2.ontology.id, self.ontology_pairs))
            return (s1.ontology.id, s2.ontology.id) in self.ontology_pairs
        else:
            return s1.class_id < s2.class_id

    def _blanket(self, nid):
        nodes = set()
        for ont in self.id_to_ontology_map[nid]:
            nodes.update(ont.ancestors(nid))
            nodes.update(ont.descendants(nid))
        return list(nodes)

    def score_xrefs_by_semsim(self, xg, ont=None):
        """
        Given an xref graph (see ref:`get_xref_graph`), this will adjust scores based on
        the semantic similarity of matches.
        """
        logging.info(
            "scoring xrefs by semantic similarity for {} nodes in {}".format(
                len(xg.nodes()), ont))
        for (i, j, d) in xg.edges(data=True):
            pfx1 = self._id_to_ontology(i)
            pfx2 = self._id_to_ontology(j)
            ancs1 = self._blanket(i)
            ancs2 = self._blanket(j)
            s1, _, _ = self._sim(xg, ancs1, ancs2, pfx1, pfx2)
            s2, _, _ = self._sim(xg, ancs2, ancs1, pfx2, pfx1)
            s = 1 - ((1 - s1) * (1 - s2))
            logging.debug("Score {} x {} = {} x {} = {} // {}".format(
                i, j, s1, s2, s, d))
            xg[i][j][self.SIMSCORES] = (s1, s2)
            xg[i][j][self.SCORE] *= s

    def _sim(self, xg, ancs1, ancs2, pfx1, pfx2):
        """
        Compare two lineages
        """
        xancs1 = set()
        for a in ancs1:
            if a in xg:
                # TODO: restrict this to neighbors in single ontology
                for n in xg.neighbors(a):
                    pfx = self._id_to_ontology(n)
                    if pfx == pfx2:
                        xancs1.add(n)
        logging.debug('SIM={}/{} ## {}'.format(len(xancs1.intersection(ancs2)),
                                               len(xancs1),
                                               xancs1.intersection(ancs2),
                                               xancs1))
        n_shared = len(xancs1.intersection(ancs2))
        n_total = len(xancs1)
        return (1 + n_shared) / (1 + n_total), n_shared, n_total

    # given an ontology class id,
    # return map keyed by ontology id, value is a list of (score, ext_class_id) pairs
    def _neighborscores_by_ontology(self, xg, nid):
        xrefmap = defaultdict(list)
        for x in xg.neighbors(nid):
            score = xg[nid][x][self.SCORE]
            for ont in self.id_to_ontology_map[x]:
                xrefmap[ont.id].append((score, x))
        return xrefmap

    # normalize direction
    def _dirn(self, edge, i, j):
        if edge['idpair'] == (i, j):
            return 'fwd'
        elif edge['idpair'] == (j, i):
            return 'rev'
        else:
            return None

    def _id_to_ontology(self, id):
        return self.merged_ontology.prefix(id)
        #onts = self.id_to_ontology_map[id]
        #if len(onts) > 1:
        #    logging.warning(">1 ontology for {}".format(id))

    def compare_to_xrefs(self, xg1, xg2):
        """
        Compares a base xref graph with another one
        """
        ont = self.merged_ontology
        for (i, j, d) in xg1.edges(data=True):
            ont_left = self._id_to_ontology(i)
            ont_right = self._id_to_ontology(j)
            unique_lr = True
            num_xrefs_left = 0
            same_left = False
            if i in xg2:
                for j2 in xg2.neighbors(i):
                    ont_right2 = self._id_to_ontology(j2)
                    if ont_right2 == ont_right:
                        unique_lr = False
                        num_xrefs_left += 1
                        if j2 == j:
                            same_left = True
            unique_rl = True
            num_xrefs_right = 0
            same_right = False
            if j in xg2:
                for i2 in xg2.neighbors(j):
                    ont_left2 = self._id_to_ontology(i2)
                    if ont_left2 == ont_left:
                        unique_rl = False
                        num_xrefs_right += 1
                        if i2 == i:
                            same_right = True

            (x, y) = d['idpair']
            xg1[x][y]['left_novel'] = num_xrefs_left == 0
            xg1[x][y]['right_novel'] = num_xrefs_right == 0
            xg1[x][y]['left_consistent'] = same_left
            xg1[x][y]['right_consistent'] = same_right

    def assign_best_matches(self, xg):
        """
        For each node in the xref graph, tag best match edges
        """
        logging.info("assigning best matches for {} nodes".format(
            len(xg.nodes())))
        for i in xg.nodes():
            xrefmap = self._neighborscores_by_ontology(xg, i)
            for (ontid, score_node_pairs) in xrefmap.items():
                score_node_pairs.sort(reverse=True)
                (best_score, best_node) = score_node_pairs[0]
                logging.info("BEST for {}: {} in {} from {}".format(
                    i, best_node, ontid, score_node_pairs))
                edge = xg[i][best_node]
                dirn = self._dirn(edge, i, best_node)
                best_kwd = 'best_' + dirn
                if len(score_node_pairs
                       ) == 1 or score_node_pairs[0] > score_node_pairs[1]:
                    edge[best_kwd] = 2
                else:
                    edge[best_kwd] = 1
                for (score, j) in score_node_pairs:
                    edge_ij = xg[i][j]
                    dirn_ij = self._dirn(edge_ij, i, j)
                    edge_ij['cpr_' + dirn_ij] = score / sum(
                        [s for s, _ in score_node_pairs])
        for (i, j, edge) in xg.edges(data=True):
            # reciprocal score is set if (A) i is best for j, and (B) j is best for i
            rs = 0
            if 'best_fwd' in edge and 'best_rev' in edge:
                rs = edge['best_fwd'] * edge['best_rev']
            edge['reciprocal_score'] = rs
            edge['cpr'] = edge['cpr_fwd'] * edge['cpr_rev']

    def _best_match_syn(self, sx, sys, scope_map):
        """
        The best match is determined by the highest magnitude weight
        """
        SUBSTRING_WEIGHT = 0.2
        WBEST = None
        sbest = None
        sxv = self._standardize_label(sx.val)
        sxp = self._id_to_ontology(sx.class_id)
        for sy in sys:
            syv = self._standardize_label(sy.val)
            syp = self._id_to_ontology(sy.class_id)
            W = None
            if sxv == syv:
                confidence = sx.confidence * sy.confidence
                if sx.is_abbreviation() or sy.is_abbreviation:
                    confidence *= self._get_config_val(
                        sxp, 'abbreviation_confidence', 0.5)
                    confidence *= self._get_config_val(
                        syp, 'abbreviation_confidence', 0.5)
                W = scope_map[sx.scope()][sy.scope()] + logit(confidence / 2)
            elif sxv in syv:
                W = np.array((-SUBSTRING_WEIGHT, SUBSTRING_WEIGHT, 0, 0))
            elif syv in sxv:
                W = np.array((SUBSTRING_WEIGHT, -SUBSTRING_WEIGHT, 0, 0))
            if W is not None:
                # The best match is determined by the highest magnitude weight
                if WBEST is None or max(abs(W)) > max(abs(WBEST)):
                    WBEST = W
                    sbest = sy
        return WBEST, sbest

    def weighted_axioms(self, x, y, xg):
        """
        return a tuple (sub,sup,equiv,other) indicating estimated prior probabilities for an interpretation of a mapping
        between x and y.

        See kboom paper
        """
        # TODO: allow additional weighting
        # weights are log odds w=log(p/(1-p))
        # (Sub,Sup,Eq,Other)
        scope_pairs = [('label', 'label', 0.0, 0.0, 3.0, -0.8),
                       ('label', 'exact', 0.0, 0.0, 2.5, -0.5),
                       ('label', 'broad', -1.0, 1.0, 0.0, 0.0),
                       ('label', 'narrow', 1.0, -1.0, 0.0, 0.0),
                       ('label', 'related', 0.0, 0.0, 0.0, 0.0),
                       ('exact', 'exact', 0.0, 0.0, 2.5, -0.5),
                       ('exact', 'broad', -1.0, 1.0, 0.0, 0.0),
                       ('exact', 'narrow', 1.0, -1.0, 0.0, 0.0),
                       ('exact', 'related', 0.0, 0.0, 0.0, 0.0),
                       ('related', 'broad', -0.5, 0.5, 0.0, 0.0),
                       ('related', 'narrow', 0.5, -0.5, 0.0, 0.0),
                       ('related', 'related', 0.0, 0.0, 0.0, 0.0),
                       ('broad', 'broad', 0.0, 0.0, 0.0, 1.0),
                       ('broad', 'narrow', -0.5, 0.5, 0.0, 0.2),
                       ('narrow', 'narrow', 0.0, 0.0, 0.0, 0.0)]
        # populate symmetric lookup matrix
        scope_map = defaultdict(dict)
        for (l, r, w1, w2, w3, w4) in scope_pairs:
            l = l.upper()
            r = r.upper()
            scope_map[l][r] = np.array((w1, w2, w3, w4))
            scope_map[r][l] = np.array((w2, w1, w3, w4))

        # TODO: get prior based on ontology pair
        # cumulative sum of weights
        WS = None
        pfx1 = self._id_to_ontology(x)
        pfx2 = self._id_to_ontology(y)
        for mw in self.config.get('match_weights', []):
            mpfx1 = mw.get('prefix1', '')
            mpfx2 = mw.get('prefix2', '')
            X = np.array(mw['weights'])
            if mpfx1 == pfx1 and mpfx2 == pfx2:
                WS = X
            elif mpfx2 == pfx1 and mpfx1 == pfx2:
                WS = self._flipweights(X)
            elif mpfx1 == pfx1 and mpfx2 == '' and WS is None:
                WS = X
            elif mpfx2 == pfx1 and mpfx1 == '' and WS is None:
                WS = self._flipweights(X)

        if WS is None:
            WS = np.array((0.0, 0.0, 0.0, 0.0))
        # defaults
        WS += np.array(
            self.config.get('default_weights', [0.0, 0.0, 1.5, -0.1]))
        logging.info('WS defaults={}'.format(WS))

        for xw in self.config.get('xref_weights', []):
            left = xw.get('left', '')
            right = xw.get('right', '')
            X = np.array(xw['weights'])
            if x == left and y == right:
                WS += X
                logging.info('MATCH: {} for {}-{}'.format(X, x, y))
            elif y == left and x == right:
                WS += self._flipweights(X)
                logging.info('IMATCH: {}'.format(X))

        smap = self.smap
        # TODO: symmetrical
        WT = np.array((0.0, 0.0, 0.0, 0.0))
        WBESTMAX = np.array((0.0, 0.0, 0.0, 0.0))
        n = 0
        for sx in smap[x]:
            WBEST, _ = self._best_match_syn(sx, smap[y], scope_map)
            if WBEST is not None:
                WT += WBEST
                n += 1
                if max(abs(WBEST)) > max(abs(WBESTMAX)):
                    WBESTMAX = WBEST
        for sy in smap[y]:
            WBEST, _ = self._best_match_syn(sy, smap[x], scope_map)
            if WBEST is not None:
                WT += WBEST
                n += 1
        # average best match
        if n > 0:
            logging.info('Adding BESTMAX={}'.format(WBESTMAX))
            WS += WBESTMAX

        # TODO: xref, many to many
        WS += self._graph_weights(x, y, xg)
        # TODO: include additional defined weights, eg ORDO
        logging.info('Adding WS, gw={}'.format(WS))

        # jaccard similarity
        (ss1, ss2) = xg[x][y][self.SIMSCORES]
        WS[3] += ((1 - ss1) + (1 - ss2)) / 2

        # reciprocal best hits are higher confidence of equiv
        rs = xg[x][y]['reciprocal_score']
        if rs == 4:
            WS[2] += 0.5
        if rs == 0:
            WS[2] -= 0.2

        #P = np.expit(WS)
        P = 1 / (1 + np.exp(-WS))
        logging.info('Final WS={}, init P={}'.format(WS, P))
        # probs should sum to 1.0
        P = P / np.sum(P)
        return P

    def _graph_weights(self, x, y, xg):
        ont = self.merged_ontology
        xancs = ont.ancestors(x)
        yancs = ont.ancestors(y)
        pfx = self._id_to_ontology(x)
        pfy = self._id_to_ontology(y)
        xns = [
            n for n in xg.neighbors(y)
            if n != x and pfx == self._id_to_ontology(n)
        ]
        yns = [
            n for n in xg.neighbors(x)
            if n != y and pfy == self._id_to_ontology(n)
        ]
        pweight = 1.0
        W = np.array((0, 0, 0, 0))
        card = '11'
        if len(xns) > 0:
            card = 'm1'
            for x2 in xns:
                if x2 in xancs:
                    W[0] += pweight
                if x in ont.ancestors(x2):
                    W[1] += pweight
        if len(yns) > 0:
            if card == '11':
                card = '1m'
            else:
                card = 'mm'
            for y2 in yns:
                if y2 in yancs:
                    W[1] += pweight
                if y in ont.ancestors(y2):
                    W[0] += pweight

        logging.debug('CARD: {}/{} <-> {}/{} = {} // X={} Y={} // W={}'.format(
            x, pfx, y, pfy, card, xns, yns, W))
        invcard = card
        if card == '1m':
            invcard = 'm1'
        elif card == 'm1':
            invcard = '1m'

        CW = None
        DEFAULT_CW = None
        for cw in self.config.get('cardinality_weights', []):
            if 'prefix1' not in cw and 'prefix2' not in cw:
                if card == cw['cardinality']:
                    DEFAULT_CW = np.array(cw['weights'])
                if invcard == cw['cardinality']:
                    DEFAULT_CW = self._flipweights(np.array(cw['weights']))
            if 'prefix1' in cw and 'prefix2' in cw:
                if pfx == cw['prefix1'] and pfy == cw[
                        'prefix2'] and card == cw['cardinality']:
                    CW = np.array(cw['weights'])

                if pfx == cw['prefix2'] and pfy == cw[
                        'prefix1'] and invcard == cw['cardinality']:
                    CW = self._flipweights(np.array(cw['weights']))

        if CW is None:
            if DEFAULT_CW is not None:
                CW = DEFAULT_CW
            else:
                if card == '11':
                    CW = np.array((0.0, 0.0, 1.0, 0.0))
                elif card == '1m':
                    CW = np.array((0.6, 0.4, 0.0, 0.0))
                elif card == 'm1':
                    CW = np.array((0.4, 0.6, 0.0, 0.0))
                elif card == 'mm':
                    CW = np.array((0.2, 0.2, 0.0, 0.5))
        return W + CW

    def _flipweights(self, W):
        return np.array((W[1], W[0], W[2], W[3]))

    def grouped_mappings(self, id):
        """
        return all mappings for a node, grouped by ID prefix
        """
        g = self.get_xref_graph()
        m = {}
        for n in g.neighbors(id):
            [prefix, local] = n.split(':')
            if prefix not in m:
                m[prefix] = []
            m[prefix].append(n)
        return m

    def unmapped_nodes(self, xg, rs_threshold=0):
        unmapped_set = set()
        for nid in self.merged_ontology.nodes():
            if nid in xg:
                for (j, edge) in xg[nid].items():
                    rs = edge.get('reciprocal_score', 0)
                    if rs < rs_threshold:
                        unmapped_set.add(nid)
            else:
                unmapped_set.add(nid)
        return unmapped_set

    def unmapped_dataframe(self, xg, **args):
        unodes = self.unmapped_nodes(xg, **args)
        ont = self.merged_ontology
        eg = ont.equiv_graph()
        items = []
        for n in unodes:
            mapped_equivs = ''
            if n in eg:
                equivs = set(eg.neighbors(n))
                mapped_equivs = list(equivs - unodes)
            items.append(
                dict(id=n, label=ont.label(n), mapped_equivs=mapped_equivs))
        df = pd.DataFrame(items, columns=['id', 'label', 'mapped_equivs'])
        df = df.sort_values(["id"])
        return df

    # scores a pairwise combination of synonyms. This will be a mix of
    #  * individual confidence in the synonyms themselves
    #  * confidence of equivalence based on scopes
    # TODO: unify this with probabilistic calculation
    def _combine_syns(self, s1, s2):
        cpred = self._combine_preds(s1.pred, s2.pred)
        s = self._pred_score(cpred)
        s *= s1.confidence * s2.confidence
        if s1.is_abbreviation() or s2.is_abbreviation():
            s *= self._get_config_val(self._id_to_ontology(s1.class_id),
                                      'abbreviation_confidence', 0.5)
            s *= self._get_config_val(self._id_to_ontology(s1.class_id),
                                      'abbreviation_confidence', 0.5)
        logging.debug("COMBINED: {} + {} = {}/{}".format(s1, s2, cpred, s))
        return round(s)

    def _rollup(self, p):
        if p == 'label':
            return LABEL_OR_EXACT
        if p == 'hasExactSynonym':
            return LABEL_OR_EXACT
        return p

    def _combine_preds(self, p1, p2):
        if p1 == p2:
            return p1
        if self._rollup(p1) == self._rollup(p2):
            return self._rollup(p1)
        return p1 + p2

    ## TODO: allow this to be weighted by ontology
    def _pred_score(self, p):
        if p == 'label':
            return 100
        if p == LABEL_OR_EXACT:
            return 90
        if p == 'hasExactSynonym':
            return 90
        return 50

    def _in_clique(self, x, cliques):
        for s in cliques:
            if x in s:
                return s
        return set()

    def as_dataframe(self, xg):
        cliques = self.cliques(xg)
        ont = self.merged_ontology
        items = []
        for (x, y, d) in xg.edges(data=True):
            # xg is a non-directional Graph object.
            # to get a deterministic ordering we use the idpair key
            (x, y) = d['idpair']
            (s1, s2) = d['syns']
            (ss1, ss2) = d['simscores']
            clique = self._in_clique(x, cliques)
            #ancs = nx.ancestors(g,x)
            left_label = ont.label(x)
            right_label = ont.label(y)
            if ont.is_obsolete(x) and not left_label.startwith('obsolete'):
                left_label = "obsolete " + left_label
            if ont.is_obsolete(y) and not right_label.startwith('obsolete'):
                right_label = "obsolete " + right_label

            P = self.weighted_axioms(x, y, xg)
            item = {
                'left': x,
                'left_label': left_label,
                'right': y,
                'right_label': right_label,
                'score': d['score'],
                'left_match_type': s1.pred,
                'right_match_type': s2.pred,
                'left_match_val': s1.val,
                'right_match_val': s2.val,
                'left_simscore': ss1,
                'right_simscore': ss2,
                'reciprocal_score': d.get('reciprocal_score', 0),
                'conditional_pr_equiv': d.get('cpr'),
                'pr_subClassOf': P[0],
                'pr_superClassOf': P[1],
                'pr_equivalentTo': P[2],
                'pr_other': P[3],
                'left_novel': d.get('left_novel'),
                'right_novel': d.get('right_novel'),
                'left_consistent': d.get('left_consistent'),
                'right_consistent': d.get('right_consistent'),
                'equiv_clique_size': len(clique)
            }

            items.append(item)

        ix = [
            'left', 'left_label', 'right', 'right_label', 'left_match_type',
            'right_match_type', 'left_match_val', 'right_match_val', 'score',
            'left_simscore', 'right_simscore', 'reciprocal_score',
            'conditional_pr_equiv', 'pr_subClassOf', 'pr_superClassOf',
            'pr_equivalentTo', 'pr_other', 'left_novel', 'right_novel',
            'left_consistent', 'right_consistent', 'equiv_clique_size'
        ]
        df = pd.DataFrame(items, columns=ix)
        df = df.sort_values(["left", "score", "right"])
        return df

    def cliques(self, xg):
        """
        Return all equivalence set cliques, assuming each edge in the xref graph is treated as equivalent,
        and all edges in ontology are subClassOf

        Arguments
        ---------
        xg : Graph
            an xref graph

        Returns
        -------
        list of sets
        """
        g = nx.DiGraph()
        for (x, y) in self.merged_ontology.get_graph().edges():
            g.add_edge(x, y)
        for (x, y) in xg.edges():
            g.add_edge(x, y)
            g.add_edge(y, x)
        return list(strongly_connected_components(g))