def intensional_proposal(best, A, B): matches = tipward(best, A, B) result = {} incoming = index_by_target(matches) # Suppose `node` x comes from the A checklist, and there is a split # such that x matches multiple nodes y1, y2 in the B checklist. # Modify the relation for all approximate-match nodes. for y in incoming: # Many x's, one y arts = incoming[y] # Canonical. back.cod will be among the incoming, by construction. back = matches.get(y) # back : y -> x if not back: continue x0 = back.cod # Back match y -> x -> y revarts = incoming[x0] if len(arts) > 1: # multiple x's if len(revarts) > 1: art.proclaim(result, art.set_relation(back, rel.eq)) dribble.log("** Tangle:\n %s\n %s" % ("\n ".join(map(art.express, arts)), ("\n ".join(map(art.express, revarts))))) art.proclaim(result, art.set_relation(back, rel.eq)) else: # OK. We're going to just throw away all non-sibling matches. rent = cl.get_parent(x0) sibs = [ar for ar in arts if cl.get_parent(ar.dom) == rent] # e.g. ar: x2 -> y # Don't even try to do anything with N->M node tangles. if len(sibs) == 1: art.proclaim(result, art.set_relation(back, rel.eq)) else: for sib in sibs: ar = art.change_relation(sib, rel.lt, "merge", "split") if sib.dom == x0: art.proclaim(result, ar) # gt else: art.half_proclaim(result, ar) art.half_proclaim( result, art.bridge(y, rent, rel.lt, "split", "merge")) # Report! dribble.log( "# Split/lump %s < %s < %s" % (" + ".join(map(lambda e: cl.get_unique(e.dom), sibs)), cl.get_unique(y), cl.get_unique(rent))) elif len(revarts) > 1: # multiple y's pass else: # n.b. arts[0] is reverse of back art.proclaim(result, art.set_relation(back, rel.eq)) return result
def choose_best_match(arts): # => art assert is_matches(arts) if len(arts) == 0: return None arts = skim_best_matches(arts) b = arts[0] if len(arts) == 1: return b dribble.log("** Multiple least-bad matches. Need to find tie-breakers.") dribble.log(" %s -> %s" % (cl.get_unique(b.dom), [cl.get_unique(a.cod) for a in arts])) return None
def process(node): merged = inject(node, al) if not merged in parents: p = merged_parent(merged, al) if p: if dribble.watch(node): (x, y) = p dribble.log("# Merged parent(%s) = (%s, %s)" % (cl.get_unique(node), cl.get_unique(x), cl.get_unique(y))) parents[merged] = p # Otherwise it's a root else: if dribble.watch(node): dribble.log("# No merge(%s)" % cl.get_unique(node)) if not merged in roots: roots.append(merged) for child in cl.get_children(node): process(child)
def dump_alignment(al, out): articulations = [ ar for ar in al.values() if (ar.dom < ar.cod or not is_mutual(ar, al)) ] def sort_key(ar): if ar.dom < ar.cod: return (ar.dom, ar.cod) else: return (ar.cod, ar.dom) articulations = sorted(articulations, key=sort_key) for ar in articulations: out.write( "[%s %s %s]\n" % (cl.get_unique(ar.dom), ar.relation.name, cl.get_unique(ar.cod))) out.write("\n")
def filter(node): debug = dribble.watch(node) found_match = None for child in cl.get_children(node): ar = filter(child) if ar: found_match = ar if found_match: # Some descendant is a particle if debug: dribble.log("# %s: descendant matches, not keeping: %s" % (cl.get_unique(node), art.express(found_match))) return found_match elif node in amap: ar = amap[node] tw[ar.dom] = ar if debug: dribble.log("# %s is a tipward match, keeping: %s" % (cl.get_unique(node), art.express(ar))) return ar else: if debug: dribble.log("# %s is unmatched" % cl.get_unique(node)) return None
def analyze_cross_mrcas(A, B, tipwards): cross_mrcas = {} def half_analyze_cross_mrcas(checklist, other): def subanalyze_cross_mrcas(node, other): result = None probe = tipwards.get(node) if probe: # Could be: = < or > result = probe.cod else: children = cl.get_children(node) if children: m = None # None is the identity for mrca for child in children: m2 = subanalyze_cross_mrcas(child, other) if m2 != None: m = cl.mrca(m, m2) if m != None else m2 if m != None: result = m if result: assert cl.get_checklist(result) != cl.get_checklist(node) if dribble.watch(node): dribble.log("# Cross-mrca(%s) = %s" % (cl.get_unique(node), cl.get_unique(result))) cross_mrcas[node] = result return result # in B for root in cl.get_roots(checklist): subanalyze_cross_mrcas(root, other) half_analyze_cross_mrcas(A, B) half_analyze_cross_mrcas(B, A) # Sanity check for node in cross_mrcas: cross = cross_mrcas[node] probe = cross_mrcas.get(cross) if probe: assert cl.get_checklist(probe) == cl.get_checklist(node) else: dribble.log("# No return cross-MRCA for %s -> %s -> ..." %\ (cl.get_unique(node), cl.get_unique(cross))) return cross_mrcas
def subanalyze_cross_mrcas(node, other): result = None probe = tipwards.get(node) if probe: # Could be: = < or > result = probe.cod else: children = cl.get_children(node) if children: m = None # None is the identity for mrca for child in children: m2 = subanalyze_cross_mrcas(child, other) if m2 != None: m = cl.mrca(m, m2) if m != None else m2 if m != None: result = m if result: assert cl.get_checklist(result) != cl.get_checklist(node) if dribble.watch(node): dribble.log("# Cross-mrca(%s) = %s" % (cl.get_unique(node), cl.get_unique(result))) cross_mrcas[node] = result return result # in B
def express_proof(proof): (c, d, e) = proof # Assume resolution (x < y) until conflict is proven # assume potential child until proven otherwise if c and d and e: proof_expression = (">< %s [%s, %s, %s]" % (cl.get_unique(yk), cl.get_unique(c), cl.get_unique(d), cl.get_unique(e))) dribble.log( "** %s doesn't refine %s because\n %s\n yk [in x, in both, in yk]" % (cl.get_unique(x), cl.get_unique(y), proof_expression)) # Should squirrel away the proof somewhere! return proof_expression
def express(ar): if ar == None: return "none" else: return "%s %s %s" % (cl.get_unique( ar.dom), ar.relation.name, cl.get_unique(ar.cod))
def extensional_match(node, xmrcas): partner = xmrcas.get(node) # node in other checklist; 'conode' if not partner: # Descendant of a particle if dribble.watch(node): dribble.log("# EM: %s is not tipward." % cl.get_unique(node)) return None back = xmrcas.get(partner) # 'bounce' if not back: # Not sure how this can happen but it does (NCBI vs. GBIF) dribble.log("%s <= %s <= nowhere" % (cl.get_unique(node), cl.get_unique(partner))) if dribble.watch(node): dribble.log("# EM: %s killed because aborted round trip." % cl.get_unique(node)) return None # node <= partner <= back how = cl.how_related(node, back) # Alway rcc5 if how == rel.eq: # Should end up being eq iff name match or unique match # Can test for unique match by looking at xmrca of parent # Could be part of a 'monotypic' chain; fix later how = rel.matches reason = "mutual-cross-mrca" elif how == rel.gt: how = rel.matches reason = "monotypic-inversion" elif how == rel.disjoint: reason = "particle-set-exclusion" else: # must be rel.lt # Assume resolution (node < partner) until conflict is proven reason = "refinement" # Look for an intersection between any partner-child and node # x is in A checklist, y is in B checklist for pchild in cl.get_children(partner): pchild_back = xmrcas.get(pchild) if pchild_back == None: # pchild ! node pass else: (d, e) = cross_compare(node, pchild, xmrcas) # d < node while e ! node if d and e: how = rel.conflict reason = ("%s is in; its sibling %s is not" % (cl.get_unique(d), cl.get_unique(e))) dribble.log("** %s conflicts with %s because\n" " %s ! %s\n (but sibling %s < %s)" % (cl.get_unique(node), cl.get_unique(partner), cl.get_unique(e), cl.get_unique(node), cl.get_unique(d), cl.get_unique(node))) break elif e: reason = ("%s is not in it" % cl.get_unique(e)) ar = art.extensional(node, partner, how, reason) if dribble.watch(node): dribble.log("# Extensional articulation %s" % art.express(ar)) return ar
def node_data(node): if node: return (cl.get_taxon_id(node), cl.get_unique(node), cl.get_nominal_rank(node)) else: return (None, None, None)