Пример #1
0
def intensional_proposal(best, A, B):
    matches = tipward(best, A, B)

    result = {}

    incoming = index_by_target(matches)

    # Suppose `node` x comes from the A checklist, and there is a split
    # such that x matches multiple nodes y1, y2 in the B checklist.

    # Modify the relation for all approximate-match nodes.
    for y in incoming:  # Many x's, one y
        arts = incoming[y]

        # Canonical.  back.cod will be among the incoming, by construction.
        back = matches.get(y)  # back : y -> x
        if not back: continue
        x0 = back.cod  # Back match y -> x -> y

        revarts = incoming[x0]

        if len(arts) > 1:  # multiple x's
            if len(revarts) > 1:
                art.proclaim(result, art.set_relation(back, rel.eq))
                dribble.log("** Tangle:\n   %s\n   %s" %
                            ("\n   ".join(map(art.express, arts)),
                             ("\n   ".join(map(art.express, revarts)))))
                art.proclaim(result, art.set_relation(back, rel.eq))
            else:

                # OK.  We're going to just throw away all non-sibling matches.

                rent = cl.get_parent(x0)
                sibs = [ar for ar in arts if cl.get_parent(ar.dom) == rent]
                # e.g. ar: x2 -> y
                # Don't even try to do anything with N->M node tangles.
                if len(sibs) == 1:
                    art.proclaim(result, art.set_relation(back, rel.eq))
                else:
                    for sib in sibs:
                        ar = art.change_relation(sib, rel.lt, "merge", "split")
                        if sib.dom == x0:
                            art.proclaim(result, ar)  # gt
                        else:
                            art.half_proclaim(result, ar)
                    art.half_proclaim(
                        result, art.bridge(y, rent, rel.lt, "split", "merge"))
                    # Report!
                    dribble.log(
                        "# Split/lump %s < %s < %s" %
                        (" + ".join(map(lambda e: cl.get_unique(e.dom), sibs)),
                         cl.get_unique(y), cl.get_unique(rent)))

        elif len(revarts) > 1:  # multiple y's
            pass
        else:
            # n.b. arts[0] is reverse of back
            art.proclaim(result, art.set_relation(back, rel.eq))

    return result
Пример #2
0
def choose_best_match(arts):  # => art
    assert is_matches(arts)
    if len(arts) == 0: return None
    arts = skim_best_matches(arts)
    b = arts[0]
    if len(arts) == 1: return b
    dribble.log("** Multiple least-bad matches. Need to find tie-breakers.")
    dribble.log("   %s -> %s" %
                (cl.get_unique(b.dom), [cl.get_unique(a.cod) for a in arts]))
    return None
Пример #3
0
 def process(node):
     merged = inject(node, al)
     if not merged in parents:
         p = merged_parent(merged, al)
         if p:
             if dribble.watch(node):
                 (x, y) = p
                 dribble.log("# Merged parent(%s) = (%s, %s)" %
                             (cl.get_unique(node), cl.get_unique(x),
                              cl.get_unique(y)))
             parents[merged] = p  # Otherwise it's a root
         else:
             if dribble.watch(node):
                 dribble.log("# No merge(%s)" % cl.get_unique(node))
             if not merged in roots:
                 roots.append(merged)
     for child in cl.get_children(node):
         process(child)
Пример #4
0
def dump_alignment(al, out):
    articulations = [
        ar for ar in al.values() if (ar.dom < ar.cod or not is_mutual(ar, al))
    ]

    def sort_key(ar):
        if ar.dom < ar.cod:
            return (ar.dom, ar.cod)
        else:
            return (ar.cod, ar.dom)

    articulations = sorted(articulations, key=sort_key)

    for ar in articulations:
        out.write(
            "[%s %s %s]\n" %
            (cl.get_unique(ar.dom), ar.relation.name, cl.get_unique(ar.cod)))
    out.write("\n")
Пример #5
0
 def filter(node):
   debug = dribble.watch(node)
   found_match = None
   for child in cl.get_children(node):
     ar = filter(child)
     if ar:
       found_match = ar
   if found_match:    # Some descendant is a particle
     if debug: dribble.log("# %s: descendant matches, not keeping: %s" %
                           (cl.get_unique(node), art.express(found_match)))
     return found_match
   elif node in amap:
     ar = amap[node]
     tw[ar.dom] = ar
     if debug: dribble.log("# %s is a tipward match, keeping: %s" %
                           (cl.get_unique(node), art.express(ar)))
     return ar
   else:
     if debug: dribble.log("# %s is unmatched" % cl.get_unique(node))
     return None
Пример #6
0
def analyze_cross_mrcas(A, B, tipwards):
  cross_mrcas = {}
  def half_analyze_cross_mrcas(checklist, other):
    def subanalyze_cross_mrcas(node, other):
      result = None
      probe = tipwards.get(node)
      if probe:
        # Could be: = < or >
        result = probe.cod
      else:
        children = cl.get_children(node)
        if children:
          m = None      # None is the identity for mrca
          for child in children:
            m2 = subanalyze_cross_mrcas(child, other)
            if m2 != None:
              m = cl.mrca(m, m2) if m != None else m2
          if m != None:
            result = m
      if result:
        assert cl.get_checklist(result) != cl.get_checklist(node)
        if dribble.watch(node):
          dribble.log("# Cross-mrca(%s) = %s" %
                      (cl.get_unique(node), cl.get_unique(result)))
        cross_mrcas[node] = result
      return result             # in B
    for root in cl.get_roots(checklist):
      subanalyze_cross_mrcas(root, other)
  half_analyze_cross_mrcas(A, B)
  half_analyze_cross_mrcas(B, A)

  # Sanity check
  for node in cross_mrcas:
    cross = cross_mrcas[node]
    probe = cross_mrcas.get(cross)
    if probe:
      assert cl.get_checklist(probe) == cl.get_checklist(node)
    else:
      dribble.log("# No return cross-MRCA for %s -> %s -> ..." %\
                  (cl.get_unique(node), cl.get_unique(cross)))
  return cross_mrcas
Пример #7
0
 def subanalyze_cross_mrcas(node, other):
   result = None
   probe = tipwards.get(node)
   if probe:
     # Could be: = < or >
     result = probe.cod
   else:
     children = cl.get_children(node)
     if children:
       m = None      # None is the identity for mrca
       for child in children:
         m2 = subanalyze_cross_mrcas(child, other)
         if m2 != None:
           m = cl.mrca(m, m2) if m != None else m2
       if m != None:
         result = m
   if result:
     assert cl.get_checklist(result) != cl.get_checklist(node)
     if dribble.watch(node):
       dribble.log("# Cross-mrca(%s) = %s" %
                   (cl.get_unique(node), cl.get_unique(result)))
     cross_mrcas[node] = result
   return result             # in B
Пример #8
0
def express_proof(proof):
    (c, d, e) = proof
    # Assume resolution (x < y) until conflict is proven
    # assume potential child until proven otherwise
    if c and d and e:
        proof_expression = (">< %s [%s, %s, %s]" %
                            (cl.get_unique(yk), cl.get_unique(c),
                             cl.get_unique(d), cl.get_unique(e)))
        dribble.log(
            "** %s doesn't refine %s because\n   %s\n   yk [in x, in both, in yk]"
            % (cl.get_unique(x), cl.get_unique(y), proof_expression))
    # Should squirrel away the proof somewhere!
    return proof_expression
Пример #9
0
def express(ar):
    if ar == None:
        return "none"
    else:
        return "%s %s %s" % (cl.get_unique(
            ar.dom), ar.relation.name, cl.get_unique(ar.cod))
Пример #10
0
def extensional_match(node, xmrcas):
  partner = xmrcas.get(node)      # node in other checklist; 'conode'
  if not partner:
    # Descendant of a particle
    if dribble.watch(node):
      dribble.log("# EM: %s is not tipward." % cl.get_unique(node))
    return None
  back = xmrcas.get(partner)    # 'bounce'
  if not back:
    # Not sure how this can happen but it does (NCBI vs. GBIF)
    dribble.log("%s <= %s <= nowhere" % (cl.get_unique(node),
                                         cl.get_unique(partner)))
    if dribble.watch(node):
      dribble.log("# EM: %s killed because aborted round trip." % cl.get_unique(node))
    return None
  # node <= partner <= back
  how = cl.how_related(node, back)    # Alway rcc5
  if how == rel.eq:
    # Should end up being eq iff name match or unique match
    # Can test for unique match by looking at xmrca of parent

    # Could be part of a 'monotypic' chain; fix later
    how = rel.matches
    reason = "mutual-cross-mrca"
  elif how == rel.gt:
    how = rel.matches
    reason = "monotypic-inversion"
  elif how == rel.disjoint:
    reason = "particle-set-exclusion"
  else:               # must be rel.lt
    # Assume resolution (node < partner) until conflict is proven
    reason = "refinement"
    # Look for an intersection between any partner-child and node
    # x is in A checklist, y is in B checklist
    for pchild in cl.get_children(partner):
      pchild_back = xmrcas.get(pchild)
      if pchild_back == None:
        # pchild ! node
        pass
      else:
        (d, e) = cross_compare(node, pchild, xmrcas)
        # d < node while e ! node
        if d and e:
          how = rel.conflict
          reason = ("%s is in; its sibling %s is not" %
                    (cl.get_unique(d), cl.get_unique(e)))
          dribble.log("** %s conflicts with %s because\n"
                      "   %s ! %s\n   (but sibling %s < %s)" %
                      (cl.get_unique(node),
                       cl.get_unique(partner),
                       cl.get_unique(e),
                       cl.get_unique(node),
                       cl.get_unique(d),
                       cl.get_unique(node)))
          break
        elif e:
          reason = ("%s is not in it" % cl.get_unique(e))

  ar = art.extensional(node, partner, how, reason)
  if dribble.watch(node):
    dribble.log("# Extensional articulation %s" % art.express(ar))
  return ar
Пример #11
0
def node_data(node):
    if node:
        return (cl.get_taxon_id(node), cl.get_unique(node),
                cl.get_nominal_rank(node))
    else:
        return (None, None, None)