示例#1
0
文件: align.py 项目: JosiePark/biopy
def mpat(tr, seqs) :
  dseqs = dict(seqs)
  for n in getPostOrder(tr) :
    data = n.data
    if not n.succ :
      data.seq = (None,dseqs[n.data.taxon])
    else :
      s1,s2 = [tr.node(x).data.seq for x in n.succ]
      if s1[1] :
        if s2[1] :
          a = calign.globalAlign(s1[1],s2[1])
          data.seq = (calign.createProfile(a),None)
        else :
          p1,p2 = calign.createProfile(s1[1:]), s2[0]
          #assert all([sum(x)==sum(p1[0]) for x in p1])
          #assert all([sum(x)==sum(p2[0]) for x in p2])
          pa = calign.prof2profAlign(p1,p2)
          data.seq = (trimp(pa),None)
          #print len(pa)
      else :
        p1 = s1[0]
        if s2[1] :
          p2 = calign.createProfile(s2[1:])
        else :
          p2 = s2[0]
        #assert all([sum(x)==sum(p1[0]) for x in p1])
        #assert all([sum(x)==sum(p2[0]) for x in p2])
        pa = calign.prof2profAlign(p1,p2)
        data.seq = (trimp(pa),None)
        #print len(pa)
        #import pdb; pdb.set_trace()
  assert n.id == tr.root
  return n.data.seq[0]
示例#2
0
文件: align.py 项目: JosiePark/biopy
def mpa(tr, seqs, scores = defaultMatchScores, trimEnd = None) :
  dseqs = dict(seqs)
  #scores = (None,None,gapPenalty,feg)
  for n in getPostOrder(tr) :
    data = n.data
    if not n.succ :
      data.seq = (None,dseqs[n.data.taxon.strip("'")])
    else :
      s1,s2 = [tr.node(x).data.seq for x in n.succ]
      if s1[1] :
        if s2[1] :
          a = calign.globalAlign(s1[1],s2[1], scores = scores)
          data.seq = (calign.createProfile(a),None)
        else :
          p1,p2 = calign.createProfile(s1[1:]), s2[0]
          pa = calign.prof2profAlign(p1,p2, scores = scores)
          data.seq = (trimendsp(pa, trimEnd) if trimEnd is not None else pa,None)
          #print len(pa)
      else :
        p1 = s1[0]
        if s2[1] :
          p2 = calign.createProfile(s2[1:])
        else :
          p2 = s2[0]
        pa = calign.prof2profAlign(p1,p2, scores = scores)
        data.seq = (trimendsp(pa, trimEnd) if trimEnd is not None else pa,None)
        #print len(pa)
        #import pdb; pdb.set_trace()
  assert n.id == tr.root
  return n.data.seq[0]
示例#3
0
def _populateTreeWithNodeToTipDistances(tree) :
  for n in getPostOrder(tree) :
    if not n.succ:
      n.data.dtips = [[[n,0]],[],[]]
    else :
      ch = [tree.node(c) for c in n.succ]
      n.data.dtips = [[[a[0],a[1]+x.data.branchlength] for a in x.data.dtips[0]] +
                      [[a[0],a[1]+x.data.branchlength] for a in x.data.dtips[1]]
                      for x in ch]
      if n.id != tree.root :
         n.data.dtips.append([])

  _populateTipDistancesFromParent(tree, tree.node(tree.root), [])
示例#4
0
文件: treeul.py 项目: jheled/biopy
def _UMTree_slsqp(tin, targetHeight = None, useDerives = True,
                 doInit = False, normed = True,
                 niter=1000, r0x = None, verb = 0, internals = False) :
  t = copy.deepcopy(tin)
  
  nmap = []
  code = []
  primecode = []
  codeNodes = []
  # Number of branches 
  nr = 2*(len(t.get_terminals()) - 1)

  if targetHeight is None :
    targetHeight = treeHeightEstimate(t)
  assert float(targetHeight) == targetHeight and targetHeight > 0
  
  for n in getPostOrder(t) :
    if n.id != t.root :
      if n.prev != t.root :
        n.data.myindx = len(nmap)
        n.data.hexpr = "%.14f * r[%d]" % (n.data.branchlength, n.data.myindx)
        c = ["0"]*(nr+2)
        c[n.data.myindx] = "%.14f" % n.data.branchlength
        n.data.hpexpr = ",".join(c)
        nmap.append(n.id)
      else :
        n.data.hexpr = ""
        n.data.hpexpr = ""
      if n.succ :
        n.data.hexpr += ' + h%d' % n.succ[0]
        n.data.hpexpr = cmbn(n.data.hpexpr, n.succ[0])
        
      if n.prev != t.root and n.data.hexpr :
        n.data.chexpr = "h%d = %s" % (n.id, n.data.hexpr)
        code.append(n.data.chexpr)
        primecode.append("hp%d = [%s]" % (n.id, n.data.hpexpr))
        
    else :
      # left,right rates, left right branch : len(nmap)-2 to len(nmap)+1
      assert nr - 2 == len(nmap)
      
      lft = t.node(n.succ[0])
      lft.data.myindx = nr-2
      lft.data.hexpr = "r[%d] " % (nr) + lft.data.hexpr
      lft.data.chexpr = "h%d = %s" % (lft.id,lft.data.hexpr)

      sss = ",".join(["0"]*(nr) + ["1","0"])
      if lft.data.hpexpr :
        lft.data.hpexpr = "x1+y1 for x1,y1 in zip([%s], %s)" % \
                          (sss,lft.data.hpexpr)
      else :
        lft.data.hpexpr = sss
      
      rht = t.node(n.succ[1])
      rht.data.myindx = nr-1
      brr = (lft.data.branchlength+rht.data.branchlength)
      rht.data.hexpr = "r[%d] " % (nr+1) + rht.data.hexpr
      rht.data.chexpr = "h%d = %s" % (rht.id,rht.data.hexpr)

      code.append(rht.data.chexpr)
      code.append("ch1 = h%d - %.14f" % (rht.id, targetHeight))
      
      sss = ",".join(["0"]*(nr) + ["0","1"])
      if rht.data.hpexpr :
        rht.data.hpexpr = "x2+y2 for x2,y2 in zip([%s], %s)" % \
                          (sss, rht.data.hpexpr)
      else :
        rht.data.hpexpr = sss
      
      nmap.extend(n.succ)
      code.append(lft.data.chexpr)
      primecode.append("cph1 = hp%d = [%s]" % (rht.id, rht.data.hpexpr))
      primecode.append("cph2 = hp%d = [%s]" % (lft.id, lft.data.hpexpr))

      code.append("ch2 = h%d - %.14f" % (lft.id, targetHeight))

      brr = rht.data.branchlength + lft.data.branchlength
      code.append("crt = (r[%d] * r[%d] + r[%d] * r[%d]) - %.15f" % (nr-2,nr,nr-1,nr+1,brr))

      sss = ",".join(["0"]*(nr-2)) + ",r[%d],r[%d],r[%d],r[%d]" % (nr,nr+1,nr-2,nr-1)
      primecode.append("cprt = [%s]" % sss)
      
    if n.succ and n.id != t.root:
      code.append("c%d = h%d - h%d" % ((n.id,) + tuple(n.succ)))
      primecode.append("cp%d = [x-y for x,y in zip(hp%d, hp%d)]" % ((n.id,) + tuple(n.succ)))
      codeNodes.append(n.id)

  cd = ["def fx(r) :"]
  cd.extend(["  " + x for x in code])
  ccs = ["c%d" % k for k in codeNodes] + ['ch1','ch2','crt']
  hhs = ["h%d" % k for k in nmap]
  cd.append("  return [" + ",".join(ccs) + "], [" + ",".join(hhs) + "]")

  exec ( "\n".join(cd) ) in globals()

  cdp = ["def fxp(r) :"]
  cdp.extend(["  " + x for x in primecode])
  ccs = ["cp%d" % k for k in codeNodes] + ['cph1','cph2','cprt']
  cdp.append("  return [" + ",".join(ccs) + "]")

  exec ( "\n".join(cdp) ) in globals()

  # optimization target

  if r0x is not None :
    r0 = r0x
  else :
    if doInit:
      calcNoderateMultipliers(t, targetHeight)    
      cl,cr = t.node(t.root).succ
      btl,btr = (t.node(cl).data.timebranchlength , t.node(cr).data.timebranchlength)
      r0 = [t.node(c).data.timebranchlength/t.node(c).data.branchlength for c in nmap] + \
           [btl, btr]
      r0[-3] = 1/r0[-3]
      r0[-4] = 1/r0[-4]
    else :
      r0 = [1]*(nr) + [targetHeight*.1]*2

  # most are constant, can improve
  #derv = array(fxp(0))
  
  assert nr == len(nmap)
  
  slsqp = scipy.optimize.slsqp.fmin_slsqp
  if useDerives :
    fm,fmp = (_targetNormedVar,_targetNormedVarDerivatives) if normed \
             else (_targetVar, _targetVarDerivatives)
    
    re = slsqp(fm, r0,
              fprime = lambda x : array(fmp(x)),
              f_eqcons = lambda x : array(fx(x)[0]),
              fprime_eqcons = lambda x : array(fxp(x)),
              bounds = [(1e-10,10)]*nr + [(0,targetHeight)]*2,
              iter = niter, iprint=verb, full_output=1)
  else :
    re = slsqp(fm, r0,
               f_eqcons = lambda x : array(fx(x)[0]),
               bounds = [(1e-10,10)]*nr + [(0,targetHeight)]*2,
              iprint=verb, full_output=1)
  r = re[0]
  if re[3] != 0 :
    if not (re[3] == 8 and (abs(targetHeight - r[-2]) < 1e-9 or
                            abs(targetHeight - r[-1]) < 1e-9)) :
      ok = re[3] in [8,9] and all([abs(x) < 1e-8 for x in fx(r)[0]])
      if not ok :
        import pdb ; pdb.set_trace()
        raise RuntimeError(re[4])
  
  hs = fx(r)[1]
  for k,i in enumerate(nmap) :
    n = t.node(i)
    n.data.ph = hs[k] 
    n.data.subsbranchlength = n.data.branchlength
    n.data.attributes = {'clockrate' : r[n.data.myindx]}
    
    if n.succ :
      n.data.branchlength = n.data.ph - t.node(n.succ[0]).data.ph
    else :
      n.data.branchlength = n.data.ph
    n.data.branchlength = max(n.data.branchlength, 0)
    
  if internals :
    return t, fm(r), r, (fx, fxp, fm, fmp)
  return t, fm(r)