Пример #1
0
def run_anc(input_dict,run_id = None):
  assert run_id
  rank_name = input_dict['rank_name']
  taxon_id  = input_dict['taxid']
  aliname = input_dict['aliname']
  
  BT = getBTOL()
  p_node = ncbi.get_node(taxon_id)
  seqnodes = BT.investigatePhylum(p_node = p_node)
  recs, seqelts, seqtuples = seq_recs(seqnodes)

  align = align_seqnodes(recs)
  tree = phyml.tree(align, run_id = run_id)
  rstfile= paml.run_paml(tree, align, run_id = run_id)
  anc_tree = paml.rst_parser(rstfile)

  anc_alignment = [SeqRecord(elt.m['seq'], 
                             id = None,
                             name = elt.name,
                             annotations = {'scores':elt.m['probs']})
                   for elt in anc_tree.get_nonterminals()]
  out_dict = dict(anc_tree=anc_tree,
                  anc_align= anc_alignment,
                  term_tree = tree,
                  term_align = align)
  return out_dict
Пример #2
0
 def set_taxnodes(**kwargs):
   
   all_seqs = get_seqs(dbname,**mem.sr(kwargs))
   seq_taxa = [s.source_taxon 
                  if s.source_taxon else None 
                  for s in all_seqs]
   alinodes = [ncbi.get_node(s) if s != None else None for s in seq_taxa]
   return alinodes
Пример #3
0
  def investigatePhylum(self, 
                        aliname = 'group2.stk',
                        p_node = None, **kwargs):

    if not p_node: p_node = ncbi.taxon_with_name('phylum', 'Thermotogae')
    ali_seqs = ali.get_seqs(aliname, **mem.sr(kwargs))
    ali_nodes = array(ali.get_taxnodes(aliname, **mem.sr(kwargs)))
    ali_phyla = array(ali.get_taxon_forall(aliname,**mem.sr(kwargs, rank = 'phylum')))
    ali_inds = nonzero(equal(ali_phyla, p_node))[0]
    
    leaf_terminals = self.t.get_terminals()
    leaf_nodes = array(self.leafNodes(**mem.sr(kwargs)))
    leaf_phyla = array(self.getTaxon('phylum', **mem.sr(kwargs)))
    leaf_inds = nonzero(equal(leaf_phyla, p_node))[0]
        
    ap_sub = ali_phyla[ali_inds]
    lp_sub = leaf_phyla[leaf_inds]

    ag_sub = array(ali.get_taxon_forsome(ali_nodes[ali_inds],'genus','thermo',
                                         **mem.sr(kwargs)))
    lg_sub = array(self.getTaxon('genus', **mem.sr(kwargs)))[leaf_inds]

    as_sub = array(ali.get_taxon_forsome(ali_nodes[ali_inds], 'species', 'thermo'))
    ls_sub = array(self.getTaxon('species',**mem.sr(kwargs)))[leaf_inds]

    db16 = cbdb.getName('16s')
    a_16s= [ db16.S.q(db16.Sequence).
             filter_by(source_taxon = n.id).all() for n in ali_nodes[ali_inds]]
    l_16s= [ db16.S.q(db16.Sequence).
             filter_by(source_taxon = n.id).all() for n in leaf_nodes[leaf_inds]]

    #fill any empty nodes... (those lacking 16s rRNA)
    for idx, elt in enumerate(a_16s):
      cur_node= ali_nodes[ali_inds[idx]]
      while not elt:
        cur_node = cur_node.parent
        elt.extend(db16.S.q(db16.Sequence).filter_by(source_taxon = cur_node.id).all())

    for idx, elt in enumerate(l_16s):
      cur_node= leaf_nodes[leaf_inds[idx]]
      while not elt:
        cur_node = cur_node.parent
        elt.extend(db16.S.q(db16.Sequence).filter_by(source_taxon = cur_node.id).all())



    all_lens = dict([ (k, [len(list( e)) for e in seqlist] )  
                      for seqlist,k in [[a_16s,'a_16s'],[l_16s,'l_16s']]])
    

    leaf_sns = [ SeqNode(lg_sub[i],ls_sub[i] , leaf_nodes[idx], 
                         [(x.sequence,x.gb_id, x.source_taxon, ncbi.get_node(x.source_taxon).rank) 
                          for x in l_16s[i]],
                         src = leaf_terminals[idx],
                         node_id =  'btol:default:{0}'.format(leaf_terminals[idx].m['id']))
                 for i, idx in enumerate(leaf_inds)]
    ali_sns = [ SeqNode(ag_sub[i],as_sub[i] , ali_nodes[idx],
                        [( x.sequence,x.gb_id , x.source_taxon, ncbi.get_node(x.source_taxon).rank) 
                         for x in a_16s[i]],
                        src = ali_seqs[idx], 
                        node_id = 'ali:{0}:{1}'.format(aliname,ali_seqs[idx].id))
                for i, idx in enumerate(ali_inds)]

    return list(it.chain(leaf_sns, ali_sns))
Пример #4
0
 def setLeafNodes(**kwargs):
   all_leaves = self.t.get_terminals()
   dbi = cbdb.getName('taxdmp')
   all_nodes = [ ncbi.get_node(l.m['taxid'],dbi) 
                 if 'taxid' in l.m.keys() else None for l in all_leaves]
   return all_nodes