Пример #1
def read_phyloxml(input_file):
    Parses a pyhlogenetic tree in phyloxml-format.
    :param str input_file: path to file
    :return: ete2.Tree object
    from ete2 import Phyloxml
    project = Phyloxml()

    return project.get_phylogeny()
Пример #2
 def getRelevantEdges( adjGraph, t1, t2 ):
   pxml = Phyloxml()
   la = filter( lambda x : x.find('LOST') == -1, pxml.phylogeny[0].get_leaf_names() )
   lb = filter( lambda x : x.find('LOST') == -1, pxml.phylogeny[1].get_leaf_names() )
   #lb = filter( lambda x : x.find('LOST') == -1, map( getName, cogent.LoadTree(t2).tips() ) )
   crossValidationEdges = filter( lambda (x,y) : ((x in la) and (y in lb)) or ((y  in la) and (x in lb))  , adjGraph.edges() )
   relevantEdges = filter( lambda (x,y) : ((x in la) or (x in lb)) and ((y in la) or (y in lb)) , adjGraph.edges() )
   newGraph = nx.Graph()
   newGraph.add_nodes_from( la + lb )
   newGraph.add_edges_from( relevantEdges )
   return newGraph, crossValidationEdges
Пример #3
def getTreeFromPhyloxml(xml, saveToFile="default.xml", delFile=True):
	Read a phylogeny tree from a phyloxml string and return a TreeClass object
	or a list of TreeClass object
	project = Phyloxml()
	fo=open(saveToFile, "w+")
	for tree in project.get_phylogeny():

	if len(treeList)==1:
		return treeList[0]
	return treeList
Пример #4
def readScoreFile(fname, noself, randomize=False):

  # The strings naming the proteins whose interaction was removed in
  # this input
  tstring = fname.split('@')[-2].split("#")
  # Convert to upper case and make into an edge name
  tedge = (tstring[0].upper(), tstring[1].upper())

  # Read in the phylogenies for the orthology groups
  treeDir = "../../Parana2Data/HerpesPPIs/trees/rearranged"#"dataOut_June17/rearranged"
  baseFile = fname.split('/')[-1]
  orthoGroup1 = baseFile.split('@')[0]
  orthoGroup2 = baseFile.split('@')[1]
  t1 = "{0}/{1}.xml.rooting.0.ntg.reconciled.0.ntg.rearrange.0.ntg".format(treeDir,orthoGroup1)
  t2 = "{0}/{1}.xml.rooting.0.ntg.reconciled.0.ntg.rearrange.0.ntg".format(treeDir,orthoGroup2)

  if ( not (os.path.exists(t1) and os.path.exists(t2)) ):
    return None, None

  # The extant (non ancestral, non lost) nodes from the two homology groups
  getName = lambda x : x.Name.upper()
  pxml = Phyloxml()
  la = filter( lambda x : x.find('LOST') == -1, map( lambda x: x.upper(), pxml.phylogeny[0].get_leaf_names() ) )
  lb = filter( lambda x : x.find('LOST') == -1, map( lambda x: x.upper(), pxml.phylogeny[1].get_leaf_names() ) )

  # The set of all possible interactions among the two homology groups
  #pe = list(itertools.product(la,la)) + list(itertools.product(la,lb)) + list(itertools.product(lb,lb))
  possibleEndpoints =  combinationsWithSelf(la) + list(itertools.product(la,lb)) + combinationsWithSelf(lb) \
                       if not (orthoGroup1 == orthoGroup2 ) else combinationsWithSelf(la)

  # From among all possible endpoints, only those protein pairs that reside in the
  # same species represent a potential edge                      
  allPossibleEdges = filter( lambda (x,y): x.split('_')[-1] == y.split('_')[-1], possibleEndpoints )
  # an edge has both endpoints in the set of extant nodes
  inCurrentGroups = lambda e, x, y: (e[0] in x or e[0] in y) and (e[1] in x or e[1] in y)
  # an edge is relevant if it's constrained to the current groups
  relevantExtantEdges = [ e for e in ExtantNetwork.edges_iter() if inCurrentGroups(e,la,lb) ]
  # the set of potential edges that don't appear in the input network
  nonPresentEdgesMinusTarget = list(set([ x for x in allPossibleEdges if not ExtantNetwork.has_edge(x[0],x[1])]))
  # the same as above but including our target edge
  nonPresentEdges = nonPresentEdgesMinusTarget + [tedge]

  import random

  # Ancestral edges start with an N or R
  ancestral = ['R','N']

  # The node is valid if it is neither lost nor ancestral
  isValidNode = lambda x : (x[0] not in ancestral) and (x.find('LOST') == -1)

  # Is the edge u,v the target edge?
  isCurrentEdge = lambda u,v : (u == tedge[0] and v == tedge[1]) or (u == tedge[1] and v == tedge[0])
  isRealEdge = lambda u,v : (not isCurrentEdge(u,v)) and ExtantNetwork.has_edge(u,v)
  isValidEdge = lambda u,v : ((isValidNode(u) and isValidNode(v)) and (not isRealEdge(u,v)))

  # Is u,v one of the edges we wish to consider?
  def inPotentialEdges(u,v) :
    contains = (u,v) in nonPresentEdges or (v,u) in nonPresentEdges
    if noself:
      return u != v and contains
      return contains

  def isEdge( se, p1, p2 ):
      r =  ((se.p1 == p1 and se.p2 == p2) or (se.p1 == p2 and se.p2 == p1))
      return r

  scoredEdges = []

  nonEdgesWithProb = set( nonPresentEdges )
  with open(fname,'rb') as ifile:
    for l in ifile:
      toks = l.rstrip().split()
      p1 = toks[0].upper()
      p2 = toks[1].upper()
      s = float(toks[3])
      if inPotentialEdges(p1,p2):
        if randomize: s = random.uniform(0.0,1.0)
        #if p1 == p2: s = 0.0
        se = ScoredEdge(p1,p2,s)
        scoredEdges.append( se )

  rev = True
  for u,v in (nonEdgesWithProb - set(nonPresentEdges)):
    s = random.uniform(0.0,1.0) if randomize else 0.0
    scoredEdges.append(ScoredEdge(u, v, s))
  # cost = 0.0
  # for u,v in nonPresentEdges:
  #     se = ScoredEdge(u,v,cost)
  #     fe = [ e for e in scoredEdges if isEdge(e, u, v) ]
  #     if len(fe) == 0:
  #         scoredEdges.append(se)

  scoredEdges = list(enumerate(sorted( scoredEdges, key=lambda x: x.score, reverse=rev )))
  # print(len(scoredEdges))
  # print(t1,t2)
  # print("Target Edge = {0}".format(tedge))
  # print("Extant Edges = {0}".format(relevantExtantEdges))
  # print("Potential Edges = {0}".format(nonPresentEdges))
  # print("Scored Edges = {0}".format(scoredEdges))

  res = [ x for x in scoredEdges if isEdge(x[1], tedge[0], tedge[1])  ]

  if len(res) > 0:
    # Prev (ISMB)
    #return (res[0][0], float(len(nonPresentEdges)-1))
    # New
    return (res[0][0], float(len(scoredEdges)-1))
    raise 'Hell'
Пример #5
def main ():

    global options, args

    if options.verbose: print time.asctime(),
    if options.verbose: print "load and parse newick file"
    # TODO: read newick file
    tree = Phylo.read(args[0],'newick') 
    # TODO: convert newick to phyloxml
    treeXML = StringIO()
    # TODO: read phyloxml as ete object
    hPhylotree = Phyloxml()
    with tempinput(treeXML.getvalue()) as tempfilename:   
    # TODO: get the tree
    tree2 = hPhylotree.get_phylogeny()[0]
    if options.verbose: print time.asctime(),
    if options.verbose: print "load and parse taxonomy file"
    # TODO: read taxonomy file
    tax = get_taxonomy(args[1])
    # TODO: refine taxonomy annotation of internal node
    tree2 = add_taxonomy_for_internal_branch(tree2,tax)
    # TODO: refine tree node label
    #tree2 = add_node_label(tree2,tax)
    for node in tree2.traverse():
        if not node.is_leaf():
            label = "null"
            for t in ['kingdom','phylum','class','order','family','genus','species']:
                if len(tax[node.id][t])>3:
                    label = tax[node.id][t]

    # TODO: add node depth
    if options.depth:
        with open(options.depth) as f:
            for line in f:
                (id,dep) = line.split()
                depth[id] = float(dep)
    # TODO: add color attribute
    if options.depth:
        for node in tree2.iter_leaves():
            if depth[node.id] >= 10 and depth[node.id] < 100:
            elif depth[node.id] >= 100 and depth[node.id] < 1000:
            elif depth[node.id] >= 1000 and depth[node.id] < 5000:
            elif depth[node.id] >= 5000:
    # TODO: set tree style
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.layout_fn = tree_layout
    # TODO: show tree2
Пример #6
from ete2 import Phyloxml
project = Phyloxml()

# Each tree contains the same methods as a PhyloTree object
for tree in project.get_phylogeny():
    print tree
    # you can even use rendering options
    # PhyloXML features are stored in the phyloxml_clade attribute
    for node in tree: 
        print "Node name:", node.name
        for seq in node.phyloxml_clade.get_sequence(): 
            for domain in seq.domain_architecture.get_domain():
                domain_data = [domain.valueOf_, domain.get_from(), domain.get_to()]
                print "  Domain:", '\t'.join(map(str, domain_data))
Пример #8
