def __create_taxon_set(self): taxonset=TaxonSet() for seq in self.aln: taxonset.new_taxon(label=seq.id) return taxonset
def testConflict(self): taxa = TaxonSet([str(i) for i in xrange(1, 7)]) o = [ '(1,5,(2,((3,6),4)));', '(2,1,(3,(6,4)));', ] m = [o[0], o[1], '(1,5,(2,(3,6,4)));'] trees = trees_from_newick_str_list(m) self.kernelOfTest(trees) rng = RepeatedRandom() for i in xrange(50): trees = trees_from_newick_str_list(m) for t in trees: randomly_reorient_tree(t, rng=rng) self.kernelOfTest(trees)
def main(argv): # Instantiates taxon set object for tree list taxa = TaxonSet() # Reads in tree string from the command line focalTree = Tree(stream=StringIO(argv[0]), schema="newick", rooted=False, taxon_set=taxa) # Iterates over all internal nodes in the focal tree (generating one constraint each) for i in focalTree.internal_nodes(): # Defines a list that initially contains all the leaf nodes from focal tree fullTaxonSet = focalTree.leaf_nodes() # Iterates over all internal nodes that are not the root if i is not focalTree.seed_node: # Instantiates string (conTree) to hold the constraint tree string conTree = "((" # Iterates over leaf nodes that are descendants of the current internal node for j in i.leaf_nodes(): # Appropriately adds the taxon name to the constraint tree string if j is i.leaf_nodes()[0]: conTree = conTree + str(j.taxon) else: conTree = conTree + "," + str(j.taxon) # Closes out the part of the constraint for taxa descended from the focal node conTree = conTree + ")" # Takes all leaves and removes those descended from the focal node for j in i.leaf_nodes(): fullTaxonSet.remove(j) # Adds all leaves not descended from the focal node to the constraint tree string for j in fullTaxonSet: conTree = conTree + "," + str(j.taxon) # Closes constraint tree string conTree = conTree + ")" # Prints constraint tree string to the screen print conTree
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % ( str(s), p, o)) out.write('%s\n' % ( str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
'--schema', dest='schema', type='str', default="newick", help='The format/schema of the input data') parser.add_option('-g', '--gordon', dest='gordons', action="store_true", default=False, help="Specify to use the Gordon's strict consensus") (options, args) = parser.parse_args() if len(args) == 0: sys.exit("Expecting a filename as an argument") schema = options.schema.upper() trees = [] taxon_set = TaxonSet() dataset = DataSet(taxon_set=taxon_set) if schema == "PHYLIP": schema = "NEWICK" for f in args: fo = open(f, "rU") dataset.read(stream=fo, schema=schema) for tl in dataset.tree_lists: trees.extend(tl) o = inplace_strict_consensus_merge(trees, gordons_supertree=options.gordons) sys.stdout.write("%s;\n" % str(o))
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % (str(s), p, o)) out.write('%s\n' % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len( parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len( parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
import os import pandas as pd o_file = sys.argv[2] i_file = sys.argv[1] ilist = find_files(top=i_file, filename_filter='RAxML_bipartitions.*') olist = find_files(top=o_file, filename_filter='RAxML_bipartitions.*') split1 = [os.path.split(file)[1][19:] for file in ilist] split2 = [os.path.split(file)[1][19:-4] for file in olist] part_bss = [] unpart_bss = [] SD = [] bs1 = [] tree3 = [] tree2 = [] shared_files = [] t = TaxonSet() for file in split1: if file in split2: print file tree1 = dendropy.Tree.get_from_path('%sRAxML_bipartitions.%s.phy' % (o_file, file), 'newick', taxon_set=t) tree3.append(tree1) print tree1 tree1 = dendropy.Tree.get_from_path('%sRAxML_bipartitions.%s' % (i_file, file), 'newick', taxon_set=t) tree2.append(tree1) print tree1
def rdf2dendropyTree(filepath): from rdflib.graph import Graph from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = Graph() graph.parse(filepath) nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open("parse_rdf.txt", "w") taxon_set = TaxonSet() OBO = Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write("%s %s %s\n" % (str(s), p, o)) out.write("%s\n" % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = ( "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) ) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: sys.exit("no parentless") return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def trees_from_newick_str_list(newick_list): all_tree_str = " ".join(newick_list) return TreeList(stream=StringIO(all_tree_str), taxon_set=TaxonSet(), schema="NEWICK")
default=4, help='The number of leaves to generate.') parser.add_option('-s', '--seed', action='store', dest='seed', type=long, default=0, help='The seed of the random number generator') (opts, args) = parser.parse_args() num_leaves = opts.num_leaves if num_leaves < 3: sys.exit("The number of leaves must be greater than 2") seed = opts.seed if seed < 0: sys.exit("Seed must be positive") if seed < 1: seed = time.time() rng = random.Random() sys.stderr.write("seed = %ld\n" % seed) rng.seed(seed) outstr = sys.stdout taxa = TaxonSet(["t%d" % i for i in xrange(1, 1 + num_leaves)]) tree = uniform_pure_birth(taxa, rng=rng) for e in tree.preorder_edge_iter(): e.length = rng.random() outstr.write("%s;\n" % tree.as_newick_string(preserve_spaces=True))