Пример #1
0
    def __create_taxon_set(self):
        

        taxonset=TaxonSet()
        for seq in self.aln:
            taxonset.new_taxon(label=seq.id)
        return taxonset
Пример #2
0
 def testConflict(self):
     taxa = TaxonSet([str(i) for i in xrange(1, 7)])
     o = [
         '(1,5,(2,((3,6),4)));',
         '(2,1,(3,(6,4)));',
     ]
     m = [o[0], o[1], '(1,5,(2,(3,6,4)));']
     trees = trees_from_newick_str_list(m)
     self.kernelOfTest(trees)
     rng = RepeatedRandom()
     for i in xrange(50):
         trees = trees_from_newick_str_list(m)
         for t in trees:
             randomly_reorient_tree(t, rng=rng)
         self.kernelOfTest(trees)
Пример #3
0
def main(argv):

    # Instantiates taxon set object for tree list
    taxa = TaxonSet()

    # Reads in tree string from the command line
    focalTree = Tree(stream=StringIO(argv[0]),
                     schema="newick",
                     rooted=False,
                     taxon_set=taxa)

    # Iterates over all internal nodes in the focal tree (generating one constraint each)
    for i in focalTree.internal_nodes():

        # Defines a list that initially contains all the leaf nodes from focal tree
        fullTaxonSet = focalTree.leaf_nodes()

        # Iterates over all internal nodes that are not the root
        if i is not focalTree.seed_node:

            # Instantiates string (conTree) to hold the constraint tree string
            conTree = "(("

            # Iterates over leaf nodes that are descendants of the current internal node
            for j in i.leaf_nodes():

                # Appropriately adds the taxon name to the constraint tree string
                if j is i.leaf_nodes()[0]:
                    conTree = conTree + str(j.taxon)
                else:
                    conTree = conTree + "," + str(j.taxon)

            # Closes out the part of the constraint for taxa descended from the focal node
            conTree = conTree + ")"

            # Takes all leaves and removes those descended from the focal node
            for j in i.leaf_nodes():
                fullTaxonSet.remove(j)

            # Adds all leaves not descended from the focal node to the constraint tree string
            for j in fullTaxonSet:
                conTree = conTree + "," + str(j.taxon)

            # Closes constraint tree string
            conTree = conTree + ")"

            # Prints constraint tree string to the screen
            print conTree
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''
    
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))
        
        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)
            
            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()
            
            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)
            
        if _DEBUGGING:
            out.write('%s %s %s\n' % ( str(s), p, o))
            out.write('%s\n' % ( str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Пример #5
0
                      '--schema',
                      dest='schema',
                      type='str',
                      default="newick",
                      help='The format/schema of the input data')
    parser.add_option('-g',
                      '--gordon',
                      dest='gordons',
                      action="store_true",
                      default=False,
                      help="Specify to use the Gordon's strict consensus")
    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")
    schema = options.schema.upper()

    trees = []
    taxon_set = TaxonSet()
    dataset = DataSet(taxon_set=taxon_set)
    if schema == "PHYLIP":
        schema = "NEWICK"
    for f in args:
        fo = open(f, "rU")
        dataset.read(stream=fo, schema=schema)
    for tl in dataset.tree_lists:
        trees.extend(tl)

    o = inplace_strict_consensus_merge(trees,
                                       gordons_supertree=options.gordons)
    sys.stdout.write("%s;\n" % str(o))
Пример #6
0
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''

    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write('%s %s %s\n' % (str(s), p, o))
            out.write('%s\n' % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(
            parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(
                parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Пример #7
0
import os
import pandas as pd
o_file = sys.argv[2]
i_file = sys.argv[1]
ilist = find_files(top=i_file, filename_filter='RAxML_bipartitions.*')
olist = find_files(top=o_file, filename_filter='RAxML_bipartitions.*')
split1 = [os.path.split(file)[1][19:] for file in ilist]
split2 = [os.path.split(file)[1][19:-4] for file in olist]
part_bss = []
unpart_bss = []
SD = []
bs1 = []
tree3 = []
tree2 = []
shared_files = []
t = TaxonSet()
for file in split1:
    if file in split2:
        print file
        tree1 = dendropy.Tree.get_from_path('%sRAxML_bipartitions.%s.phy' %
                                            (o_file, file),
                                            'newick',
                                            taxon_set=t)
        tree3.append(tree1)
        print tree1
        tree1 = dendropy.Tree.get_from_path('%sRAxML_bipartitions.%s' %
                                            (i_file, file),
                                            'newick',
                                            taxon_set=t)
        tree2.append(tree1)
        print tree1
def rdf2dendropyTree(filepath):
    from rdflib.graph import Graph
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon

    graph = Graph()
    graph.parse(filepath)
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open("parse_rdf.txt", "w")
    taxon_set = TaxonSet()
    OBO = Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write("%s %s %s\n" % (str(s), p, o))
            out.write("%s\n" % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = (
            "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d"
            % len(parentless)
        )
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            sys.exit("no parentless")
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Пример #9
0
def trees_from_newick_str_list(newick_list):
    all_tree_str = " ".join(newick_list)
    return TreeList(stream=StringIO(all_tree_str),
                    taxon_set=TaxonSet(),
                    schema="NEWICK")
Пример #10
0
                      default=4,
                      help='The number of leaves to generate.')

    parser.add_option('-s',
                      '--seed',
                      action='store',
                      dest='seed',
                      type=long,
                      default=0,
                      help='The seed of the random number generator')

    (opts, args) = parser.parse_args()
    num_leaves = opts.num_leaves
    if num_leaves < 3:
        sys.exit("The number of leaves must be greater than 2")
    seed = opts.seed
    if seed < 0:
        sys.exit("Seed must be positive")
    if seed < 1:
        seed = time.time()
    rng = random.Random()
    sys.stderr.write("seed = %ld\n" % seed)
    rng.seed(seed)
    outstr = sys.stdout

    taxa = TaxonSet(["t%d" % i for i in xrange(1, 1 + num_leaves)])
    tree = uniform_pure_birth(taxa, rng=rng)
    for e in tree.preorder_edge_iter():
        e.length = rng.random()
    outstr.write("%s;\n" % tree.as_newick_string(preserve_spaces=True))