Пример #1
0
 def test_taxon_assignment_and_namespace(self):
     for seed in itertools.chain((
             559,
             631,
             230,
             212,
             907,
             237,
     ), (random.randint(0, 1000) for i in range(10))):
         rng = random.Random(seed)
         for psm in self.iter_psm_models(rng=rng):
             for kwargs in (
                 {
                     "max_time": 20
                 },
                 {
                     "num_extant_orthospecies": 10
                 },
                 {
                     "num_extant_lineages": 20
                 },
             ):
                 lineage_taxon_namespace = dendropy.TaxonNamespace()
                 species_taxon_namespace = dendropy.TaxonNamespace()
                 kwargs["lineage_taxon_namespace"] = lineage_taxon_namespace
                 kwargs["species_taxon_namespace"] = species_taxon_namespace
                 lineage_tree, orthospecies_tree = psm.generate_sample(
                     **kwargs)
                 self.assertIs(lineage_tree.taxon_namespace,
                               lineage_taxon_namespace)
                 self.assertIs(orthospecies_tree.taxon_namespace,
                               species_taxon_namespace)
                 for tree in (lineage_tree, orthospecies_tree):
                     self.check(tree)
Пример #2
0
    def test_multiplePrune(self):
        pruner = TreePruner(re.compile("(.*)"), re.compile("(.*)"), True)

        pruner.set_taxon_set(["B", "C"])
        mrca = self.tree.mrca(taxa=[n.taxon for n in self.tree.leaf_node_iter() if n.taxon.label in ["B", "C"]])
        tree_to_prune2 = dendropy.Tree(seed_node=copy.deepcopy(mrca),
                                       taxon_namespace=dendropy.TaxonNamespace())

        pruner.prune(tree_to_prune2)

        pruner.set_taxon_set(["A", "A2"])

        mrca = self.tree.mrca(taxa=[n.taxon for n in self.tree.leaf_node_iter() if n.taxon.label in ["A", "A2"]])
        tree_to_prune = dendropy.Tree(seed_node=copy.deepcopy(mrca),
                                      taxon_namespace=dendropy.TaxonNamespace())
        pruner.prune(tree_to_prune)
        self.assertEqual([taxon.label for taxon in tree_to_prune.taxon_namespace], ["A", "A2"])

        pruner.set_taxon_set(["B", "C"])
        mrca = self.tree.mrca(taxa=[n.taxon for n in self.tree.leaf_node_iter() if n.taxon.label in ["B", "C"]])
        tree_to_prune2 = dendropy.Tree(seed_node=copy.deepcopy(mrca),
                                       taxon_namespace=dendropy.TaxonNamespace())

        pruner.prune(tree_to_prune2)

        self.assertEqual([taxon.label for taxon in tree_to_prune2.taxon_namespace], ["B", "C"])

        self.assertEqual([taxon.label for taxon in self.tree.taxon_namespace], ["A", "B", "A2", "C"])
    def setUp(self):
        self.expected_taxon_namespaces = []
        self.standalone_taxon_namespaces = []
        self.standalone_taxon_namespaces.append(
            dendropy.TaxonNamespace(["t1", "t2", "t3"]))
        self.standalone_taxon_namespaces.append(
            dendropy.TaxonNamespace(["s1", "s2", "s3"]))
        self.expected_taxon_namespaces.extend(self.standalone_taxon_namespaces)
        self.expected_tree_lists = collections.OrderedDict()
        for i in range(2):
            pdo1 = curated_test_tree_list.get_tree_list(4)
            self.expected_tree_lists[pdo1] = pdo1.taxon_namespace
            self.expected_taxon_namespaces.append(pdo1.taxon_namespace)
            for j in range(2):
                pdo2 = curated_test_tree_list.get_tree_list(
                    4, taxon_namespace=pdo1.taxon_namespace)
                self.expected_tree_lists[pdo2] = pdo2.taxon_namespace

        self.expected_char_matrices = collections.OrderedDict()
        for i in range(2):
            pdo1 = standard_file_test_chars.DnaTestChecker.get_char_matrix_from_class_data(
            )
            self.expected_char_matrices[pdo1] = pdo1.taxon_namespace
            self.expected_taxon_namespaces.append(pdo1.taxon_namespace)
            for j in range(2):
                pdo2 = standard_file_test_chars.ProteinTestChecker.get_char_matrix_from_class_data(
                    taxon_namespace=pdo1.taxon_namespace)
                self.expected_char_matrices[pdo2] = pdo2.taxon_namespace
Пример #4
0
def setup(mainDir,conFiles,folders = True):

	os.chdir(mainDir)
	
	# For each nexus file
	for nex in glob.glob('*nex'):

		# Make folder for locus 
		if folders == True:
			
			# Make a folder and move MSA file into folder 
			dirPath,fName = makeFolders(nex, mainDir)

		else:
			# Get locus name 
			fName=os.path.split(n)[0]
			# Create paths 
			dirPath = os.path.join(mainDir,fName)

		print(dirPath)	

		# Move into folder and grab constraint files
		os.chdir(dirPath)
		os.system("cp ../*.constraint .")

		# Open locus sequence alignment, get taxa names
		locusTaxa = dendropy.TaxonNamespace()
		alignment = dendropy.DnaCharacterMatrix.get(path=nex, schema="nexus", taxon_namespace=locusTaxa)
		locusList = locusTaxa.labels()

		for f in conFiles:
			editFile(nex,fName,f,locusList)
		# When done with locus 
		os.chdir(mainDir)
 def test1(self):
     with open(pathmap.other_source_path("multispecies_coalescent_test_data.json")) as src:
         test_regimes = json.load(src)
     for test_regime in test_regimes:
         species_tree = dendropy.Tree.get(
                 data=test_regime["species_tree"],
                 schema="newick",
                 rooting="force-rooted",
                 )
         species_tree.taxon_namespace.is_mutable = False
         msc = multispeciescoalescent.MultispeciesCoalescent(species_tree=species_tree)
         coalescent_species_lineage_label_map = test_regime["coalescent_species_lineage_label_map"]
         coalescent_species_lineage_map_fn = lambda x: species_tree.taxon_namespace.require_taxon(coalescent_species_lineage_label_map[x.label])
         coalescent_taxa = dendropy.TaxonNamespace(sorted(coalescent_species_lineage_label_map.keys()))
         coalescent_taxa.is_mutable = False
         for sub_regime in test_regime["coalescent_trees"]:
             coalescent_tree = dendropy.Tree.get(
                     data=sub_regime["coalescent_tree"],
                     schema="newick",
                     rooting="force-rooted",
                     taxon_namespace=coalescent_taxa,
                     )
             obs_ln_likelihood = msc.score_coalescent_tree(
                     coalescent_tree=coalescent_tree,
                     coalescent_species_lineage_map_fn=coalescent_species_lineage_map_fn,
                     )
             exp_ln_likelihood = sub_regime["log_likelihood"]
             self.assertAlmostEqual(obs_ln_likelihood, exp_ln_likelihood, 2)
Пример #6
0
def get_backbone_tree(tree1, tree2):
    """Constain tree1 to its shared leaf set with tree2

    Parameters
    ----------
    tree1 : dendropy tree object
    tree2 : dendropy tree object

    Returns
    -------
    tree1 : dendropy tree object

    """
    tree1 = deepcopy(tree1)

    leaves1 = njmergepair.get_leaf_set(tree1)
    leaves2 = njmergepair.get_leaf_set(tree2)
    shared = list(leaves1.intersection(leaves2))

    taxa = dendropy.TaxonNamespace(shared)

    tree1.retain_taxa_with_labels(shared)
    tree1.migrate_taxon_namespace(taxa)
    tree1.encode_bipartitions()

    return tree1
Пример #7
0
    def setUp(self):
        self.namespace1 = dendropy.TaxonNamespace(
            ["dog", "cat", "snake", "fish", "tree"])
        self.taxa1 = spectraltree.TaxaMetadata(self.namespace1,
                                               ["fish", "snake", "cat", "dog"],
                                               "DNA")
        self.dog = self.namespace1.get_taxon("dog")
        self.snake = self.namespace1.get_taxon("snake")

        self.array1 = np.array([
            [3, 1, 1, 0, 0],
            [3, 2, 1, 0, 0],
            [3, 2, 4, 0, 0],
            [2, 2, 0, 0, 0],
        ])

        self.tax2seq = {
            "fish": self.array1[0, :],
            "snake": self.array1[1, :],
            "cat": self.array1[2, :],
            "dog": self.array1[3, :],
        }

        d = {
            "fish": "TCCAA",
            "snake": "TGCAA",
            "cat": "TG-AA",
            "dog": "GGAAA",
        }
        self.dna_charmatrix = dendropy.DnaCharacterMatrix.from_dict(
            d, taxon_namespace=self.namespace1)

        self.tree = spectraltree.lopsided_tree(4, self.taxa1)
        self.dm = self.tree.phylogenetic_distance_matrix()
 def testAttachTaxonNamespaceOnGet(self):
     t = dendropy.TaxonNamespace()
     d = dendropy.DataSet.get_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus",
         taxon_namespace=t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIsNot(d.attached_taxon_namespace, None)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     self.assertIs(d.attached_taxon_namespace, t)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.detach_taxon_namespace()
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         schema="fasta",
         data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     self.assertEqual(len(d.taxon_namespaces[1]), 114)
Пример #9
0
def generate_contained_trees(
        containing_tree,
        contained_taxon_namespace=None,
        population_size=1,
        num_individuals_per_population=4,
        num_gene_trees=5,
        rng=None):
    if contained_taxon_namespace is None:
        contained_taxon_namespace = dendropy.TaxonNamespace()
    contained_to_containing_map = {}
    assert len(containing_tree.taxon_namespace) > 0
    for sp_idx, sp_tax in enumerate(containing_tree.taxon_namespace):
        for gidx in range(num_individuals_per_population):
            glabel = "{sp}_{ind}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1)
            # glabel = "{sp}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1)
            g = contained_taxon_namespace.require_taxon(label=glabel)
            g.population_label = sp_tax.label
            contained_to_containing_map[g] = sp_tax
    ct = reconcile.ContainingTree(
            containing_tree=containing_tree,
            contained_taxon_namespace=contained_taxon_namespace,
            contained_to_containing_taxon_map=contained_to_containing_map)
    gene_trees = dendropy.TreeList(taxon_namespace=contained_taxon_namespace)
    for gtidx in range(num_gene_trees):
        gt = ct.embed_contained_kingman(
                default_pop_size=population_size,
                rng=rng)
        gene_trees.append(gt)
    return gene_trees
Пример #10
0
 def check(self, title, src_prefix):
     tns = dendropy.TaxonNamespace()
     input_ds = dendropy.DataSet.get_from_path(
         src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"),
         schema='nexus',
         attached_taxon_namespace=tns)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
         src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
         schema='nexus',
         taxon_namespace=input_taxa)
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" %
                        (title, set_idx + 1, len(input_ds.tree_lists),
                         tree_idx + 1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             # d = src_tree.symmetric_difference(ref_tree)
             # if d > 0:
             #     print d
             self.assertEqual(
                 treecompare.symmetric_difference(src_tree, ref_tree), 0)
Пример #11
0
 def test_basic_migration(self):
     char_matrix = self.get_char_matrix()
     tns = char_matrix.taxon_namespace
     new_tns = dendropy.TaxonNamespace()
     new_tns.is_case_sensitive = True
     char_matrix.migrate_taxon_namespace(
             new_tns,
             unify_taxa_by_label=False)
     self.assertIsNot(char_matrix.taxon_namespace, tns)
     self.assertIs(char_matrix.taxon_namespace, new_tns)
     self.assertEqual(len(char_matrix), char_matrix.nseqs)
     self.assertEqual(len(char_matrix), len(char_matrix.original_seqs))
     assert len(char_matrix) == len(char_matrix._taxon_sequence_map)
     if len(char_matrix.taxon_namespace) != len(tns):
         x1 = [t.label for t in char_matrix.taxon_namespace]
         x2 = [t.label for t in tns]
         c1 = collections.Counter(x1)
         c2 = collections.Counter(x2)
         c3 = c2 - c1
         print(c3)
     self.assertEqual(len(char_matrix.taxon_namespace), len(tns))
     original_labels = [t.label for t in tns]
     new_labels = [t.label for t in new_tns]
     self.assertCountEqual(new_labels, original_labels)
     for taxon in char_matrix:
         self.assertIn(taxon, char_matrix.taxon_namespace)
         self.assertNotIn(taxon, tns)
         self.assertIs(char_matrix[taxon], char_matrix[taxon].original_seq)
         self.assertIn(char_matrix[taxon], char_matrix.original_seqs)
         char_matrix.original_seqs.remove(char_matrix[taxon])
     self.assertEqual(char_matrix.original_seqs, [])
Пример #12
0
 def test_reconstruct_taxon_namespace_unifying_case_sensitive_fail(self):
     char_matrix = self.get_char_matrix_with_case_insensitive_and_case_sensitive_label_collisions()
     new_tns = dendropy.TaxonNamespace()
     new_tns.is_case_sensitive = True
     char_matrix._taxon_namespace = new_tns
     with self.assertRaises(error.TaxonNamespaceReconstructionError):
         char_matrix.reconstruct_taxon_namespace(unify_taxa_by_label=True)
Пример #13
0
def calc_robinson_foulds_distance():
    print("Robinson Foulds Distances between Trees")
    print("---------------------------------------")
    robinson_foulds_distances = {}

    for tree_i in os.listdir('out/phylogenetic_trees'):
        robinson_foulds_distances[tree_i] = {}

        for tree_j in os.listdir('out/phylogenetic_trees'):
            if tree_i >= tree_j:
                continue

            else:
                taxon_nmspce = dendropy.TaxonNamespace()
                treeA = dendropy.Tree.get_from_path(
                    'out/phylogenetic_trees/{}'.format(tree_i),
                    'nexus',
                    taxon_namespace=taxon_nmspce,
                )

                treeB = dendropy.Tree.get_from_path(
                    'out/phylogenetic_trees/{}'.format(tree_j),
                    "nexus",
                    taxon_namespace=taxon_nmspce)

                treeA.encode_bipartitions()
                treeB.encode_bipartitions()
                distance = round(
                    dendropy.calculate.treecompare.
                    weighted_robinson_foulds_distance(treeA, treeB), 4)
                robinson_foulds_distances[tree_i][tree_j] = distance
                print(tree_i, "AND", tree_j, distance, "\n")

    with open('out/robinson_foulds_distances_between_trees.json', 'w') as file:
        json.dump(robinson_foulds_distances, file)
Пример #14
0
def generate_contained_trees(
        containing_tree,
        contained_taxon_namespace=None,
        population_size=1,
        total_number_of_individuals=200,
        num_gene_trees=5,
        rng=None):
    if contained_taxon_namespace is None:
        contained_taxon_namespace = dendropy.TaxonNamespace()
    contained_to_containing_map = {}
    assert len(containing_tree.taxon_namespace) > 0
    containing_tree = process_containing_tree_for_gene_samples(
            containing_tree=containing_tree,
            total_number_of_individuals=total_number_of_individuals,
            rng=rng)
    containing_tree_leaf_nodes = containing_tree.leaf_nodes()
    for sp_idx, sp_node in enumerate(containing_tree_leaf_nodes):
        sp_tax = sp_node.taxon
        for gidx in range(sp_node.num_individuals_sampled):
            glabel = "{sp}_{ind}^{sp}".format(sp=sp_tax.label, ind=gidx+1)
            # glabel = "{sp}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1)
            g = contained_taxon_namespace.require_taxon(label=glabel)
            g.population_label = sp_tax.label
            contained_to_containing_map[g] = sp_tax
    ct = reconcile.ContainingTree(
            containing_tree=containing_tree,
            contained_taxon_namespace=contained_taxon_namespace,
            contained_to_containing_taxon_map=contained_to_containing_map)
    gene_trees = dendropy.TreeList(taxon_namespace=contained_taxon_namespace)
    for gtidx in range(num_gene_trees):
        gt = ct.embed_contained_kingman(
                default_pop_size=population_size,
                rng=rng)
        gene_trees.append(gt)
    return containing_tree, gene_trees
Пример #15
0
def main(args=None):
    for param in [
            'birth_rate', 'death_rate', 'birth_rate_sd', 'death_rate_sd'
    ]:
        param = '--' + param
        args[param] = float(args[param])

    # loading taxon list
    if args['<genome_list>'] is not None:
        taxa = Utils.parseGenomeList(args['<genome_list>'], check_exists=False)
        taxa = [x[0] for x in taxa]
    elif args['<comm_file>'] is not None:
        comm = CommTable.from_csv(args['<comm_file>'], sep='\t')
        taxa = comm.get_unique_taxon_names()

    # init dendropy taxon namespace
    taxa = dendropy.TaxonNamespace(taxa, label='taxa')

    # simulating tree
    if args['--star']:
        tree = star_tree(taxon_set=taxa)
    else:
        tree = birth_death(args['--birth_rate'],
                           args['--death_rate'],
                           birth_rate_sd=args['--birth_rate_sd'],
                           death_rate_sd=args['--death_rate_sd'],
                           num_extant_tips=len(taxa))

    # writing tree
    outfmt = args['--outfmt'].lower()
    psbl_fmts = ['newick', 'nexus']
    assert outfmt in psbl_fmts, 'output file format not recognized.' +\
        ' Possible formats: {}'.format(', '.join(psbl_fmts))
    tree.write_to_stream(sys.stdout, outfmt)
Пример #16
0
 def runTest(self):
     """PureCoalescentTreeTest -- tree generation without checking [TODO: checks]"""
     _RNG = MockRandom()
     tns = dendropy.TaxonNamespace(
         ["t{}".format(i + 1) for i in range(100)])
     t = coalescent.pure_kingman_tree(tns, rng=_RNG)
     assert t._debug_tree_is_valid()
 def testBoundTaxonNamespaceDefault(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonNamespace()
     d.attach_taxon_namespace(t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     d.read(path=pathmap.mixed_source_path(
         'reference_single_taxonset_dataset.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'),
            schema="fasta",
            data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 147)
Пример #18
0
def generate_star_tree2():
    num_tips = 10
    branch_length = 1
    names = []

    for i in range(num_tips + 1):
        names.append("s" + str(i))

    taxon_namespace = dendropy.TaxonNamespace(names)
    tree = dendropy.Tree(taxon_namespace=taxon_namespace)

    index = 0
    for i in range(num_tips + 1):
        if index == 0:
            tree.seed_node.taxon = taxon_namespace.get_taxon("s" + str(0))

            tree.seed_node.X = 0
            tree.seed_node.time = 0
        else:
            node = dendropy.Node(taxon=taxon_namespace.get_taxon("s" +
                                                                 str(index)))
            node.edge_length = branch_length
            node.X = random.gauss(0, 1)
            node.time = branch_length
            tree.seed_node.add_child(node)
    return tree
 def test_attached_taxon_namespace(self):
     tns = dendropy.TaxonNamespace()
     ds = dendropy.DataSet.get_from_path(
         pathmap.mixed_source_path('multitaxa_mesquite.nex'),
         "nexus",
         taxon_namespace=tns)
     self.verify_attached_taxon_namespace(ds, tns)
Пример #20
0
def generate_birthdeath_tree(num_extinct, br, dr):
    t = treesim.birth_death_tree(birth_rate=br,
                                 death_rate=dr,
                                 num_extinct_tips=num_extinct,
                                 is_retain_extinct_tips=True,
                                 is_add_extinct_attr=True)

    index = 0
    namespace = []

    for node in t.preorder_node_iter():
        index = index + 1
        namespace.append("s" + str(index))

    #name all nodes instead of just leaves
    taxon_namespace = dendropy.TaxonNamespace(namespace)
    t.taxon_namespace = taxon_namespace
    index = 0
    for node in t.preorder_node_iter():
        index = index + 1
        node.taxon = t.taxon_namespace.get_taxon("s" + str(index))

    t = prune_nodes(t)
    #distance to root
    t = calculate_times(t)

    return t
Пример #21
0
    def getDiploid(self):
        """
        Set diploid species list.
        Open up a dialog for user to select diploid species. Get result from the dialog and store as
        a global variable.
        """
        class emptyFileError(Exception):
            pass

        try:
            if len(self.inputFiles) == 0:
                raise emptyFileError

            # Create a taxon_namespace object based on current taxa names set.
            taxa = dendropy.TaxonNamespace()
            for taxon in list(self.taxa_names):
                taxa.add_taxon(dendropy.Taxon(taxon))

            dialog = diploidList.DiploidListDlg(taxa, self.diploidList, self)

            if dialog.exec_():
                # If executed, update diploid species list.
                self.diploidList = dialog.getDiploidSpeciesList()

        except emptyFileError:
            QMessageBox.warning(self, "Warning",
                                "Please select a file type and upload data!",
                                QMessageBox.Ok)
            return
Пример #22
0
 def setUp(self):
     tree_str = "[&R] ((((H**o:0.21,Bogus1:0.23,Pongo:0.21)N1:0.28,Bogus2:0.49,Macaca:0.49)N2:0.13,Bogus3:0.62,Ateles:0.62)N3:0.38,Galago:1.00)N4:0.0;"
     data_str = """
 #NEXUS
 BEGIN DATA;
     DIMENSIONS  NTAX=8 NCHAR=2;
     FORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;
     MATRIX
         H**o      4.09434   4.74493
         Pongo     3.61092   3.33220
         Macaca    2.37024   3.36730
         Ateles    2.02815   2.89037
         Galago   -1.46968   2.30259
         Bogus1    2.15      2.15
         Bogus2    2.15      2.15
         Bogus3    2.15      2.15
     ;
 END;
 """
     taxa = dendropy.TaxonNamespace()
     self.tree = dendropy.Tree.get_from_string(tree_str,
                                               'newick',
                                               taxon_namespace=taxa)
     self.char_matrix = dendropy.ContinuousCharacterMatrix.get_from_string(
         data_str, 'nexus', taxon_namespace=taxa)
Пример #23
0
def rf_weighted(tree_object1, tree_object2):
    tree_newick1 = tree_object1.newick(tree_object1.root) + ";"
    tree_newick2 = tree_object2.newick(tree_object2.root) + ";"
    #print(tree_newick1)
    #print(tree_newick2)
    version = dendropy.__version__.split(".")[0]
    if version == '4':
        taxa = dendropy.TaxonNamespace()  #set taxa same for all
        tree1 = dendropy.Tree.get(data=tree_newick1,
                                  schema='newick',
                                  taxon_namespace=taxa,
                                  rooting='force-rooted')
        tree2 = dendropy.Tree.get(data=tree_newick2,
                                  schema='newick',
                                  taxon_namespace=taxa,
                                  rooting='force-rooted')
    elif version == '3':
        taxa = dendropy.TaxonSet()  #set taxa same for all
        tree1 = dendropy.Tree.get(data=tree_newick1,
                                  schema='newick',
                                  taxon_set=taxa,
                                  rooting='force-rooted')
        tree2 = dendropy.Tree.get(data=tree_newick2,
                                  schema='newick',
                                  taxon_set=taxa,
                                  rooting='force-rooted')
    tree1.encode_bipartitions()
    tree2.encode_bipartitions()
    dist = dendropy.calculate.treecompare.weighted_robinson_foulds_distance(
        tree1, tree2)
    return dist
Пример #24
0
def dist_from_files(cl1, cl2, distance_method=False):
    if distance_method == False:
        print("ERROR: must provide distance method")
        sys.exit(1)

    distance = {
        "symmetric": dp.calculate.treecompare.symmetric_difference,
        "weightedRF":
        dp.calculate.treecompare.weighted_robinson_foulds_distance,
        "euclidean": dp.calculate.treecompare.euclidean_distance,
        "quartet": quartet_distance,
        "triplet": triplet_distance
    }

    tns = dp.TaxonNamespace()
    t1 = dp.Tree.get_from_path(src=treedir + "/" + cl1 + ".phy_phyml_tree.txt",
                               schema="newick")
    t2 = dp.Tree.get_from_path(src=cl2,
                               schema="newick",
                               taxon_namespace=t1.taxon_namespace)

    try:
        distance = distance[distance_method](t1, t2)
    except KeyError:
        "ERROR: distance method {0} not found".format(distance_method)
        sys.exit(1)

    return distance
Пример #25
0
 def get_random_tree():
     from dendropy.model import birthdeath
     tns = dendropy.TaxonNamespace()
     for group_id in AssemblageInducedTreeManagerTests.GROUP_IDS:
         for group_member in range(10):
             t = tns.require_taxon(
                 label="{}{}".format(group_id, group_member))
     tree = dendropy.simulate.birth_death_tree(birth_rate=0.1,
                                               death_rate=0.0,
                                               taxon_namespace=tns,
                                               num_extant_tips=len(tns))
     tree.assemblage_leaf_sets = []
     tree.assemblage_classification_regime_subtrees = []
     for group_id in AssemblageInducedTreeManagerTests.GROUP_IDS:
         node_filter_fn = lambda nd: nd.taxon is None or nd.taxon.label.startswith(
             group_id)
         subtree1 = tree.extract_tree(node_filter_fn=node_filter_fn)
         assemblage_leaf_set = set()
         for leaf_nd in tree.leaf_node_iter():
             if node_filter_fn(leaf_nd):
                 assert leaf_nd.taxon.label.startswith(group_id)
                 assemblage_leaf_set.add(leaf_nd)
         tree.assemblage_leaf_sets.append(assemblage_leaf_set)
         tree.assemblage_classification_regime_subtrees.append(subtree1)
     assert len(tree.assemblage_classification_regime_subtrees) == len(
         AssemblageInducedTreeManagerTests.GROUP_IDS)
     return tree
Пример #26
0
def labeler(files, etalon_tree, tree_path=".", rebuild=False):
    """
    Constructs labels for given files. (Best phylogeny reconstruction method)
    :param files: an iterable with file paths to alignments
    :param etalon_tree: the path to etalon tree
    :param tree_path: a directory, where built trees will be stored
    :param rebuild: set it True, if you need to rebuild trees or build them from scratch
    :return: tensor with labels
    """
    tree_path = osp.abspath(tree_path)  # raxml needs absolute paths
    if rebuild:
        calculator = TreeConstruction.DistanceCalculator('blosum62')
        dist_constructor = TreeConstruction.DistanceTreeConstructor()

        # construct all trees with UPGMA, NJ and raxml
        for i, file in enumerate(files):
            aln = AlignIO.read(file, 'fasta')
            tree = dist_constructor.upgma(calculator.get_distance(aln))
            name = file.split("/")[-1].split(".")[0]
            Phylo.write(tree, osp.join(tree_path, 'upgma_{}.tre'.format(name)),
                        'newick')
            tree = dist_constructor.nj(calculator.get_distance(aln))
            Phylo.write(tree, osp.join(tree_path, 'nj_{}.tre'.format(name)),
                        'newick')
            raxml = RaxmlCommandline(sequences=osp.abspath(file),
                                     model='PROTCATWAG',
                                     name='{}.tre'.format(name),
                                     threads=3,
                                     working_dir=tree_path)
            _, stderr = raxml()
            print(stderr)
            print('{} finished'.format(name))
    # get best tree
    tns = dendropy.TaxonNamespace()
    act_tree = dendropy.Tree.get_from_path(osp.join(tree_path, etalon_tree),
                                           "newick",
                                           taxon_namespace=tns)
    act_tree.encode_bipartitions()
    distances = np.zeros(shape=(len(files), 3))
    for i, file in enumerate(files):
        name = file.split("/")[-1].split(".")[0]
        nj_tree = dendropy.Tree.get_from_path(osp.join(
            tree_path, "nj_{}.tre".format(name)),
                                              "newick",
                                              taxon_namespace=tns)
        up_tree = dendropy.Tree.get_from_path(osp.join(
            tree_path, "upgma_{}.tre".format(name)),
                                              "newick",
                                              taxon_namespace=tns)
        ml_tree = dendropy.Tree.get_from_path(osp.join(
            tree_path, "RAxML_bestTree.{}.tre".format(name)),
                                              "newick",
                                              taxon_namespace=tns)
        distances[i, 0] = dendropy.calculate.treecompare.symmetric_difference(
            nj_tree, act_tree)
        distances[i, 1] = dendropy.calculate.treecompare.symmetric_difference(
            up_tree, act_tree)
        distances[i, 2] = dendropy.calculate.treecompare.symmetric_difference(
            ml_tree, act_tree)
    return distances.argmin(1)
Пример #27
0
 def test_probs(self):
     with open(
             os.path.join(_pathmap.TESTS_DATA_DIR,
                          "marginal_probability_of_species.json")) as src:
         test_ref = json.load(src)
     for test_tree_set in test_ref:
         taxon_namespace = dendropy.TaxonNamespace(
             test_tree_set["taxon_namespace"])
         tree = model.LineageTree.get(
             data=test_tree_set["tree"],
             schema="newick",
             taxon_namespace=taxon_namespace,
         )
         tree.encode_bipartitions()
         for brlen_config in test_tree_set["branch_length_configurations"]:
             for split_bitmask, br_len in brlen_config[
                     "branch_lengths"].items():
                 split_bitmask = int(split_bitmask)
                 assert split_bitmask in tree.split_bitmask_edge_map, split_bitmask
                 tree.split_bitmask_edge_map[split_bitmask].length = br_len
             for speciation_rate_config in brlen_config[
                     "speciation_rate_configurations"]:
                 tree.speciation_completion_rate = speciation_rate_config[
                     "speciation_rate"]
                 for species_configuration in speciation_rate_config[
                         "species_configurations"]:
                     species_labels = species_configuration["species"]
                     expected_probability = species_configuration[
                         "probability"]
                     obs_probability = tree.calc_marginal_probability_of_species(
                         species_labels)
                     self.assertAlmostEqual(expected_probability,
                                            obs_probability, 8)
Пример #28
0
    def get_species_tree(self, ntax=10):
        _RNG = MockRandom()
        ages = [_RNG.randint(1000, 10000) for age in range(ntax)]
        ages.sort()
        pop_sizes = [_RNG.randint(1000, 10000) for pop in range(2 * ntax + 1)]
        taxon_namespace = dendropy.TaxonNamespace(
            ["t{}".format(i + 1) for i in range(ntax)])
        species_tree = popgensim.pop_gen_tree(taxon_namespace=taxon_namespace,
                                              ages=ages,
                                              num_genes=4,
                                              pop_sizes=pop_sizes,
                                              rng=_RNG)
        ages2 = []
        for node in species_tree.postorder_node_iter():
            distance_from_tip = node.distance_from_tip()
            if distance_from_tip > 0:
                ages2.append(distance_from_tip)
        ages2.sort()
        for index in range(len(ages2)):
            assert (ages[index] - ages2[index]) < 10e-6

        pop_sizes2 = []
        for edge in species_tree.postorder_edge_iter():
            pop_sizes2.append(edge.pop_size)
        pop_sizes2.sort()

        return species_tree
Пример #29
0
def compareDendropyTrees(tr1, tr2):
    from dendropy.calculate.treecompare \
        import false_positives_and_negatives

    lb1 = set([l.taxon.label for l in tr1.leaf_nodes()])
    lb2 = set([l.taxon.label for l in tr2.leaf_nodes()])

    com = lb1.intersection(lb2)
    if com != lb1 or com != lb2:
        com = list(com)
        tns = dendropy.TaxonNamespace(com)

        tr1.retain_taxa_with_labels(com)
        tr1.migrate_taxon_namespace(tns)

        tr2.retain_taxa_with_labels(com)
        tr2.migrate_taxon_namespace(tns)
    com = list(com)

    tr1.update_bipartitions()
    tr2.update_bipartitions()

    nl = len(com)
    ei1 = len(tr1.internal_edges(exclude_seed_edge=True))
    ei2 = len(tr2.internal_edges(exclude_seed_edge=True))

    [fp, fn] = false_positives_and_negatives(tr1, tr2)
    rf = float(fp + fn) / (ei1 + ei2)

    return (nl, ei1, ei2, fp, fn, rf)
Пример #30
0
def bootstrap_consensus_tree(corpus, trees=[], consensus_level=0.5):
    tmp_dir = mkdtemp()
    for idx, tree in enumerate(trees):
        t = tree.dendrogram.to_ete(labels=corpus.titles)
        t.write(outfile=tmp_dir + '/tree_' + str(idx) + '.newick')
    trees = []
    tns = dendropy.TaxonNamespace(corpus.titles, label="label")
    for filename in glob.glob(tmp_dir + '/*.newick'):
        tree = dendropy.Tree.get(path=filename,
                                 schema='newick',
                                 preserve_underscores=True,
                                 taxon_namespace=tns)
        trees.append(tree)

    tsum = TreeSummarizer(support_as_labels=True,
                          support_as_edge_lengths=False,
                          support_as_percentages=True,
                          add_node_metadata=True,
                          weighted_splits=True)
    taxon_namespace = trees[0].taxon_namespace
    split_distribution = dendropy.SplitDistribution(
        taxon_namespace=taxon_namespace)
    tsum.count_splits_on_trees(trees,
                               split_distribution=split_distribution,
                               is_bipartitions_updated=False)
    tree = tsum.tree_from_splits(
        split_distribution,
        min_freq=consensus_level,
        rooted=False,
        include_edge_lengths=False)  # this param is crucial
    ete_tree = EteTree(tree.as_string("newick").replace('[&U] ', '') + ';')
    return ete_tree