def test_majority_consensus(self): ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick') ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees, 1) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
def test_majority_consensus(self): # three trees # ref_tree = open('./TreeConstruction/majority_ref.tre') ref_tree = list(Phylo.parse("./TreeConstruction/majority_ref.tre", "newick")) consensus_tree = Consensus.majority_consensus(self.trees) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[0])) consensus_tree = Consensus.majority_consensus(self.trees, 1) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[1]))
def test_upgma(self): tree = self.constructor.upgma(self.dm) self.assertTrue(isinstance(tree, BaseTree.Tree)) # tree_file = StringIO() # Phylo.write(tree, tree_file, 'newick') ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick") self.assertTrue(Consensus._equal_topology(tree, ref_tree))
def test_built_tree(self): tree = self.constructor.build_tree(self.aln) self.assertTrue(isinstance(tree, BaseTree.Tree)) # tree_file = StringIO() # Phylo.write(tree, tree_file, 'newick') ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick') self.assertTrue(Consensus._equal_topology(tree, ref_tree))
def _calculate_gsi(self): """ Method for calculating Gene Support Indices :return: """ LOGGER.info("Calculating Gene Support Indices (GSIs)" " from the gene trees..") genome_num = 0 bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir) for file in os.listdir(bcg_dir): if file.endswith('.bcg'): genome_num += 1 nwk_file = os.path.join(self._align_output_dir, "all_gene.trees") trees = Phylo.parse(nwk_file, 'newick') tree = Consensus.majority_consensus(trees, cutoff=(100-self.config.gsi_threshold) * genome_num/100) Phylo.draw_ascii(tree) ubcg_gsi_file = os.path.join(self._align_output_dir, f'UBCG_gsi({self._bcg_num}' f'){self.config.postfixes.align_tree_const}') with open(ubcg_gsi_file, 'w') as f: Phylo.write(tree, f, 'newick') LOGGER.info("The final tree marked with GSI was written" " to %s", ubcg_gsi_file)
def test_get_support(self): support_tree = Consensus.get_support(self.trees[0], self.trees) clade = support_tree.common_ancestor([support_tree.find_any(name="Beta"), support_tree.find_any(name="Gamma")]) self.assertEqual(clade.confidence, 2 * 100.0 / 3) clade = support_tree.common_ancestor([support_tree.find_any(name="Alpha"), support_tree.find_any(name="Beta")]) self.assertEqual(clade.confidence, 3 * 100.0 / 3) clade = support_tree.common_ancestor([support_tree.find_any(name="Delta"), support_tree.find_any(name="Epsilon")]) self.assertEqual(clade.confidence, 2 * 100.0 / 3)
def test_bootstrap_consensus(self): calculator = DistanceCalculator("blosum62") constructor = DistanceTreeConstructor(calculator, "nj") tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus) self.assertTrue(isinstance(tree, BaseTree.Tree)) Phylo.write(tree, os.path.join(temp_dir, "bootstrap_consensus.tre"), "newick")
def test_bootstrap_consensus(self): calculator = DistanceCalculator('blosum62') constructor = DistanceTreeConstructor(calculator, 'nj') tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus) self.assertTrue(isinstance(tree, BaseTree.Tree)) Phylo.write(tree, './TreeConstruction/bootstrap_consensus.tre', 'newick')
def test_count_clades(self): bitstr_counts = Consensus._count_clades(self.trees) self.assertEqual(len(bitstr_counts), 6) self.assertEqual(bitstr_counts[_BitString('11111')][0], 3) self.assertEqual(bitstr_counts[_BitString('11000')][0], 2) self.assertEqual(bitstr_counts[_BitString('00111')][0], 3) self.assertEqual(bitstr_counts[_BitString('00110')][0], 2) self.assertEqual(bitstr_counts[_BitString('00011')][0], 1) self.assertEqual(bitstr_counts[_BitString('01111')][0], 1)
def test_count_clades(self): bitstr_counts, len_trees = Consensus._count_clades(self.trees) self.assertEqual(len_trees, len(self.trees)) self.assertEqual(len(bitstr_counts), 6) self.assertEqual(bitstr_counts[_BitString("11111")][0], 3) self.assertEqual(bitstr_counts[_BitString("11000")][0], 2) self.assertEqual(bitstr_counts[_BitString("00111")][0], 3) self.assertEqual(bitstr_counts[_BitString("00110")][0], 2) self.assertEqual(bitstr_counts[_BitString("00011")][0], 1) self.assertEqual(bitstr_counts[_BitString("01111")][0], 1)
def test_nj(self): tree = self.constructor.nj(self.dm) self.assertTrue(isinstance(tree, BaseTree.Tree)) # tree_file = StringIO() # Phylo.write(tree, tree_file, 'newick') ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick') self.assertTrue(Consensus._equal_topology(tree, ref_tree)) # ref_tree.close() # create a matrix of length 2 calculator = DistanceCalculator('blosum62') self.min_dm = calculator.get_distance(self.aln) for i in range(len(self.min_dm) - 2): del self.min_dm[len(self.min_dm) - 1] min_tree = self.constructor.nj(self.min_dm) self.assertTrue(isinstance(min_tree, BaseTree.Tree)) ref_min_tree = Phylo.read('./TreeConstruction/nj_min.tre', 'newick') self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))
def test_nj(self): tree = self.constructor.nj(self.dm) self.assertTrue(isinstance(tree, BaseTree.Tree)) # tree_file = StringIO() # Phylo.write(tree, tree_file, 'newick') ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick") self.assertTrue(Consensus._equal_topology(tree, ref_tree)) # ref_tree.close() # create a matrix of length 2 calculator = DistanceCalculator("blosum62") self.min_dm = calculator.get_distance(self.aln) for i in range(len(self.min_dm) - 2): del self.min_dm[len(self.min_dm) - 1] min_tree = self.constructor.nj(self.min_dm) self.assertTrue(isinstance(min_tree, BaseTree.Tree)) ref_min_tree = Phylo.read("./TreeConstruction/nj_min.tre", "newick") self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))
def test_adam_consensus(self): ref_trees = list(Phylo.parse("./TreeConstruction/adam_refs.tre", "newick")) # three trees consensus_tree = Consensus.adam_consensus(self.trees) # tree_file = '/home/yeyanbo/adam.tres' # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) consensus_tree_mcmc = Consensus.adam_consensus(self.mcmc_trees, mcmc=True) self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_trees[0])) # tree 1 and tree 2 consensus_tree = Consensus.adam_consensus(self.trees[:2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1])) consensus_tree_mcmc = Consensus.adam_consensus(self.mcmc_trees[:2], mcmc=True) # tree 1 and tree 3 consensus_tree = Consensus.adam_consensus(self.trees[::2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
def test_majority_consensus(self): ref_trees = Phylo.parse("./TreeConstruction/majority_ref.tre", "newick") ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) # ref_tree = next(ref_trees) # consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, mcmc=True) # self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree)) ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees, 1) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, 1, mcmc=True) self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree))
def bootstrap(self, afbased=True, basename='majorityTree', treebuilder='nj', bootstraps=1000, outgroup=None, useAllLoci=False): """treebuilder could be nj/upgma, outgroup: a population name or 'midpoint'""" ## allLoci: all loci that are variable in at least one population allpolySites, pwm = {True: ['allpolySites', 'pwm'], False: ['allpolySitesVCF', 'pwmVCF']}[afbased] allLoci = set() for pop in self.populations: allLoci = allLoci.union(getattr(pop, allpolySites)) allLoci = list(allLoci) ## sort it? Reduce to independent sites (www.pnas.org/content/93/23/13429, run LD?) sites = len(allLoci) trees = [] print ("Bootstrapping, rounds:", end=' ') for bootstrap in range(bootstraps): ## see also parallelized version print(bootstrap, end=' ') if useAllLoci: selectedLoci = allLoci else: selectedLoci0 = np.random.choice(range(len(allLoci)), sites, replace=True) selectedLoci = [allLoci[l] for l in selectedLoci0] df = pd.DataFrame([pop.bootstrap(selectedLoci, afbased) for pop in self.populations], index=self.popnames) #import pdb; pdb.set_trace() self.dmNei = neiDF(df, [5]*(sites-1)) ## annoying conversion, BioPython couldnt be just more compatible with scipy/pdist? dmTriangular = [list(self.dmNei[i, :(i + 1)]) for i in range(len(self.dmNei))] m = _DistanceMatrix(self.popnames, dmTriangular) constructor = DistanceTreeConstructor() # could've passed treebuilder here too tree = getattr(constructor, treebuilder)(m) if outgroup == 'midpoint': tree.root_at_midpoint() elif not outgroup is None: tree.root_with_outgroup({'name': outgroup}) trees.append(tree) ## use nj! ## debug info: print(f'selectedLoci: {selectedLoci[:30]}') ## see https://biopython.org/wiki/Phylo, turned out to be more suitable than dendropy/sumtrees self.majorityTree = Consensus.majority_consensus(trees) ## also consider strict_consensus and adam_consensus (but they don't have bootstrap support values) treefile = '%s/%s_%s_%s_%s.nwk' %(resultDir, basename, bootstraps,treebuilder, len(self.populations)) Phylo.write(self.majorityTree, treefile, format='newick') print(f'wrote {treefile}') Phylo.draw_ascii(self.majorityTree)
def test_strict_consensus(self): ref_trees = list(Phylo.parse('./TreeConstruction/strict_refs.tre', 'newick')) # three trees consensus_tree = Consensus.strict_consensus(self.trees) #tree_file = StringIO() #Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) # tree 1 and tree 2 consensus_tree = Consensus.strict_consensus(self.trees[:2]) #tree_file = StringIO() #Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1])) # tree 1 and tree 3 consensus_tree = Consensus.strict_consensus(self.trees[::2]) #tree_file = StringIO() #Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
def test_strict_consensus(self): ref_trees = list(Phylo.parse('./TreeConstruction/strict_refs.tre', 'newick')) # three trees consensus_tree = Consensus.strict_consensus(self.trees) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) # tree 1 and tree 2 consensus_tree = Consensus.strict_consensus(self.trees[:2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1])) # tree 1 and tree 3 consensus_tree = Consensus.strict_consensus(self.trees[::2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
def test_adam_consensus(self): # ref_trees = open('./TreeConstruction/adam_refs.tre') ref_trees = list(Phylo.parse("./TreeConstruction/adam_refs.tre", "newick")) # three trees consensus_tree = Consensus.adam_consensus(self.trees) # tree_file = '/home/yeyanbo/adam.tres' # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) # tree 1 and tree 2 consensus_tree = Consensus.adam_consensus(self.trees[:2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1])) # tree 1 and tree 3 consensus_tree = Consensus.adam_consensus(self.trees[::2]) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
def test_bootstrap_consensus(self): calculator = DistanceCalculator('blosum62') constructor = DistanceTreeConstructor(calculator, 'nj') tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus) self.assertTrue(isinstance(tree, BaseTree.Tree)) Phylo.write(tree, os.path.join(temp_dir, 'bootstrap_consensus.tre'), 'newick')
def test_bootstrap_trees(self): calculator = DistanceCalculator('blosum62') constructor = DistanceTreeConstructor(calculator) trees = list(Consensus.bootstrap_trees(self.msa, 100, constructor)) self.assertEqual(len(trees), 100) self.assertTrue(isinstance(trees[0], BaseTree.Tree))
def test_bootstrap(self): msa_list = list(Consensus.bootstrap(self.msa, 100)) self.assertEqual(len(msa_list), 100) self.assertEqual(len(msa_list[0]), len(self.msa)) self.assertEqual(len(msa_list[0][0]), len(self.msa[0]))
def test_maximum_clade_probability_consensus(self): ref_trees = list(Phylo.parse("./TreeConstruction/maximum_clade_probability_ref.tre", "newick")) consensus_tree = Consensus.maximum_clade_probability_consensus(self.trees)[0] self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0])) consensus_tree_mcmc = Consensus.maximum_clade_probability_consensus(self.mcmc_trees, mcmc=True)[0] self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_trees[0]))
the script parallelBootstrap.py, which dumps a list of trees that were created with BioPython's DistanceTreeConstructor. Alternative consensus construction that BioPython provides: * strict_consensus * adam_consensus They don't provide bootstrap support values, though. #run on HPC #source activate bio3 """ datadir = "/research/btc_bioinformatic/operations/HLA/FreqRT/Data" treebase = "maj_ABC_1_nj_92" if len(sys.path) > 2: treebase = sys.argv[-1] #maj_AB_min95_1_nj_246_0001.pcl treefiles = glob.glob("%s/%s*.pcl" % (datadir, treebase)) trees = [] for treefile in treefiles: with open(treefile, "rb") as tf: trees += pickle.load(tf) majorityTree = Consensus.majority_consensus(trees) Phylo.write(majorityTree, '../Data/%s.nwk' % treebase, format='newick') Phylo.draw_ascii(majorityTree)
def test_bootstrap_consensus(self): calculator = DistanceCalculator("blosum62") constructor = DistanceTreeConstructor(calculator, "nj") tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus) self.assertTrue(isinstance(tree, BaseTree.Tree)) Phylo.write(tree, "./TreeConstruction/bootstrap_consensus.tre", "newick")