def test_setup_master_tree_alltips(self): """tests setup_master_tree""" master_tree = parse_newick("((a:2,b:3):2,(c:1,d:2)foo:7);") t1 = parse_newick("((a:6,b:8.2):2,(c:1,d:2):7);") # same structure t2 = parse_newick("((a:2,b:3,c:33):2,d:7);") # abc are siblings support_tree_tipnames = ["a", "b", "c", "d"] exp = "((a:2.0,b:3.0)node1:2.0,(c:1.0,d:2.0)foo:7.0)node0;" new_master = tc.setup_master_tree(master_tree, [t1, t2]) self.assertEqual(new_master.getNewick(with_distances=True), exp) desc_tips_root = frozenset(["a", "b", "c", "d"]) desc_tips_c0 = frozenset(["a", "b"]) desc_tips_c1 = frozenset(["c", "d"]) desc_tips_c0c0 = frozenset(["a"]) desc_tips_c0c1 = frozenset(["b"]) desc_tips_c1c0 = frozenset(["c"]) desc_tips_c1c1 = frozenset(["d"]) self.assertEqual(frozenset(new_master.getTipNames()), desc_tips_root) self.assertEqual(frozenset(new_master.Children[0].getTipNames()), desc_tips_c0) self.assertEqual(frozenset(new_master.Children[1].getTipNames()), desc_tips_c1) c0 = master_tree.Children[0] c1 = master_tree.Children[1] self.assertEqual(frozenset(c0.Children[0].getTipNames()), desc_tips_c0c0) self.assertEqual(frozenset(c0.Children[1].getTipNames()), desc_tips_c0c1) self.assertEqual(frozenset(c1.Children[0].getTipNames()), desc_tips_c1c0) self.assertEqual(frozenset(c1.Children[1].getTipNames()), desc_tips_c1c1)
def test_setup_master_tree_missingtips(self): """tests setup_master_tree""" master_tree = parse_newick('((a:2,b:3):2,(c:1,d:2)foo:7);') t1 = parse_newick('((a:6,b:8.2):2,(c:1,d:2):7);') # same structure t2 = parse_newick('((a:2,c:33):2,d:7);') # where's b? support_tree_tipnames = ['a', 'c', 'd'] exp = "((c:1.0,d:2.0)foo:7.0,a:4.0)node0;" new_master = tc.setup_master_tree(master_tree, [t1, t2]) self.assertEqual(new_master.getNewick(with_distances=True), exp) desc_tips_root = frozenset(['a', 'c', 'd']) desc_tips_c1 = frozenset(['a']) desc_tips_c0 = frozenset(['c', 'd']) desc_tips_c0c0 = frozenset(['c']) desc_tips_c0c1 = frozenset(['d']) self.assertEqual(frozenset(new_master.getTipNames()), desc_tips_root) self.assertEqual( frozenset(new_master.Children[0].getTipNames()), desc_tips_c0) self.assertEqual( frozenset(new_master.Children[1].getTipNames()), desc_tips_c1) c0 = new_master.Children[0] self.assertEqual( frozenset(c0.Children[0].getTipNames()), desc_tips_c0c0) self.assertEqual( frozenset(c0.Children[1].getTipNames()), desc_tips_c0c1)
def setUp(self): """Set up some test variables""" self.newick = "((s1:0.2,s2:0.2):0.6,s3:0.8);" self.tree = parse_newick(self.newick, PhyloNode) self.newick_scaled = "((s1:25,s2:25):75,s3:100);" self.tree_scaled = parse_newick(self.newick_scaled, PhyloNode) self.tree_scaled.scaleBranchLengths(max_length=100, ultrametric=True) self.num_trees_considered = 10 self.trans_values = {(None, None) : ("#FFFFFF", ""), (None, 0.5): ("#dddddd", "< 50%"), (0.5, 0.7): ("#99CCFF", "50-70%"), (0.7, 0.9): ("#82FF8B", "70-90%"), (0.9, 0.999): ("#F8FE83", "90-99.9%"), (0.999, None): ("#FF8582", "> 99.9%")} self.jack_newick = "((s1:0.2,s2:0.2)0.8:0.6,s3:0.8)1.0;" self.jack_tree = parse_newick(self.jack_newick, PhyloNode) self.jack_newick_scaled = "((s1:25,s2:25)0.8:75,s3:100)1.0;" self.jack_tree_scaled = parse_newick(self.jack_newick_scaled, PhyloNode) self.jack_tree_scaled.scaleBranchLengths(max_length=100, ultrametric=True) self.support = { 'trees_considered': 10, 'support_dict': {"node0":1.0, "node1":0.8}} self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.output_file = get_tmp_filename(tmp_dir = self.tmp_dir) dict_mapping_data = {} dict_mapping_data["s1"] = { 'Description':'s1 test description', 'NumIndividuals':'100', 'BarcodeSequence':'AAAAAAAAAACT', 'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA', 'ExampleHeader1':'Value1', 'ExampleHeader2':'Val2'} dict_mapping_data["s2"] = { 'Description':'s2 test description', 'NumIndividuals':'200', 'BarcodeSequence':'CAAAAAAAAACT', 'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA', 'ExampleHeader1':'Value2', 'ExampleHeader2':'Val1'} dict_mapping_data["s3"] = { 'Description':'s3 test description', 'NumIndividuals':'300', 'BarcodeSequence':'GAAAAAAAAACT', 'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA', 'ExampleHeader1':'Value2', 'ExampleHeader2':'Val3'} self.mapping_data = [dict_mapping_data, "Example comment string for test"] self._paths_to_clean_up = []
def setUp(self): """Define some test data.""" self.qiime_config = load_qiime_config() self.dirs_to_remove = [] self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' if not exists(self.tmp_dir): makedirs(self.tmp_dir) # if test creates the temp dir, also remove it self.dirs_to_remove.append(self.tmp_dir) self.otu_table1 = table_factory(data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list('XYZ'), observation_ids=list('abcd'), constructor=DenseOTUTable) fd, self.otu_table1_fp = mkstemp(dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom') close(fd) open(self.otu_table1_fp, 'w').write( format_biom_table(self.otu_table1)) self.otu_table2 = table_factory(data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list('XYZ'), observation_ids=['a', 'b', 'c', 'd_'], constructor=DenseOTUTable) fd, self.otu_table2_fp = mkstemp(dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom') close(fd) open(self.otu_table2_fp, 'w').write( format_biom_table(self.otu_table2)) self.single_sample_otu_table = table_factory( data=array([[2, 0, 0, 1]]).T, sample_ids=list('X'), observation_ids=list( 'abcd'), constructor=DenseOTUTable) fd, self.single_sample_otu_table_fp = mkstemp( dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom') close(fd) open(self.single_sample_otu_table_fp, 'w').write( format_biom_table(self.single_sample_otu_table)) self.tree1 = parse_newick('((a:2,b:3):2,(c:1,d:2):7);') self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);") self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp, self.single_sample_otu_table_fp]
def test_tree_support(self): """ tree_support should correctly modify node.bootstrap_support """ master_tree = parse_newick("((a:2,b:3)ab:2,(c:1,d:2)cd:7)rt;") """ /-------.5 /-a ---1| \-b \------.5 /-c \-d """ t2 = parse_newick("((a:2,b:3,c:33)ho:2,d:7);") # abc are siblings tc.tree_support(master_tree, t2) self.assertFloatEqual(master_tree.getNodeMatchingName("rt").bootstrap_support, 1.0)
def load_tree_files(master_tree_file, support_dir): """Load trees from filepaths checks if support filenames indicate that support are from different distance methods. If so, warns user. loads trees into phylonode objects returns master_tree, [support_trees] raises a RuntimeError if no support trees are loaded """ tree_file_names = os.listdir(support_dir) # ignore invisible files like .DS_Store tree_file_names = [ fname for fname in tree_file_names if not fname.startswith('.') ] # try to warn user if using multiple types of trees try: base_names = [] for fname in tree_file_names: base_names.append(qiime.parse.parse_rarefaction_fname(fname)[0]) except ValueError: pass else: if len(set(base_names)) > 1: warnstr = """ warning: support trees are named differently, please be sure you're not comparing trees generated in different manners, unless you're quite sure that's what you intend to do. types: """ + str(set(base_names)) + """ continuing anyway...""" warn(warnstr) master_tree = parse_newick(open(master_tree_file, 'U'), PhyloNode) support_trees = [] for fname in tree_file_names: try: f = open(os.path.join(support_dir, fname), 'U') tree = parse_newick(f, PhyloNode) tree.filepath = fname support_trees.append(tree) f.close() except IOError as err: sys.stderr.write('error loading support tree ' + fname + '\n') exit(1) if len(support_trees) == 0: raise RuntimeError( 'Error: no support trees loaded' + ', check that support tree directory has has valid trees') return master_tree, support_trees
def test_bootstrap_support(self): """ bootstrap_support should have correct bootstrap for a tree with unlabeled internal nodes """ master_tree = parse_newick("((a:2,b:3):2,(c:1,d:2):7);") """ /-------.5 /-a ---1| \-b \------.5 /-c \-d """ t1 = parse_newick("((a:6,b:8.2):2,(c:1,d:2):7);") # same structure t2 = parse_newick("((a:2,b:3,c:33):2,d:7);") # abc are siblings new_master, bootstraps = tc.bootstrap_support(master_tree, [t1, t2]) self.assertFloatEqual(sorted(bootstraps.values()), sorted([1.0, 0.5, 0.5]))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_fp = opts.output_fp if output_fp: fd = open(output_fp, "w") else: fd = stdout tr = parse_newick(open(opts.tree_fp), PhyloNode) # all_nodes is list node objs tip_dists, all_nodes = tr.tipToTipDistances() all_ids = [node.Name for node in all_nodes] o = open(opts.taxa_fp) group_ids = [i.strip() for i in o.readline().split(",")] o.close() # check that there are at least 2 ids in the group, otherwise the math # fails if len(group_ids) < 2: option_parser.error( "Not enough taxa in the taxa file.You must have " "at least 2 taxa specified in the taxa file or " "the standard deviation of the distance will be " "zero, causing both NRI and NTI to fail." ) # check that all_ids contains every group_id if not set(group_ids).issubset(all_ids): raise option_parser.error( "There are taxa in the taxa file which are " "not found in the tree. You may have " "specified an internal node." ) # check that all_ids != group_ids if len(all_ids) == len(group_ids): # must be same set if above passes option_parser.error( "The taxa_ids you specified contain every tip in " "the tree. The NRI and NTI formulas will fail " "because there is no standard deviation of mpd or " "mntd, and thus division by zero. In addition, " "the concept of over/under dispersion of a group " "of taxa (what NRI/NTI measure) is done in " "reference to the tree they are a part of. If the " "group being tested is the entire tree, the idea " "of over/under dispersion makes little sense." ) # mapping from string of method name to function handle method_lookup = {"nri": nri, "nti": nti} methods = opts.methods for method in methods: if method not in method_lookup: option_parser.error("Unknown method: %s; valid methods are: %s" % (method, ", ".join(method_lookup.keys()))) for method in methods: print >> fd, method + ":", method_lookup[method](tip_dists, all_ids, group_ids, iters=opts.iters) fd.close()
def get_order_from_tree(ids, tree_text): """Returns the indices that would sort ids by tree tip order""" tree = parse_newick(tree_text, PhyloNode) ordered_ids = [] for tip in tree.iterTips(): if tip.Name in ids: ordered_ids.append(tip.Name) return names_to_indices(ids, ordered_ids)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) tree = parse_newick(open(opts.master_tree, 'U')) support_file = open(opts.support) bootstraps = parse_bootstrap_support(support_file) support_file.close() write_pdf_bootstrap_tree(tree, opts.output_file, bootstraps)
def test_bootstrap_support_labeled(self): """ bootstrap_support should have correct bootstrap on a tree with labeled internal nodes """ master_tree = parse_newick('((a:2,b:3)ab:2,(c:1,d:2)cd:7)rt;') """ /-------.5 /-a ---1| \-b \------.5 /-c \-d """ t1 = parse_newick('((a:6,b:8.2)hi:2,(c:1,d:2):7);') # same structure t2 = parse_newick('((a:2,b:3,c:33)ho:2,d:7);') # abc are siblings new_master, bootstraps = tc.bootstrap_support(master_tree, [t1, t2]) expected = dict([('ab', .5), ('cd', .5), ('rt', 1.0)]) self.assertDictEqual(bootstraps, expected)
def setUp(self): """Define some test data.""" self.qiime_config = load_qiime_config() self.dirs_to_remove = [] self.tmp_dir = self.qiime_config["temp_dir"] or "/tmp/" if not exists(self.tmp_dir): makedirs(self.tmp_dir) # if test creates the temp dir, also remove it self.dirs_to_remove.append(self.tmp_dir) self.otu_table1 = table_factory( data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list("XYZ"), observation_ids=list("abcd"), constructor=DenseOTUTable, ) self.otu_table1_fp = get_tmp_filename( tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str ) open(self.otu_table1_fp, "w").write(format_biom_table(self.otu_table1)) self.otu_table2 = table_factory( data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list("XYZ"), observation_ids=["a", "b", "c", "d_"], constructor=DenseOTUTable, ) self.otu_table2_fp = get_tmp_filename( tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str ) open(self.otu_table2_fp, "w").write(format_biom_table(self.otu_table2)) self.single_sample_otu_table = table_factory( data=array([[2, 0, 0, 1]]).T, sample_ids=list("X"), observation_ids=list("abcd"), constructor=DenseOTUTable ) self.single_sample_otu_table_fp = get_tmp_filename( tmp_dir=self.tmp_dir, prefix="alpha_diversity_tests", suffix=".biom", result_constructor=str ) open(self.single_sample_otu_table_fp, "w").write(format_biom_table(self.single_sample_otu_table)) self.tree1 = parse_newick("((a:2,b:3):2,(c:1,d:2):7);") self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);") self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp, self.single_sample_otu_table_fp]
def setUp(self): self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.l19_data = np.array([ [7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) self.l19_sample_names = [ 'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6', 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19'] self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_taxon_names_w_underscore = ['ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9'] l19 = Table(self.l19_data.T, self.l19_taxon_names, self.l19_sample_names) fd, self.l19_fp = mkstemp(dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') os.close(fd) write_biom_table(l19, self.l19_fp) l19_w_underscore = Table(self.l19_data.T, self.l19_taxon_names_w_underscore, self.l19_sample_names) fd, self.l19_w_underscore_fp = mkstemp(dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') os.close(fd) write_biom_table(l19_w_underscore, self.l19_w_underscore_fp) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\ ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = [self.l19_fp, self.l19_w_underscore_fp] self.folders_to_remove = []
def setUp(self): """Define some test data.""" self.tmp_dir = get_qiime_temp_dir() self.otu_table1 = Table(data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list('XYZ'), observation_ids=list('abcd')) fd, self.otu_table1_fp = mkstemp(dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom') close(fd) write_biom_table(self.otu_table1, self.otu_table1_fp) self.otu_table2 = Table(data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list('XYZ'), observation_ids=['a', 'b', 'c', 'd_']) fd, self.otu_table2_fp = mkstemp(dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom') close(fd) write_biom_table(self.otu_table2, self.otu_table2_fp) self.single_sample_otu_table = Table( data=array([[2, 0, 0, 1]]).T, sample_ids=list('X'), observation_ids=list( 'abcd')) fd, self.single_sample_otu_table_fp = mkstemp( dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom') close(fd) write_biom_table(self.single_sample_otu_table, self.single_sample_otu_table_fp) self.tree1 = parse_newick('((a:2,b:3):2,(c:1,d:2):7);') self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);") self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp, self.single_sample_otu_table_fp]
def test_bootstrap_support_subset(self): """ bootstrap_support should have correct bootstrap on a tree when one support tree is missing a tip """ master_tree = parse_newick("((a:2,b:3)ab:2,(c:1,d:2)cd:7)rt;") """ /-------.5 /-a ---1| \-b \------.5 /-c \-d """ t1 = parse_newick("((a:6,b:8.2)hi:2,(c:1,d:2):7);") # same structure t2 = parse_newick("((a:2,b:3,c:33)ho:2,d:7);") # abc are siblings t3 = parse_newick("((a:6)hi:2,(c:1,d:2):7);") # b missing t4 = parse_newick("(a:8,(c:1,d:2):7);") # b missing, and pruned new_master, bootstraps = tc.bootstrap_support(master_tree, [t1, t2, t3, t4]) expected = dict([("cd", 0.75), ("rt", 1.0)]) self.assertFloatEqual(bootstraps, expected)
def test_write_pdf_bootstrap_tree(self): """ write_pdf_bootstrap_tree should throw no errors""" tree = parse_newick("((tax7:0.1,tax3:0.2)node0:.98,tax8:.3, tax4:.3)node1:.4", PhyloNode) bootstraps = {"node0": 0.7, "node1": 0.4} fd, f = mkstemp(prefix="make_bootstrapped_tree_test", suffix=".pdf") close(fd) self._paths_to_clean_up.append(f) write_pdf_bootstrap_tree(tree, f, bootstraps) assert os.path.exists(f)
def setUp(self): """Define some test data.""" self.otu_table = array([[2,0,0,1], [1,1,1,1], [0,0,0,0]]) self.sample_names = list('XYZ') self.otu_names = list('abcd') self.otu_tuple = (self.sample_names, self.otu_names, self.otu_table.T, None) self.tree = parse_newick('((a:2,b:3):2,(c:1,d:2):7);')
def test_make_unifrac_metric(self): """ exercise of the unweighted unifrac metric should not throw errors""" tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=["distance_matrix"])["distance_matrix"] self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names)) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 3], 0.0) self.assertNotEqual(res[0, 1], 1.0)
def test_make_unifrac_row_metric3(self): treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, ' +\ '((tax6:.09):0.43):0.5):.2,' +\ '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed tree = parse_newick(treestr, PhyloNode) otu_data = numpy.array([ [7, 1, 0, 0, 0, 0, 0, 0, 0], # 1 now zeros [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], # 4 now zeros [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) unif = make_unifrac_metric(False, unifrac, True) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) warnings.resetwarnings() envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 3], 0.0) self.assertEqual(res[0, 1], 1.0) warnings.filterwarnings('ignore') unif_row = make_unifrac_row_metric(False, unifrac, True) for i, sam_name in enumerate(self.l19_sample_names): if i in [0, 3, 4, 5, 8, 9]: continue # these have no data and are warned "meaningless". # I Would prefer if they matched res anyway though res_row = unif_row(otu_data, self.l19_taxon_names, tree, self.l19_sample_names, sam_name) for j in range(len(self.l19_sample_names)): if j in [0, 3, 4, 5, 8, 9]: continue # ok if meaningless number in zero sample self.assertEqual(res_row[j], res[i, j]) warnings.resetwarnings()
def test_root_midpt2(self): """ midpoint between nodes should behave correctly""" from qiime.make_phylogeny import root_midpt from cogent.core.tree import PhyloNode from qiime.parse import parse_newick tree = parse_newick('(a:1,((c:1,d:2.5)n3:1,b:1)n2:1)rt;') tmid = root_midpt(tree) self.assertEqual(tmid.getDistances(),tree.getDistances()) tipnames = tree.getTipNames() nontipnames = [t.Name for t in tree.nontips()] self.assertTrue(tmid.isRoot()) self.assertEqual(tmid.distance(tmid.getNodeMatchingName('d')), 2.75)
def test_write_pdf_bootstrap_tree(self): """ write_pdf_bootstrap_tree should throw no errors""" tree = parse_newick( "((tax7:0.1,tax3:0.2)node0:.98,tax8:.3, tax4:.3)node1:.4", PhyloNode) bootstraps = {'node0': .7, 'node1': .4} _, f = mkstemp(prefix='make_bootstrapped_tree_test', suffix='.pdf') close(_) self._paths_to_clean_up.append(f) write_pdf_bootstrap_tree(tree, f, bootstraps) assert(os.path.exists(f))
def test_parse_newick(self): """parse_newick correctly matches escaped tip names to otu ids """ # confirm that it works without escaped names t1 = ('((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,' '((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2,' '(tax9:0.3, endbigtaxon:.08));') expected1 = ['tax7','tax3','tax8','tax4','tax1', 'tax6','tax2','tax9','endbigtaxon'] self.assertEqual(set(parse_newick(t1).getTipNames()),set(expected1)) self.assertEqual(set([tip.Name for tip in parse_newick(t1).tips()]), set(expected1)) # throw some screwed up names in t2 = ('((((tax7:0.1,tax3:0.2):.98,tax8:.3, \'tax4\':.3):.4,' "(('ta_______ x1':0.3, tax6:.09):0.43,tax2:0.4):0.5):.2," '(tax9:0.3, endbigtaxon:.08));') expected2 = ['tax7','tax3','tax8','tax4','ta_______ x1', 'tax6','tax2','tax9','endbigtaxon'] self.assertEqual(set(parse_newick(t2).getTipNames()),set(expected2)) self.assertEqual(set([tip.Name for tip in parse_newick(t2).tips()]), set(expected2))
def test_make_unifrac_row_metric2(self): """ samples with no seqs, and identical samples, should behave correctly """ tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) otu_data = numpy.array([ [0, 0, 0, 0, 0, 0, 0, 0, 0], # sam1 zeros [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], # sam14 zeros [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], # sam 16 now like sam 13 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 13], 0.0) self.assertEqual(res[12, 15], 0.0) self.assertEqual(res[0, 1], 1.0) warnings.resetwarnings() warnings.filterwarnings('ignore') unif_row = make_unifrac_row_metric(False, unifrac, True) for i, sam_name in enumerate(self.l19_sample_names): if i in [0]: continue # these have no data and are warned "meaningless". # I Would prefer if they matched res anyway though res_row = unif_row(otu_data, self.l19_taxon_names, tree, self.l19_sample_names, sam_name) for j in range(len((self.l19_sample_names))): if j in [0]: continue # ok if meaningless number in zero sample self.assertEqual(res_row[j], res[i, j]) warnings.resetwarnings()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) tree = parse_newick(open(opts.master_tree, 'U')) # support_file = open(opts.support) # bootstraps = parse_bootstrap_support(support_file) color_dict = {} with open(opts.support) as fp: for line in fp: tabs = line.rstrip().split('\t') color_dict[tabs[0]] = tabs[1] # support_file.close() write_pdf_bootstrap_tree(tree, opts.output_file, color_dict)
def test_write_pdf_bootstrap_tree(self): """ write_pdf_bootstrap_tree should throw no errors""" tree = parse_newick( "((tax7:0.1,tax3:0.2)node0:.98,tax8:.3, tax4:.3)node1:.4", PhyloNode) bootstraps = {'node0':.7,'node1':.4} f = get_tmp_filename(\ prefix='make_bootstrapped_tree_test',\ suffix='.pdf',\ result_constructor=str) self._paths_to_clean_up.append(f) write_pdf_bootstrap_tree(tree, f, bootstraps) assert(os.path.exists(f))
def test_write_pdf_bootstrap_tree(self): """ write_pdf_bootstrap_tree should throw no errors""" tree = parse_newick( "((tax7:0.1,tax3:0.2)node0:.98,tax8:.3, tax4:.3)node1:.4", PhyloNode) bootstraps = {'node0': .7, 'node1': .4} f = get_tmp_filename( prefix='make_bootstrapped_tree_test', suffix='.pdf', result_constructor=str) self._paths_to_clean_up.append(f) write_pdf_bootstrap_tree(tree, f, bootstraps) assert(os.path.exists(f))
def test_make_unifrac_metric(self): """ exercise of the unweighted unifrac metric should not throw errors""" tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=['distance_matrix'])['distance_matrix'] self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names)) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,3], 0.0) self.assertNotEqual(res[0,1], 1.0)
def getTree(self, tree_source): """Returns parsed tree from putative tree source""" if isinstance(tree_source, PhyloNode): tree = tree_source #accept tree object directly for tests elif tree_source: try: f = open(tree_source, 'U') except (TypeError, IOError): raise TreeMissingError, \ "Couldn't read tree file at path: %s" % tree_source tree = parse_newick(f, PhyloNode) f.close() else: raise TreeMissingError, str(self.Name) + \ " is a phylogenetic metric, but no tree was supplied." return tree
def load_tree_files(tree_dir): """Load trees from filepaths checks if filenames indicate that trees are from different distance methods. If so, warns user. loads trees into phylonode objects returns [trees] raises a RuntimeError if no trees are loaded """ tree_file_names = os.listdir(tree_dir) # ignore invisible files like .DS_Store tree_file_names = [ fname for fname in tree_file_names if not fname.startswith('.') ] # try to warn user if using multiple types of trees { try: base_names = [] for fname in tree_file_names: base_names.append(qiime.parse.parse_rarefaction_fname(fname)[0]) except ValueError: pass else: if len(set(base_names)) > 1: warnstr = """ warning: trees are named differently, please be sure you're not comparing trees generated in different manners, unless you're quite sure that's what you intend to do. types: """ + str(set(base_names)) + """ continuing anyway...""" warn(warnstr) # } trees = [] for fname in tree_file_names: try: f = open(os.path.join(tree_dir, fname), 'U') tree = parse_newick(f, PhyloNode) tree.filepath = fname trees.append(tree) f.close() except IOError as err: sys.stderr.write('error loading tree ' + fname + '\n') exit(1) if len(trees) == 0: raise RuntimeError('Error: no trees loaded' + ', check that tree directory has has valid trees') return trees
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) tr = parse_newick(open(opts.tree_fp),PhyloNode) tip_dists, all_nodes = tr.tipToTipDistances() # tipTo returns a list of actual node objects all_ids = [node.Name for node in all_nodes] o = open(opts.taxa_fp) group_ids = [i.strip() for i in o.readline().split(',')] o.close() # check that there are at least 2 ids in the group, otherwise the math fails if len(group_ids) < 2: option_parser.error('you must have at least 2 taxa specified' +\ ' in the taxa file or the math will fail.') # make sure specified taxa are in the tree, break at first failure for i in group_ids: try: all_ids.index(i) except ValueError: option_parser.error('Taxa '+i+' not found in the tree. You may'+\ ' have specified an internal node.') if len(all_ids)==len(group_ids): #m ust be the same set of ids if above check passes option_parser.error('The taxa_ids you specified contain every tip'+\ ' in the tree. The NRI and NTI formulas will fail with these values'+\ ' because there is no standard deviation of mpd or mntd, and thus'+\ ' division by zero will occur. In addition, the concept of over/under'+\ ' dispersion of a group of taxa (what NRI/NTI measure) is done in'+\ ' reference to the tree they are a part of. If the group being tested'+\ ' is the entire tree, the idea of over/under dispersion does not make'+\ ' much sense.') # mapping from string of method name to function handle method_lookup = {'nri':nri, 'nti':nti} methods = opts.methods.split(',') for method in methods: if method not in method_lookup: option_parser.error("unknown method: %s; valid methods are: %s" % (method, ', '.join(method_lookup.keys()))) for method in methods: print method+':', method_lookup[method](tip_dists, all_ids, group_ids, iters=opts.iters)
def setUp(self): self.l19_data = numpy.array([ [7,1,0,0,0,0,0,0,0], [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0], [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0], [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,1,7,0,0,0,0,0], [0,0,0,8,0,0,0,0,0], [0,0,0,7,1,0,0,0,0], [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0] ]) self.l19_sample_names = ['sam1', 'sam2', 'sam3', 'sam4', 'sam5','sam6',\ 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', \ 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19'] self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon',\ 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_taxon_names_w_underscore = ['ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_str = format_otu_table( self.l19_sample_names, self.l19_taxon_names, self.l19_data.T) self.l19_str_w_underscore = format_otu_table( self.l19_sample_names, self.l19_taxon_names_w_underscore, self.l19_data.T) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4, ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = [] self.folders_to_remove = []
def test_write_pdf_bootstrap_tree_escaped_names(self): """ write_pdf_bootstrap_tree functions when newick names are escaped This test essentially is only checking that no failures arise from having escaped strings as nodes in the newick file. Visual inspection of the resulting PDFs shows that the coloring is occuring as expected but unfortunately there is not a great way to test for this. """ tree = parse_newick("((tax7:0.1,'tax3':0.2)'no__``!!:o de0':.98,'ta___x8':.3, tax4:.3)node1:.4", PhyloNode) bootstraps = {"no__``!!:o de0": 0.7, "node1": 0.4} fd, f = mkstemp(prefix="make_bootstrapped_tree_test", suffix=".pdf") close(fd) self._paths_to_clean_up.append(f) write_pdf_bootstrap_tree(tree, f, bootstraps) assert os.path.exists(f)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) tr = parse_newick(open(opts.tree_fp),PhyloNode) tip_dists, all_nodes = tr.tipToTipDistances() #all_nodes is list node objs all_ids = [node.Name for node in all_nodes] o = open(opts.taxa_fp) group_ids = [i.strip() for i in o.readline().split(',')] o.close() # check that there are at least 2 ids in the group, otherwise the math fails if len(group_ids) < 2: option_parser.error('Not enough taxa in the taxa file.You must have '+\ ' at least 2 taxa specified' +\ ' in the taxa file or the standard deviation of the distance will '+\ ' be zero, causing both NRI and NTI to fail.') # check that all_ids contains every group_id if not set(group_ids).issubset(all_ids): raise option_parser.error('There are taxa in the taxa file which are '+\ 'not found in the tree. You may have specified an internal node.') # check that all_ids != group_ids if len(all_ids)==len(group_ids): #must be same set if above passes option_parser.error('The taxa_ids you specified contain every tip'+\ ' in the tree. The NRI and NTI formulas will fail '+\ ' because there is no standard deviation of mpd or mntd, and thus'+\ ' division by zero. In addition, the concept of over/under'+\ ' dispersion of a group of taxa (what NRI/NTI measure) is done in'+\ ' reference to the tree they are a part of. If the group being'+\ ' tested is the entire tree, the idea of over/under dispersion '+\ ' makes little sense.') # mapping from string of method name to function handle method_lookup = {'nri':nri, 'nti':nti} methods = opts.methods for method in methods: if method not in method_lookup: option_parser.error("Unknown method: %s; valid methods are: %s" % \ (method, ', '.join(method_lookup.keys()))) for method in methods: print method+':', method_lookup[method](tip_dists, all_ids, group_ids, iters=opts.iters)
def test_write_pdf_bootstrap_tree_escaped_names(self): """ write_pdf_bootstrap_tree functions when newick names are escaped This test essentially is only checking that no failures arise from having escaped strings as nodes in the newick file. Visual inspection of the resulting PDFs shows that the coloring is occuring as expected but unfortunately there is not a great way to test for this. """ tree = parse_newick( "((tax7:0.1,'tax3':0.2)'no__``!!:o de0':.98,'ta___x8':.3, tax4:.3)node1:.4", PhyloNode) bootstraps = {"no__``!!:o de0": .7, 'node1': .4} fd, f = mkstemp(prefix='make_bootstrapped_tree_test', suffix='.pdf') close(fd) self._paths_to_clean_up.append(f) write_pdf_bootstrap_tree(tree, f, bootstraps) assert (os.path.exists(f))
def setUp(self): self.l19_data = numpy.array([[7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0]]) self.l19_sample_names = ['sam1', 'sam2', 'sam3', 'sam4', 'sam5','sam6',\ 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', \ 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19'] self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon',\ 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_taxon_names_w_underscore = [ 'ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9' ] self.l19_str = format_otu_table(self.l19_sample_names, self.l19_taxon_names, self.l19_data.T) self.l19_str_w_underscore = format_otu_table( self.l19_sample_names, self.l19_taxon_names_w_underscore, self.l19_data.T) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4, ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = []
def test_make_unifrac_metric3(self): treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, '+\ '((tax6:.09):0.43):0.5):.2,'+\ '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed tree = parse_newick(treestr, PhyloNode) otu_data = numpy.array([ [7,1,0,0,0,0,0,0,0], # 1 now zeros [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], # 4 now zeros [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0], [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0], [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,1,7,0,0,0,0,0], [0,0,0,8,0,0,0,0,0], [0,0,0,7,1,0,0,0,0], [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0] ]) unif = make_unifrac_metric(False, unifrac, True) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) warnings.resetwarnings() envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,3], 0.0) self.assertEqual(res[0,1], 1.0)
def test_root_midpt(self): """midpoint should be selected correctly when it is an internal node """ from qiime.make_phylogeny import root_midpt from cogent.core.tree import PhyloNode from qiime.parse import parse_newick tree = parse_newick('(a:1,((c:1,d:3)n3:1,b:1)n2:1)rt;') tmid = root_midpt(tree) self.assertEqual(tmid.getDistances(), tree.getDistances()) tipnames = tree.getTipNames() nontipnames = [t.Name for t in tree.nontips()] # for tipname in tipnames: # tmid_tip = tmid.getNodeMatchingName(tipname) # orig_tip = tree.getNodeMatchingName(tipname) # for nontipname in nontipnames: # tmid_dist=\ # tmid.getNodeMatchingName(nontipname).distance(tmid_tip) # orig_dist=\ # tree.getNodeMatchingName(nontipname).distance(orig_tip) # print nontipname, tipname, 'assert' # self.assertEqual(tmid_dist, orig_dist) self.assertTrue(tmid.isRoot()) self.assertEqual(tmid.distance(tmid.getNodeMatchingName('d')), 3)
def test_make_unifrac_metric2(self): """ samples with no seqs, and identical samples, should behave correctly """ tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) otu_data = numpy.array([ [0,0,0,0,0,0,0,0,0],#sam1 zeros [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0], [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0], [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,0,0,0,0,0,0,0],#sam14 zeros [0,0,0,8,0,0,0,0,0], [0,0,2,4,0,0,0,1,0], #sam 16 now like sam 13 [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0] ]) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,13], 0.0) self.assertEqual(res[12,15], 0.0) self.assertEqual(res[0,1], 1.0) warnings.resetwarnings()
make_option('--output_dir', type="new_dirpath", help='Output directory which will contain the scripts' + ' for the html file') ] script_info['optional_options'] = [] script_info['version'] = __version__ if __name__ == '__main__': option_parser, opts, args = parse_command_line_parameters(**script_info) tree_path = opts.tree_fp mapping_path = opts.mapping_fp html_fp = opts.html_fp output_dir = opts.output_dir # Create output directory try: os.mkdir(output_dir) except OSError: pass # Parse the tree tree = parse_newick(open(tree_path, 'U'), PhyloNode) # Parse mapping file mapping_data = parse_mapping_file_to_dict(open(mapping_path, 'U')) # Generate the HTML file make_interactive_sample_id_tree_file(tree, mapping_data, html_fp, output_dir)
def single_file_beta(input_path, metrics, tree_path, output_dir, rowids=None, full_tree=False): """ does beta diversity calc on a single otu table uses name in metrics to name output beta diversity files assumes input tree is already trimmed to contain only otus present in otu table, doesn't call getSubTree() inputs: input_path (str) metrics (str, comma delimited if more than 1 metric; or list) tree_path (str) output_dir (str) rowids (comma separated str) """ metrics_list = metrics try: metrics_list = metrics_list.split(',') except AttributeError: pass otu_table = parse_biom_table(open(input_path, 'U')) if isinstance(otu_table, DenseTable): otumtx = otu_table._data.T else: otumtx = asarray([v for v in otu_table.iterSampleData()]) if tree_path: tree = parse_newick(open(tree_path, 'U'), PhyloNode) else: tree = None input_dir, input_filename = os.path.split(input_path) input_basename, input_ext = os.path.splitext(input_filename) for metric in metrics_list: outfilepath = os.path.join(output_dir, metric + '_' + input_basename + '.txt') try: metric_f = get_nonphylogenetic_metric(metric) is_phylogenetic = False except AttributeError: try: metric_f = get_phylogenetic_metric(metric) is_phylogenetic = True if tree is None: stderr.write( "metric %s requires a tree, but none found\n" % (metric, )) exit(1) except AttributeError: stderr.write( "Could not find metric %s.\n\nKnown metrics are: %s\n" % (metric, ', '.join(list_known_metrics()))) exit(1) if rowids is None: # standard, full way if is_phylogenetic: dissims = metric_f(otumtx, otu_table.ObservationIds, tree, otu_table.SampleIds, make_subtree=(not full_tree)) else: dissims = metric_f(otumtx) f = open(outfilepath, 'w') f.write(format_distance_matrix(otu_table.SampleIds, dissims)) f.close() else: # only calc d(rowid1, *) for each rowid rowids_list = rowids.split(',') row_dissims = [] # same order as rowids_list for rowid in rowids_list: rowidx = otu_table.SampleIds.index(rowid) # first test if we can the dissim is a fn of only the pair # if not, just calc the whole matrix if metric_f.__name__ == 'dist_chisq' or \ metric_f.__name__ == 'dist_gower' or \ metric_f.__name__ == 'dist_hellinger' or\ metric_f.__name__ == 'binary_dist_chisq': warnings.warn( 'dissimilarity ' + metric_f.__name__ + ' is not parallelized, calculating the whole matrix...' ) row_dissims.append(metric_f(otumtx)[rowidx]) else: try: row_metric = get_phylogenetic_row_metric(metric) except AttributeError: # do element by element dissims = [] for i in range(len(otu_table.SampleIds)): if is_phylogenetic: dissim = metric_f( otumtx[[rowidx, i], :], otu_table.ObservationIds, tree, [ otu_table.SampleIds[rowidx], otu_table.SampleIds[i] ], make_subtree=(not full_tree))[0, 1] else: dissim = metric_f(otumtx[[rowidx, i], :])[0, 1] dissims.append(dissim) row_dissims.append(dissims) else: # do whole row at once dissims = row_metric(otumtx, otu_table.ObservationIds, tree, otu_table.SampleIds, rowid, make_subtree=(not full_tree)) row_dissims.append(dissims) # rows_outfilepath = os.path.join(output_dir, metric + '_' +\ # '_'.join(rowids_list) + '_' + os.path.split(input_path)[1]) f = open(outfilepath, 'w') f.write( format_matrix(row_dissims, rowids_list, otu_table.SampleIds)) f.close()
def setUp(self): self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.l19_data = np.array([[7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0]]) self.l19_sample_names = [ 'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6', 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19' ] self.l19_taxon_names = [ 'tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9' ] self.l19_taxon_names_w_underscore = [ 'ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9' ] l19 = Table(self.l19_data.T, self.l19_taxon_names, self.l19_sample_names) fd, self.l19_fp = mkstemp(dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') os.close(fd) write_biom_table(l19, self.l19_fp) l19_w_underscore = Table(self.l19_data.T, self.l19_taxon_names_w_underscore, self.l19_sample_names) fd, self.l19_w_underscore_fp = mkstemp(dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') os.close(fd) write_biom_table(l19_w_underscore, self.l19_w_underscore_fp) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\ ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = [self.l19_fp, self.l19_w_underscore_fp] self.folders_to_remove = []
def single_file_beta(input_path, metrics, tree_path, output_dir, rowids=None, full_tree=False): """ does beta diversity calc on a single otu table uses name in metrics to name output beta diversity files assumes input tree is already trimmed to contain only otus present in otu table, doesn't call getSubTree() inputs: input_path (str) metrics (str, comma delimited if more than 1 metric) tree_path (str) output_dir (str) rowids (comma separated str) """ f = open(input_path, 'U') samids, otuids, otumtx, lineages = parse_otu_table(f) # otu mtx is otus by samples f.close() tree = None if tree_path: f = open(tree_path, 'U') tree = parse_newick(f, PhyloNode) f.close() if not full_tree: tree = tree.getSubTree(otuids, ignore_missing=True) metrics_list = metrics.split(',') for metric in metrics_list: outfilepath = os.path.join(output_dir, metric + '_' + os.path.split(input_path)[1]) try: metric_f = get_nonphylogenetic_metric(metric) is_phylogenetic = False except AttributeError: try: metric_f = get_phylogenetic_metric(metric) is_phylogenetic = True if tree == None: stderr.write("metric %s requires a tree, but none found\n"\ % (metric,)) exit(1) except AttributeError: stderr.write("Could not find metric %s.\n\nKnown metrics are: %s\n"\ % (metric, ', '.join(list_known_metrics()))) exit(1) if rowids == None: # standard, full way if is_phylogenetic: dissims = metric_f(otumtx.T, otuids, tree, samids) else: dissims = metric_f(otumtx.T) f = open(outfilepath, 'w') f.write(format_distance_matrix(samids, dissims)) f.close() else: # only calc d(rowid1, *) for each rowid rowids_list = rowids.split(',') row_dissims = [] # same order as rowids_list for rowid in rowids_list: rowidx = samids.index(rowid) # first test if we can the dissim is a fn of only the pair # if not, just calc the whole matrix if metric_f.__name__ == 'dist_chisq' or \ metric_f.__name__ == 'dist_gower' or \ metric_f.__name__ == 'dist_hellinger' or\ metric_f.__name__ == 'binary_dist_chisq': row_dissims.append(metric_f(otumtx.T)[rowidx]) else: try: row_metric = get_phylogenetic_row_metric(metric) except AttributeError: # do element by element dissims = [] for i in range(len(samids)): if is_phylogenetic: dissim = metric_f( otumtx.T[[rowidx, i], :], otuids, tree, [samids[rowidx], samids[i]])[0, 1] else: dissim = metric_f(otumtx.T[[rowidx, i], :])[0, 1] dissims.append(dissim) row_dissims.append(dissims) else: # do whole row at once dissims = row_metric(otumtx.T, otuids, tree, samids, rowid) row_dissims.append(dissims) # rows_outfilepath = os.path.join(output_dir, metric + '_' +\ # '_'.join(rowids_list) + '_' + os.path.split(input_path)[1]) f = open(outfilepath, 'w') f.write(format_matrix(row_dissims, rowids_list, samids)) f.close()
(0.7, 0.9): ("#82FF8B", "70-90%"), (0.9, 0.999): ("#F8FE83", "90-99.9%"), (0.999, None): ("#FF8582", "> 99.9%") } if __name__ == '__main__': option_parser, opts, args = parse_command_line_parameters(**script_info) support_fp = opts.support_fp tree_fp = opts.tree_fp mapping_fp = opts.mapping_fp html_fp = opts.output_html_fp output_dir = opts.output_dir # Create output directory try: os.mkdir(output_dir) except OSError: pass # Parse jackknife support file support = parse_jackknife_support_file(open(support_fp, 'U')) # Parse jackknife named nodes tree file tree = parse_newick(open(tree_fp, 'U'), PhyloNode) # Parse mapping file mapping_data = parse_mapping_file_to_dict(open(mapping_fp, 'U')) # Generate the html file make_jackknife_tree_html_file(tree, support, DICT_TRANS_VALUES, mapping_data, html_fp, output_dir)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) data = {} # Open and get coord data otu_table = get_otu_counts(opts.otu_table_fp) # determine whether fractional values are present in OTU table num_otu_hits = opts.num_otu_hits if opts.log_transform: otu_table = get_log_transform(otu_table) num_otu_hits = 0 fractional_values = False max_val = -1 for val in otu_table.iter_data(axis='observation'): max_val = maximum(max_val, val.max()) # the data cannot be of mixed types: if one is float, all are float fractional_values = (max_val.dtype.name == 'float32' or max_val.dtype.name == 'float64') if fractional_values and max_val <= 1: if num_otu_hits > 0: print("Warning: OTU table appears to be using relative " "abundances and num_otu_hits was set to %d. Setting " "num_otu_hits to 0." % num_otu_hits) num_otu_hits = 0 filepath = opts.otu_table_fp filename = filepath.strip().split('/')[-1].split('.')[0] dir_path = opts.output_dir create_dir(dir_path) js_dir_path = os.path.join(dir_path, 'js') create_dir(js_dir_path) qiime_dir = get_qiime_project_dir() js_path = os.path.join(qiime_dir, 'qiime/support_files/js') shutil.copyfile(os.path.join(js_path, 'overlib.js'), os.path.join(js_dir_path, 'overlib.js')) shutil.copyfile(os.path.join(js_path, 'otu_count_display.js'), os.path.join(js_dir_path, 'otu_count_display.js')) shutil.copyfile(os.path.join(js_path, 'jquery.js'), os.path.join(js_dir_path, 'jquery.js')) shutil.copyfile(os.path.join(js_path, 'jquery.tablednd_0_5.js'), os.path.join(js_dir_path, 'jquery.tablednd_0_5.js')) # load tree for sorting OTUs ordered_otu_names = None if opts.tree is not None: try: f = open(opts.tree, 'U') except (TypeError, IOError): raise TreeMissingError("Couldn't read tree file at path: %s" % tree_source) tree = parse_newick(f, PhyloNode) f.close() ordered_otu_names = [tip.Name for tip in tree.iterTips()] ordered_sample_names = None # load tree for sorting Samples if opts.sample_tree is not None: try: f = open(opts.sample_tree, 'U') except (TypeError, IOError): raise TreeMissingError("Couldn't read tree file at path: %s" % tree_source) tree = parse_newick(f, PhyloNode) f.close() ordered_sample_names = [tip.Name for tip in tree.iterTips()] # if there's no sample tree, load sample map for sorting samples elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() map = parse_mapping_file(lines)[0] ordered_sample_names = [row[0] for row in map] try: action = generate_heatmap_plots except NameError: action = None # Place this outside try/except so we don't mask NameError in action if action: action(num_otu_hits, otu_table, ordered_otu_names, ordered_sample_names, dir_path, js_dir_path, filename, fractional_values)