def test_all_nonroot_branchlengths_0(self): newicks = ['((b:0)a:0)root:0;', '((b:0)a:0)root:1;'] for nwk in newicks: st = TreeNode.read([nwk]) with self.assertRaisesRegex(ValueError, "must have a positive length"): Tree.from_tree(st)
def test_nonroot_negative_branchlengths(self): newicks = [ '((b:-1)a:1)root:1;', '((b:100)a:-100)root:0;', '(b:1,c:-1)a:2;', '((b:-1)a:0)root;' ] for nwk in newicks: st = TreeNode.read([nwk]) with self.assertRaisesRegex(ValueError, "must have nonnegative lengths"): Tree.from_tree(st)
def test_from_tree_duplicate_internal_node_names(self): bad_newicks = [ # Two non-root internal nodes have same name '((a:1,b:3)c:2,(d:2,e:3)c:5)r:2;', # Two internal nodes (one of which is the root) have same name '((a:1,b:3)c:2,(d:2,e:3)f:5)c:2;' ] for nwk in bad_newicks: t = TreeNode.read([nwk]) with self.assertWarnsRegex( TreeFormatWarning, "Internal node names in the tree are not unique"): Tree.from_tree(t)
def test_match_inputs_filter_missing_features_override(self): """Checks that --p-filter-missing-features works as expected.""" # The inputs are the same as with the above test t = Tree.from_tree(self.tree) bad_table = self.table.copy() bad_table.index = ["a", "b", "e", "g"] out_table = None out_sm = None with self.assertWarnsRegex( tools.DataMatchingWarning, # The parentheses mess up the regex, hence the necessity for using # raw strings ._. ( r"1 feature\(s\) in the table were not present as tips in " r"the tree. These feature\(s\) have been removed from the " "visualization." ) ): out_table, out_sm, tm, im = tools.match_inputs( t, bad_table, self.sample_metadata, filter_missing_features=True ) self.assertCountEqual(out_table.index, ["a", "b", "e"]) # Just to check, make sure the rest of the table is ok assert_frame_equal( out_table, self.table.loc[["a", "b", "e"]], check_like=True ) # ... and that the sample metadata is ok assert_frame_equal( out_sm, self.sample_metadata )
def test_fill_missing_node_names(self): t = Tree.from_tree(self.tree) tools.fill_missing_node_names(t) names = ['a', 'e', 'EmpressNode0', 'b', 'g', 'EmpressNode1', 'd', 'h', 'EmpressNode2'] for i, node in enumerate(t.postorder()): self.assertEqual(node.name, names[i])
def test_match_inputs_feature_metadata_duplicate_name_internal_node(self): """Tests that feature metadata for internal nodes with duplicate names is preserved. In the JS interface, there are two options for coloring nodes by feature metadata: 1) just coloring tips (and propagating clades with uniform feature metadata upwards), or 2) coloring all nodes with feature metadata, which can include internal nodes. In 2), internal nodes with the same name will have the same feature metadata color. """ # Slightly modified version of self.tree with duplicate internal node # names (i and g) t = Tree.from_tree( TreeNode.read(['(((a:1,e:2)i:1,b:2)g:1,(:1,d:3)g:2)i:1;']) ) fm = self.feature_metadata.copy() fm.index = ["a", "g", "i"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, fm ) assert_frame_equal(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) split_fm = split_taxonomy(fm) # Main point of this test: all of the feature metadata should have been # kept, even though g and i were both duplicate node names. assert_frame_equal(t_fm, split_fm.loc[["a"]]) assert_frame_equal(i_fm, split_fm.loc[["g", "i"]], check_like=True)
def test_from_tree_overlapping_tip_and_internal_node_names(self): bad_newicks = [ # Tip overlaps with non-root internal node '((a:1,b:3)a:2,d:5)e:2;', # Tip overlaps with root node '((a:1,b:3)c:2,d:5)a:2;', # Tip overlaps with both non-root and root internal nodes '((a:1,b:3)a:2,d:5)a:2;' ] for nwk in bad_newicks: t = TreeNode.read([nwk]) with self.assertRaisesRegex( ValueError, "Tip names in the tree cannot overlap with internal node names" ): Tree.from_tree(t)
def test_match_inputs_feature_metadata_some_features_dropped(self): """Tests the filtering case described above, but with not all feature(s) in the feature metadata getting filtered out. """ t = Tree.from_tree(self.tree) # Manipulate bad_fm so that only the "e" feature should get preserved # (since it's actually in the tree, while "asdf" and "hjkl" aren't) bad_fm = self.feature_metadata.copy() bad_fm.index = ["e", "asdf", "hjkl"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, bad_fm ) assert_frame_equal(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) # Check that the feature metadata just describes "e" (which should be # in the tip metadata) assert_frame_equal(t_fm, self.split_tax_fm.loc[["e"]]) # ... and check that the internal node metadata is empty. self.assertEqual(len(i_fm.index), 0) # Columns should be the same between tip and internal md, though. # (It shouldn't really make a difference, since the empty internal # metadata will be represented as an empty dict/JSON object ({}) in # the generated HTML... but may as well check.) self.assertListEqual(list(t_fm.columns), self.exp_split_fm_cols) self.assertListEqual(list(i_fm.columns), self.exp_split_fm_cols)
def test_from_tree(self): t = Tree.from_tree(self.tree) self.assertEqual(t.__class__, Tree) # Check that trees match by iterating over original and Empress trees # simultaneously: see https://stackoverflow.com/a/20910242/10730311 for n1_index, n2_index in zip(t.preorder(), self.tree.preorder()): self.assertEqual(t.name(n1_index), self.tree.name(n2_index)) self.assertEqual(t.length(n1_index), self.tree.length(n1_index))
def test_match_inputs_nothing_dropped(self): t = Tree.from_tree(self.tree) filtered_table, filtered_sample_md, t_md, i_md = tools.match_inputs( t, self.table, self.sample_metadata ) assert_frame_equal(filtered_table, self.table) assert_frame_equal(filtered_sample_md, self.sample_metadata) # We didn't pass in any feature metadata, so we shouldn't get any out self.assertIsNone(t_md) self.assertIsNone(i_md)
def test_match_inputs_ignore_missing_samples_error(self): t = Tree.from_tree(self.tree) bad_table = self.table.copy() # Replace one of the sample IDs in the table with some junk bad_table.columns = ["Sample1", "Sample2", "Whatever", "Sample4"] with self.assertRaisesRegex( tools.DataMatchingError, "The feature table contains samples that aren't present in the " "sample metadata." ): tools.match_inputs(t, bad_table, self.sample_metadata)
def test_match_inputs_only_1_feature_in_table(self): # This is technically allowed (so long as this 1 feature is a tree tip) t = Tree.from_tree(self.tree) tiny_table = self.table.loc[["a"]] filtered_tiny_table, filtered_sample_md, tm, im = tools.match_inputs( t, tiny_table, self.sample_metadata ) assert_frame_equal(filtered_tiny_table, tiny_table) assert_frame_equal(filtered_sample_md, self.sample_metadata) self.assertIsNone(tm) self.assertIsNone(im)
def test_nonroot_missing_branchlengths(self): # Note about the fourth test tree here: the reason this triggers a # missing-branch-length error before a negative-branch-length error is # because the tree is checked in postorder. This sort of "precedence" # can be changed in the future if desired. bad_newicks = [ '((b)a:1)root:1;', '((b:1)a)root:0;', '(b,c)a;', '((b)a:-1)root:3;', '((b:0,c)a:0)root:0;' ] for nwk in bad_newicks: st = TreeNode.read([nwk]) with self.assertRaisesRegex(ValueError, "must have lengths"): Tree.from_tree(st) # Check that roots *with* missing branch lengths don't trigger an error # on tree creation ok_newicks = ['((b:0,c:1)a:0)root;'] for nwk in ok_newicks: st = TreeNode.read([nwk]) Tree.from_tree(st)
def test_match_inputs_filter_missing_features_error(self): t = Tree.from_tree(self.tree) bad_table = self.table.copy() # Replace one of the tip IDs in the table with an internal node ID, # instead. This isn't ok. bad_table.index = ["a", "b", "e", "g"] with self.assertRaisesRegex( tools.DataMatchingError, "The feature table contains features that aren't present as tips " "in the tree." ): tools.match_inputs(t, bad_table, self.sample_metadata)
def test_missing_root_length_tree_rect_layout(self): """Like the above test, but checks that things still work ok when the root node has no assigned branch length. """ st = TreeNode.read(['((b:2)a:1)root;']) t = Tree.from_tree(st) t.coords(100, 100) expected_coords = [(100, 0.0), (100 / 3.0, 0.0), (0.0, 0.0)] self.check_coords(t, "xr", "yr", expected_coords) for node in t.non_tips(): self.assertEqual(node.lowest_child_yr, 0) self.assertEqual(node.highest_child_yr, 0) self.check_basic_tree_rect_layout(t)
def test_unrooted_layout(self): t = Tree.from_tree(self.tree) t.coords(500, 500) expected_coords = [(-10.222747306219219, 195.06163867407446), (118.00044943013512, 262.22444928198297), (36.73032180166217, 137.07942714215795), (184.76890317443747, 23.95196521134946), (40.6350638142365, 62.57251106991248), (-77.36538561589865, -199.6519382120705), (-290.23109682556253, -205.35762294073118), (-81.27012762847295, -125.14502213982503), (0.0, 0.0)] self.check_coords(t, "x2", "y2", expected_coords)
def test_straightline_tree_rect_layout(self): """Checks that all nodes are drawn as expected even when there aren't any "branches" in the tree. """ # Setting root length to 100 to demonstrate/verify that root length is # not taken into account (if this behavior changes we'll need to modify # this test, rightfully so) st = TreeNode.read(['((b:2)a:1)root:100;']) t = Tree.from_tree(st) t.coords(100, 100) expected_coords = [(100, 0.0), (100 / 3.0, 0.0), (0.0, 0.0)] self.check_coords(t, "xr", "yr", expected_coords) for node in t.non_tips(): self.assertEqual(node.lowest_child_yr, 0) self.assertEqual(node.highest_child_yr, 0) self.check_basic_tree_rect_layout(t)
def test_match_inputs_ignore_missing_samples_override(self): """Checks that --p-ignore-missing-samples works as expected.""" # These inputs are the same as with the above test t = Tree.from_tree(self.tree) bad_table = self.table.copy() # Replace one of the sample IDs in the table with some junk bad_table.columns = ["Sample1", "Sample2", "Whatever", "Sample4"] out_table = None out_sm = None with self.assertWarnsRegex( tools.DataMatchingWarning, ( r"1 sample\(s\) in the table were not present in the sample " r"metadata. These sample\(s\) have been assigned placeholder " "metadata." ) ): out_table, out_sm, tm, im = tools.match_inputs( t, bad_table, self.sample_metadata, ignore_missing_samples=True ) self.assertCountEqual( out_table.columns, ["Sample1", "Sample2", "Whatever", "Sample4"] ) self.assertCountEqual( out_sm.index, ["Sample1", "Sample2", "Whatever", "Sample4"] ) # Make sure the table stays consistent assert_frame_equal(out_table, bad_table) # ...And that the placeholder metadata was added in for the "Whatever" # sample correctly self.assertTrue( (out_sm.loc["Whatever"] == "This sample has no metadata").all() ) # ... And that, with the exception of the newly added placeholder # metadata, the sample metadata is also consistent. (The dtypes of # individual columns can change if placeholder metadata was added, # since the "This sample has no metadata" thing is just a string.) # (...And *that* shouldn't impact Empress since Empress stores all # sample metadata as strings. At least as of writing this.) assert_frame_equal( out_sm.loc[["Sample1", "Sample2", "Sample4"]], self.sample_metadata.loc[["Sample1", "Sample2", "Sample4"]], check_dtype=False )
def test_match_inputs_feature_metadata_no_features_in_tree(self): """Tests that feature names not corresponding to internal nodes / tips in the tree are filtered out of the feature metadata, and that if all features in the input feature metadata are filtered that an error is raised. """ t = Tree.from_tree(self.tree) bad_fm = self.feature_metadata.copy() bad_fm.index = range(len(self.feature_metadata.index)) with self.assertRaisesRegex( tools.DataMatchingError, ( "No features in the feature metadata are present in the tree, " "either as tips or as internal nodes." ) ): tools.match_inputs(t, self.table, self.sample_metadata, bad_fm)
def test_circular_layout(self): """Test to make sure the circular layout computes what we expect it to. For each node, circular layou computer the following things: (xc0, yc0) - the starting location for each node (xc1, yc1) - the ending location for each node Then, all non-root internal nodes, have an arc that connects the "starting points" of the children with the minimum and maximum angle: (arcx0, arcy0) - the starting location for the arc highest_child_clangle - the starting angle for the arc lowest_child_clangle - the ending angle for the arc """ st = TreeNode.read(["((d:4,c:3)b:2,a:1)root:1;"]) t = Tree.from_tree(st) t.coords(100, 100) # check starting location for each node # Note: nodes 'a' and 'b' should have the same starting coordinates # since they both start at the root. expected_start = [(38.490018, 0.0), (-19.245009, 33.333333), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0)] self.check_coords(t, "xc0", "yc0", expected_start) # check ending location for each node expected_end = [(115.470054, 0.0), (-48.112522, 83.333333), (19.245009, 33.333333), (-9.622504, -16.666667), (0.0, 0.0)] self.check_coords(t, "xc1", "yc1", expected_end) # check starting location for b's arc expected_arc = [-19.245009, 33.333333] b = t.find("b") self.assertAlmostEqual(b.arcx0, expected_arc[0], places=5) self.assertAlmostEqual(b.arcy0, expected_arc[1], places=5) # check b's arc angles expected_angles = [2.0943951, 0.0] self.assertAlmostEqual(b.highest_child_clangle, expected_angles[0]) self.assertAlmostEqual(b.lowest_child_clangle, expected_angles[1])
def test_match_inputs_no_tips_in_table(self): t = Tree.from_tree(self.tree) bad_table = self.table.copy() bad_table.index = range(len(self.table.index)) with self.assertRaisesRegex( tools.DataMatchingError, "No features in the feature table are present as tips in the tree." ): tools.match_inputs(t, bad_table, self.sample_metadata) # Check that --p-filter-missing-features still doesn't work to override # this, since there are NO matching features at all with self.assertRaisesRegex( tools.DataMatchingError, "No features in the feature table are present as tips in the tree." ): tools.match_inputs( t, bad_table, self.sample_metadata, filter_missing_features=True )
def test_match_inputs_no_shared_samples(self): t = Tree.from_tree(self.tree) bad_sample_metadata = self.sample_metadata.copy() bad_sample_metadata.index = ["lol", "nothing", "here", "matches"] with self.assertRaisesRegex( tools.DataMatchingError, "No samples in the feature table are present in the sample " "metadata." ): tools.match_inputs(t, self.table, bad_sample_metadata) # Check that --p-ignore-missing-samples still doesn't work to override # this, since there are NO matching samples at all with self.assertRaisesRegex( tools.DataMatchingError, "No samples in the feature table are present in the sample " "metadata." ): tools.match_inputs( t, self.table, bad_sample_metadata, ignore_missing_samples=True )
def test_match_inputs_feature_metadata_root_metadata_allowed(self): """Tests that feature metadata for the root node is preserved.""" # Slightly modified version of self.tree where root has a name (i) t = Tree.from_tree( TreeNode.read(['(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2)i:1;']) ) fm = self.feature_metadata.copy() fm.index = ["a", "g", "i"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, fm ) # (check that we didn't mess up the table / sample metadata matching by # accident) assert_frame_equal(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) split_fm = split_taxonomy(fm) # Main point of this test: all of the feature metadata should have been # kept, since a, g, and i are all included in the tree (i in particular # is important to verify, since it's the root) assert_frame_equal(t_fm, split_fm.loc[["a"]]) assert_frame_equal(i_fm, split_fm.loc[["g", "i"]], check_like=True)
def test_match_inputs_feature_metadata_only_internal_node_metadata(self): """Tests that feature metadata only for internal nodes is allowed.""" # Slightly modified version of self.tree where root has a name (i) t = Tree.from_tree( TreeNode.read(['(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2)i:1;']) ) fm = self.feature_metadata.copy() fm.index = ["h", "g", "i"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, fm ) assert_frame_equal(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) split_fm = split_taxonomy(fm) # 1) Check that tip metadata is empty self.assertEqual(len(t_fm.index), 0) # 2) Check that internal node metadata was preserved assert_frame_equal(i_fm, split_fm.loc[fm.index], check_like=True) # 3) Check that columns on both DFs are identical self.assertListEqual(list(t_fm.columns), self.exp_split_fm_cols) self.assertListEqual(list(i_fm.columns), self.exp_split_fm_cols)
def test_match_inputs_feature_metadata_nothing_dropped(self): """Tests that tip/internal node names allowed as entries in feat. md. (self.feature_metadata describes three features, "e", "h", and "a". h is an internal node in self.tree, and e and a are tips.) """ t = Tree.from_tree(self.tree) f_table, f_sample_metadata, tip_md, int_md = tools.match_inputs( t, self.table, self.sample_metadata, self.feature_metadata ) assert_frame_equal(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) # Check that no filtering had to be done -- only differences in output # and input feature metadata should be that 1) the output is split into # two DataFrames, one for tip and one for internal node metadata, and # 2) the taxonomy column was split up. assert_frame_equal( tip_md, self.split_tax_fm.loc[["e", "a"]], check_like=True ) assert_frame_equal(int_md, self.split_tax_fm.loc[["h"]]) # Check that the tip + internal node metadata have identical columns self.assertListEqual(list(tip_md.columns), self.exp_split_fm_cols) self.assertListEqual(list(int_md.columns), self.exp_split_fm_cols)
def test_circular_layout_scaling_factor(self): """Checks to make sure the scaling factor applied at the end of the circular layout calculation preservers branch lengths. Basically a nodes length in the circular layout space should be proportional to its branch length. """ st = TreeNode.read(["((d:4,c:3)b:2,a:1)root:1;"]) t = Tree.from_tree(st) t.coords(100, 100) # All nodes' length (beside the root which is represented by a point) # in the circular layout space should have roughly the # same proportional length compared to their branch length. # # For example, in the above tree, if d's length in the circular layout # space is 1.5x larger than its branch length than all nodes should be # roughly 1.5x larger than their branch lengths. test_prop = None for n in t.preorder(include_self=False): n_prop = sqrt((n.xc1 - n.xc0)**2 + (n.yc1 - n.yc0)**2) / n.length if test_prop is None: test_prop = n_prop else: self.assertAlmostEqual(test_prop, n_prop, places=5)
def test_from_tree_node_starts_with_EmpressNode(self): t = TreeNode.read(['((a:1,b:3)c:2,EmpressNode1:5)e:2;']) with self.assertRaisesRegex( ValueError, 'Node names can\'t start with "EmpressNode"'): Tree.from_tree(t)
def test_from_tree_duplicate_tip_names(self): t = TreeNode.read(['((i:1,a:3)b:2,i:5)r:2;']) with self.assertRaisesRegex(ValueError, "Tip names in the tree must be unique"): Tree.from_tree(t)
def test_from_tree_singlenode(self): st = TreeNode.read(['i:1;']) with self.assertRaisesRegex(ValueError, "must contain at least 2 nodes"): Tree.from_tree(st)
def test_rectangular_layout(self): t = Tree.from_tree(self.tree) t.coords(500, 500) # Why do these coordinates look like this for such a simple tree? # There are a few steps. # # 1. Compute initial y-coordinates of layout: tips are assigned to # y=0, y=1, y=2, ... up to y=|tips|, and internal nodes are # positioned at the average of their childrens' y-positions. # # 2. Compute initial x-coordinates of layout: root starts at x=0, and # each child C with parent P is assigned x = P.x + C.branch_length. # (...those aren't real attribute names, this is just pseudocode) # # 3. Positions are scaled relative to the maximum width and height. # With this example tree, there are 5 tips so the maximum height is # 4 (since heights are 0-indexed), and the "farthest right" node is # d (at x=5). So we scale y-positions by 500 / 4 = 125, and we # scale x-positions by 500 / 5 = 100. (The "500"s are used here just # because these are the dimensions passed to coords().) # # 4. At this point we're done with Tree.layout_rectangular(), but # coords() still needs to alter coordinates to be relative to the # root node's coordinates. So every node's x-coordinate is # subtracted by the root's x=0 (this does nothing), and every node's # y-coordinate is subtracted by the root's y=(2.375*125)=296.875. # # So TLDR this is why a's coordinates go from (3, 0) on the first pass # to ((3 * 100) - 0, (0 * 125) - 296.875) = (300, -296.875) in the end. expected_coords = [ (300, -296.875), # a (400, -171.875), # e (200, -234.375), # f (300, -46.875), # b (100, -140.625), # g (300, 78.125), # c (500, 203.125), # d (200, 140.625), # h (0.0, 0.0) ] # i (root) self.check_coords(t, "xr", "yr", expected_coords) # Check that lowest_child_yr and highest_child_yr attributes were set # properly. We do this by iterating over tree.non_tips(), which (like # check_coords()) also uses a post-order traversal. # (Note that the "coordinates" in this list of 2-tuples are ordered as # (lowest child y-coordinate, highest child y-coordinate). Computing # these from the list above should be pretty simple.) expected_lowesthighest_child_yr = [ (-296.875, -171.875), # f (-234.375, -46.875), # g (78.125, 203.125), # h (-140.625, 140.625) ] # i for i, node in enumerate(t.non_tips()): l, h = expected_lowesthighest_child_yr[i] self.assertTrue(hasattr(node, "lowest_child_yr")) self.assertTrue(hasattr(node, "highest_child_yr")) self.assertAlmostEqual(node.lowest_child_yr, l, places=5) self.assertAlmostEqual(node.highest_child_yr, h, places=5) # ... And also check that tip nodes *don't* have these attributes, # since tips don't have children. for node in t.tips(): self.assertFalse(hasattr(node, "lowest_child_yr")) self.assertFalse(hasattr(node, "highest_child_yr"))