def test_coords(self): t = Tree.from_tree(self.tree2) edge_exp = pd.DataFrame( { 'a': [ 'a', 'f', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 141.35398602846797, 339.46141862722482, 1, 1, 83.371774496551481, 292.50834951934343 ], 'e': [ 'e', 'f', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 141.35398602846797, 339.46141862722482, 1, 1, 16.20896388864297, 420.73154625569776 ], 'f': [ 'f', 'g', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 215.86090210071345, 343.36616063979909, 1, 1, 141.35398602846797, 339.46141862722482 ], 'b': [ 'b', 'g', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 215.86090210071345, 343.36616063979909, 1, 1, 254.48144795927647, 487.5 ], 'c': [ 'c', 'h', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 403.57843531045097, 221.46096919708964, 1, 1, 478.08535138269644, 225.36571120966394 ], 'd': [ 'd', 'h', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 403.57843531045097, 221.46096919708964, 1, 1, 483.79103611135702, 12.500000000000028 ], 'g': [ 'g', 'i', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 278.43341317062595, 302.73109682556259, 1, 1, 215.86090210071345, 343.36616063979909 ], 'h': [ 'h', 'i', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 278.43341317062595, 302.73109682556259, 1, 1, 403.57843531045097, 221.46096919708964 ] }, index=[ 'Node_id', 'Parent_id', 'branch_color', 'branch_is_visible', 'is_tip', 'node_color', 'node_is_visible', 'px', 'py', 'size', 'width', 'x', 'y' ]).T edge_exp = edge_exp[[ 'Node_id', 'is_tip', 'x', 'y', 'Parent_id', 'px', 'py', 'node_color', 'branch_color', 'node_is_visible', 'branch_is_visible', 'width', 'size' ]] (edge_res, _, _, _) = t.coords(500, 500) assert_frame_equal(edge_exp, edge_res)
def __init__(self, tree, metadata, clade_field, highlight_ids=None, port=8080): """ Model constructor. This initializes the model, including the tree object and the metadata. Parameters ---------- tree : skbio.TreeNode Tree data structure. metadata : str Metadata object for the features being plotted on the tree. clade_field : str Name of field within metadata that contains clade names highlight_file : list of str List of nodes to highlight port : int port number Notes ----- The first column name should be renamed to Node_id """ self.zoom_level = 1 self.scale = 1 # convert to empress tree self.tree = Tree.from_tree(tree) tools.name_internal_nodes(self.tree) (self.edge_metadata, self.centerX, self.centerY, self.scale) = self.tree.coords(DEFAULT_WIDTH, DEFAULT_HEIGHT) # read in main metadata self.headers = metadata.columns.values.tolist() self.edge_metadata = pd.merge(self.edge_metadata, metadata, how='outer', on="Node_id") # todo need to warn user that some entries in metadata do not have a mapping to tree self.edge_metadata = self.edge_metadata[self.edge_metadata.x.notnull()] self.triangles = pd.DataFrame() self.clade_field = clade_field self.selected_tree = pd.DataFrame() self.selected_root = self.tree self.triData = {} self.colored_clades = {} # cached subtrees self.cached_subtrees = list() self.cached_clades = list() self.highlight_nodes(highlight_ids)
def _validate_and_match_data(self, ignore_missing_samples, filter_missing_features, filter_unobserved_features_from_phylogeny): # Note that the feature_table we get from QIIME 2 (as an argument to # this function) is set up such that the index describes sample IDs and # the columns describe feature IDs. We transpose this table before # sending it to tools.match_inputs() and keep using the transposed # table for the rest of this visualizer. self.tree = Tree(self.tree) self.table, self.samples, self.tip_md, self.int_md = match_inputs( self.tree, self.table.T, self.samples, self.features, ignore_missing_samples, filter_missing_features) # remove unobserved features from the phylogeny if filter_unobserved_features_from_phylogeny: self.tree.bp_tree = self.tree.bp_tree.shear(set(self.table.index)) # extract balance parenthesis self._bp_tree = list(self.tree.B) fill_missing_node_names(self.tree)
def test_to_df(self): t = TreeNode.read(['((a,b)c,d)r;']) t = Tree.from_tree(t) t.assign_ids() i = 0 for node in t.postorder(): node.x2, node.y2 = i, i data = [[ 'd', 'r', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 0, 0, 1, 1, 0, 0, t.find('d').id ], [ 'c', 'r', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 0, 0, 1, 1, 0, 0, t.find('c').id ], [ 'b', 'c', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 0, 0, 1, 1, 0, 0, t.find('b').id ], [ 'a', 'c', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 0, 0, 1, 1, 0, 0, t.find('a').id ]] df_exp = pd.DataFrame(data, columns=[ 'Node_id', 'Parent_id', 'branch_color', 'branch_is_visible', 'is_tip', 'node_color', 'node_is_visible', 'px', 'py', 'size', 'width', 'x', 'y', 'unique_id' ]) df_exp = df_exp[[ 'Node_id', 'unique_id', 'is_tip', 'x', 'y', 'Parent_id', 'px', 'py', 'node_color', 'branch_color', 'node_is_visible', 'branch_is_visible', 'width', 'size' ]] df_res = t.to_df().iloc[::-1, :] df_exp.set_index('Node_id', inplace=True) df_res.set_index('Node_id', inplace=True) assert_frame_equal(df_exp, df_res)
def __init__(self, tree, metadata, highlight_ids=None, coords_file=None, port=8080): """ Model constructor. This initializes the model, including the tree object and the metadata. Parameters ---------- tree : skbio.TreeNode Tree data structure. metadata : str Metadata object for the features being plotted on the tree. clade_field : str Name of field within metadata that contains clade names highlight_file : list of str List of nodes to highlight port : int port number Notes ----- The first column name should be renamed to Node_id """ self.TIP_LIMIT = 100 self.zoom_level = 1 self.scale = 1 # convert to empress tree print('converting tree TreeNode to Tree') self.tree = Tree.from_tree(tree) tools.name_internal_nodes(self.tree) if coords_file is None: print('calculating tree coords') self.tree.tip_count_per_subclade() self.edge_metadata = self.tree.coords(DEFAULT_WIDTH, DEFAULT_HEIGHT) else: print('extracting tree coords from file') self.tree.from_file(coords_file) self.edge_metadata = self.tree.to_df() # read in main metadata self.headers = metadata.columns.values.tolist() self.edge_metadata = pd.merge(self.edge_metadata, metadata, how='outer', on="Node_id") # todo need to warn user that some entries in metadata do not have a mapping to tree self.edge_metadata = self.edge_metadata[self.edge_metadata.x.notnull()] self.edge_metadata['index'] = self.edge_metadata['Node_id'] self.edge_metadata = self.edge_metadata.set_index('index') print(metadata) self.triangles = pd.DataFrame() self.selected_tree = pd.DataFrame() self.selected_root = self.tree self.triData = {} self.colored_clades = {} # cached subtrees self.cached_subtrees = list() self.cached_clades = list() # start = time.time() # print('starting auto collapse') # self.default_auto_collapse(100) # end = time.time() # print('finished auto collapse in %d' % (end - start)) print('highlight_ids') self.highlight_nodes(highlight_ids) self.__clade_level()
def test_coords_random_tree(self): t = Tree.from_tree(self.tree1) data = [[ '7', 'y2', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 79.070722542332845, 129.00083943597397, 1, 1, 50.679561936771449, 55.039337408460526 ], [ '8', 'y2', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 79.070722542332845, 129.00083943597397, 1, 1, 12.628310993232901, 85.85263286563449 ], [ '4', 'y6', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 74.068217341096869, 368.43664502236788, 1, 1, 12.499999999999979, 418.29360437746811 ], [ '6', 'y6', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 74.068217341096869, 368.43664502236788, 1, 1, 53.563668631852295, 444.9606625915394 ], [ '9', 'y7', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 117.21642391143635, 301.99423347326797, 1, 1, 38.10150433604548, 306.1404707163706 ], [ 'y6', 'y7', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 117.21642391143635, 301.99423347326797, 1, 1, 74.068217341096869, 368.43664502236788 ], [ '0', 'y11', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 408.3850804246091, 240.10442497874831, 1, 1, 474.82749197370902, 283.25263154908782 ], [ '3', 'y11', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 408.3850804246091, 240.10442497874831, 1, 1, 487.5, 235.95818773564568 ], [ '2', 'y14', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 375.00926942577706, 153.15746472040379, 1, 1, 436.57748676687396, 103.30050536530359 ], [ '5', 'y14', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 375.00926942577706, 153.15746472040379, 1, 1, 395.51381813502167, 76.633447151232261 ], [ 'y11', 'y15', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 331.86106285543758, 219.59987626950374, 1, 1, 408.3850804246091, 240.10442497874831 ], [ 'y14', 'y15', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 331.86106285543758, 219.59987626950374, 1, 1, 375.00926942577706, 153.15746472040379 ], [ '1', 'y16', DEFAULT_COLOR, True, True, DEFAULT_COLOR, True, 257.89956082792412, 247.99103687506513, 1, 1, 286.29072143348549, 321.95253890257857 ], [ 'y15', 'y16', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 257.89956082792412, 247.99103687506513, 1, 1, 331.86106285543758, 219.59987626950374 ], [ 'y7', 'y17', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 178.78464125253325, 252.13727411816777, 1, 1, 117.21642391143635, 301.99423347326797 ], [ 'y16', 'y17', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 178.78464125253325, 252.13727411816777, 1, 1, 257.89956082792412, 247.99103687506513 ], [ 'y2', 'y18', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 128.92768189743305, 190.56905677707087, 1, 1, 79.070722542332845, 129.00083943597397 ], [ 'y17', 'y18', DEFAULT_COLOR, True, False, DEFAULT_COLOR, True, 128.92768189743305, 190.56905677707087, 1, 1, 178.78464125253325, 252.13727411816777 ]] edge_exp = pd.DataFrame(data, columns=[your, list, of, columns]) edge_exp.set_index('Node_id', inplace=True) edge_exp = edge_exp[[ 'Node_id', 'unique_id', 'is_tip', 'x', 'y', 'Parent_id', 'px', 'py', 'node_color', 'branch_color', 'node_is_visible', 'branch_is_visible', 'width', 'size' ]] (edge_res, _, _, _) = t.coords(500, 500) assert_frame_equal(edge_exp, edge_res)
def test_from_tree_random_tree(self): t = Tree.from_tree(self.tree1) self.assertEqual(t.__class__, Tree)
def test_rescale(self): t = Tree.from_tree(self.tree2) self.assertAlmostEqual(t.rescale(500, 500), 74.609165340334656, places=5)
def test_rescale_random_tree(self): t = Tree.from_tree(self.tree1) self.assertAlmostEqual(t.rescale(500, 500), 79.223492618646006, places=5)
class Empress(): def __init__(self, tree, table, sample_metadata, feature_metadata=None, ordination=None, ignore_missing_samples=False, filter_missing_features=False, resource_path=None, filter_unobserved_features_from_phylogeny=True): """Visualize a phylogenetic tree Use this object to interactively display a phylogenetic tree using the Empress GUI. Parameters ---------- tree: bp.Tree: The phylogenetic tree to visualize. table: pd.DataFrame: The matrix to visualize paired with the phylogenetic tree. sample_metadata: pd.DataFrame DataFrame object with the metadata associated to the samples in the ``ordination`` object, should have an index set and it should match the identifiers in the ``ordination`` object. feature_metadata: pd.DataFrame, optional DataFrame object with the metadata associated to the names of tips and/or internal nodes in the ``tree`` object, should have an index set and it should match at least one of these nodes' names. ordination: skbio.OrdinationResults, optional Object containing the computed values for an ordination method in scikit-bio. Currently supports skbio.stats.ordination.PCoA and skbio.stats.ordination.RDA results. ignore_missing_samples: bool, optional (default False) If True, pads missing samples (i.e. samples in the table but not the metadata) with placeholder metadata. If False, raises a DataMatchingError if any such samples exist. (Note that in either case, samples in the metadata but not in the table are filtered out; and if no samples are shared between the table and metadata, a DataMatchingError is raised regardless.) This is analogous to the ignore_missing_samples flag in Emperor. filter_missing_features: bool, optional (default False) If True, filters features from the table that aren't present as tips in the tree. If False, raises a DataMatchingError if any such features exist. (Note that in either case, features in the tree but not in the table are preserved.) resource_path: str, optional Load the resources from a user-specified remote location. If set to None resources are loaded from the current directory. filter_unobserved_features_from_phylogeny: bool, optional If True, filters features from the phylogeny that aren't present as features in feature table. features in feature table. Otherwise, the phylogeny is not filtered. Attributes ---------- tree: Phylogenetic tree. table: Contingency matrix for the phylogeny. samples: Sample metadata. features: Feature metadata. ordination: Ordination matrix to visualize simultaneously with the tree. base_url: Base path to the remote resources. """ self.tree = tree self.table = table self.samples = sample_metadata.copy() if feature_metadata is not None: self.features = feature_metadata.copy() else: self.features = None self.ordination = ordination self.base_url = resource_path if self.base_url is None: self.base_url = './' self._validate_and_match_data( ignore_missing_samples, filter_missing_features, filter_unobserved_features_from_phylogeny) if self.ordination is not None: # Note that tip-level metadata is the only "feature metadata" we # send to Emperor, because internal nodes in the tree should not # correspond to features in the table (and thus to arrows in a # biplot). self._emperor = Emperor( self.ordination, mapping_file=self.samples, feature_mapping_file=self.tip_md, ignore_missing_samples=ignore_missing_samples, remote='./emperor-resources') else: self._emperor = None def _validate_and_match_data(self, ignore_missing_samples, filter_missing_features, filter_unobserved_features_from_phylogeny): # Note that the feature_table we get from QIIME 2 (as an argument to # this function) is set up such that the index describes sample IDs and # the columns describe feature IDs. We transpose this table before # sending it to tools.match_inputs() and keep using the transposed # table for the rest of this visualizer. self.tree = Tree(self.tree) self.table, self.samples, self.tip_md, self.int_md = match_inputs( self.tree, self.table.T, self.samples, self.features, ignore_missing_samples, filter_missing_features) # remove unobserved features from the phylogeny if filter_unobserved_features_from_phylogeny: self.tree.bp_tree = self.tree.bp_tree.shear(set(self.table.index)) # extract balance parenthesis self._bp_tree = list(self.tree.B) fill_missing_node_names(self.tree) def copy_support_files(self, target=None): """Copies the support files to a target directory If an ordination is included Emperor's support files will also be copied over (in a directory named emperor-resources). Parameters ---------- target : str The path where resources should be copied to. By default it copies the files to ``self.base_url``. """ if target is None: target = self.base_url # copy the required resources copytree(SUPPORT_FILES, os.path.join(target, 'support_files')) if self._emperor is not None: self._emperor.copy_support_files( os.path.join(target, 'emperor-resources')) def __str__(self): return self.make_empress() def make_empress(self): """Build an empress plot Returns ------- str Formatted empress plot. Notes ----- Once you generate the plot (and write it to a HTML file in a given directory) you will need to copy the support files (the JS/CSS/etc. code needed to view the visualization) to the same directory by calling the ``copy_support_files`` method. See Also -------- empress.core.Empress.copy_support_files """ main_template = self._get_template() # _process_data does a lot of munging to the coordinates data and # _to_dict puts the data into a dictionary-like object for consumption data = self._to_dict() plot = main_template.render(data) return plot def _to_dict(self): """Convert processed data into a dictionary Returns ------- dict A dictionary describing the plots contained in the ordination object and the sample + feature metadata. """ # Compute coordinates resulting from layout algorithm(s) # TODO: figure out implications of screen size layout_to_coordsuffix, default_layout = self.tree.coords(4020, 4020) tree_data = {} names_to_keys = {} for node_idx in self.tree.postorder(include_self=True): tree_data[node_idx] = { 'name': self.tree.name(node_idx), 'color': [0.75, 0.75, 0.75], 'sampVal': 1, 'visible': True, 'single_samp': False } # Add coordinate data from all layouts for this node for layoutsuffix in layout_to_coordsuffix.values(): xcoord = "x" + layoutsuffix ycoord = "y" + layoutsuffix tree_data[node_idx][xcoord] =\ getattr(self.tree, xcoord)[node_idx] tree_data[node_idx][ycoord] =\ getattr(self.tree, ycoord)[node_idx] # Hack: it isn't mentioned above, but we need start pos info for # circular layout. The start pos for the other layouts is the # parent xy coordinates so we need only need to specify the start # for circular layout. tree_data[node_idx]["xc0"] = self.tree.xc0[node_idx] tree_data[node_idx]["yc0"] = self.tree.yc0[node_idx] # Also add vertical bar coordinate info for the rectangular layout, # and start point & arc coordinate info for the circular layout if not self.tree.isleaf(node_idx): tree_data[node_idx][ "highestchildyr"] = self.tree.highest_child_yr[node_idx] tree_data[node_idx][ "lowestchildyr"] = self.tree.lowest_child_yr[node_idx] if not self.tree.isleaf(node_idx): tree_data[node_idx]["arcx0"] = self.tree.arcx0[node_idx] tree_data[node_idx]["arcy0"] = self.tree.arcy0[node_idx] tree_data[node_idx]["arcstartangle"] = \ self.tree.highest_child_clangle[node_idx] tree_data[node_idx]["arcendangle"] = \ self.tree.lowest_child_clangle[node_idx] if self.tree.name(node_idx) in names_to_keys: names_to_keys[self.tree.name(node_idx)].append(node_idx) else: names_to_keys[self.tree.name(node_idx)] = [node_idx] names = [] for node_idx in self.tree.preorder(include_self=True): names.append(self.tree.name(node_idx)) # Convert sample metadata to a JSON-esque format sample_data = self.samples.to_dict(orient='index') # Convert feature metadata, similarly to how we handle sample metadata. # If the user passed in feature metadata, self.features won't be None. # (We don't actually use any data from self.features at this point in # the program since it hasn't had taxonomy splitting / matching / etc. # done.) if self.features is not None: # If we're in this block, we know that self.tip_md and self.int_md # are both DataFrames. They have identical columns, so we can just # use self.tip_md.columns when setting feature_metadata_columns. # (We don't use self.features.columns because stuff like taxonomy # splitting will have changed the columns from what they initially # were in some cases.) feature_metadata_columns = list(self.tip_md.columns) # Calling .to_dict() on an empty DataFrame just gives you {}, so # this is safe even if there is no tip or internal node metadata. # (...At least one of these DFs should be populated, though, since # none of the feature IDs matching up would have caused an error.) tip_md_json = self.tip_md.to_dict(orient='index') int_md_json = self.int_md.to_dict(orient='index') else: feature_metadata_columns = [] tip_md_json = {} int_md_json = {} # TODO: Empress is currently storing all metadata as strings. This is # memory intensive and won't scale well. We should convert all numeric # data/compress metadata. # This is used in biom-table. Currently this is only used to ignore # null data (i.e. NaN and "unknown") and also determines sorting order. # The original intent is to signal what columns are # discrete/continuous. type of sample metadata (n - number, o - object) sample_data_type = self.samples.dtypes.to_dict() sample_data_type = { k: 'n' if pd.api.types.is_numeric_dtype(v) else 'o' for k, v in sample_data_type.items() } # create a mapping of observation ids and the samples that contain them obs_data = {} feature_table = (self.table > 0) for _, series in feature_table.iteritems(): sample_ids = series[series].index.tolist() obs_data[series.name] = sample_ids data_to_render = { 'base_url': './support_files', 'tree': self._bp_tree, 'tree_data': tree_data, 'names_to_keys': names_to_keys, 'sample_data': sample_data, 'sample_data_type': sample_data_type, 'tip_metadata': tip_md_json, 'int_metadata': int_md_json, 'feature_metadata_columns': feature_metadata_columns, 'obs_data': obs_data, 'names': names, 'layout_to_coordsuffix': layout_to_coordsuffix, 'default_layout': default_layout, 'emperor_div': '', 'emperor_require_logic': '', 'emperor_style': '', 'emperor_base_dependencies': '', 'emperor_classes': '' } if self._emperor is not None: data_to_render.update(self._scavenge_emperor()) return data_to_render def _get_template(self, standalone=False): """Get the jinja template object Parameters ---------- standalone: bool, optional Whether or not the generated plot will load resources locally (``True``), or from a specified URL (``False``). Returns ------- jinja2.Template Template where the plot is created. """ # based on: http://stackoverflow.com/a/6196098 env = Environment(loader=FileSystemLoader(TEMPLATES)) return env.get_template('empress-template.html') def _scavenge_emperor(self): # can't make this 50vw because one of the plot containers has some # padding that makes the divs stack on top of each other self._emperor.width = '48vw' self._emperor.height = '100vh; float: right' # make the background white so it matches Empress self._emperor.set_background_color('white') self._emperor.set_axes(color='black') html = self._emperor.make_emperor(standalone=True) html = html.split('\n') # The following line references will be replace with API calls to the # Emperor object, however those are not implemented yet emperor_base_dependencies = html[6] # line 14 is where the CSS includes start, but it is surrounded by # unnecessary tags so we strip those out style = '\n'.join([ line.strip().replace("'", '').replace(',', '') for line in html[14:20] ]) # main divs for emperor emperor_div = '\n'.join(html[39:44]) # main js script for emperor emperor_require_logic = '\n'.join(html[45:-3]) # once everything is loaded replace the callback tag for custom JS with open(SELECTION_CALLBACK_PATH) as f: selection_callback = f.read() emperor_require_logic = emperor_require_logic.replace( '/*__select_callback__*/', selection_callback) emperor_data = { 'emperor_div': emperor_div, 'emperor_require_logic': emperor_require_logic, 'emperor_style': style, 'emperor_base_dependencies': emperor_base_dependencies, 'emperor_classes': 'combined-plot-container' } return emperor_data