def load_json(fp): data = json.loads(clean_json(fp)) taxonomy = {} count_total = 0 counts = [] for row in data['ubiome_bacteriacounts']: normalise_row(row) counts.append(row['count_norm']) t = PhyloTree() t.name = row['tax_name'] t.add_features(**row) taxonomy[row['taxon']] = t root = taxonomy[min(taxonomy.keys())] count_total = root.count_norm root.alpha = alpha_function(counts) for t in taxonomy.values(): t.add_feature('count_pct', float(t.count_norm) / count_total * 100) parent = t.parent tp = taxonomy.get(parent) if tp is not None: tp.add_child(t) print('loaded {} into tree depth {} diversity {:.2f}'.format( len(taxonomy), len(root), root.alpha)) return root
def build_tree(self, sample, rank_limit='None'): # Gets taxids of sample. Gets all taxids if sample is None. taxids = set(self.get_all_tax_ids(sample)) taxid2nodes = {} all_nodes = {} root_children = [] for taxid in taxids: taxid2nodes[taxid] = [] taxpath = self.get_taxpath(taxid) rank = self.get_rank(taxid) if self.rank_position[rank] <= self.rank_position[rank_limit]: for node_id in taxpath: if node_id != '': if node_id not in all_nodes: node = all_nodes.setdefault(node_id, PhyloTree()) node.name = str(node_id) node.taxid = node_id rank = self.get_rank(node_id) node.add_feature("rank", rank) node.add_feature("sci_name", self.get_name(node_id)) if rank == 'superkingdom': root_children.append(node) else: node = all_nodes[node_id] # node already exists taxid2nodes[taxid].append(node) # generate parent child relationships for taxid in taxid2nodes.keys(): parent = None for node in taxid2nodes[taxid]: if parent and node not in parent.children: parent.add_child(node) parent = node root = PhyloTree() root.name = 'root' root.taxid = '0' root.add_feature("rank", "root") root.add_feature("sci_name", "root") for child in root_children: root.add_child(child) tree = root if len(root.children) == 1: tree = root.children[0].detach() return tree