def testBasicEst(self): # list of tuples, (birth-rate, log-likelihood) expected_results = ( # birth rate # log-likelihood (0.02879745490817826186758, -59.41355682054444287132355), (0.03074708092192806122012, -57.38280732060526645454956), (0.02539588437187430269848, -63.31025321526630023072357), (0.02261951969802362960582, -66.89924384677527768872096), (0.02804607815688910446572, -60.23314120509648716961237), (0.02748663302756114423797, -60.85775993426526042640035), (0.02816256618562208019485, -60.10465085978295007862471), (0.03592126646048716259729, -52.56123967307649991198559), (0.02905144990609926855529, -59.14133401672411594063306), (0.02703739196351075124714, -61.36860953277779628933786), (0.01981322730236481297061, -71.00561162515919022553135), ) trees = dendropy.TreeList.get_from_path( pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick") self.assertEqual(len(trees), len(expected_results)) for tree, expected_result in zip(trees, expected_results): obs_result1 = birthdeath.fit_pure_birth_model(tree=tree, ultrametricity_precision=1e-5) obs_result2 = birthdeath.fit_pure_birth_model(internal_node_ages=tree.internal_node_ages(ultrametricity_precision=1e-5)) for obs_result in (obs_result1, obs_result2): self.assertAlmostEqual(obs_result["birth_rate"], expected_result[0], 5) self.assertAlmostEqual(obs_result["log_likelihood"], expected_result[1], 5)
def summarize_trees(self, trees, trees_outf=None, params=None, summaries=None): trees = self.tree_postprocessor.process_trees(trees) stats_fields = set() # crucial assumption here is all trees from same landscape wrt to # number of islands and habitats representative_taxon = trees[0].taxon_namespace[0] community_by_disturbed_vs_interior_habitat = {} num_islands = len(representative_taxon.island_code) num_habitats = len(representative_taxon.habitat_code) # community_by_island = {} # community_by_habitat = {} # for i in num_islands: # community_by_island[i] = {} # for i in num_habitats: # community_by_habitat[i] = {} # community_by_disturbed_vs_interior_habitat[0] = {} # community_by_disturbed_vs_interior_habitat[1] = {} for tree in list(trees): num_tips = 0 total_length = 0.0 total_edges = 0 nodes_by_island = collections.defaultdict(list) nodes_by_habitat = collections.defaultdict(list) disturbed_habitat_nodes = [] interior_habitat_nodes = [] all_tips = [] for nd in tree: # colorize if nd.taxon is None and nd.label is None: continue if nd.label is not None: self.tree_postprocessor.decode_labeled_item_biogeography(nd) # stats total_edges += 1 num_tips += 1 total_length += nd.edge.length if nd.is_leaf(): all_tips.append(nd) island_code = nd.taxon.island_code for idx, i in enumerate(island_code): # island_idx = len(island_code) - idx island_idx = idx if i == "1": nodes_by_island[island_idx].append(nd) habitat_code = nd.taxon.habitat_code for idx, i in enumerate(habitat_code): # habitat_idx = len(habitat_code) - idx habitat_idx = idx if i == "1": nodes_by_habitat[habitat_idx].append(nd) if habitat_idx == 0: disturbed_habitat_nodes.append(nd) else: if nd not in interior_habitat_nodes: interior_habitat_nodes.append(nd) if len(nodes_by_island) < num_islands and self.drop_trees_not_occupying_all_islands: trees.remove(tree) continue if len(nodes_by_habitat) < num_habitats and self.drop_trees_not_occupying_all_habitats: trees.remove(tree) continue pdm = treemeasure.PatristicDistanceMatrix(tree=tree) tree.stats = collections.defaultdict(lambda: "NA") if params is not None: tree.params = params.copy() tree.stats["size"] = num_tips tree.stats["length"] = total_length tree.stats["edges"] = total_edges # node_ages = tree.internal_node_ages() # node_ages = [n/total_length for n in node_ages] # tree.stats["est.birth.rate"] = birthdeath.fit_pure_birth_model(internal_node_ages=node_ages)["birth_rate"] tree.stats["est.birth.rate"] = birthdeath.fit_pure_birth_model(tree=tree)["birth_rate"] weighted_disturbed, unweighted_disturbed = self.get_mean_patristic_distance(pdm, disturbed_habitat_nodes) weighted_interior, unweighted_interior = self.get_mean_patristic_distance(pdm, interior_habitat_nodes) tree.stats["weighted.disturbed.habitat.pd"] = weighted_disturbed tree.stats["unweighted.disturbed.habitat.pd"] = unweighted_disturbed tree.stats["weighted.interior.habitat.pd"] = weighted_interior tree.stats["unweighted.interior.habitat.pd"] = unweighted_interior try: tree.stats["weighted.disturbed.to.interior.habitat.pd"] = weighted_disturbed / weighted_interior tree.stats["unweighted.disturbed.to.interior.habitat.pd"] = unweighted_disturbed / unweighted_interior except (ZeroDivisionError, TypeError): tree.stats["weighted.disturbed.to.interior.habitat.pd"] = "NA" tree.stats["unweighted.disturbed.to.interior.habitat.pd"] = "NA" rstats = self.rcalc.calc_ecological_stats( tree=tree, patristic_distance_matrix=pdm, total_tree_length=total_length, total_tree_edges=total_edges, nodes_by_island=nodes_by_island, nodes_by_habitat=nodes_by_habitat, disturbed_habitat_nodes=disturbed_habitat_nodes, interior_habitat_nodes=interior_habitat_nodes, ) stats_fields.update(tree.stats.keys()) if summaries is not None: sss = tree.stats.copy() sss.update(tree.params) summaries.append(sss) if trees_outf is not None: try: trees.write_to_stream(trees_outf, "nexus") except AttributeError: self.write_nexus(trees, trees_outf) return trees, stats_fields
def summarize_trees(self, trees, trees_outf=None, params=None, summaries=None): trees = self.tree_postprocessor.process_trees(trees) stats_fields = set() # crucial assumption here is all trees from same landscape wrt to # number of islands and habitats representative_taxon = trees[0].taxon_namespace[0] community_by_disturbed_vs_interior_habitat = {} num_islands = len(representative_taxon.island_code) num_habitats = len(representative_taxon.habitat_code) # community_by_island = {} # community_by_habitat = {} # for i in num_islands: # community_by_island[i] = {} # for i in num_habitats: # community_by_habitat[i] = {} # community_by_disturbed_vs_interior_habitat[0] = {} # community_by_disturbed_vs_interior_habitat[1] = {} for tree in list(trees): num_tips = 0 total_length = 0.0 total_edges = 0 nodes_by_island = collections.defaultdict(list) nodes_by_habitat = collections.defaultdict(list) disturbed_habitat_nodes = [] interior_habitat_nodes = [] all_tips = [] for nd in tree: # colorize if nd.taxon is None and nd.label is None: continue if nd.label is not None: self.tree_postprocessor.decode_labeled_item_biogeography(nd) # stats total_edges += 1 num_tips += 1 total_length += nd.edge.length if nd.is_leaf(): all_tips.append(nd) island_code = nd.taxon.island_code for idx, i in enumerate(island_code): # island_idx = len(island_code) - idx island_idx = idx if i == "1": nodes_by_island[island_idx].append(nd) habitat_code = nd.taxon.habitat_code for idx, i in enumerate(habitat_code): # habitat_idx = len(habitat_code) - idx habitat_idx = idx if i == "1": nodes_by_habitat[habitat_idx].append(nd) if habitat_idx == 0: disturbed_habitat_nodes.append(nd) else: if nd not in interior_habitat_nodes: interior_habitat_nodes.append(nd) if len(nodes_by_island) < num_islands and self.drop_trees_not_occupying_all_islands: trees.remove(tree) continue if len(nodes_by_habitat) < num_habitats and self.drop_trees_not_occupying_all_habitats: trees.remove(tree) continue pdm = treemeasure.PatristicDistanceMatrix(tree=tree) tree.stats = collections.defaultdict(lambda:"NA") if params is not None: tree.params = params.copy() tree.stats["size"] = num_tips tree.stats["length"] = total_length tree.stats["edges"] = total_edges # node_ages = tree.internal_node_ages() # node_ages = [n/total_length for n in node_ages] # tree.stats["est.birth.rate"] = birthdeath.fit_pure_birth_model(internal_node_ages=node_ages)["birth_rate"] tree.stats["est.birth.rate"] = birthdeath.fit_pure_birth_model(tree=tree)["birth_rate"] weighted_disturbed, unweighted_disturbed = self.get_mean_patristic_distance(pdm, disturbed_habitat_nodes) weighted_interior, unweighted_interior = self.get_mean_patristic_distance(pdm, interior_habitat_nodes) tree.stats["weighted.disturbed.habitat.pd"] = weighted_disturbed tree.stats["unweighted.disturbed.habitat.pd"] = unweighted_disturbed tree.stats["weighted.interior.habitat.pd"] = weighted_interior tree.stats["unweighted.interior.habitat.pd"] = unweighted_interior try: tree.stats["weighted.disturbed.to.interior.habitat.pd"] = weighted_disturbed/weighted_interior tree.stats["unweighted.disturbed.to.interior.habitat.pd"] = unweighted_disturbed/unweighted_interior except (ZeroDivisionError, TypeError): tree.stats["weighted.disturbed.to.interior.habitat.pd"] = "NA" tree.stats["unweighted.disturbed.to.interior.habitat.pd"] = "NA" rstats = self.rcalc.calc_ecological_stats( tree=tree, patristic_distance_matrix=pdm, total_tree_length=total_length, total_tree_edges=total_edges, nodes_by_island=nodes_by_island, nodes_by_habitat=nodes_by_habitat, disturbed_habitat_nodes=disturbed_habitat_nodes, interior_habitat_nodes=interior_habitat_nodes, ) stats_fields.update(tree.stats.keys()) if summaries is not None: sss = tree.stats.copy() sss.update(tree.params) summaries.append(sss) if trees_outf is not None: try: trees.write_to_stream(trees_outf, "nexus") except AttributeError: self.write_nexus(trees, trees_outf) return trees, stats_fields