def test_fast_unifrac(self): """Should calc unifrac values for whole tree.""" #Note: results not tested for correctness here as detailed tests #in fast_tree module. res = fast_unifrac(self.t, self.env_counts) res = fast_unifrac(self.t, self.missing_env_counts) res = fast_unifrac(self.t, self.extra_tip_counts) self.assertRaises(ValueError, fast_unifrac, self.t, \ self.wrong_tip_counts)
def unifrac_distance_matrix(table, sample_ids, otu_ids, tree): """ Parameters ---------- table : np.array Contingency table samples = rows observations = columns sample_ids : list, str List of sample ids otu_ids : list, str List of otu ids tree : str newick tree Returns ------- np.array : Unifrac distance matrix """ df = pd.DataFrame(table, index=sample_ids, columns=otu_ids) env = df.to_dict() res = fast_unifrac(tree, env, weighted=True) dist_mat = pd.DataFrame(res['distance_matrix'][0], index=res['distance_matrix'][1], columns=res['distance_matrix'][1]) return dist_mat
def timing(tree_size, num_trees, num_samples): FastUnifrac_times = list() EMDUnifrac_times = list() EMDUnifrac_flow_times = list() for tree_it in range(num_trees): t = Tree() t.populate(tree_size, random_branches = True) tree_str = t.write(format=1) tr = DndParser(tree_str, UniFracTreeNode) (T,l,nodes_in_order) = EMDU.parse_tree(tree_str) for it in range(num_samples): envs = EMDU.simulate_data(t.get_leaf_names()) # FastUnifrac can only take weight on leaf nodes (envs_prob_dict, samples) = EMDU.parse_envs(envs, nodes_in_order) P = envs_prob_dict[samples[0]] Q = envs_prob_dict[samples[1]] #EMDUnifrac with flow t0 = timeit.default_timer() (Z, Flow, diffab) = EMDU.EMDUnifrac_weighted_flow(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_flow_times.append(t1-t0) #EMDUnifrac no flow t0 = timeit.default_timer() (Z,diffab) = EMDU.EMDUnifrac_weighted(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_times.append(t1-t0) #FastUnifrac weighted t0 = timeit.default_timer() res = fast_unifrac(tr, envs, weighted=True, modes=set(['distance_matrix'])) t1 = timeit.default_timer() FastUnifrac_times.append(t1-t0) return (np.array(EMDUnifrac_times).mean(), np.array(EMDUnifrac_flow_times).mean(), np.array(FastUnifrac_times).mean())
def unifrac_distance_rows(data, samples_arg=None, otus_arg=None, tree_arg=None, sample_filter=None, otu_filter=None): DEBUG("Starting unifrac_distance_rows...") if sample_filter is None: sample_filter = [] if otu_filter is None: otu_filter = [] with warnings.catch_warnings(): warnings.simplefilter("ignore") from cogent.maths.unifrac.fast_unifrac import fast_unifrac if samples_arg is None: samples = get_default_samples() elif callable(samples_arg): samples = samples_arg() else: samples = samples_arg if otus_arg is None: otus = get_default_otus() elif callable(otus_arg): otus = otus_arg() else: otus = otus_arg if tree_arg is None: tree = get_default_tree(otus) elif callable(tree_arg): tree = tree_arg() else: tree = tree_arg mat = __get_precalculated_unifrac_file_if_exists_for_data(data, sample_filter, otu_filter) if mat is not None: DEBUG("Found previously calculated Unifrac data") return mat DEBUG("Preparing data dictionary...") data_dict = __unifrac_prepare_dictionary_from_matrix_rows(data, samples, otus, sample_filter, otu_filter) DEBUG("Running fast_unifrac...") unifrac = fast_unifrac(tree, data_dict, weighted=WEIGHTED_UNIFRAC) DEBUG("Unifrac results: {0}".format(unifrac)) DEBUG("Reordering results...") mat = __reorder_unifrac_distance_matrix_by_original_samples(unifrac['distance_matrix'], samples, sample_filter, otu_filter) DEBUG("Setting distances for filtered items to large values...") filter_indices = [ ind for ind, samp in enumerate(samples) if samp in sample_filter ] mat = __increase_distance_for_filtered_samples(mat, filter_indices) DEBUG("Fixing NaN/inf values...") mat = np.nan_to_num(mat) if SQUARE_UNIFRAC_DISTANCE: mat = np.multiply(mat, mat) __save_calculated_unifrac_file_and_hash_for_data(data, sample_filter, otu_filter, mat) DEBUG("Finished calculating Samples distance matrix.") return mat
def calculate_unifrac(abund, sample_names, taxa_tree): """ calculates the unifrac distance between samples both weighted and unweighted @param abund: the abundance matrix @param sample_names: the sample names @param taxa_tree: the tree of data @return: (unweighted matrix, row names), (weighted matrix, row names) @rtype: tuple """ unifrac_dict = _create_unifrac_dict(abund, sample_names, taxa_tree) tree = dendropy_to_cogent(taxa_tree) unweighted = fast_unifrac(tree, unifrac_dict, modes={UNIFRAC_DIST_MATRIX}, is_symmetric=True, weighted=False) un_matrix = unweighted[UNIFRAC_DIST_MATRIX][0] un_rows = unweighted[UNIFRAC_DIST_MATRIX][1] weighted = fast_unifrac(tree, unifrac_dict, modes={UNIFRAC_DIST_MATRIX}, is_symmetric=True, weighted=True) w_matrix = weighted[UNIFRAC_DIST_MATRIX][0] w_rows = weighted[UNIFRAC_DIST_MATRIX][1] return (un_matrix, un_rows), (w_matrix, w_rows)
def test_make_unifrac_metric(self): """ exercise of the unweighted unifrac metric should not throw errors""" tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=["distance_matrix"])["distance_matrix"] self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names)) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 3], 0.0) self.assertNotEqual(res[0, 1], 1.0)
def unifrac_pycogent(self): """Step 3 with Pycogent""" tree_newick = open(self.fasttree_tree, 'r').read() from cogent.parse.tree import DndParser from cogent.maths.unifrac.fast_tree import UniFracTreeNode tree = DndParser(tree_newick, UniFracTreeNode) from cogent.maths.unifrac.fast_unifrac import fast_unifrac distances = fast_unifrac(tree, self.tax.otu_table.to_dict()) # Make a dataframe # names = distances['distance_matrix'][1] df = pandas.DataFrame(distances['distance_matrix'][0], index=names, columns=names) df.to_csv(self.distances_csv, sep='\t', float_format='%.5g')
def result(data, taxon_names, tree, sample_names, **kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) unifrac_res = fast_unifrac( tree, envs, weighted=weighted, metric=metric, is_symmetric=is_symmetric, modes=["distance_matrix"], **kwargs ) dist_mtx = _reorder_unifrac_res(unifrac_res["distance_matrix"], sample_names) return dist_mtx
def result(data, taxon_names, tree, sample_names, **kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) unifrac_res = fast_unifrac(tree, envs, weighted=weighted, metric=metric, is_symmetric=is_symmetric, modes=["distance_matrix"],**kwargs) dist_mtx = _reorder_unifrac_res(unifrac_res['distance_matrix'], sample_names) return dist_mtx
def test_unifrac_explicit(self): """unifrac should correctly compute correct values. environment M contains only tips not in tree, tip j is in no envs values were calculated by hand """ t1 = DndParser('((a:1,b:2):4,((c:3, j:17),(d:1,e:1):2):3)', \ UniFracTreeNode) # note c,j is len 0 node # /-------- /-a # ---------| \-b # | /-------- /-c # \--------| \-j # \-------- /-d # \-e env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertFloatEqual(fast_unifrac(t1,env_counts)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # changing tree topology relative to c,j tips shouldn't change # anything t2 = DndParser('((a:1,b:2):4,((c:2, j:16):1,(d:1,e:1):2):3)', \ UniFracTreeNode) self.assertFloatEqual(fast_unifrac(t2,env_counts)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C']))
def test_fast_unifrac_one_sample2(self): """fu one sam should match whole weighted unifrac result, for env 'B'""" # first get full unifrac matrix res = fast_unifrac(self.t, self.env_counts, weighted=True) dmtx, env_order = res['distance_matrix'] dmtx_vec = dmtx[env_order.index('B')] dmtx_vec = dmtx_vec[argsort(env_order)] # then get one sample unifrac vector one_sam_dvec, one_sam_env_order = \ fast_unifrac_one_sample('B', self.t, self.env_counts,weighted=True) one_sam_dvec = one_sam_dvec[argsort(one_sam_env_order)] self.assertFloatEqual(one_sam_dvec, dmtx_vec)
def test_make_unifrac_metric(self): """ exercise of the unweighted unifrac metric should not throw errors""" tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=['distance_matrix'])['distance_matrix'] self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names)) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,3], 0.0) self.assertNotEqual(res[0,1], 1.0)
def unifrac2(sample1, sample2, tree, repetitions=1, subsampleSize='auto'): distances = [] if subsampleSize == 'auto': subsampleSize = int(min(sample1.size, sample2.size)*.8) # 80% of the smaller sample for i in range(repetitions): subsample1 = sample1.subsample(subsampleSize) subsample2 = sample2.subsample(subsampleSize) allOtus = set(subsample1.keys()).union(subsample2.keys()) envs = dict([(otu, makeOTUdict(otu, subsample1, subsample2, sample1, sample2)) for otu in allOtus]) #pdb.set_trace() res = fast_unifrac(tree, envs, weighted=True) try: distances.append(res['distance_matrix'][0][0,1]) except: pdb.set_trace() print subsampleSize, np.array(distances).mean() return np.array(distances)
def test_fast_unifrac_one_sample3(self): """fu one sam should match missing env unifrac result, for env 'B'""" # first get full unifrac matrix res = fast_unifrac(self.t, self.missing_env_counts, weighted=False) dmtx, env_order = res['distance_matrix'] dmtx_vec = dmtx[env_order.index('C')] dmtx_vec = dmtx_vec[argsort(env_order)] # then get one sample unifrac vector one_sam_dvec, one_sam_env_order = \ fast_unifrac_one_sample('C', self.t, self.missing_env_counts,weighted=False) one_sam_dvec = one_sam_dvec[argsort(one_sam_env_order)] self.assertFloatEqual(one_sam_dvec, dmtx_vec) # and should raise valueerror when 'B' self.assertRaises(ValueError, fast_unifrac_one_sample, 'B', self.t, self.missing_env_counts,weighted=False)
def test_fast_unifrac_one_sample3(self): """fu one sam should match missing env unifrac result, for env 'B'""" # first get full unifrac matrix res = fast_unifrac(self.t, self.missing_env_counts, weighted=False) dmtx, env_order = res['distance_matrix'] dmtx_vec = dmtx[env_order.index('C')] dmtx_vec = dmtx_vec[argsort(env_order)] # then get one sample unifrac vector one_sam_dvec, one_sam_env_order = \ fast_unifrac_one_sample('C', self.t, self.missing_env_counts,weighted=False) one_sam_dvec = one_sam_dvec[argsort(one_sam_env_order)] self.assertFloatEqual(one_sam_dvec, dmtx_vec) # and should raise valueerror when 'B' self.assertRaises(ValueError, fast_unifrac_one_sample, 'B', self.t, self.missing_env_counts, weighted=False)
def unifrac(p1, p2, sample_ids, otu_ids, tree): """ Creates UniFrac distance between two urns Parameters ---------- p1 : np.array Urn 1 p2 : np.array Urn 2 Returns ------- np.array : Unifrac distance matrix """ env = df.to_dict() df = pd.DataFrame([p1, p2], index=sample_ids, columns=otu_ids) res = fast_unifrac(tree, env, weighted=True) dist_mat = pd.DataFrame(res['distance_matrix'][0], index=res['distance_matrix'][1], columns=res['distance_matrix'][1]) return dist_mat.ix[1, 0]
def unifrac_upgma(table, sample_ids, otu_ids, tree): """ Parameters ---------- table : np.array Contingency table samples = rows observations = columns sample_ids : list, str List of sample ids otu_ids : list, str List of otu ids tree : str newick tree Returns ------- skbio.TreeNode : Tree representation of clustering """ df = pd.DataFrame(mat, index=sample_ids, columns=otu_ids) env = df.to_dict() res = fast_unifrac(tree, env, weighted=True, modes=['cluster_envs']) return TreeNode.read(StringIO(str(res['cluster_envs'])))
def test_unifrac_make_subtree(self): """unifrac result should not depend on make_subtree environment M contains only tips not in tree, tip j, k is in no envs one clade is missing entirely values were calculated by hand we also test that we still have a valid tree at the end """ t1 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node # /-------- /-a # ---------| \-b # | /-------- /-c # \--------| \mt------ /-j # | \-k # \-------- /-d # \-e # env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # changing tree topology relative to c,j tips shouldn't change anything t2 = DndParser('((a:1,b:2):4,((c:2, (j:1,k:2)mt:17):1,(d:1,e:1):2):3)', \ UniFracTreeNode) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # ensure we haven't meaningfully changed the tree # by passing it to unifrac t3 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node t1_tips = [tip.Name for tip in t1.tips()] t1_tips.sort() t3_tips = [tip.Name for tip in t3.tips()] t3_tips.sort() self.assertEqual(t1_tips, t3_tips) tipj3 = t3.getNodeMatchingName('j') tipb3 = t3.getNodeMatchingName('b') tipj1 = t1.getNodeMatchingName('j') tipb1 = t1.getNodeMatchingName('b') self.assertFloatEqual(tipj1.distance(tipb1), tipj3.distance(tipb3))
def table2dict(lines): '''Convert an OTU table into a nested dictionary of counts''' header_line = next(lines) header_fields = header_line.rstrip().split("\t") samples = header_fields[1:] dat = {} for line in lines: fields = line.rstrip().split("\t") otu = string.translate(fields[0], tr) counts = [int(x) for x in fields[1:]] dat[otu] = {s: c for s, c in zip(samples, counts)} return dat with open('tree.newick') as f: raw_tree = f.read() tree = DndParser(raw_tree, UniFracTreeNode) with open('../../../../data/rdp_g.counts') as f: envs = table2dict(f) # write the weighted and unweighted tables for weighted, fn in [[True, 'unifrac-w.dat'], [False, 'unifrac-uw.dat']]: res = fast_unifrac(tree, envs, weighted=weighted) matrix, samples = res['distance_matrix'] df = pd.DataFrame(data=matrix, index=samples, columns=samples) df.to_csv(fn, sep='\t', index=False)
(envs_prob_dict, samples) = EMDU.parse_envs(envs, nodes_in_order) P = envs_prob_dict[samples[0]] Q = envs_prob_dict[samples[1]] #EMDUnifrac with flow t0 = timeit.default_timer() (Z, Flow, diffab) = EMDU.EMDUnifrac_weighted_flow(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_flow_times.append(t1-t0) #EMDUnifrac no flow t0 = timeit.default_timer() (Z, diffab) = EMDU.EMDUnifrac_weighted(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_times.append(t1-t0) #FastUnifrac_ weighted t0 = timeit.default_timer() res = fast_unifrac(tr, envs, weighted=True, modes=set(['distance_matrix'])) t1 = timeit.default_timer() FastUnifrac_times.append(t1-t0) i = i+1 #Save means mean_EMDUnifrac_times[tree_sizes.index(tree_size)] = np.array(EMDUnifrac_times).mean() mean_EMDUnifrac_flow_times[tree_sizes.index(tree_size)] = np.array(EMDUnifrac_flow_times).mean() mean_FastUnifrac_times[tree_sizes.index(tree_size)] = np.array(FastUnifrac_times).mean() # Export all mean times np.savetxt('EMDU_mean_times.txt', mean_EMDUnifrac_times, delimiter=',') np.savetxt('EMDU_flow_mean_times.txt', mean_EMDUnifrac_flow_times, delimiter=',') np.savetxt('FastUnifrac__mean_times.txt', mean_FastUnifrac_times, delimiter=',')
def unifrac_recursive_test(ref_tree, tree, sample_names, taxon_names, data, permutations=1000): # , metric=weighted): """Performs UniFrac recursively over a tree. Specifically, for each node in the tree, performs UniFrac clustering. Then compares the UniFrac tree to a reference tree of the same taxa using the tip-to-tip distances and the subset distances. Assumption is that if the two trees match, the node represents a group in which evolution has mirrored the evolution of the reference tree. tree: contains the tree on which UniFrac will be performed recursively. envs: environments for UniFrac clustering (these envs should match the taxon labels in the ref_tree) ref_tree: reference tree that the clustering is supposed to match. metric: metric for UniFrac clustering. Typically, will want to estimate significance by comparing the actual values from ref_tree to values obtained with one or more shuffled versions of ref_tree (can make these with permute_tip_labels). Note from Jon: I've modified this code a bit to test each node against a set of label- permuted host trees, and return some additional information about each node. It doesn't appear to give sensible results, not sure why. Almost none of the resulting permutations yield any other than zero or the number of permuta- tions. In other words, every permutation yields either a better or worse match than the true tree. """ UNIFRAC_CLUST_ENVS = "cluster_envs" lengths, dists, sets, s_nodes, h_nodes, dist_below, sets_below, h_tips, s_tips = [ ], [], [], [], [], [], [], [], [] # Permute host tips, store permuted trees in a list of tree strings # print "Permuting host tree..." permuted_trees = [] host_names = ref_tree.getTipNames() random_names = ref_tree.getTipNames() # for i in range(permutations): # shuffle(random_names) # permute_dict = dict(zip(host_names,random_names)) # permuted_subtree = ref_tree.copy() # permuted_subtree.reassignNames(permute_dict) # permuted_trees.append(str(permuted_subtree)) # # alt: for i in range(permutations): shuffle(random_names) permute_dict = dict(zip(host_names, random_names)) permuted_subtree = ref_tree.copy() permuted_subtree.reassignNames(permute_dict) permuted_trees.append(permuted_subtree) interaction = data.clip(0, 1) # Parse OTU table data into Unifrac-compatible envs tuple envs = make_envs_dict(data.T, sample_names, taxon_names) # Pass host tree, new OTU tree, and envs to recursive unifrac # print "Performing recursive Unifrac analysis..." for node in tree.traverse(self_before=True, self_after=False): #pause = raw_input("pause!") # print node try: result = fast_unifrac( node, envs, weighted=False, modes=set([UNIFRAC_CLUST_ENVS])) curr_tree = result[UNIFRAC_CLUST_ENVS] except ValueError: # hit a single node? continue except AttributeError: # hit a zero branch length continue if curr_tree is None: # hit single node? continue try: l = len(curr_tree.tips()) d = curr_tree.compareByTipDistances(ref_tree) s = curr_tree.compareBySubsets(ref_tree, True) d_b = 0.0 s_b = 0.0 # for rand_tree_string in permuted_trees: # rand_tree = DndParser(rand_tree_string) # if d >= curr_tree.compareByTipDistances(rand_tree): # d_b += 1 # if s >= curr_tree.compareBySubsets(rand_tree): # s_b += 1 for rand_tree in permuted_trees: if d >= curr_tree.compareByTipDistances(rand_tree): d_b += 1 if s >= curr_tree.compareBySubsets(rand_tree): s_b += 1 d_b = d_b / float(len(permuted_trees)) s_b = s_b / float(len(permuted_trees)) # The following section generates s_tips and h_tips variables # get just OTUs in this node otu_subset = node.getTipNames() s_tips_tmp = 0 h_tips_tmp = 0 s_vec = [] # find positional index (from OTU table) for each cOTU represented # in this node: for i in range(len(taxon_names)): if taxon_names[i] in otu_subset: s_tips_tmp += 1 s_vec.append(i) # slice interaction matrix down to only cOTUs in this node i_s_slice = interaction[numpy.ix_(s_vec)] # find positional index (this time from OTU table size) for each sample in this node: # sum all values in column for each host, if greater than zero, add # that host position to h_vec for j in range(i_s_slice.shape[1]): if i_s_slice[:, j].sum(): h_tips_tmp += 1 # want to calculate all values before appending so we can bail out # if any of the calculations fails: this ensures that the lists # remain synchronized. """ print curr_tree.asciiArt() print ref_tree.asciiArt() print l print d print d_b print s print s_b print node pause = raw_input("pause!") """ if l > 2: lengths.append(l) dists.append(d) sets.append(s) s_nodes.append(node) h_nodes.append(curr_tree) dist_below.append(d_b) sets_below.append(s_b) h_tips.append(h_tips_tmp) s_tips.append(s_tips_tmp) except ValueError: # no common taxa continue results_dict = {'p_vals': sets_below, 's_tips': s_tips, 'h_tips': h_tips, 's_nodes': s_nodes, 'h_nodes': h_nodes} acc_dict = {'lengths': lengths, 'dists': dists, 'sets': sets, 'dist_below': dist_below} return (results_dict, acc_dict)
#EMDUnifrac with flow t0 = timeit.default_timer() (Z, Flow, diffab) = EMDU.EMDUnifrac_weighted_flow(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_flow_times.append(t1 - t0) #EMDUnifrac no flow t0 = timeit.default_timer() (Z, diffab) = EMDU.EMDUnifrac_weighted(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_times.append(t1 - t0) #FastUnifrac_ weighted t0 = timeit.default_timer() res = fast_unifrac(tr, envs, weighted=True, modes=set(['distance_matrix'])) t1 = timeit.default_timer() FastUnifrac_times.append(t1 - t0) i = i + 1 #Save means mean_EMDUnifrac_times[tree_sizes.index(tree_size)] = np.array( EMDUnifrac_times).mean() mean_EMDUnifrac_flow_times[tree_sizes.index(tree_size)] = np.array( EMDUnifrac_flow_times).mean() mean_FastUnifrac_times[tree_sizes.index(tree_size)] = np.array( FastUnifrac_times).mean() # Export all mean times np.savetxt('EMDU_mean_times.txt', mean_EMDUnifrac_times, delimiter=',') np.savetxt('EMDU_flow_mean_times.txt',