def test_comparing_same_matrices(self): for method in self.methods: obs = mantel(self.minx, self.minx, method=method)[0] self.assertAlmostEqual(obs, 1) obs = mantel(self.miny, self.miny, method=method)[0] self.assertAlmostEqual(obs, 1)
def test_two_sided(self): np.random.seed(0) obs = mantel(self.minx, self.minx, method='spearman', alternative='two-sided') self.assertEqual(obs[0], 1) self.assertAlmostEqual(obs[1], 0.328) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.miny, method='spearman', alternative='two-sided') self.assertAlmostEqual(obs[0], 0.5) self.assertAlmostEqual(obs[1], 1.0) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.minz, method='spearman', alternative='two-sided') self.assertAlmostEqual(obs[0], -1) self.assertAlmostEqual(obs[1], 0.322) self.assertEqual(obs[2], 3)
def test_invalid_distance_matrix(self): # Single asymmetric, non-hollow distance matrix. with self.assertRaises(DissimilarityMatrixError): mantel([[1, 2], [3, 4]], [[0, 0], [0, 0]]) # Two asymmetric distance matrices. with self.assertRaises(DistanceMatrixError): mantel([[0, 2], [3, 0]], [[0, 1], [0, 0]])
def test_one_sided_greater(self): np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='greater') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.324) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.minx, alternative='greater') self.assert_mantel_almost_equal(obs, [1, 0.172, 3])
def test_one_sided_greater(self): np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='greater') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.324) obs = mantel(self.minx, self.minx, alternative='greater') self.assertAlmostEqual(obs[0], 1) self.assertAlmostEqual(obs[1], 0.172)
def get_pairwise_diversity_data(pre_bioms, post_bioms, trim_lengths): """For each pre-post pair, gets the pairwise distance matrix of each sequence set and does a mantel test between pre and post pariwise distance matrices using both jaccard and bray-curtis metrics Parameters ---------- pre_bioms: array_like of biom.Table pre-trimmed Artifacts in descending trim length order. Should be in same order as post_bioms post_bioms: array_like of biom.Table post-trimmed Artifacts in descending trim length order. Should be in same order as pre_bioms trim_lengths: array_like Trim lengths in descending order, should correspond to other arguments Returns ------- Pandas dataframe that holds results for each pre-post mantel test """ print("enter get_pairwise_diversity") np.seterr(all="raise") if(not (len(pre_bioms) == len(post_bioms) == len(trim_lengths))): raise ValueError("Length of 3 arguments lists should be same\n" "pre: {}, post: {}, lengths: {}".format(len(pre_bioms), len(post_bioms), len(trim_lengths))) cols = ["trim_length", "dist_type", "r", "pval", "nsamples"] p_div = pd.DataFrame(index=range(2*len(pre_bioms)), columns=cols) j = 0 for i in range(len(pre_bioms)): # pairwise distance matrices pre_biom = pre_bioms[i] post_biom = post_bioms[i] pre_d_j = get_pairwise_dist_mat(pre_biom, "jaccard") post_d_j = get_pairwise_dist_mat(post_biom, "jaccard") r, p, nsamp = mantel(pre_d_j, post_d_j) p_div.iloc[j] = [trim_lengths[i], "jaccard", r, p, nsamp] j += 1 pre_d_bc = get_pairwise_dist_mat(pre_biom, "braycurtis") post_d_bc = get_pairwise_dist_mat(post_biom, "braycurtis") print("pre_d_bc, i: {}".format(i)) print(str(pre_d_bc)) print("post_d_bc") print(str(post_d_bc)) r, p, nsamp = mantel(pre_d_bc, post_d_bc) print("r: {}, p: {}".format(str(r),str(p))) p_div.iloc[j] = [trim_lengths[i], "braycurtis", r, p, nsamp] p_div["r_sq"] = p_div["r"]**2 print("exit get_pairwise_diversity") return p_div
def test_no_variation_spearman(self): exp = (np.nan, np.nan, 3) for alt in self.alternatives: obs = mantel(self.miny, self.no_variation, method="spearman", alternative=alt) npt.assert_equal(obs, exp) obs = mantel(self.no_variation, self.miny, method="spearman", alternative=alt) npt.assert_equal(obs, exp) obs = mantel(self.no_variation, self.no_variation, method="spearman", alternative=alt) npt.assert_equal(obs, exp)
def test_no_side_effects(self): minx = np.asarray(self.minx, dtype='float') miny = np.asarray(self.miny, dtype='float') minx_copy = np.copy(minx) miny_copy = np.copy(miny) mantel(minx, miny) # Make sure we haven't modified the input. npt.assert_equal(minx, minx_copy) npt.assert_equal(miny, miny_copy)
def test_zero_permutations(self): for alt in self.alternatives: for method, exp in (("pearson", self.exp_x_vs_y), ("spearman", 0.5)): obs = mantel(self.minx, self.miny, permutations=0, method=method, alternative=alt) self.assertAlmostEqual(obs[0], exp) npt.assert_equal(obs[1], np.nan) self.assertEqual(obs[2], 3) # swapping order of matrices should give same result obs = mantel(self.miny, self.minx, permutations=0, method=method, alternative=alt) self.assertAlmostEqual(obs[0], exp) npt.assert_equal(obs[1], np.nan) self.assertEqual(obs[2], 3)
def test_no_variation_spearman(self): exp = (np.nan, np.nan, 3) for alt in self.alternatives: obs = mantel(self.miny, self.no_variation, method='spearman', alternative=alt) npt.assert_equal(obs, exp) obs = mantel(self.no_variation, self.miny, method='spearman', alternative=alt) npt.assert_equal(obs, exp) obs = mantel(self.no_variation, self.no_variation, method='spearman', alternative=alt) npt.assert_equal(obs, exp)
def test_vegan_example(self): np.random.seed(0) # pearson obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative="greater") self.assertAlmostEqual(obs[0], 0.3047454) self.assertAlmostEqual(obs[1], 0.002) self.assertEqual(obs[2], 24) # spearman obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative="greater", method="spearman") self.assertAlmostEqual(obs[0], 0.283791) self.assertAlmostEqual(obs[1], 0.003) self.assertEqual(obs[2], 24)
def test_distance_matrix_instances_with_reordering_and_nonmatching(self): x = self.minx_dm_extra.filter(['1', '0', 'foo', '2']) y = self.miny_dm.filter(['0', '2', '1']) # strict=True should disallow IDs that aren't found in both matrices with self.assertRaises(ValueError): mantel(x, y, alternative='less', strict=True) np.random.seed(0) # strict=False should ignore IDs that aren't found in both matrices obs = mantel(x, y, alternative='less', strict=False) self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3])
def test_no_variation_pearson(self): # Output doesn't match vegan::mantel with method='pearson'. Consider # revising output and this test depending on outcome of # https://github.com/scipy/scipy/issues/3728 for alt in self.alternatives: # test one or both inputs having no variation in their # distances obs = mantel(self.miny, self.no_variation, method="pearson", alternative=alt) npt.assert_equal(obs, (0.0, 1.0, 3)) obs = mantel(self.no_variation, self.miny, method="pearson", alternative=alt) npt.assert_equal(obs, (0.0, 1.0, 3)) obs = mantel(self.no_variation, self.no_variation, method="pearson", alternative=alt) npt.assert_equal(obs, (1.0, 1.0, 3))
def test_vegan_example(self): np.random.seed(0) # pearson obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative='greater') self.assert_mantel_almost_equal(obs, [0.3047454, 0.002, 24]) # spearman obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative='greater', method='spearman') self.assert_mantel_almost_equal(obs, [0.283791, 0.003, 24])
def compare_clusters(args): ref_df = pd.read_table(args['ref'], sep='\t', skipinitialspace=True, index_col=0).as_matrix() check_symmetry(ref_df) linkage_ref = linkage(ref_df, 'average') c_ref, coph_dists_ref = cophenet(linkage_ref, pdist(ref_df)) outfile = open(args['output'],"w") outfile.write("Tree_cluster\tMantel_Correlation_Coefficient\tManter_P-value\tCophenetic_Pearson\tCophenetic_P-value\n") for i in args['all']: fst_df = pd.read_table(i, sep='\t', skipinitialspace=True, index_col=0).as_matrix() check_symmetry(fst_df) mantel_coeff = 0.0 p_value_mantel = 0.0 cophenetic_pearson = 0.0 p_value_cophenetic = 0.0 n = 0 try: mantel_coeff, p_value_mantel, n = mantel(ref_df, fst_df) linkage_fst = linkage(fst_df, 'average') c_fst, coph_dists_fst = cophenet(linkage_fst, pdist(fst_df)) cophenetic_pearson, p_value_cophenetic = pearsonr(coph_dists_ref, coph_dists_fst) except Exception as e: print("Error : %s" % str(e)) mantel_coeff = "Failed" p_value_manel = "Failed" cophenetic_pearson = "Failed" p_value_cophenetic = "Failed" outfile.write(i+"\t"+str(mantel_coeff)+"\t"+str(p_value_mantel)+"\t"+str(cophenetic_pearson)+"\t"+str(p_value_cophenetic)+"\n") outfile.close()
def test_distance_matrix_instances_as_input(self): # Matrices with all matching IDs in the same order. np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative='less') self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3])
def compare_multiple(pose_data, method='distance', figure_type='aligned_figures'): """ For multi-dancer videos: Get the mean and standard deviation of inter-pose similarities for each frame """ frame_means = [] frame_stdevs = [] for f, frame in enumerate(pose_data): print("Processing frame", f, "of", len(pose_data)) frame_similarities = [] for i, figure_i in enumerate(frame[figure_type]): for j, figure_j in enumerate(frame[figure_type]): if i < j: if method == 'distance': mi = get_pose_matrix(frame, i) mj = get_pose_matrix(frame, j) if mi is None or mj is None: similarity = np.nan else: similarity = mantel(mi, mj)[0] else: # method == 'laplacian' mi = get_laplacian_matrix(frame, i) mj = get_laplacian_matrix(frame, j) if mi is None or mj is None: similarity = np.nan else: similarity = 1 - abs( np.subtract(mi.todense(), mj.todense()).sum()) frame_similarities.append(similarity) frame_means.append(np.nanmean(frame_similarities)) frame_stdevs.append(np.nanstd(frame_similarities)) return [frame_means, frame_stdevs]
def test_distance_matrix_instances_with_reordering_and_nonmatching(self): x = self.minx_dm_extra.filter(['1', '0', 'foo', '2']) y = self.miny_dm.filter(['0', '2', '1']) # strict=True should disallow IDs that aren't found in both matrices with self.assertRaises(ValueError): mantel(x, y, alternative='less', strict=True) np.random.seed(0) # strict=False should ignore IDs that aren't found in both matrices obs = mantel(x, y, alternative='less', strict=False) self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843) self.assertEqual(obs[2], 3)
def test_vegan_example(self): np.random.seed(0) # pearson obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative='greater') self.assertAlmostEqual(obs[0], 0.3047454) self.assertAlmostEqual(obs[1], 0.002) self.assertEqual(obs[2], 24) # spearman obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative='greater', method='spearman') self.assertAlmostEqual(obs[0], 0.283791) self.assertAlmostEqual(obs[1], 0.003) self.assertEqual(obs[2], 24)
def calc_dist_mat_corr(sim_mat, distance_dict): """Calculate correlation of different distance matrices with similarity matrix.""" dist_mat = create_symm_dist_mat(sim_mat) for feature_type in FEAT_LIST: coeff, p_value, n = mantel(dist_mat, distance_dict[feature_type]) print("Feature type: %s Coeff: %.3f" % (feature_type, coeff))
def test_zero_permutations(self): for alt in self.alternatives: for method, exp in (('pearson', self.exp_x_vs_y), ('spearman', 0.5)): obs = mantel(self.minx, self.miny, permutations=0, method=method, alternative=alt) self.assertAlmostEqual(obs[0], exp) npt.assert_equal(obs[1], np.nan) self.assertEqual(obs[2], 3) # swapping order of matrices should give same result obs = mantel(self.miny, self.minx, permutations=0, method=method, alternative=alt) self.assertAlmostEqual(obs[0], exp) npt.assert_equal(obs[1], np.nan) self.assertEqual(obs[2], 3)
def test_hommola_vs_mantel(self): # we don't compare p-values because the two methods use different # permutation strategies r_mantel, p_mantel, _ = mantel(self.hdist, self.pdist, method="pearson", permutations=0, alternative="greater") r_hommola, p_hommola, _ = hommola_cospeciation(self.hdist, self.pdist, self.interact_1to1, permutations=0) self.assertAlmostEqual(r_hommola, r_mantel) npt.assert_equal(p_hommola, p_mantel)
def test_two_sided(self): np.random.seed(0) obs = mantel(self.minx, self.minx, method="spearman", alternative="two-sided") self.assertEqual(obs[0], 1) self.assertAlmostEqual(obs[1], 0.328) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.miny, method="spearman", alternative="two-sided") self.assertAlmostEqual(obs[0], 0.5) self.assertAlmostEqual(obs[1], 1.0) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.minz, method="spearman", alternative="two-sided") self.assertAlmostEqual(obs[0], -1) self.assertAlmostEqual(obs[1], 0.322) self.assertEqual(obs[2], 3)
def calc_corr(sim_mat1, sim_mat2): """Calculate correlation between symmetric and non-symmetric matrices.""" non_symm_corr = pearsonr(get_non_diagonal_entries(sim_mat1), get_non_diagonal_entries(sim_mat2))[0] symm_corr = mantel(create_symm_dist_mat(sim_mat1), create_symm_dist_mat(sim_mat2))[0] print ("Correlation between non-diagonal entries: %.3f" %non_symm_corr) print ("Mantel correlation: %.3f" %symm_corr)
def test_one_sided_less(self): # no need to seed here as permuted test statistics will all be less # than or equal to the observed test statistic (1.0) for method in self.methods: obs = mantel(self.minx, self.minx, method=method, alternative='less') self.assertEqual(obs, (1, 1)) np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='less') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843) obs = mantel(self.minx, self.minz, alternative='less') self.assertAlmostEqual(obs[0], self.exp_x_vs_z) self.assertAlmostEqual(obs[1], 0.172)
def test_one_sided_less(self): # no need to seed here as permuted test statistics will all be less # than or equal to the observed test statistic (1.0) for method in self.methods: obs = mantel(self.minx, self.minx, method=method, alternative='less') npt.assert_almost_equal(obs, (1, 1, 3)) np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='less') self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3]) obs = mantel(self.minx, self.minz, alternative='less') self.assert_mantel_almost_equal(obs, [self.exp_x_vs_z, 0.172, 3])
def compare_tip_to_tip_distances(tree_fh1, tree_fh2, method="pearson"): tree1 = TreeNode.read(tree_fh1) tree2 = TreeNode.read(tree_fh2) dm1 = tree1.tip_tip_distances() dm2 = tree2.tip_tip_distances() return mantel(dm1, dm2, strict=False, method=method)
def test_statistic_same_across_alternatives_and_permutations(self): # Varying permutations and alternative hypotheses shouldn't affect the # computed test statistics. for n in (0, 99, 999): for alt in self.alternatives: for method, exp in (("pearson", self.exp_x_vs_y), ("spearman", 0.5)): obs = mantel(self.minx, self.miny, method=method, permutations=n, alternative=alt)[0] self.assertAlmostEqual(obs, exp)
def diversity_analysis(wu_dm_list,bc_dm_list): from skbio.stats.distance import mantel #do the UniFrac and Bray-Curtis distances correlate? r, p_value, n = mantel(wu_dm_list[0],bc_dm_list[0]) print("Mantel Correlation COEF=",r) print("At significance of 0.05, the p-value for the correlation is = ",p_value) #next perform principle coordinate analysis (PCoA) on the weighted UniFrac distance matrix: from skbio.stats.ordination import pcoa wu_pc = pcoa(wu_dm_list[0])
def test_no_variation_pearson(self): # Output doesn't match vegan::mantel with method='pearson'. Consider # revising output and this test depending on outcome of # https://github.com/scipy/scipy/issues/3728 for alt in self.alternatives: # test one or both inputs having no variation in their # distances obs = mantel(self.miny, self.no_variation, method='pearson', alternative=alt) npt.assert_equal(obs, (0.0, 1.0, 3)) obs = mantel(self.no_variation, self.miny, method='pearson', alternative=alt) npt.assert_equal(obs, (0.0, 1.0, 3)) obs = mantel(self.no_variation, self.no_variation, method='pearson', alternative=alt) npt.assert_equal(obs, (1.0, 1.0, 3))
def find_nearest_pose(pose_matrix, cluster_averages): best_corr = 0 best_label = -1 for label in cluster_averages: corr = mantel(pose_matrix, cluster_averages[label])[0] if corr > best_corr: best_label = label best_corr = corr return best_label
def test_distance_matrix_instances_as_input(self): # Matrices with all matching IDs in the same order. np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative='less') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843) self.assertEqual(obs[2], 3)
def test_one_sided_less(self): # no need to seed here as permuted test statistics will all be less # than or equal to the observed test statistic (1.0) for method in self.methods: obs = mantel(self.minx, self.minx, method=method, alternative='less') self.assertEqual(obs, (1, 1, 3)) np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='less') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.minz, alternative='less') self.assertAlmostEqual(obs[0], self.exp_x_vs_z) self.assertAlmostEqual(obs[1], 0.172) self.assertEqual(obs[2], 3)
def test_statistic_same_across_alternatives_and_permutations(self): # Varying permutations and alternative hypotheses shouldn't affect the # computed test statistics. for n in (0, 99, 999): for alt in self.alternatives: for method, exp in (('pearson', self.exp_x_vs_y), ('spearman', 0.5)): obs = mantel(self.minx, self.miny, method=method, permutations=n, alternative=alt)[0] self.assertAlmostEqual(obs, exp)
def test_distance_matrix_instances_with_lookup(self): self.minx_dm.ids = ("a", "b", "c") self.miny_dm.ids = ("d", "e", "f") lookup = {"a": "A", "b": "B", "c": "C", "d": "A", "e": "B", "f": "C"} np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative="less", lookup=lookup) self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843) self.assertEqual(obs[2], 3)
def test_two_sided(self): np.random.seed(0) obs = mantel(self.minx, self.minx, method='spearman', alternative='two-sided') self.assert_mantel_almost_equal(obs, [1.0, 0.328, 3]) obs = mantel(self.minx, self.miny, method='spearman', alternative='two-sided') self.assert_mantel_almost_equal(obs, [0.5, 1.0, 3]) obs = mantel(self.minx, self.minz, method='spearman', alternative='two-sided') self.assert_mantel_almost_equal(obs, [-1, 0.322, 3])
def test_invalid_input(self): # invalid correlation method with self.assertRaises(ValueError): mantel([[1]], [[1]], method='brofist') # invalid permutations with self.assertRaises(ValueError): mantel([[1]], [[1]], permutations=-1) # invalid alternative with self.assertRaises(ValueError): mantel([[1]], [[1]], alternative='no cog yay') # mismatched shape with self.assertRaises(ValueError): mantel(self.minx, [[0, 2], [2, 0]]) # too small dms with self.assertRaises(ValueError): mantel([[0, 3], [3, 0]], [[0, 2], [2, 0]])
def test_distance_matrix_instances_as_input(self): # IDs shouldn't matter -- the function should only care about the # matrix data dmx = DistanceMatrix(self.minx) dmy = DistanceMatrix(self.miny, ['no', 'cog', 'yay']) np.random.seed(0) obs = mantel(dmx, dmy, alternative='less') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843)
def test_no_variation_pearson(self): for alt in self.alternatives: # test one or both inputs having no variation in their # distances obs = mantel(self.miny, self.no_variation, method='pearson', alternative=alt) npt.assert_equal(obs, (np.nan, np.nan, 3)) obs = mantel(self.no_variation, self.miny, method='pearson', alternative=alt) npt.assert_equal(obs, (np.nan, np.nan, 3)) obs = mantel(self.no_variation, self.no_variation, method='pearson', alternative=alt) npt.assert_equal(obs, (np.nan, np.nan, 3))
def test_distance_matrix_instances_with_lookup(self): self.minx_dm.ids = ('a', 'b', 'c') self.miny_dm.ids = ('d', 'e', 'f') lookup = {'a': 'A', 'b': 'B', 'c': 'C', 'd': 'A', 'e': 'B', 'f': 'C'} np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative='less', lookup=lookup) self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3])
def test_distance_matrix_instances_with_lookup(self): self.minx_dm.ids = ('a', 'b', 'c') self.miny_dm.ids = ('d', 'e', 'f') lookup = {'a': 'A', 'b': 'B', 'c': 'C', 'd': 'A', 'e': 'B', 'f': 'C'} np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative='less', lookup=lookup) self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.843) self.assertEqual(obs[2], 3)
def test_invalid_input(self): # invalid correlation method with self.assertRaises(ValueError): mantel([[1]], [[1]], method="brofist") # invalid permutations with self.assertRaises(ValueError): mantel([[1]], [[1]], permutations=-1) # invalid alternative with self.assertRaises(ValueError): mantel([[1]], [[1]], alternative="no cog yay") # too small dms with self.assertRaises(ValueError): mantel([[0, 3], [3, 0]], [[0, 2], [2, 0]])
def test_invalid_input(self): # invalid correlation method with self.assertRaises(ValueError): mantel([[1]], [[1]], method='brofist') # invalid permutations with self.assertRaises(ValueError): mantel([[1]], [[1]], permutations=-1) # invalid alternative with self.assertRaises(ValueError): mantel([[1]], [[1]], alternative='no cog yay') # too small dms with self.assertRaises(ValueError): mantel([[0, 3], [3, 0]], [[0, 2], [2, 0]])
def test_hommola_vs_mantel(self): # we don't compare p-values because the two methods use different # permutation strategies r_mantel, p_mantel, _ = mantel(self.hdist, self.pdist, method='pearson', permutations=0, alternative='greater') r_hommola, p_hommola, _ = hommola_cospeciation(self.hdist, self.pdist, self.interact_1to1, permutations=0) self.assertAlmostEqual(r_hommola, r_mantel) npt.assert_equal(p_hommola, p_mantel)
def compare_trees(tree_fp1, tree_fp2, method): itstree = TreeNode.read(tree_fp1) hybridtree = TreeNode.read(tree_fp2) itstreedm = itstree.tip_tip_distances() hybridtreedm = hybridtree.tip_tip_distances() coeff, p_value, n = mantel(itstreedm, hybridtreedm, strict=False, method=method) click.echo("Correlation coefficient: %f" % coeff) click.echo("P-value: %f" % p_value) click.echo("Number of overlapping tips: %d" % n)
def compute_mantel(result_tables, taxonomy_level=6, random_trials=999): """ Compute mantel r and p-values for a set of results result_tables: 2d list of tables to be compared, where the data in the inner list is: [dataset_id, reference_database_id, method_id, parameter_combination_id, table_fp] taxonomy_level: level to compute results random_trials : number of Monte Carlo trials to run in Mantel test """ collapse_by_taxonomy = get_taxonomy_collapser(taxonomy_level) results = [] for dataset_id, reference_id, method_id, params, actual_table_fp in result_tables: ## load the table and collapse it at the specified taxonomic level try: full_table = load_table(actual_table_fp) except ValueError: raise ValueError("Couldn't parse BIOM table: %s" % actual_table_fp) collapsed_table = full_table.collapse(collapse_by_taxonomy, axis='observation', min_group_size=1) ## Compute Bray-Curtis distances between samples in the full table and ## in the collapsed table, and compare them with Mantel. # This is way too compute-intensive because we're computing the actual # dm everytime, which doesn't need to happen. collapsed_dm = distance_matrix_from_table(collapsed_table) full_dm = distance_matrix_from_table(full_table) mantel_r, p = mantel(collapsed_dm, full_dm) results.append((dataset_id, reference_id, method_id, params, mantel_r, p)) return pd.DataFrame(results, columns=["Dataset", "Reference", "Method", "Parameters", "Mantel r", "Mantel p"])
def run_mantel_test(method, fps, distmats, num_perms, tail_type, comment, control_dm_fp=None, control_dm=None, sample_id_map=None): """Runs a Mantel test on all pairs of distance matrices. Returns a string suitable for writing out to a file containing the results of the test. WARNING: Only symmetric, hollow distance matrices may be used as input. Asymmetric distance matrices, such as those obtained by the UniFrac Gain metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input. Arguments: method - which Mantel test to run (either 'mantel' or 'partial_mantel') fps - list of filepaths of the distance matrices distmats - list of tuples containing dm labels and dm data (i.e. the output of parse_distmat) num_perms - the number of permutations to use to calculate the p-value(s) tail_type - the type of tail test to use when calculating the p-value(s). Can be 'two-sided', 'greater', or 'less'. Only applies when method is mantel comment - comment string to add to the beginning of the results string control_dm_fp - filepath of the control distance matrix. Only applies when method is partial_mantel (it is required then) control_dm - tuple containing control distance matrix labels and matrix data. Only applies when method is partial_mantel (it is required then) sample_id_map - dict mapping sample IDs (i.e. what is expected by make_compatible_distance_matrices) """ if len(fps) != len(distmats): raise ValueError("Must provide the same number of filepaths as there " "are distance matrices.") if comment is None: comment = '' result = comment if method == 'mantel': result += 'DM1\tDM2\tNumber of entries\tMantel r statistic\t' + \ 'p-value\tNumber of permutations\tTail type\n' elif method == 'partial_mantel': if not control_dm_fp or not control_dm: raise ValueError("You must provide a control matrix filepath and " "control matrix when running the partial Mantel " "test.") result += 'DM1\tDM2\tCDM\tNumber of entries\t' + \ 'Mantel r statistic\tp-value\tNumber of permutations\t' +\ 'Tail type\n' else: raise ValueError("Invalid method '%s'. Must be either 'mantel' or " "'partial_mantel'." % method) # Loop over all pairs of dms. for i, (fp1, (dm1_labels, dm1_data)) in enumerate(zip(fps, distmats)): for fp2, (dm2_labels, dm2_data) in zip(fps, distmats)[i + 1:]: # Make the current pair of distance matrices compatible by only # keeping samples that match between them, and ordering them by # the same sample IDs. (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \ make_compatible_distance_matrices((dm1_labels, dm1_data), (dm2_labels, dm2_data), lookup=sample_id_map) if method == 'partial_mantel': # We need to intersect three sets (three matrices). (dm1_labels, dm1_data), (cdm_labels, cdm_data) = \ make_compatible_distance_matrices( (dm1_labels, dm1_data), control_dm, lookup=sample_id_map) (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \ make_compatible_distance_matrices( (dm1_labels, dm1_data), (dm2_labels, dm2_data), lookup=sample_id_map) if len(dm1_labels) < 3: result += '%s\t%s\t%s\t%d\tToo few samples\n' % (fp1, fp2, control_dm_fp, len(dm1_labels)) continue elif len(dm1_labels) < 3: result += '%s\t%s\t%d\tToo few samples\n' % (fp1, fp2, len(dm1_labels)) continue dm1 = DistanceMatrix(dm1_data, dm1_labels) dm2 = DistanceMatrix(dm2_data, dm2_labels) if method == 'mantel': corr_coeff, p_value, n = mantel(dm1, dm2, method='pearson', permutations=num_perms, alternative=tail_type, strict=True) p_str = p_value_to_str(p_value, num_perms) result += "%s\t%s\t%d\t%.5f\t%s\t%d\t%s\n" % ( fp1, fp2, n, corr_coeff, p_str, num_perms, tail_type) elif method == 'partial_mantel': cdm = DistanceMatrix(cdm_data, cdm_labels) results = PartialMantel(dm1, dm2, cdm)(num_perms) p_str = p_value_to_str(results['mantel_p'], num_perms) result += "%s\t%s\t%s\t%d\t%.5f\t%s\t%d\t%s\n" % ( fp1, fp2, control_dm_fp, len(dm1_labels), results['mantel_r'], p_str, num_perms, 'greater') return result
def test_negative_correlation(self): for method, exp in (('pearson', self.exp_x_vs_z), ('spearman', -1)): obs = mantel(self.minx, self.minz, method=method)[0] self.assertAlmostEqual(obs, exp)