def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = one_sample_permutation(x - y, tail=1) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='pearson', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='spearman', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='kendall', tail=2) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='doesntwork',tail=3) s = np.random.normal(0, 1, 10000) two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2) upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1) lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1) sum_p = upper_p + lower_p np.testing.assert_almost_equal(two_sided, sum_p)
def stats_label_distance(self, labels=None, n_permute=5000, n_jobs=-1): ''' Calculate permutation tests on within and between label distance. Args: labels (np.array): numpy array of labels to plot n_permute (int): number of permutations to run (default=5000) Returns: dict: dictionary of within and between group differences and p-values ''' if not self.is_single_matrix: raise ValueError('This function only works on single adjacency ' 'matrices.') distance = pd.DataFrame(self.squareform()) if labels is not None: labels = deepcopy(self.labels) else: if len(labels) != distance.shape[0]: raise ValueError( 'Labels must be same length as distance matrix') within = [] between = [] out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None) for i in np.unique(labels): tmp_w = pd.DataFrame(columns=out.columns, index=None) tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[ np.triu_indices(sum(labels == i), k=1)] tmp_w['Type'] = 'Within' tmp_w['Group'] = i tmp_b = pd.DataFrame(columns=out.columns, index=None) tmp_b['Distance'] = distance.loc[labels == i, labels != i].values.flatten() tmp_b['Type'] = 'Between' tmp_b['Group'] = i out = out.append(tmp_w).append(tmp_b) stats = dict() for i in np.unique(labels): # Within group test tmp1 = out.loc[(out['Group'] == i) & (out['Type'] == 'Within'), 'Distance'] tmp2 = out.loc[(out['Group'] == i) & (out['Type'] == 'Between'), 'Distance'] stats[str(i)] = two_sample_permutation(tmp1, tmp2, n_permute=n_permute, n_jobs=n_jobs) return stats
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1, n_permute=1000) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = one_sample_permutation(x - y, tail=1, n_permute=1000) assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='pearson', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='spearman', tail=1) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) stats = correlation_permutation(x, y, metric='kendall', tail=2) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='doesntwork',tail=3) s = np.random.normal(0, 1, 10000) two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2) upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1) lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1) sum_p = upper_p + lower_p np.testing.assert_almost_equal(two_sided, sum_p) # Test matrix_permutation dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 190) x = squareform(dat[:, 0]) y = squareform(dat[:, 1]) stats = matrix_permutation(x, y, n_permute=1000) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) # Test jackknife_permutation dat = np.random.multivariate_normal( [5, 10, 15, 25, 35, 45], [[1, .2, .5, .7, .8, .9], [.2, 1, .4, .1, .1, .1], [.5, .4, 1, .1, .1, .1], [.7, .1, .1, 1, .3, .6], [.8, .1, .1, .3, 1, .5], [.9, .1, .1, .6, .5, 1]], 200) dat = dat + np.random.randn(dat.shape[0], dat.shape[1]) * .5 data1 = pairwise_distances(dat[0:100, :].T, metric='correlation') data2 = pairwise_distances(dat[100:, :].T, metric='correlation') stats = jackknife_permutation(data1, data2) print(stats) assert (stats['correlation'] >= .4) & (stats['correlation'] <= .99) & (stats['p'] <= .05)
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 100) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y) assert (stats['mean'] < -2) & (stats['mean'] > -6) assert stats['p'] < .001 print(stats) stats = one_sample_permutation(x - y) assert (stats['mean'] < -2) & (stats['mean'] > -6) assert stats['p'] < .001 print(stats) stats = correlation_permutation(x, y) assert (stats['correlation'] > .4) & (stats['correlation'] < .85) assert stats['p'] < .001 stats = correlation_permutation(x, y, metric='kendall') assert (stats['correlation'] > .4) & (stats['correlation'] < .85) assert stats['p'] < .001
def test_permutation(): dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1, n_permute=1000) assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001) stats = one_sample_permutation(x - y, tail=1, n_permute=1000) assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001) for method in ["permute", "circle_shift", "phase_randomize"]: for metric in ["spearman", "kendall", "pearson"]: stats = correlation_permutation(x, y, metric=metric, method=method, n_permute=500, tail=1) assert ((stats["correlation"] > 0.4) & (stats["correlation"] < 0.85) & (stats["p"] < 0.05)) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='doesntwork',tail=3) s = np.random.normal(0, 1, 10000) two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2) upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1) lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1) sum_p = upper_p + lower_p np.testing.assert_almost_equal(two_sided, sum_p, decimal=3) # Test matrix_permutation dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 190) x = squareform(dat[:, 0]) y = squareform(dat[:, 1]) stats = matrix_permutation(x, y, n_permute=1000) assert ((stats["correlation"] > 0.4) & (stats["correlation"] < 0.85) & (stats["p"] < 0.001))
def plot_between_label_distance( distance, labels, ax=None, permutation_test=True, n_permute=5000, fontsize=18, **kwargs, ): """Create a heatmap indicating average between label distance Args: distance: (pandas dataframe) brain_distance matrix labels: (pandas dataframe) group labels ax: axis to plot (default=None) permutation_test: (boolean) n_permute: (int) number of samples for permuation test fontsize: (int) size of font for plot Returns: f: heatmap out: pandas dataframe of pairwise distance between conditions within_dist_out: average pairwise distance matrix mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions """ labels = np.unique(np.array(labels)) out = pd.DataFrame(columns=["Distance", "Group", "Comparison"], index=None) for i in labels: for j in labels: tmp_b = pd.DataFrame(columns=out.columns, index=None) if (distance.loc[labels == i, labels == j].shape[0] == distance.loc[labels == i, labels == j].shape[1]): tmp_b["Distance"] = distance.loc[ labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)] else: tmp_b["Distance"] = distance.loc[labels == i, labels == j].values.flatten() tmp_b["Comparison"] = j tmp_b["Group"] = i out = out.append(tmp_b) within_dist_out = pd.DataFrame( np.zeros((len(out["Group"].unique()), len(out["Group"].unique()))), columns=out["Group"].unique(), index=out["Group"].unique(), ) for i in out["Group"].unique(): for j in out["Comparison"].unique(): within_dist_out.loc[i, j] = out.loc[(out["Group"] == i) & ( out["Comparison"] == j)]["Distance"].mean() if ax is None: _, ax = plt.subplots(1) else: plt.figure() if permutation_test: mn_dist_out = pd.DataFrame( np.zeros((len(out["Group"].unique()), len(out["Group"].unique()))), columns=out["Group"].unique(), index=out["Group"].unique(), ) p_dist_out = pd.DataFrame( np.zeros((len(out["Group"].unique()), len(out["Group"].unique()))), columns=out["Group"].unique(), index=out["Group"].unique(), ) for i in out["Group"].unique(): for j in out["Comparison"].unique(): tmp1 = out.loc[(out["Group"] == i) & (out["Comparison"] == i), "Distance"] tmp2 = out.loc[(out["Group"] == i) & (out["Comparison"] == j), "Distance"] s = two_sample_permutation(tmp1, tmp2, n_permute=n_permute) mn_dist_out.loc[i, j] = s["mean"] p_dist_out.loc[i, j] = s["p"] sns.heatmap(mn_dist_out, ax=ax, square=True, **kwargs) sns.heatmap( mn_dist_out, mask=p_dist_out > 0.05, square=True, linewidth=2, annot=True, ax=ax, cbar=False, ) return (out, within_dist_out, mn_dist_out, p_dist_out) else: sns.heatmap(within_dist_out, ax=ax, square=True, **kwargs) return (out, within_dist_out)
def plot_mean_label_distance( distance, labels, ax=None, permutation_test=False, n_permute=5000, fontsize=18, **kwargs, ): """Create a violin plot indicating within and between label distance. Args: distance: pandas dataframe of distance labels: labels indicating columns and rows to group ax: matplotlib axis to plot on permutation_test: (bool) indicates whether to run permuatation test or not n_permute: (int) number of permutations to run fontsize: (int) fontsize for plot labels Returns: f: heatmap stats: (optional if permutation_test=True) permutation results """ if not isinstance(distance, pd.DataFrame): raise ValueError("distance must be a pandas dataframe") if distance.shape[0] != distance.shape[1]: raise ValueError("distance must be square.") if len(labels) != distance.shape[0]: raise ValueError("Labels must be same length as distance matrix") out = pd.DataFrame(columns=["Distance", "Group", "Type"], index=None) for i in labels.unique(): tmp_w = pd.DataFrame(columns=out.columns, index=None) tmp_w["Distance"] = distance.loc[labels == i, labels == i].values[np.triu_indices( sum(labels == i), k=1)] tmp_w["Type"] = "Within" tmp_w["Group"] = i tmp_b = pd.DataFrame(columns=out.columns, index=None) tmp_b["Distance"] = distance.loc[labels == i, labels != i].values.flatten() tmp_b["Type"] = "Between" tmp_b["Group"] = i out = out.append(tmp_w).append(tmp_b) f = sns.violinplot( x="Group", y="Distance", hue="Type", data=out, split=True, inner="quartile", palette={ "Within": "lightskyblue", "Between": "red" }, ax=ax, **kwargs, ) f.set_ylabel("Average Distance", fontsize=fontsize) f.set_title("Average Group Distance", fontsize=fontsize) if permutation_test: stats = dict() for i in labels.unique(): # Between group test tmp1 = out.loc[(out["Group"] == i) & (out["Type"] == "Within"), "Distance"] tmp2 = out.loc[(out["Group"] == i) & (out["Type"] == "Between"), "Distance"] stats[str(i)] = two_sample_permutation(tmp1, tmp2, n_permute=n_permute) return (f, stats) else: return f