Python two_sample_permutation示例，nltools.stats.two_sample_permutation Python示例

示例#1

0

显示文件

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y, tail=1)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = one_sample_permutation(x - y, tail=1)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='pearson', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='spearman', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='kendall', tail=2)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='kendall',tail=3)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='doesntwork',tail=3)
    s = np.random.normal(0, 1, 10000)
    two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2)
    upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1)
    lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1)
    sum_p = upper_p + lower_p
    np.testing.assert_almost_equal(two_sided, sum_p)

示例#2

0

显示文件

    def stats_label_distance(self, labels=None, n_permute=5000, n_jobs=-1):
        ''' Calculate permutation tests on within and between label distance.

            Args:
                labels (np.array):  numpy array of labels to plot
                n_permute (int): number of permutations to run (default=5000)

            Returns:
                dict:  dictionary of within and between group differences
                        and p-values

        '''

        if not self.is_single_matrix:
            raise ValueError('This function only works on single adjacency '
                             'matrices.')

        distance = pd.DataFrame(self.squareform())

        if labels is not None:
            labels = deepcopy(self.labels)
        else:
            if len(labels) != distance.shape[0]:
                raise ValueError(
                    'Labels must be same length as distance matrix')

        within = []
        between = []
        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
        for i in np.unique(labels):
            tmp_w = pd.DataFrame(columns=out.columns, index=None)
            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[
                np.triu_indices(sum(labels == i), k=1)]
            tmp_w['Type'] = 'Within'
            tmp_w['Group'] = i
            tmp_b = pd.DataFrame(columns=out.columns, index=None)
            tmp_b['Distance'] = distance.loc[labels == i,
                                             labels != i].values.flatten()
            tmp_b['Type'] = 'Between'
            tmp_b['Group'] = i
            out = out.append(tmp_w).append(tmp_b)
        stats = dict()
        for i in np.unique(labels):
            # Within group test
            tmp1 = out.loc[(out['Group'] == i) & (out['Type'] == 'Within'),
                           'Distance']
            tmp2 = out.loc[(out['Group'] == i) & (out['Type'] == 'Between'),
                           'Distance']
            stats[str(i)] = two_sample_permutation(tmp1,
                                                   tmp2,
                                                   n_permute=n_permute,
                                                   n_jobs=n_jobs)
        return stats

示例#3

0

显示文件

文件： test_stats.py 项目： danieljwilson/nltools

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y, tail=1, n_permute=1000)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = one_sample_permutation(x - y, tail=1, n_permute=1000)
    assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='pearson', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='spearman', tail=1)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    stats = correlation_permutation(x, y, metric='kendall', tail=2)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='kendall',tail=3)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='doesntwork',tail=3)
    s = np.random.normal(0, 1, 10000)
    two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2)
    upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1)
    lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1)
    sum_p = upper_p + lower_p
    np.testing.assert_almost_equal(two_sided, sum_p)

    # Test matrix_permutation
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 190)
    x = squareform(dat[:, 0])
    y = squareform(dat[:, 1])
    stats = matrix_permutation(x, y, n_permute=1000)
    assert (stats['correlation'] > .4) & (stats['correlation'] <
                                          .85) & (stats['p'] < .001)

    # Test jackknife_permutation
    dat = np.random.multivariate_normal(
        [5, 10, 15, 25, 35, 45],
        [[1, .2, .5, .7, .8, .9], [.2, 1, .4, .1, .1, .1],
         [.5, .4, 1, .1, .1, .1], [.7, .1, .1, 1, .3, .6],
         [.8, .1, .1, .3, 1, .5], [.9, .1, .1, .6, .5, 1]], 200)
    dat = dat + np.random.randn(dat.shape[0], dat.shape[1]) * .5
    data1 = pairwise_distances(dat[0:100, :].T, metric='correlation')
    data2 = pairwise_distances(dat[100:, :].T, metric='correlation')

    stats = jackknife_permutation(data1, data2)
    print(stats)
    assert (stats['correlation'] >= .4) & (stats['correlation'] <=
                                           .99) & (stats['p'] <= .05)

示例#4

0

显示文件

文件： test_stats.py 项目： jeroenvanbaar/nltools

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 100)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y)
    assert (stats['mean'] < -2) & (stats['mean'] > -6)
    assert stats['p'] < .001
    print(stats)
    stats = one_sample_permutation(x - y)
    assert (stats['mean'] < -2) & (stats['mean'] > -6)
    assert stats['p'] < .001
    print(stats)
    stats = correlation_permutation(x, y)
    assert (stats['correlation'] > .4) & (stats['correlation'] < .85)
    assert stats['p'] < .001
    stats = correlation_permutation(x, y, metric='kendall')
    assert (stats['correlation'] > .4) & (stats['correlation'] < .85)
    assert stats['p'] < .001

示例#5

0

显示文件

文件： test_stats.py 项目： paxtonfitzpatrick/nltools

def test_permutation():
    dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 1000)
    x = dat[:, 0]
    y = dat[:, 1]
    stats = two_sample_permutation(x, y, tail=1, n_permute=1000)
    assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001)
    stats = one_sample_permutation(x - y, tail=1, n_permute=1000)
    assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001)
    for method in ["permute", "circle_shift", "phase_randomize"]:
        for metric in ["spearman", "kendall", "pearson"]:
            stats = correlation_permutation(x,
                                            y,
                                            metric=metric,
                                            method=method,
                                            n_permute=500,
                                            tail=1)
            assert ((stats["correlation"] > 0.4)
                    & (stats["correlation"] < 0.85)
                    & (stats["p"] < 0.05))

    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='kendall',tail=3)
    # with pytest.raises(ValueError):
    # 	correlation_permutation(x, y, metric='doesntwork',tail=3)
    s = np.random.normal(0, 1, 10000)
    two_sided = _calc_pvalue(all_p=s, stat=1.96, tail=2)
    upper_p = _calc_pvalue(all_p=s, stat=1.96, tail=1)
    lower_p = _calc_pvalue(all_p=s, stat=-1.96, tail=1)
    sum_p = upper_p + lower_p
    np.testing.assert_almost_equal(two_sided, sum_p, decimal=3)

    # Test matrix_permutation
    dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 190)
    x = squareform(dat[:, 0])
    y = squareform(dat[:, 1])
    stats = matrix_permutation(x, y, n_permute=1000)
    assert ((stats["correlation"] > 0.4)
            & (stats["correlation"] < 0.85)
            & (stats["p"] < 0.001))

示例#6

0

显示文件

文件： plotting.py 项目： paxtonfitzpatrick/nltools

def plot_between_label_distance(
    distance,
    labels,
    ax=None,
    permutation_test=True,
    n_permute=5000,
    fontsize=18,
    **kwargs,
):
    """Create a heatmap indicating average between label distance


    Args:
        distance: (pandas dataframe) brain_distance matrix
        labels: (pandas dataframe) group labels
        ax: axis to plot (default=None)
        permutation_test: (boolean)
        n_permute: (int) number of samples for permuation test
        fontsize: (int) size of font for plot
    Returns:
        f: heatmap
        out: pandas dataframe of pairwise distance between conditions
        within_dist_out: average pairwise distance matrix
        mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions
        p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions
    """

    labels = np.unique(np.array(labels))

    out = pd.DataFrame(columns=["Distance", "Group", "Comparison"], index=None)
    for i in labels:
        for j in labels:
            tmp_b = pd.DataFrame(columns=out.columns, index=None)
            if (distance.loc[labels == i, labels == j].shape[0] ==
                    distance.loc[labels == i, labels == j].shape[1]):
                tmp_b["Distance"] = distance.loc[
                    labels == i,
                    labels == i].values[np.triu_indices(sum(labels == i), k=1)]
            else:
                tmp_b["Distance"] = distance.loc[labels == i,
                                                 labels == j].values.flatten()
            tmp_b["Comparison"] = j
            tmp_b["Group"] = i
            out = out.append(tmp_b)

    within_dist_out = pd.DataFrame(
        np.zeros((len(out["Group"].unique()), len(out["Group"].unique()))),
        columns=out["Group"].unique(),
        index=out["Group"].unique(),
    )
    for i in out["Group"].unique():
        for j in out["Comparison"].unique():
            within_dist_out.loc[i, j] = out.loc[(out["Group"] == i) & (
                out["Comparison"] == j)]["Distance"].mean()

    if ax is None:
        _, ax = plt.subplots(1)
    else:
        plt.figure()

    if permutation_test:
        mn_dist_out = pd.DataFrame(
            np.zeros((len(out["Group"].unique()), len(out["Group"].unique()))),
            columns=out["Group"].unique(),
            index=out["Group"].unique(),
        )
        p_dist_out = pd.DataFrame(
            np.zeros((len(out["Group"].unique()), len(out["Group"].unique()))),
            columns=out["Group"].unique(),
            index=out["Group"].unique(),
        )
        for i in out["Group"].unique():
            for j in out["Comparison"].unique():
                tmp1 = out.loc[(out["Group"] == i) & (out["Comparison"] == i),
                               "Distance"]
                tmp2 = out.loc[(out["Group"] == i) & (out["Comparison"] == j),
                               "Distance"]
                s = two_sample_permutation(tmp1, tmp2, n_permute=n_permute)
                mn_dist_out.loc[i, j] = s["mean"]
                p_dist_out.loc[i, j] = s["p"]
        sns.heatmap(mn_dist_out, ax=ax, square=True, **kwargs)
        sns.heatmap(
            mn_dist_out,
            mask=p_dist_out > 0.05,
            square=True,
            linewidth=2,
            annot=True,
            ax=ax,
            cbar=False,
        )
        return (out, within_dist_out, mn_dist_out, p_dist_out)
    else:
        sns.heatmap(within_dist_out, ax=ax, square=True, **kwargs)
        return (out, within_dist_out)

示例#7

0

显示文件

文件： plotting.py 项目： paxtonfitzpatrick/nltools

def plot_mean_label_distance(
    distance,
    labels,
    ax=None,
    permutation_test=False,
    n_permute=5000,
    fontsize=18,
    **kwargs,
):
    """Create a violin plot indicating within and between label distance.

    Args:
        distance:  pandas dataframe of distance
        labels: labels indicating columns and rows to group
        ax: matplotlib axis to plot on
        permutation_test: (bool) indicates whether to run permuatation test or not
        n_permute: (int) number of permutations to run
        fontsize: (int) fontsize for plot labels
    Returns:
        f: heatmap
        stats: (optional if permutation_test=True) permutation results

    """

    if not isinstance(distance, pd.DataFrame):
        raise ValueError("distance must be a pandas dataframe")

    if distance.shape[0] != distance.shape[1]:
        raise ValueError("distance must be square.")

    if len(labels) != distance.shape[0]:
        raise ValueError("Labels must be same length as distance matrix")

    out = pd.DataFrame(columns=["Distance", "Group", "Type"], index=None)
    for i in labels.unique():
        tmp_w = pd.DataFrame(columns=out.columns, index=None)
        tmp_w["Distance"] = distance.loc[labels == i,
                                         labels == i].values[np.triu_indices(
                                             sum(labels == i), k=1)]
        tmp_w["Type"] = "Within"
        tmp_w["Group"] = i
        tmp_b = pd.DataFrame(columns=out.columns, index=None)
        tmp_b["Distance"] = distance.loc[labels == i,
                                         labels != i].values.flatten()
        tmp_b["Type"] = "Between"
        tmp_b["Group"] = i
        out = out.append(tmp_w).append(tmp_b)
    f = sns.violinplot(
        x="Group",
        y="Distance",
        hue="Type",
        data=out,
        split=True,
        inner="quartile",
        palette={
            "Within": "lightskyblue",
            "Between": "red"
        },
        ax=ax,
        **kwargs,
    )
    f.set_ylabel("Average Distance", fontsize=fontsize)
    f.set_title("Average Group Distance", fontsize=fontsize)
    if permutation_test:
        stats = dict()
        for i in labels.unique():
            # Between group test
            tmp1 = out.loc[(out["Group"] == i) & (out["Type"] == "Within"),
                           "Distance"]
            tmp2 = out.loc[(out["Group"] == i) & (out["Type"] == "Between"),
                           "Distance"]
            stats[str(i)] = two_sample_permutation(tmp1,
                                                   tmp2,
                                                   n_permute=n_permute)
        return (f, stats)
    else:
        return f