示例#1
0
def test_adjacency_equiv(shape):
    """Test adjacency equivalence for lattice adjacency."""
    from sklearn.feature_extraction import grid_to_graph
    # sklearn requires at least two dimensions
    sk_shape = shape if len(shape) > 1 else (shape + (1, ))
    conn_sk = grid_to_graph(*sk_shape).toarray()
    conn = combine_adjacency(*shape)
    want_shape = (np.prod(shape), ) * 2
    assert conn.shape == conn_sk.shape == want_shape
    assert (conn.data == 1.).all()
    conn = conn.toarray()
    # we end up with some duplicates that can turn into 2's and 3's,
    # eventually we might want to keep these as 1's but it's easy enough
    # with a .astype(bool) (also matches sklearn output) so let's leave it
    # for now
    assert np.in1d(conn, [0, 1, 2, 3]).all()
    assert conn.shape == conn_sk.shape
    assert_array_equal(conn, conn_sk)
def test_output_equiv(shape, out_type, adjacency):
    """Test equivalence of output types."""
    rng = np.random.RandomState(0)
    n_subjects = 10
    data = rng.randn(n_subjects, *shape)
    data -= data.mean(axis=0, keepdims=True)
    data[:, 2:4] += 2
    data[:, 6:9] += 2
    want_mask = np.zeros(shape, int)
    want_mask[2:4] = 1
    want_mask[6:9] = 2
    if adjacency is not None:
        assert adjacency == 'sparse'
        adjacency = combine_adjacency(*shape)
    clusters = permutation_cluster_1samp_test(X=data,
                                              n_permutations=1,
                                              adjacency=adjacency,
                                              out_type=out_type)[1]
    got_mask = np.zeros_like(want_mask)
    for n, clu in enumerate(clusters, 1):
        if out_type == 'mask':
            if len(shape) == 1 and adjacency is None:
                assert isinstance(clu, tuple)
                assert len(clu) == 1
                assert isinstance(clu[0], slice)
            else:
                assert isinstance(clu, np.ndarray)
                assert clu.dtype == bool
                assert clu.shape == shape
            got_mask[clu] = n
        else:
            assert isinstance(clu, tuple)
            for c in clu:
                assert isinstance(c, np.ndarray)
                assert c.dtype.kind == 'i'
            assert out_type == 'indices'
            got_mask[np.ix_(*clu)] = n
    assert_array_equal(got_mask, want_mask)
示例#3
0
def permutation_correlation(diff_data, info, n_permutations, p_value):
    sensor_adjacency, ch_names = find_ch_adjacency(info, "eeg")
    adjacency = combine_adjacency(sensor_adjacency, diff_data.shape[2],
                                  diff_data.shape[3])

    T_obs, clusters, cluster_p_values, H0 = permutation_cluster_1samp_test(
        diff_data,
        n_permutations=n_permutations,
        threshold=None,
        tail=0,
        adjacency=adjacency,
        out_type="mask",
        verbose=True,
    )

    # Create new stats image with only significant clusters for plotting
    T_obs_plot = np.nan * np.ones_like(T_obs)
    for c, p_val in zip(clusters, cluster_p_values):
        if p_val <= p_value:
            print(f"Significant cluster with p-value {p_val}")
            T_obs_plot[c] = T_obs[c]

    return T_obs_plot, cluster_p_values[cluster_p_values <= p_value]
示例#4
0
def test_cluster_permutation_with_adjacency(numba_conditional):
    """Test cluster level permutations with adjacency matrix."""
    from sklearn.feature_extraction.image import grid_to_graph
    condition1_1d, condition2_1d, condition1_2d, condition2_2d = \
        _get_conditions()

    n_pts = condition1_1d.shape[1]
    # we don't care about p-values in any of these, so do fewer permutations
    args = dict(seed=None,
                max_step=1,
                exclude=None,
                out_type='mask',
                step_down_p=0,
                t_power=1,
                threshold=1.67,
                check_disjoint=False,
                n_permutations=50)

    did_warn = False
    for X1d, X2d, func, spatio_temporal_func in \
            [(condition1_1d, condition1_2d,
              permutation_cluster_1samp_test,
              spatio_temporal_cluster_1samp_test),
             ([condition1_1d, condition2_1d],
              [condition1_2d, condition2_2d],
              permutation_cluster_test,
              spatio_temporal_cluster_test)]:
        out = func(X1d, **args)
        adjacency = grid_to_graph(1, n_pts)
        out_adjacency = func(X1d, adjacency=adjacency, **args)
        assert_array_equal(out[0], out_adjacency[0])
        for a, b in zip(out_adjacency[1], out[1]):
            assert_array_equal(out[0][a], out[0][b])
            assert np.all(a[b])

        # test spatio-temporal w/o time adjacency (repeat spatial pattern)
        adjacency_2 = sparse.coo_matrix(
            linalg.block_diag(adjacency.asfptype().todense(),
                              adjacency.asfptype().todense()))
        # nesting here is time then space:
        adjacency_2a = combine_adjacency(np.eye(2), adjacency)
        assert_array_equal(adjacency_2.toarray().astype(bool),
                           adjacency_2a.toarray().astype(bool))

        if isinstance(X1d, list):
            X1d_2 = [np.concatenate((x, x), axis=1) for x in X1d]
        else:
            X1d_2 = np.concatenate((X1d, X1d), axis=1)

        out_adjacency_2 = func(X1d_2, adjacency=adjacency_2, **args)
        # make sure we were operating on the same values
        split = len(out[0])
        assert_array_equal(out[0], out_adjacency_2[0][:split])
        assert_array_equal(out[0], out_adjacency_2[0][split:])

        # make sure we really got 2x the number of original clusters
        n_clust_orig = len(out[1])
        assert len(out_adjacency_2[1]) == 2 * n_clust_orig

        # Make sure that we got the old ones back
        data_1 = {np.sum(out[0][b[:n_pts]]) for b in out[1]}
        data_2 = {np.sum(out_adjacency_2[0][a]) for a in out_adjacency_2[1][:]}
        assert len(data_1.intersection(data_2)) == len(data_1)

        # now use the other algorithm
        if isinstance(X1d, list):
            X1d_3 = [np.reshape(x, (-1, 2, n_space)) for x in X1d_2]
        else:
            X1d_3 = np.reshape(X1d_2, (-1, 2, n_space))

        out_adjacency_3 = spatio_temporal_func(X1d_3,
                                               n_permutations=50,
                                               adjacency=adjacency,
                                               max_step=0,
                                               threshold=1.67,
                                               check_disjoint=True)
        # make sure we were operating on the same values
        split = len(out[0])
        assert_array_equal(out[0], out_adjacency_3[0][0])
        assert_array_equal(out[0], out_adjacency_3[0][1])

        # make sure we really got 2x the number of original clusters
        assert len(out_adjacency_3[1]) == 2 * n_clust_orig

        # Make sure that we got the old ones back
        data_1 = {np.sum(out[0][b[:n_pts]]) for b in out[1]}
        data_2 = {
            np.sum(out_adjacency_3[0][a[0], a[1]])
            for a in out_adjacency_3[1]
        }
        assert len(data_1.intersection(data_2)) == len(data_1)

        # test new versus old method
        out_adjacency_4 = spatio_temporal_func(X1d_3,
                                               n_permutations=50,
                                               adjacency=adjacency,
                                               max_step=2,
                                               threshold=1.67)
        out_adjacency_5 = spatio_temporal_func(X1d_3,
                                               n_permutations=50,
                                               adjacency=adjacency,
                                               max_step=1,
                                               threshold=1.67)

        # clusters could be in a different order
        sums_4 = [np.sum(out_adjacency_4[0][a]) for a in out_adjacency_4[1]]
        sums_5 = [np.sum(out_adjacency_4[0][a]) for a in out_adjacency_5[1]]
        sums_4 = np.sort(sums_4)
        sums_5 = np.sort(sums_5)
        assert_array_almost_equal(sums_4, sums_5)

        if not _force_serial:
            pytest.raises(ValueError,
                          spatio_temporal_func,
                          X1d_3,
                          n_permutations=1,
                          adjacency=adjacency,
                          max_step=1,
                          threshold=1.67,
                          n_jobs=-1000)

        # not enough TFCE params
        with pytest.raises(KeyError, match='threshold, if dict, must have'):
            spatio_temporal_func(X1d_3,
                                 adjacency=adjacency,
                                 threshold=dict(me='hello'))

        # too extreme a start threshold
        with pytest.warns(None) as w:
            spatio_temporal_func(X1d_3,
                                 adjacency=adjacency,
                                 threshold=dict(start=10, step=1))
        if not did_warn:
            assert len(w) == 1
            did_warn = True

        with pytest.raises(ValueError, match='threshold.*<= 0 for tail == -1'):
            spatio_temporal_func(X1d_3,
                                 adjacency=adjacency,
                                 tail=-1,
                                 threshold=dict(start=1, step=-1))
        with pytest.warns(RuntimeWarning, match='threshold.* is more extreme'):
            spatio_temporal_func(X1d_3,
                                 adjacency=adjacency,
                                 tail=1,
                                 threshold=dict(start=100, step=1))
        bad_con = adjacency.todense()
        with pytest.raises(ValueError, match='must be a SciPy sparse matrix'):
            spatio_temporal_func(X1d_3,
                                 n_permutations=50,
                                 adjacency=bad_con,
                                 max_step=1,
                                 threshold=1.67)
        bad_con = adjacency.tocsr()[:-1, :-1].tocoo()
        with pytest.raises(ValueError, match='adjacency.*the correct size'):
            spatio_temporal_func(X1d_3,
                                 n_permutations=50,
                                 adjacency=bad_con,
                                 max_step=1,
                                 threshold=1.67)
        with pytest.raises(TypeError, match='must be a'):
            spatio_temporal_func(X1d_3, adjacency=adjacency, threshold=[])
        with pytest.raises(ValueError, match='Invalid value for the \'tail\''):
            with pytest.warns(None):  # sometimes ignoring tail
                spatio_temporal_func(X1d_3, adjacency=adjacency, tail=2)

        # make sure it actually found a significant point
        out_adjacency_6 = spatio_temporal_func(X1d_3,
                                               n_permutations=50,
                                               adjacency=adjacency,
                                               max_step=1,
                                               threshold=dict(start=1, step=1))
        assert np.min(out_adjacency_6[2]) < 0.05

        with pytest.raises(ValueError, match='not compatible'):
            with pytest.warns(RuntimeWarning, match='No clusters'):
                spatio_temporal_func(X1d_3,
                                     n_permutations=50,
                                     adjacency=adjacency,
                                     threshold=1e-3,
                                     stat_fun=lambda *x: f_oneway(*x)[:-1],
                                     buffer_size=None)
# transpose again to (epochs, frequencies, times, channels)
X = [np.transpose(x, (0, 2, 3, 1)) for x in epochs_power]

# %%
# Remember the note on the adjacency matrix from above: For 3D data, as here,
# we must use :func:`mne.stats.combine_adjacency` to extend the
# sensor-based adjacency to incorporate the time-frequency plane as well.
#
# Here, the integer inputs are converted into a lattice and
# combined with the sensor adjacency matrix so that data at similar
# times and with similar frequencies and at close sensor locations are
# clustered together.

# our data at each observation is of shape frequencies × times × channels
tfr_adjacency = combine_adjacency(
    len(freqs), len(this_tfr.times), adjacency)

# %%
# Now we can run the cluster permutation test, but first we have to set a
# threshold. This example decimates in time and uses few frequencies so we need
# to increase the threshold from the default value in order to have
# differentiated clusters (i.e., so that our algorithm doesn't just find one
# large cluster). For a more principled method of setting this parameter,
# threshold-free cluster enhancement may be used.
# See :ref:`disc-stats` for a discussion.

# This time we don't calculate a threshold based on the F distribution.
# We might as well select an arbitrary threshold for cluster forming
tfr_threshold = 15.0

# run cluster based permutation analysis