Пример #1
0
def _prepare_clustering(data, adjacency, cluster_fun, backend, min_adj_ch=0):
    '''Prepare clustering - perform checks and create necessary variables.'''
    # FIXME - some these lines should be put in _get_cluster_fun
    if cluster_fun is None and backend == 'auto':
        if data.ndim < 3:
            backend = 'auto' if has_numba() else 'mne'

    if data.ndim < 3 and min_adj_ch > 0:
        if backend not in ['auto', 'numba']:
            raise ValueError('currently ``min_adj_ch`` is implemented only for'
                             ' 3d clustering.')

    # mne_reshape_clusters=True,
    if backend == 'mne':
        # prepare mne clustering, maybe put this in a separate function?
        if min_adj_ch > 0:
            raise ValueError('mne backend does not supprot ``min_adj_ch`` '
                             'filtering')

        try:
            from mne.stats.cluster_level import _setup_connectivity
            argname = 'connectivity'
        except ImportError:
            from mne.stats.cluster_level import (_setup_adjacency
                                                 as _setup_connectivity)
            argname = 'adjacency'

        if adjacency is not None and isinstance(adjacency, np.ndarray):
            if not sparse.issparse(adjacency):
                adjacency = sparse.coo_matrix(adjacency)
            if adjacency.ndim == 2:
                adjacency = _setup_connectivity(adjacency, np.prod(data.shape),
                                                data.shape[0])

        return _find_clusters_mne, adjacency, argname
    else:
        if cluster_fun is None:
            cluster_fun = _get_cluster_fun(data, adjacency=adjacency,
                                           min_adj_ch=min_adj_ch,
                                           backend=backend)
        return _find_clusters_borsar, adjacency, cluster_fun
Пример #2
0
spacing = "ico4"
conds = ["audio","visselten","visual"]
wavs = ["4000Hz","4000cheby","7000Hz","4000fftf"]
band = opt.band
indep_var = "Angenehm"
n_freqs = 1
n_srcs = 5124
n_subjs = len(subjs)
perm_n = opt.perm

# setup connectivity
fs_src = mne.read_source_spaces("{}{}_{}-src.fif".format(proc_dir,"fsaverage",
                                                         spacing))
cnx = mne.spatial_src_connectivity(fs_src)
del fs_src
connectivity = _setup_connectivity(cnx, n_srcs, n_freqs)
exclude = np.load("{}fsaverage_{}_exclude.npy".format(proc_dir,spacing))
include = np.ones(cnx.shape[0],dtype="bool")
include[exclude] = 0

# threshold for clustering
threshold = dict(start=0, step=0.2)
#random_state = 42
random = np.random.RandomState()

df_laut = pd.read_pickle("/scratch/jeffhanna/ATT_dat/behave/laut")
df_ang = pd.read_pickle("/scratch/jeffhanna/ATT_dat/behave/ang")

predictor_vars = ["Laut","Subj","Block","Wav"]
dm_laut = df_laut.copy()[predictor_vars]
Пример #3
0
# set random state for replication
random_state = 42
random = np.random.RandomState(random_state)

# number of random samples
boot = 2000

# place holders for bootstrap samples
cluster_H0 = np.zeros(boot)
f_H0 = np.zeros(boot)

# setup connectivity
n_tests = betas.shape[1]
connectivity, ch_names = find_ch_connectivity(epochs_info, ch_type='eeg')
connectivity = _setup_connectivity(connectivity, n_tests, n_times)

# threshond for clustering
threshold = 100.

# run bootstrap for regression coefficients
for i in range(boot):
    # extract random subjects from overall sample
    resampled_subjects = random.choice(range(betas.shape[0]),
                                       betas.shape[0],
                                       replace=True)
    # resampled betas
    resampled_betas = betas[resampled_subjects, :]

    # compute standard error of bootstrap sample
    se = resampled_betas.std(axis=0) / np.sqrt(resampled_betas.shape[0])
Пример #4
0
def cluster_based_regression(data, preds, conn, n_permutations=1000,
                             progressbar=True):
    # data has to have observations as 1st dim and channels/vert as last dim
    from mypy.stats import compute_regression_t
    from mne.stats.cluster_level import (_setup_connectivity, _find_clusters,
                                         _cluster_indices_to_mask)

    # TODO - move this piece of code to utils
    #        maybe a simple ProgressBar class?
    #      - then support tqdm pbar as input
    if progressbar:
        if not progressbar == 'text':
            from tqdm import tqdm_notebook
            pbar = tqdm_notebook(total=n_permutations)
        else:
            from tqdm import tqdm
            pbar = tqdm(total=n_permutations)

    n_obs, n_times, n_channels = data.shape
    connectivity = _setup_connectivity(conn, n_channels * n_times, n_times)

    pos_dist = np.zeros(n_permutations)
    neg_dist = np.zeros(n_permutations)
    perm_preds = preds.copy()

    # regression on non-permuted data
    t_values = compute_regression_t(data, preds)
    clusters, cluster_stats = _find_clusters(t_values.ravel(), threshold=2.,
                                             tail=0, connectivity=connectivity)
    clusters = _cluster_indices_to_mask(clusters, n_channels * n_times)
    clusters = [clst.reshape((n_times, n_channels)) for clst in clusters]

    if not clusters:
        print('No clusters found, permutations are not performed.')
        return t_values, clusters, cluster_stats
    else:
        msg = 'Found {} clusters, computing permutations.'
        print(msg.format(len(clusters)))

    # compute permutations
    for perm in range(n_permutations):
        perm_inds = np.random.permutation(n_obs)
        this_perm = perm_preds[perm_inds]
        perm_tvals = compute_regression_t(data, this_perm)
        _, perm_cluster_stats = _find_clusters(
            perm_tvals.ravel(), threshold=2., tail=0, connectivity=connectivity)

        # if any clusters were found - add max statistic
        if perm_cluster_stats.shape[0] > 0:
            max_val = perm_cluster_stats.max()
            min_val = perm_cluster_stats.min()

            if max_val > 0: pos_dist[perm] = max_val
            if min_val < 0: neg_dist[perm] = min_val

        if progressbar:
            pbar.update(1)

    # compute permutation probability
    cluster_p = np.array([(pos_dist > cluster_stat).mean() if cluster_stat > 0
                          else (neg_dist < cluster_stat).mean()
                          for cluster_stat in cluster_stats])
    cluster_p *= 2 # because we use two-tail
    cluster_p[cluster_p > 1.] = 1. # probability has to be >= 1.

    # sort clusters by p value
    cluster_order = np.argsort(cluster_p)
    cluster_p = cluster_p[cluster_order]
    clusters = [clusters[i] for i in cluster_order]

    return t_values, clusters, cluster_p
Пример #5
0
def _permutation_cluster_test_AT(X, threshold, tail, n_permutations,
                                 connectivity, n_jobs, seed, max_step, exclude,
                                 step_down_p, t_power, out_type,
                                 check_disjoint, buffer_size):
    n_jobs = check_n_jobs(n_jobs)
    """Aux Function.
    Note. X is required to be a list. Depending on the length of X
    either a 1 sample t-test or an F test / more sample permutation scheme
    is elicited.
    """
    if out_type not in ['mask', 'indices']:
        raise ValueError('out_type must be either \'mask\' or \'indices\'')
    if not isinstance(threshold, dict) and (tail < 0 and threshold > 0
                                            or tail > 0 and threshold < 0
                                            or tail == 0 and threshold < 0):
        raise ValueError(
            'incompatible tail and threshold signs, got %s and %s' %
            (tail, threshold))

    # check dimensions for each group in X (a list at this stage).
    X = [x[:, np.newaxis] if x.ndim == 1 else x for x in X]
    n_times = X[0].shape[0]

    sample_shape = X[0].shape[1:]
    for x in X:
        if x.shape[1:] != sample_shape:
            raise ValueError('All samples mush have the same size')

#    # flatten the last dimensions in case the data is high dimensional
#    X = [np.reshape(x, (x.shape[0], -1)) for x in X]
    n_tests = X[0].shape[1]

    if connectivity is not None and connectivity is not False:
        connectivity = cluster_level._setup_connectivity(
            connectivity, n_tests, n_times)

    if (exclude is not None) and not exclude.size == n_tests:
        raise ValueError('exclude must be the same shape as X[0]')

    # determine if connectivity itself can be separated into disjoint sets
    if check_disjoint is True and (connectivity is not None
                                   and connectivity is not False):
        partitions = cluster_level._get_partitions_from_connectivity(
            connectivity, n_times)
    else:
        partitions = None
    max_clu_lens = np.zeros(n_permutations)
    for i in range(0, n_permutations):
        #logger.info('Running initial clustering')
        include = None
        out = cluster_level._find_clusters(X[i][0],
                                           threshold,
                                           tail,
                                           connectivity,
                                           max_step=max_step,
                                           include=include,
                                           partitions=partitions,
                                           t_power=t_power,
                                           show_info=True)
        clusters, cluster_stats = out

        logger.info('Found %d clusters' % len(clusters))

        # convert clusters to old format
        if connectivity is not None and connectivity is not False:
            # our algorithms output lists of indices by default
            if out_type == 'mask':
                clusters = cluster_level._cluster_indices_to_mask(
                    clusters, n_tests)
        else:
            # ndimage outputs slices or boolean masks by default
            if out_type == 'indices':
                clusters = cluster_level._cluster_mask_to_indices(clusters)

        # The clusters should have the same shape as the samples
        clusters = cluster_level._reshape_clusters(clusters, sample_shape)
        max_clu_len = 0
        for j in range(0, len(clusters)):
            max_new = len(clusters[j][0])
            if max_new > max_clu_len:
                max_clu_len = max_new
        logger.info('Max cluster length %d' % max_clu_len)
        max_clu_lens[i] = max_clu_len
    return max_clu_lens, clusters
Пример #6
0
def cluster_based_regression(data,
                             preds,
                             adjacency=None,
                             n_permutations=1000,
                             stat_threshold=None,
                             alpha_threshold=0.05,
                             cluster_pred=None,
                             backend='auto',
                             progressbar=True,
                             return_distribution=False,
                             stat_fun=None):
    '''Compute cluster-based permutation test with regression as the
    statistical function.

    Parameters
    ----------
    data : numpy array
        N-dimensional numpy array with data to predict with regression. The
        first dimension has to correspond to observations. If ``adjacency`` was
        given the last dimension has to correspond to adjacency space (for
        example channels or vertices).
    preds : numpy array
        Predictors array of shape ``(n_observations, n_predictors)`` to use in
        regression.
    adjacency : numpy array, optional
        Adjacency matrix for the last ``data`` dimension. If ``None`` (default)
        lattice/grid adjacency is used.
    n_permutations : int
        Number of permutations to perferom to get a monte-carlo estimate of the
        null hypothesis distribution. More permutations result in more
        accurrate p values. Default is 1000.
    stat_threshold : float | None
        Cluster inclusion threshold in t value. Only data points exceeding this
        value of the t statistic (either ``t value > stat_threshold`` or
        ``t value < -stat_threshold``) form clusters. Default is ``None``,
        which means that cluster inclusion threshold is set according to
        ``alpha_threshold``. If both ``stat_threshold`` and ``alpha_threshold``
        are set, ``stat_threshold`` takes priority.
    alpha_threshold : float | None
        Cluster inclusion threshold in critical p value. Only data points where
        p value of the predictor effect lower than the critical value form
        clusters. Default is 0.05.
    cluster_pred : int
        Specify which predictor to use in clustering. Must be an integer: a
        zero-based index for the t values matrix returned by
        ``compute_regression_t``. Use values higher than zero - zero index
        indicates the intercept, which should be tested using a different
        permutation scheme than the one used here.
    backend : str
        Clustering backend. The default is 'numpy' but 'numba' can be also
        chosen. 'numba' should be faster for 3d clustering but requires the
        numba package.
    progressbar : bool
        Whether to report the progress of permutations using a progress bar.
        The default is ``True`` which uses tqdm progress bar.
    return_distribution : bool
        Whether to retrun the permutation distribution as an additional, fourth
        output argument.
    stat_fun : None | callable
        Function to compute regression. The function should take two arguments:
        data (data to predict) and preds (predictors to use) and return a
        matrix of regression parameters.

    Returns
    -------
    t_values : numpy array
        Statistical map of t values for the effect of predictor of interest.
    clusters : list of numpy arrays
        List of boolean numpy arrays. Consecutive arrays correspond to boolean
        cluster masks.
    cluster_p : numpy array
        Numpy array of cluster-level p values.
    distributions : dict
        Dictionary of positive null distribution (``distributions['pos']``) and
        negative null distribution (``distributions['neg']``). Returned only if
        ``return_distribution`` was set to ``True``.
    '''
    # data has to have observations as 1st dim and channels/vert as last dim
    # FIXME: add checks for input types
    preds = _handle_preds(preds)

    if stat_threshold is None:
        from scipy.stats import t
        df = data.shape[0] - 2  # in future: preds.shape[1]
        stat_threshold = t.ppf(1 - alpha_threshold / 2, df)

    if stat_fun is None:
        stat_fun = compute_regression_t

    use_3d_clustering = data.ndim > 3 and adjacency is not None

    n_obs = data.shape[0]
    if adjacency is not None and not use_3d_clustering:
        try:
            from mne.stats.cluster_level import _setup_connectivity
        except ImportError:
            from mne.stats.cluster_level import (_setup_adjacency as
                                                 _setup_connectivity)
        adjacency = _setup_connectivity(adjacency, np.prod(data.shape[1:]),
                                        data.shape[1])

    pos_dist = np.zeros(n_permutations)
    neg_dist = np.zeros(n_permutations)
    perm_preds = preds.copy()

    if cluster_pred is None:
        cluster_pred = 1

    # regression on non-permuted data
    t_values = stat_fun(data, preds)[cluster_pred]

    if use_3d_clustering:
        # use 3d clustering
        cluster_fun = _get_cluster_fun(t_values,
                                       adjacency=adjacency,
                                       backend=backend)
        # we need to transpose dimensions for 3d clustering
        # FIXME/TODO - this could be eliminated by creating a single unified
        #              clustering function / API
        data_dims = np.array(list(range(data.ndim)))
        data_dims[1], data_dims[-1] = data_dims[-1], 1
        data = data.transpose(data_dims)
        t_values = t_values.transpose(data_dims[1:] - 1)
    else:
        backend = 'mne'
        cluster_fun = None

    clusters, cluster_stats = find_clusters(t_values,
                                            stat_threshold,
                                            adjacency=adjacency,
                                            cluster_fun=cluster_fun,
                                            backend=backend)

    if use_3d_clustering:
        t_values = t_values.transpose(data_dims[1:] - 1)

    if not clusters:
        print('No clusters found, permutations are not performed.')
        return t_values, clusters, cluster_stats
    else:
        msg = 'Found {} clusters, computing permutations.'
        print(msg.format(len(clusters)))

    # TODO - move progressbar code from DiamSar!
    #      - then support tqdm pbar as input
    if progressbar:
        from tqdm import tqdm
        pbar = tqdm(total=n_permutations)

    # compute permutations
    for perm in range(n_permutations):
        # permute predictors
        perm_inds = np.random.permutation(n_obs)
        perm_preds[:, cluster_pred] = preds[perm_inds, cluster_pred]
        perm_tvals = stat_fun(data, perm_preds)[cluster_pred]

        # cluster
        _, perm_cluster_stats = find_clusters(perm_tvals,
                                              stat_threshold,
                                              adjacency=adjacency,
                                              cluster_fun=cluster_fun,
                                              backend=backend,
                                              mne_reshape_clusters=False)

        # if any clusters were found - add max statistic
        if len(perm_cluster_stats) > 0:
            max_val = perm_cluster_stats.max()
            min_val = perm_cluster_stats.min()

            if max_val > 0:
                pos_dist[perm] = max_val
            if min_val < 0:
                neg_dist[perm] = min_val

        if progressbar:
            pbar.update(1)

    # compute permutation probability
    cluster_p = np.array([
        (pos_dist > cluster_stat).mean() if cluster_stat > 0 else
        (neg_dist < cluster_stat).mean() for cluster_stat in cluster_stats
    ])
    cluster_p *= 2  # because we use two-tail
    cluster_p[cluster_p > 1.] = 1.  # probability has to be <= 1.

    # sort clusters by p value
    cluster_order = np.argsort(cluster_p)
    cluster_p = cluster_p[cluster_order]
    clusters = [clusters[i] for i in cluster_order]

    if use_3d_clustering:
        clusters = [clst.transpose(data_dims[1:] - 1) for clst in clusters]

    out = t_values, clusters, cluster_p
    if return_distribution:
        distribution = dict(pos=pos_dist, neg=neg_dist)
        out += (distribution, )

    return out