Exemplo n.º 1
0
    def __init__(self, Y, X, norm=None, sub=None, ds=None,
                 contours={.05: (.8, .2, .0), .01: (1., .6, .0), .001: (1., 1., .0)},
                 tp=.1, samples=1000, replacement=False,
                 tstart=None, tstop=None, close_time=0, pmax=1):
        """

        Y : ndvar
            Dependent variable.
        X : continuous | None
            The continuous predictor variable.
        norm : None | categorial
            Categories in which to normalize (z-score) X.

        """
        Y = asndvar(Y, sub=sub, ds=ds)
        X = asvar(X, sub=sub, ds=ds)

        self.name = name = "%s corr %s" % (Y.name, X.name)

        # calculate threshold
        # http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#Inference
        self.n = n = len(X)
        df = n - 2
        tt = scipy.stats.distributions.t.isf(tp, df)
        tr = tt / np.sqrt(df + tt ** 2)

        cs = _cs.Colorspace(cmap=_cs.cm_xpolar, vmax=1, vmin= -1)
        cdist = cluster_dist(Y, N=samples, t_upper=tr, t_lower= -tr,
                             tstart=tstart, tstop=tstop, close_time=close_time,
                             unit='r', pmax=pmax, name=name, cs=cs)

        # normalization is done before the permutation b/c we are interested in the variance associated with each subject for the z-scoring.
        Y = Y.copy()
        Y.x = Y.x.reshape((n, -1))
        if norm is not None:
            for cell in norm.cells:
                idx = (norm == cell)
                Y.x[idx] = scipy.stats.mstats.zscore(Y.x[idx])

        x = X.x.reshape((n, -1))
        m_x = np.mean(x)
        if np.isnan(m_x):
            raise ValueError("np.mean(x) is nan")
        self.x = x - m_x

        for _, Yrs in _resample(Y, replacement=replacement, samples=samples):
            r = self._corr(Yrs)
            cdist.add_perm(r)

        r = self._corr(Y)
        cdist.add_original(r)

        self.r_map = cdist.P
        self.all = [[self.r_map] + cdist.clusters]
        self.clusters = cdist
Exemplo n.º 2
0
    def __init__(self, Y, X, t=.1, samples=1000, replacement=False,
                 tstart=None, tstop=None, close_time=0,
                 pmax=1, sub=None, ds=None,
                 ):
        """

        Arguments
        ---------

        Y : ndvar
            Measurements (dependent variable)

        X : categorial
            Model

        t : scalar
            Threshold (uncorrected p-value) to use for finding clusters

        samples : int
            Number of samples to estimate parameter distributions

        replacement : bool
            whether random samples should be drawn with replacement or
            without

        tstart, tstop : None | scalar
            Time window for clusters.
            **None**: use the whole epoch;
            **scalar**: use only a part of the epoch

            .. Note:: implementation is not optimal: F-values are still
                computed but ignored.

        close_time : scalar
            Close gaps in clusters that are smaller than this interval. Assumes
            that Y is a uniform time series.

        sub : index
            Apply analysis to a subset of cases in Y, X

        pmax : scalar <= 1
            Maximum cluster p-values to keep cluster.


        .. FIXME:: connectivity for >2 dimensional data. Currently, adjacent
            samples are connected.

        """
        Y = self.Y = asndvar(Y, sub=sub, ds=ds)
        X = self.X = asmodel(X, sub=sub, ds=ds)
        lm = _glm.lm_fitter(X)

        # get F-thresholds from p-threshold
        tF = {}
        if lm.full_model:
            for e in lm.E_MS:
                effects_d = lm.E_MS[e]
                if effects_d:
                    df_d = sum(ed.df for ed in effects_d)
                    tF[e] = scipy.stats.distributions.f.isf(t, e.df, df_d)
        else:
            df_d = X.df_error
            tF = {e: scipy.stats.distributions.f.isf(t, e.df, df_d) for e in X.effects}

        # Estimate statistic distributions from permuted Ys
        kwargs = dict(tstart=tstart, tstop=tstop, close_time=close_time, unit='F')
        dists = {e: cluster_dist(Y, samples, tF[e], name=e.name, **kwargs) for e in tF}
        self.cluster_dists = dists
        for _, Yrs in _resample(Y, replacement=replacement, samples=samples):
            for e, F in lm.map(Yrs.x, p=False):
                dists[e].add_perm(F)

        # Find clusters in the actual data
        test0 = lm.map(Y.x, p=False)
        self.effects = []
        self.clusters = {}
        self.F_maps = {}
        for e, F in test0:
            self.effects.append(e)
            dist = dists[e]
            dist.add_original(F)
            self.clusters[e] = dist
            self.F_maps[e] = dist.P

        self.name = "ANOVA Permutation Cluster Test"
        self.tF = tF

        self.all = [[self.F_maps[e]] + self.clusters[e].clusters
                    for e in self.X.effects if e in self.F_maps]