Пример #1
0
    def __init__(self, Y, X, match=None, sub=None, match_func=np.mean, ds=None):
        """
        Parameters
        ----------

        Y : var, ndvar
            dependent measurement
        X : categorial
            factor or interaction
        match :
            factor on which cases are matched (i.e. subject for a repeated
            measures comparisons). If several data points with the same
            case fall into one cell of X, they are combined using
            match_func. If match is not None, celltable.groups contains the
            {Xcell -> [match values of data points], ...} mapping corres-
            ponding to self.data
        sub : bool array
            Bool array of length N specifying which cases to include
        match_func : callable
            see match
        ds : dataset
            If a dataset is specified, input items (Y / X / match / sub) can
            be str instead of data-objects, in which case they will be
            retrieved from the dataset.


        Examples
        --------

        Split a repeated-measure variable Y into cells defined by the
        interaction of A and B::

            >>> c = celltable(Y, A % B, match=subject)

        """
        if isinstance(Y, basestring):
            Y = ds.eval(Y)
        if isinstance(X, basestring):
            X = ds.eval(X)
        if isinstance(match, basestring):
            match = ds[match]
        if isinstance(sub, basestring):
            sub = ds.eval(sub)

        if _data.iscategorial(Y) or _data.isndvar(Y):
            if sub is not None:
                Y = Y[sub]
        else:
            Y = _data.asvar(Y, sub)

        if X is not None:
            X = _data.ascategorial(X, sub)

        if match:
            match = _data.asfactor(match, sub)
            assert len(match) == len(Y)
            self.groups = {}

        # save args
        self.X = X
        self.Y = Y
        self.sub = sub
        self.match = match

        # extract cells and cell data
        self.data = {}
        self.data_indexes = {}
        if X is None:
            self.data[None] = Y
            self.data_indexes[None] = np.ones(len(Y), dtype=bool)
            self.cells = [None]
            return

        self.cells = X.cells

        for cell in self.cells:
            self.data_indexes[cell] = cell_index = X == cell
            newdata = Y[cell_index]
            if match:
                group = match[cell_index]
                values = group.cells

                # sort
                if len(values) < len(group):
                    newdata = newdata.compress(group, func=match_func)
                    group = _data.factor(values, name=group.name)
                else:
                    group_ids = [group == v for v in values]
                    sort_arg = np.sum(group_ids * np.arange(len(values)), axis=0)
                    newdata = newdata[sort_arg]
                    group = group[sort_arg]

                self.groups[cell] = group

            self.data[cell] = newdata

        if match:
            # determine which cells compare values for dependent values on
            # match_variable
            #            n_cells = len(self.indexes)
            #            self.within = np.empty((n_cells, n_cells), dtype=bool)
            self.within = {}
            for cell1 in self.cells:
                for cell2 in self.cells:
                    if cell1 == cell2:
                        pass
                    else:
                        v = self.groups[cell1] == self.groups[cell2]
                        if v is not False:
                            v = all(v)
                        self.within[cell1, cell2] = v
                        self.within[cell2, cell1] = v
            self.all_within = np.all(self.within.values())
        else:
            self.all_within = False
Пример #2
0
    def __init__(self, Y, X, match=None, sub=None, match_func=np.mean):
        """
        divides Y into cells defined by X
        
        Y       dependent measurement
        X       factor or interaction
        match   factor on which cases are matched (i.e. subject for a repeated 
                measures comparisons). If several data points with the same 
                case fall into one cell of X, they are combined using 
                match_func. If match is not None, celltable.groups contains the
                {Xcell -> [match values of data points], ...} mapping corres-
                ponding to self.data
        sub     Bool Array of length N specifying which cases to include
        match_func:  see match
        
        
        e.g.
        >>> c = S.celltable(Y, A%B, match=subject)
        
        """
        if _data.isfactor(Y):
            if sub is not None:
                Y = Y[sub]
        else:
            Y = _data.asvar(Y, sub)
        
        X = _data.ascategorial(X, sub)
        assert X.N == Y.N
        
        if match:
            match = _data.asfactor(match, sub)
            assert match.N == Y.N
            self.groups = {}
        
        # save args
        self.X = X
        self.Y = Y
        self.sub = sub
        self.match = match

        # extract cells and cell data
        self.data = {}
        self.data_indexes = {}
        self.cells = X.cells
        self.indexes = sorted(X.cells.keys())
        for cell in self.indexes:
            sub = X==cell
            self.data_indexes[cell] = sub
            newdata = Y.x[sub]
            if match:
                # get match ids
                group = match.x[sub]
                occurring_ids = np.unique(group)
                
                # sort
                if len(occurring_ids) < len(group):
                    newdata = np.array([match_func(newdata[group==ID]) 
                                        for ID in occurring_ids])
                    group = occurring_ids
                else:
                    sort_arg = np.argsort(group)
                    group = group[sort_arg]
                    newdata = newdata[sort_arg]
                
                self.groups[cell] = group
            self.data[cell] = newdata
        
        if match:
            # determine which cells compare values for dependent values on 
            # match_variable
#            n_cells = len(self.indexes)
#            self.within = np.empty((n_cells, n_cells), dtype=bool)
            self.within = {}
            for cell1 in self.indexes:
                for cell2 in self.indexes:
                    if cell1==cell2:
                        self.within[cell1,cell2] = True
                    else:
                        v = self.groups[cell1] == self.groups[cell2]
                        if v is not False:
                            v = all(v)
                        self.within[cell1,cell2] = v
                        self.within[cell2,cell1] = v
            self.all_within = np.all(self.within.values())
        else:
            self.within = self.all_within = False