def __init__(self, Y, X, match=None, sub=None, match_func=np.mean, ds=None): """ Parameters ---------- Y : var, ndvar dependent measurement X : categorial factor or interaction match : factor on which cases are matched (i.e. subject for a repeated measures comparisons). If several data points with the same case fall into one cell of X, they are combined using match_func. If match is not None, celltable.groups contains the {Xcell -> [match values of data points], ...} mapping corres- ponding to self.data sub : bool array Bool array of length N specifying which cases to include match_func : callable see match ds : dataset If a dataset is specified, input items (Y / X / match / sub) can be str instead of data-objects, in which case they will be retrieved from the dataset. Examples -------- Split a repeated-measure variable Y into cells defined by the interaction of A and B:: >>> c = celltable(Y, A % B, match=subject) """ if isinstance(Y, basestring): Y = ds.eval(Y) if isinstance(X, basestring): X = ds.eval(X) if isinstance(match, basestring): match = ds[match] if isinstance(sub, basestring): sub = ds.eval(sub) if _data.iscategorial(Y) or _data.isndvar(Y): if sub is not None: Y = Y[sub] else: Y = _data.asvar(Y, sub) if X is not None: X = _data.ascategorial(X, sub) if match: match = _data.asfactor(match, sub) assert len(match) == len(Y) self.groups = {} # save args self.X = X self.Y = Y self.sub = sub self.match = match # extract cells and cell data self.data = {} self.data_indexes = {} if X is None: self.data[None] = Y self.data_indexes[None] = np.ones(len(Y), dtype=bool) self.cells = [None] return self.cells = X.cells for cell in self.cells: self.data_indexes[cell] = cell_index = X == cell newdata = Y[cell_index] if match: group = match[cell_index] values = group.cells # sort if len(values) < len(group): newdata = newdata.compress(group, func=match_func) group = _data.factor(values, name=group.name) else: group_ids = [group == v for v in values] sort_arg = np.sum(group_ids * np.arange(len(values)), axis=0) newdata = newdata[sort_arg] group = group[sort_arg] self.groups[cell] = group self.data[cell] = newdata if match: # determine which cells compare values for dependent values on # match_variable # n_cells = len(self.indexes) # self.within = np.empty((n_cells, n_cells), dtype=bool) self.within = {} for cell1 in self.cells: for cell2 in self.cells: if cell1 == cell2: pass else: v = self.groups[cell1] == self.groups[cell2] if v is not False: v = all(v) self.within[cell1, cell2] = v self.within[cell2, cell1] = v self.all_within = np.all(self.within.values()) else: self.all_within = False
def __init__(self, Y, X, match=None, sub=None, match_func=np.mean): """ divides Y into cells defined by X Y dependent measurement X factor or interaction match factor on which cases are matched (i.e. subject for a repeated measures comparisons). If several data points with the same case fall into one cell of X, they are combined using match_func. If match is not None, celltable.groups contains the {Xcell -> [match values of data points], ...} mapping corres- ponding to self.data sub Bool Array of length N specifying which cases to include match_func: see match e.g. >>> c = S.celltable(Y, A%B, match=subject) """ if _data.isfactor(Y): if sub is not None: Y = Y[sub] else: Y = _data.asvar(Y, sub) X = _data.ascategorial(X, sub) assert X.N == Y.N if match: match = _data.asfactor(match, sub) assert match.N == Y.N self.groups = {} # save args self.X = X self.Y = Y self.sub = sub self.match = match # extract cells and cell data self.data = {} self.data_indexes = {} self.cells = X.cells self.indexes = sorted(X.cells.keys()) for cell in self.indexes: sub = X==cell self.data_indexes[cell] = sub newdata = Y.x[sub] if match: # get match ids group = match.x[sub] occurring_ids = np.unique(group) # sort if len(occurring_ids) < len(group): newdata = np.array([match_func(newdata[group==ID]) for ID in occurring_ids]) group = occurring_ids else: sort_arg = np.argsort(group) group = group[sort_arg] newdata = newdata[sort_arg] self.groups[cell] = group self.data[cell] = newdata if match: # determine which cells compare values for dependent values on # match_variable # n_cells = len(self.indexes) # self.within = np.empty((n_cells, n_cells), dtype=bool) self.within = {} for cell1 in self.indexes: for cell2 in self.indexes: if cell1==cell2: self.within[cell1,cell2] = True else: v = self.groups[cell1] == self.groups[cell2] if v is not False: v = all(v) self.within[cell1,cell2] = v self.within[cell2,cell1] = v self.all_within = np.all(self.within.values()) else: self.within = self.all_within = False