def __str__(self): f_names = [f.name for f in self._factors.values() if _vsl.isfactor(f)] v_names = [f.name for f in self._factors.values() if _vsl.isvar(f)] out = 'Variables:\n' + ', '.join(sorted(v_names)) out += '\nFactors:\n' + ', '.join(sorted(f_names)) if hasattr(self, '_stats'): out += '\n\nSEGMENTS:\n' + ', '.join(f.name for f in self._stats.values()) return out
def correlations(Y, Xs, cat=None, levels=[.05, .01, .001], diff=None, sub=None, pmax=None, nan=True): # , match=None): """ :arg var Y: first variable :arg var X: second variable (or list of variables) :arg cat: show correlations separately for different groups in the data. Can be a ``factor`` (the correlation for each level is shown separately) or an array of ``bool`` values (e.g. from a comparison like ``Stim==1``) :arg list levels: significance levels to mark :arg diff: (factor, cat_1, cat_2) :arg sub: use only a subset of the data :arg pmax: (None) don't show correlations with p>pmax :arg nan: ``True``: display correlation which yield NAN; ``False``: hide NANs but mention occurrence in summary (not implemented); ``None``: don't mention NANs :rtype: Table """ levels = np.array(levels) if isvar(Xs): Xs = [Xs] # SUB if sub is not None: Y = Y[sub] Xs = [X[sub] for X in Xs] if ismodel(cat) or isfactor(cat): cat = cat[sub] if diff is not None: raise NotImplementedError if cat is None: table = fmtxt.Table('l' * 4) table.cells('Variable', 'r', 'p', 'n') else: assert iscategorial(cat) table = fmtxt.Table('l' * 5) table.cells('Variable', 'Category', 'r', 'p', 'n') table.midrule() table.title("Correlations with %s" % (Y.name)) table._my_nan_count = 0 for X in Xs: if cat is None: _corr_to_table(table, Y, X, cat, levels, pmax=pmax, nan=nan) else: printXname = True for cell in cat.cells: tlen = len(table) sub = (cat == cell) _corr_to_table(table, Y, X, sub, levels, pmax=pmax, nan=nan, printXname=printXname, label=cell_label(cell)) if len(table) > tlen: printXname = False # last row if pmax is None: p_text = '' else: p_text = 'all other p>{p}'.format(p=pmax) if nan is False and table._my_nan_count > 0: nan_text = '%s NANs' % table._my_nan_count else: nan_text = '' if p_text or nan_text: if p_text and nan_text: text = ', '.join([p_text, nan_text]) else: text = ''.join([p_text, nan_text]) table.cell("(%s)" % text) return table
def data(Y, X=None, match=None, cov=[], sub=None, fmt=None, labels=True, showcase=True): """ return a textab.table (printed as tsv table by default) parameters ---------- Y: variable to display (can be model with several dependents) X: categories defining cells (factorial model) match: factor to match values on and return repeated-measures table cov: covariate to report (WARNING: only works with match, where each value on the matching variable corresponds with one value in the covariate) sub: boolean array specifying which values to include (generate e.g. with 'sub=T==[1,2]') fmt: Format string labels: display labels for nominal variables (otherwise display codes) """ if hasattr(Y, '_items'): # dataframe Y = Y._items Y = _data.asmodel(Y) if _data.isfactor(cov) or _data.isvar(cov): cov = [cov] data = [] names_yname = [] # names including Yi.name for matched table headers ynames = [] # names of Yi for independent measures table headers within_list = [] for Yi in Y.effects: _data, datalabels, names, _within = _data._split_Y(Yi, X, match=match, sub=sub, datalabels=match) data += _data names_yname += ['({c})'.format(c=n) for n in names] ynames.append(Yi.name) within_list.append(_within) within = within_list[0] assert all([w==within for w in within_list]) # table n_dependents = len(Y.effects) n_cells = int(len(data) / n_dependents) if within: n, k = len(data[0]), len(data) table = textab.Table('l' * (k + showcase + len(cov))) # header line 1 if showcase: table.cell(match.name) case_labels = datalabels[0] assert all([np.all(case_labels==l) for l in datalabels[1:]]) for i in range(n_dependents): for name in names: table.cell(name.replace(' ','_')) for c in cov: table.cell(c.name) # header line 2 if n_dependents > 1: if showcase: table.cell() for name in ynames: [table.cell('(%s)'%name) for i in range(n_cells)] for c in cov: table.cell() # body table.midrule() for i in range(n): case = case_labels[i] if showcase: table.cell(case) for j in range(k): table.cell(data[j][i], fmt=fmt) # covariates indexes = match==case for c in cov: # test it's all the same values case_cov = c[indexes] if len(np.unique(case_cov.x)) != 1: msg = 'covariate for case "%s" has several values'%case raise ValueError(msg) # get value first_i = np.nonzero(indexes)[0][0] cov_value = c[first_i] if _data.isfactor(c) and labels: cov_value = c.cells[cov_value] table.cell(cov_value, fmt=fmt) else: table = textab.Table('l'*(1 + n_dependents)) table.cell(X.name) [table.cell(y) for y in ynames] table.midrule() # data is now sorted: (cell_i within dependent_i) # sort data as (X-cell, dependent_i) data_sorted = [] for i_cell in range(n_cells): data_sorted.append([data[i_dep*n_cells + i_cell] for i_dep in \ range(n_dependents)]) # table for name, cell_data in zip(names, data_sorted): for i in range(len(cell_data[0])): table.cell(name) for dep_data in cell_data: v = dep_data[i] table.cell(v, fmt=fmt) return table