Exemplo n.º 1
0
def frequencies(Y, X, sub=None, title="{Yname} Frequencies"):
    """
    Display frequency of occurrence of all categories in Y in the cells 
    defined by X.
    
    Y: vactor whose ferquencies are of interest
    X: model defining cells for which frequencies are displayed
    
    """
    Y = _data.asfactor(Y)
    X = _data.asfactor(X)
    
    cells = _structure.celltable(Y, X, sub=sub)
    
    Y_categories = cells.Y.cells.keys()
    
    # header
    n_Y_categories = len(Y_categories)
    table = textab.Table('l' * (n_Y_categories+1))
    # header line 1
    table.cell()
    table.cell(Y.name, width=n_Y_categories, just='c')
    table.midrule(span=(2, n_Y_categories+1))
    # header line 2
    table.cell(X.name)
    for cat_id in Y_categories:
        table.cell(cells.Y.cells[cat_id])
    table.midrule()
    
    # body
    for cell_id in cells.indexes:
        table.cell(cells.cells[cell_id])
        data = cells.data[cell_id]
        for cat_id in Y_categories:
            n = np.sum(cat_id == data)
            table.cell(n)
    
    # title
    if title:
        title = title.format(Yname=Y.name.capitalize())
        table.title(title)
    
    return table
Exemplo n.º 2
0
    def __init__(self, Y, X, match=None, sub=None,
                 samples=1000, replacement=True,
                 title="Bootstrapped Pairwise Tests"):
        Y = asvar(Y, sub)
        X = asfactor(X, sub)
        assert len(Y) == len(X), "dataset length mismatch"

        if match:
            if sub is not None:
                match = match[sub]
            assert len(match) == len(Y), "dataset length mismatch"

        # prepare data container
        resampled = np.empty((samples + 1, len(Y)))  # sample X subject within category
        resampled[0] = Y.x
        # fill resampled
        for i, Y_resampled in _resample(Y, unit=match, samples=samples,
                                        replacement=replacement):
            resampled[i + 1] = Y_resampled.x
        self.resampled = resampled

        cells = X.cells
        n_groups = len(cells)

        if match:
            # if there are several values per X%match cell, take the average
            # T: indexes to transform Y.x to [X%match, value]-array
            match_cell_ids = match.cells
            group_size = len(match_cell_ids)
            T = None; i = 0
            for X_cell in cells:
                for match_cell in match_cell_ids:
                    source_indexes = np.where((X == X_cell) * (match == match_cell))[0]
                    if T is None:
                        n_cells = n_groups * group_size
                        T = np.empty((n_cells, len(source_indexes)), dtype=int)
                    T[i, :] = source_indexes
                    i += 1

            if T.shape[1] == 1:
                T = T[:, 0]
                ordered = resampled[:, T]
            else:
                ordered = resampled[:, T].mean(axis=2)
            self.ordered = ordered

            # t-tests
            n_comparisons = sum(range(n_groups))
            t = np.empty((samples + 1, n_comparisons))
            comp_names = []
            one_group = np.arange(group_size)
            groups = [one_group + i * group_size for i in range(n_groups)]
            for i, (g1, g2) in enumerate(itertools.combinations(range(n_groups), 2)):
                group_1 = groups[g1]
                group_2 = groups[g2]
                diffs = ordered[:, group_1] - ordered[:, group_2]
                t[:, i] = np.mean(diffs, axis=1) * np.sqrt(group_size) / np.std(diffs, axis=1, ddof=1)
                comp_names.append(' - '.join((cells[g1], cells[g2])))

            self.diffs = diffs
            self.t_resampled = np.max(np.abs(t[1:]), axis=1)
            self.t = t = t[0]
        else:
            raise NotImplementedError

        self._Y = Y
        self._X = X
        self._group_names = cells
        self._group_data = np.array([ordered[0, g] for g in groups])
        self._group_size = group_size
        self._df = group_size - 1
        self._match = match
        self._n_samples = samples
        self._replacement = replacement
        self._comp_names = comp_names
        self._p_parametric = self.test_param(t)
        self._p_boot = self.test_boot(t)
        self.title = title