def test_variability(): "Test variability functions" ds = datasets.get_loftus_masson_1994() y = ds['n_recalled'].x.astype(np.float64) x = ds['exposure'].as_factor() match = ds['subject'] sem = scipy.stats.sem(y, 0, 1) ci = sem * scipy.stats.t.isf(0.05 / 2., len(y) - 1) # invalid spec assert_raises(ValueError, stats.variability, y, 0, 0, '1mile', 0) assert_raises(ValueError, stats.variability, y, 0, 0, 'ci7ci', 0) # standard error assert_almost_equal(stats.variability(y, None, None, 'sem', False), sem) assert_almost_equal(stats.variability(y, None, None, '2sem', False), 2 * sem) # within subject standard-error target = scipy.stats.sem(stats.residuals(y[:, None], match), 0, len(match.cells)) assert_almost_equal(stats.variability(y, None, match, 'sem', True), target) assert_almost_equal(stats.variability(y, None, match, 'sem', False), target) # one data point per match cell n = match.df + 1 assert_raises(ValueError, stats.variability, y[:n], None, match[:n], 'sem', True) target = np.array([scipy.stats.sem(y[x == cell], 0, 1) for cell in x.cells]) es = stats.variability(y, x, None, 'sem', False) assert_allclose(es, target) stats.variability(y, x, None, 'sem', True) # confidence intervals assert_almost_equal(stats.variability(y, None, None, '95%ci', False), ci) assert_almost_equal(stats.variability(y, x, None, '95%ci', True), 3.86, 2) # L&M: 3.85 assert_almost_equal(stats.variability(y, x, match, '95%ci', True), 0.52, 2) assert_equal(stats.variability(y, x, None, '95%ci', False)[::-1], stats.variability(y, x, None, '95%ci', False, x.cells[::-1]))
def _plt_barplot(ax, ct, error, pool_error, hatch, colors, bottom, top=None, origin=None, left=None, width=.5, c='#0099FF', edgec=None, ec='k', test=True, par=True, trend="'", corr='Hochberg', test_markers=True): """Draw a barplot to axes ax for Celltable ct. Parameters ---------- ax : mpl Axes Axes to which to plot ct : Celltable Data to plot. error : str Variability description (e.g., "95%ci"). pool_error : bool Pool the errors for the estimate of variability. ... """ # kwargs if hatch is True: hatch = defaults['hatch'] if colors is True: if defaults['mono']: colors = defaults['cm']['colors'] else: colors = defaults['c']['colors'] elif isinstance(colors, dict): colors = [colors[cell] for cell in ct.cells] # data means k = len(ct.cells) if left is None: left = np.arange(k) - width / 2 height = np.array(ct.get_statistic(np.mean)) # origin if origin is None: origin = max(0, bottom) # error bars if ct.X is None: error_match = None else: error_match = ct.match y_error = stats.variability(ct.Y.x, ct.X, error_match, error, pool_error, ct.cells) # fig spacing plot_max = np.max(height + y_error) plot_min = np.min(height - y_error) plot_span = plot_max - plot_min y_bottom = min(bottom, plot_min - plot_span * .05) # main BARPLOT bars = ax.bar(left, height - origin, width, bottom=origin, align='edge', color=c, edgecolor=edgec, ecolor=ec, yerr=y_error) # hatch if hatch: for bar, h in zip(bars, hatch): bar.set_hatch(h) if colors: for bar, c in zip(bars, colors): bar.set_facecolor(c) # pairwise tests if ct.X is None and test is True: test = 0. y_unit = (plot_max - y_bottom) / 15 if test is True: y_top = _mark_plot_pairwise(ax, ct, par, plot_max, y_unit, corr, trend, test_markers, top=top) elif (test is False) or (test is None): y_top = plot_max + y_unit else: ax.axhline(test, color='black') y_top = _mark_plot_1sample(ax, ct, par, plot_max, y_unit, test, corr, trend) if top is None: top = y_top # x0, x1, y0, y1 lim = (min(left) - .5 * width, max(left) + 1.5 * width, y_bottom, top) return lim
def test_sem_and_variability(): "Test variability() and standard_error_of_the_mean() functions" ds = datasets.get_loftus_masson_1994() y = ds['n_recalled'].x x = ds['exposure'].as_factor() match = ds['subject'] # invalid spec assert_raises(ValueError, stats.variability, y, 0, 0, '1mile', 0) assert_raises(ValueError, stats.variability, y, 0, 0, 'ci7ci', 0) # standard error target = scipy.stats.sem(y, 0, 1) e = stats.variability(y, None, None, 'sem', False) assert_almost_equal(e, target) e = stats.variability(y, None, None, '2sem', False) assert_almost_equal(e, 2 * target) # within subject standard-error target = scipy.stats.sem(stats.residuals(y[:, None], match), 0, len(match.cells)) assert_almost_equal(stats.variability(y, None, match, 'sem', True), target) assert_almost_equal(stats.variability(y, None, match, 'sem', False), target) # one data point per match cell n = match.df + 1 assert_raises(ValueError, stats.variability, y[:n], None, match[:n], 'sem', True) target = np.array([scipy.stats.sem(y[x == cell], 0, 1) for cell in x.cells]) es = stats.variability(y, x, None, 'sem', False) assert_allclose(es, target) stats.variability(y, x, None, 'sem', True) # confidence intervals stats.variability(y, None, None, '95%ci', False) stats.variability(y, x, None, '95%ci', True) stats.variability(y, x, match, '95%ci', True)
def test_variability(): "Test variability functions" ds = datasets.get_loftus_masson_1994() y = ds['n_recalled'].x.astype(np.float64) x = ds['exposure'].as_factor() match = ds['subject'] sem = scipy.stats.sem(y, 0, 1) ci = sem * scipy.stats.t.isf(0.05 / 2., len(y) - 1) # invalid spec with pytest.raises(ValueError): stats.variability(y, 0, 0, '1mile', 0) with pytest.raises(ValueError): stats.variability(y, 0, 0, 'ci7ci', 0) # standard error assert stats.variability(y, None, None, 'sem', False) == sem assert stats.variability(y, None, None, '2sem', False) == 2 * sem # within subject standard-error target = scipy.stats.sem(stats.residuals(y[:, None], match), 0, len(match.cells))[0] assert stats.variability(y, None, match, 'sem', True) == pytest.approx(target) assert stats.variability(y, None, match, 'sem', False) == pytest.approx(target) # one data point per match cell n = match.df + 1 with pytest.raises(ValueError): stats.variability(y[:n], None, match[:n], 'sem', True) target = np.array( [scipy.stats.sem(y[x == cell], 0, 1) for cell in x.cells]) es = stats.variability(y, x, None, 'sem', False) assert_allclose(es, target) stats.variability(y, x, None, 'sem', True) # confidence intervals assert stats.variability(y, None, None, '95%ci', False) == pytest.approx(ci) assert stats.variability(y, x, None, '95%ci', True) == pytest.approx(3.86, abs=1e-2) # L&M: 3.85 assert stats.variability(y, x, match, '95%ci', True) == pytest.approx(0.52, abs=1e-2) assert_equal( stats.variability(y, x, None, '95%ci', False)[::-1], stats.variability(y, x, None, '95%ci', False, x.cells[::-1]))
def _plt_barplot(ax, ct, error, pool_error, hatch, colors, bottom, top=None, origin=None, left=None, width=.5, c='#0099FF', edgec=None, ec='k', test=True, par=True, trend="'", corr='Hochberg', test_markers=True): """Draw a barplot to axes ax for Celltable ct. Parameters ---------- ax : mpl Axes Axes to which to plot ct : Celltable Data to plot. error : str Variability description (e.g., "95%ci"). pool_error : bool Pool the errors for the estimate of variability. ... """ # kwargs if hatch is True: hatch = defaults['hatch'] if colors is True: if defaults['mono']: colors = defaults['cm']['colors'] else: colors = defaults['c']['colors'] elif isinstance(colors, dict): colors = [colors[cell] for cell in ct.cells] # data means k = len(ct.cells) if left is None: left = np.arange(k) - width / 2 height = np.array(ct.get_statistic(np.mean)) # origin if origin is None: origin = max(0, bottom) # error bars if ct.X is None: error_match = None else: error_match = ct.match y_error = stats.variability(ct.Y.x, ct.X, error_match, error, pool_error) # fig spacing plot_max = np.max(height + y_error) plot_min = np.min(height - y_error) plot_span = plot_max - plot_min y_bottom = min(bottom, plot_min - plot_span * .05) # main BARPLOT bars = ax.bar(left, height - origin, width, bottom=origin, color=c, edgecolor=edgec, ecolor=ec, yerr=y_error) # hatch if hatch: for bar, h in zip(bars, hatch): bar.set_hatch(h) if colors: for bar, c in zip(bars, colors): bar.set_facecolor(c) # pairwise tests if ct.X is None and test is True: test = 0. y_unit = (plot_max - y_bottom) / 15 if test is True: y_top = _mark_plot_pairwise(ax, ct, par, plot_max, y_unit, corr, trend, test_markers, top=top) elif (test is False) or (test is None): y_top = plot_max + y_unit else: ax.axhline(test, color='black') y_top = _mark_plot_1sample(ax, ct, par, plot_max, y_unit, test, corr, trend) if top is None: top = y_top # x0, x1, y0, y1 lim = (min(left) - .5 * width, max(left) + 1.5 * width, y_bottom, top) return lim
def test_sem_and_variability(): "Test variability() and standard_error_of_the_mean() functions" ds = datasets.get_loftus_masson_1994() y = ds['n_recalled'].x.astype(np.float64) x = ds['exposure'].as_factor() match = ds['subject'] # invalid spec assert_raises(ValueError, stats.variability, y, 0, 0, '1mile', 0) assert_raises(ValueError, stats.variability, y, 0, 0, 'ci7ci', 0) # standard error target = scipy.stats.sem(y, 0, 1) e = stats.variability(y, None, None, 'sem', False) assert_almost_equal(e, target) e = stats.variability(y, None, None, '2sem', False) assert_almost_equal(e, 2 * target) # within subject standard-error target = scipy.stats.sem(stats.residuals(y[:, None], match), 0, len(match.cells)) assert_almost_equal(stats.variability(y, None, match, 'sem', True), target) assert_almost_equal(stats.variability(y, None, match, 'sem', False), target) # one data point per match cell n = match.df + 1 assert_raises(ValueError, stats.variability, y[:n], None, match[:n], 'sem', True) target = np.array( [scipy.stats.sem(y[x == cell], 0, 1) for cell in x.cells]) es = stats.variability(y, x, None, 'sem', False) assert_allclose(es, target) stats.variability(y, x, None, 'sem', True) # confidence intervals stats.variability(y, None, None, '95%ci', False) stats.variability(y, x, None, '95%ci', True) stats.variability(y, x, match, '95%ci', True) assert_equal( stats.variability(y, x, None, '95%ci', False)[::-1], stats.variability(y, x, None, '95%ci', False, x.cells[::-1]))
def __init__(self, ax, ct, error, pool_error, hatch, colors, bottom, top=None, origin=None, left=None, width=.5, c='#0099FF', edgec=None, ec='k', test=True, par=True, trend="'", corr='Hochberg', test_markers=True, xticks=None, xtick_delim=None): # kwargs if hatch is True: hatch = defaults['hatch'] if colors is True: if defaults['mono']: colors = defaults['cm']['colors'] else: colors = defaults['c']['colors'] elif isinstance(colors, dict): colors = [colors[cell] for cell in ct.cells] # data means k = len(ct.cells) if left is None: left = np.arange(k) - width / 2 height = np.array(ct.get_statistic(np.mean)) # origin if origin is None: origin = max(0, bottom) # error bars if ct.x is None: error_match = None else: error_match = ct.match y_error = stats.variability(ct.y.x, ct.x, error_match, error, pool_error, ct.cells) # fig spacing plot_max = np.max(height + y_error) plot_min = np.min(height - y_error) plot_span = plot_max - plot_min y_bottom = min(bottom, plot_min - plot_span * .05) # main BARPLOT bars = ax.bar(left, height - origin, width, bottom=origin, align='edge', color=c, edgecolor=edgec, ecolor=ec, yerr=y_error) # hatch if hatch: for bar, h in zip(bars, hatch): bar.set_hatch(h) if colors: for bar, c in zip(bars, colors): bar.set_facecolor(c) # pairwise tests if ct.x is None and test is True: test = 0. y_unit = (plot_max - y_bottom) / 15 if test is True: y_top = _mark_plot_pairwise(ax, ct, par, plot_max, y_unit, corr, trend, test_markers, top=top) elif (test is False) or (test is None): y_top = plot_max + y_unit else: ax.axhline(test, color='black') y_top = _mark_plot_1sample(ax, ct, par, plot_max, y_unit, test, corr, trend) self.left = min(left) - .5 * width self.right = max(left) + 1.5 * width self.bottom = y_bottom self.top = y_top if top is None else top _plt_uv_base.__init__(self, ax, ct, xticks, xtick_delim)