示例#1
0
def test_confint(testdata):
    result = confidence_interval(testdata, control_label='A')
    c_means1 = CompareMeans(DescrStatsW(testdata['kpi1']['B']),
                            DescrStatsW(testdata['kpi1']['A']))
    c_means2 = CompareMeans(DescrStatsW(testdata['kpi2']['B']),
                            DescrStatsW(testdata['kpi2']['A']))
    expected1 = c_means1.tconfint_diff()
    expected2 = c_means2.zconfint_diff()
    assert result['B']['kpi1'] == expected1
    assert result['B']['kpi2'] == expected2
示例#2
0
def mean_diff_confint_ind(sample1, sample2, alpha=0.05):
    """Доверительный интервал разности средних для двух независимых выборок

    Parameters
    ----------
    sample1 : array_like
        Первая выборка
    sample2 : array_like
        Вторая выборка
    alpha : float in (0, 1)
        Уровень доверия, рассчитывается как ``1-alpha``

    Returns
    -------
    lower, upper : floats
        Левая и правая граница доверительного интервала
    """
    cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2))
    return cm.tconfint_diff(alpha=alpha)
示例#3
0
    def test_ttest_2sample(self):
        x1, x2 = self.x1, self.x2
        x1r, x2r = self.x1r, self.x2r
        w1, w2 = self.w1, self.w2

        #Note: stats.ttest_ind handles 2d/nd arguments
        res_sp = stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            res_sp, 14)

        #check correct ttest independent of user ddof
        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=1))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                          DescrStatsW(x2, weights=w2, ddof=2))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)


        cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=0))
        cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=1))
        cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                          DescrStatsW(x2, weights=w2, ddof=2))

        res0 = cm0.ttest_ind(usevar='unequal')
        res1 = cm1.ttest_ind(usevar='unequal')
        res2 = cm2.ttest_ind(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        #check confint independent of user ddof
        res0 = cm0.tconfint_diff(usevar='pooled')
        res1 = cm1.tconfint_diff(usevar='pooled')
        res2 = cm2.tconfint_diff(usevar='pooled')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        res0 = cm0.tconfint_diff(usevar='unequal')
        res1 = cm1.tconfint_diff(usevar='unequal')
        res2 = cm2.tconfint_diff(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)
    def test_ttest_2sample(self):
        x1, x2 = self.x1, self.x2
        x1r, x2r = self.x1r, self.x2r
        w1, w2 = self.w1, self.w2

        #Note: stats.ttest_ind handles 2d/nd arguments
        res_sp = stats.ttest_ind(x1r, x2r)
        assert_almost_equal(
            ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14)

        #check correct ttest independent of user ddof
        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=1))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                          DescrStatsW(x2, weights=w2, ddof=2))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=0))
        cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=1))
        cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                           DescrStatsW(x2, weights=w2, ddof=2))

        res0 = cm0.ttest_ind(usevar='unequal')
        res1 = cm1.ttest_ind(usevar='unequal')
        res2 = cm2.ttest_ind(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        #check confint independent of user ddof
        res0 = cm0.tconfint_diff(usevar='pooled')
        res1 = cm1.tconfint_diff(usevar='pooled')
        res2 = cm2.tconfint_diff(usevar='pooled')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        res0 = cm0.tconfint_diff(usevar='unequal')
        res1 = cm1.tconfint_diff(usevar='unequal')
        res2 = cm2.tconfint_diff(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)
示例#5
0
# 右片側検定なら'larger'

d = DescrStatsW(np.array(X) - np.array(Y))  # 対標本の場合
d.ttest_mean(alternative=alt)[1]  # p値
#> 0.0006415571512322235

d.tconfint_mean(alpha=a, alternative=alt)  # 信頼区間
#> (-3.9955246743198867, -1.3644753256801117)

c = CompareMeans(DescrStatsW(X), DescrStatsW(Y))  # 対標本でない場合

ve = 'pooled'  # 等分散を仮定する(デフォルト).仮定しないなら'unequal'.
c.ttest_ind(alternative=alt, usevar=ve)[1]  # p値
#> 0.000978530937238609

c.tconfint_diff(alpha=a, alternative=alt, usevar=ve)  # 信頼区間
#> (-4.170905570517185, -1.1890944294828283)

### 4.4.4 独立性の検定(カイ2乗検定)

import pandas as pd
my_url = ('https://raw.githubusercontent.com/taroyabuki'
          '/fromzero/master/data/smoker.csv')
my_data = pd.read_csv(my_url)

my_data.head()
#>   alive smoker
#> 0   Yes     No
#> 1   Yes     No
#> 2   Yes     No
#> 3   Yes     No
示例#6
0
 def fn(control, test):
     c_means = CompareMeans(DescrStatsW(test), DescrStatsW(control))
     if _is_proportion(control, test):
         return c_means.zconfint_diff()
     else:
         return c_means.tconfint_diff()
示例#7
0
def mean_diff_confint_ind(sample1, sample2):
    cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2))
    return cm.tconfint_diff()