Пример #1
0
    def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()

        assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14)
        assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14)
        assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14)
        assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14)
        assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14)

#        print d1w_2d.ttest_mean(3)
#        #scipy.stats.ttest is also vectorized
#        print stats.ttest_1samp(x1r_2d, 3)
        t, p, d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        # print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        cm = CompareMeans(d1w_2d, d2w_2d)
        ressm = cm.ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)
Пример #2
0
    def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()

        assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14)
        assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14)
        assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14)
        assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14)
        assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14)

        #        print d1w_2d.ttest_mean(3)
        #        #scipy.stats.ttest is also vectorized
        #        print stats.ttest_1samp(x1r_2d, 3)
        t, p, d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        cm = CompareMeans(d1w_2d, d2w_2d)
        ressm = cm.ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)
Пример #3
0
def test_confint(testdata):
    result = confidence_interval(testdata, control_label='A')
    c_means1 = CompareMeans(DescrStatsW(testdata['kpi1']['B']),
                            DescrStatsW(testdata['kpi1']['A']))
    c_means2 = CompareMeans(DescrStatsW(testdata['kpi2']['B']),
                            DescrStatsW(testdata['kpi2']['A']))
    expected1 = c_means1.tconfint_diff()
    expected2 = c_means2.zconfint_diff()
    assert result['B']['kpi1'] == expected1
    assert result['B']['kpi2'] == expected2
Пример #4
0
    def test_comparemeans_convenient_interface(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        d1 = DescrStatsW(x1_2d)
        d2 = DescrStatsW(x2_2d)
        cm1 = CompareMeans(d1, d2)

        # smoke test for summary
        from statsmodels.iolib.table import SimpleTable
        for use_t in [True, False]:
            for usevar in ['pooled', 'unequal']:
                smry = cm1.summary(use_t=use_t, usevar=usevar)
                assert_(isinstance(smry, SimpleTable))

        # test for from_data method
        cm2 = CompareMeans.from_data(x1_2d, x2_2d)
        assert_(str(cm1.summary()) == str(cm2.summary()))
Пример #5
0
 def setup_class(cls):
     cls.x1 = np.array(
         [7.8, 6.6, 6.5, 7.4, 7.3, 7., 6.4, 7.1, 6.7, 7.6, 6.8])
     cls.x2 = np.array([4.5, 5.4, 6.1, 6.1, 5.4, 5., 4.1, 5.5])
     cls.d1 = DescrStatsW(cls.x1)
     cls.d2 = DescrStatsW(cls.x2)
     cls.cm = CompareMeans(cls.d1, cls.d2)
Пример #6
0
def test_ztest_ztost():
    # compare weightstats with separately tested proportion ztest ztost
    import statsmodels.stats.proportion as smprop

    x1 = [0, 1]
    w1 = [5, 15]

    res2 = smprop.proportions_ztest(15, 20., value=0.5)
    d1 = DescrStatsW(x1, w1)
    res1 = d1.ztest_mean(0.5)
    assert_allclose(res1, res2, rtol=0.03, atol=0.003)

    d2 = DescrStatsW(x1, np.array(w1) * 21. / 20)
    res1 = d2.ztest_mean(0.5)
    assert_almost_equal(res1, res2, decimal=12)

    res1 = d2.ztost_mean(0.4, 0.6)
    res2 = smprop.proportions_ztost(15, 20., 0.4, 0.6)
    assert_almost_equal(res1[0], res2[0], decimal=12)

    x2 = [0, 1]
    w2 = [10, 10]
    #d2 = DescrStatsW(x1, np.array(w1)*21./20)
    d2 = DescrStatsW(x2, w2)
    res1 = ztest(d1.asrepeats(), d2.asrepeats())
    res2 = smprop.proportions_chisquare(np.asarray([15, 10]),
                                        np.asarray([20., 20]))
    #TODO: check this is this difference expected?, see test_proportion
    assert_allclose(res1[1], res2[1], rtol=0.03)

    res1a = CompareMeans(d1, d2).ztest_ind()
    assert_allclose(res1a[1], res2[1], rtol=0.03)
    assert_almost_equal(res1a, res1, decimal=12)
Пример #7
0
    def test_comparemeans_convenient_interface(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        d1 = DescrStatsW(x1_2d)
        d2 = DescrStatsW(x2_2d)
        cm1 = CompareMeans(d1, d2)

        # smoke test for summary
        from statsmodels.iolib.table import SimpleTable
        for use_t in [True, False]:
            for usevar in ['pooled', 'unequal']:
                smry = cm1.summary(use_t=use_t, usevar=usevar)
                assert_(isinstance(smry, SimpleTable))

        # test for from_data method
        cm2 = CompareMeans.from_data(x1_2d, x2_2d)
        assert_(str(cm1.summary()) == str(cm2.summary()))
Пример #8
0
def testcomparison(df, smp1_cols, smp2_cols, test='ttest'):
    if len(smp1_cols) == 0 or len(smp2_cols) == 0:
        logging.warning("data not exist for comparison")
    else:
        col_stat = 'stat %s' % test
        col_pval = 'pval %s' % test
        df.loc[:, col_stat] = np.nan
        df.loc[:, col_pval] = np.nan
        for i in df.index:
            X = DescrStatsW(df.loc[i, smp1_cols].as_matrix())
            Y = DescrStatsW(df.loc[i, smp2_cols].as_matrix())
            if test == 'ttest':
                df.loc[i, col_stat], df.loc[i, col_pval], tmp = CompareMeans(
                    X, Y).ttest_ind()
            if test == 'ztest':
                df.loc[i, col_stat], df.loc[i, col_pval] = CompareMeans(
                    X, Y).ztest_ind()
        return df
Пример #9
0
def mean_diff_confint_ind(sample1, sample2, alpha=0.05):
    """Доверительный интервал разности средних для двух независимых выборок

    Parameters
    ----------
    sample1 : array_like
        Первая выборка
    sample2 : array_like
        Вторая выборка
    alpha : float in (0, 1)
        Уровень доверия, рассчитывается как ``1-alpha``

    Returns
    -------
    lower, upper : floats
        Левая и правая граница доверительного интервала
    """
    cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2))
    return cm.tconfint_diff(alpha=alpha)
    def test_means(self, population_A, population_B, sample_size_A,
                   sample_size_B, variable, SEED):

        # gets a sample for each popuplation
        sample_A = population_A.sample(n=sample_size_A,
                                       random_state=SEED)[variable]
        sample_B = population_B.sample(n=sample_size_B,
                                       random_state=SEED)[variable]

        # prepares the tests
        test_pop_A = DescrStatsW(sample_A)
        test_pop_B = DescrStatsW(sample_B)

        # makes the comparison
        test_compare = CompareMeans(test_pop_A, test_pop_B)

        # tests
        z, p_value = test_compare.ztest_ind(alternative='larger', value=0)

        return p_value
Пример #11
0
def mean_method(df, target_col):
    df_t0 = df.loc[(df['treatment'] == 0) & (df['in_delta'] == 1)]
    df_t1 = df.loc[(df['treatment'] == 1) & (df['in_delta'] == 1)]
    df_t0 = df_t0[target_col]
    df_t1 = df_t1[target_col]
    mean_0 = df_t0.mean()
    mean_1 = df_t1.mean()
    print(f"{'=' * 10}Mean Method Results{'=' * 10}")
    effect = mean_1 - mean_0
    print(f"Treatment effect on {target_col} is {mean_1 - mean_0}")
    cm = CompareMeans.from_data(df_t1.values, df_t0.values)
    result = cm.ttest_ind(alternative='two-sided')
    print(f"Two-sided T test: CI={np.round(cm.tconfint_diff(), 3)} pvalue={round(result[1], 3)}")

    return effect
Пример #12
0
def mean_diff_confint_ind(sample1, sample2):
    cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2))
    return cm.tconfint_diff()
Пример #13
0
    def test_ttest_2sample(self):
        x1, x2 = self.x1, self.x2
        x1r, x2r = self.x1r, self.x2r
        w1, w2 = self.w1, self.w2

        #Note: stats.ttest_ind handles 2d/nd arguments
        res_sp = stats.ttest_ind(x1r, x2r)
        assert_almost_equal(
            ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14)

        #check correct ttest independent of user ddof
        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=1))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                          DescrStatsW(x2, weights=w2, ddof=2))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=0))
        cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=1))
        cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                           DescrStatsW(x2, weights=w2, ddof=2))

        res0 = cm0.ttest_ind(usevar='unequal')
        res1 = cm1.ttest_ind(usevar='unequal')
        res2 = cm2.ttest_ind(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        #check confint independent of user ddof
        res0 = cm0.tconfint_diff(usevar='pooled')
        res1 = cm1.tconfint_diff(usevar='pooled')
        res2 = cm2.tconfint_diff(usevar='pooled')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        res0 = cm0.tconfint_diff(usevar='unequal')
        res1 = cm1.tconfint_diff(usevar='unequal')
        res2 = cm2.tconfint_diff(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)
Пример #14
0
 def fn(control, test):
     c_means = CompareMeans(DescrStatsW(test), DescrStatsW(control))
     if _is_proportion(control, test):
         return c_means.zconfint_diff()
     else:
         return c_means.tconfint_diff()
Пример #15
0
    def test_ttest_2sample(self):
        x1, x2 = self.x1, self.x2
        x1r, x2r = self.x1r, self.x2r
        w1, w2 = self.w1, self.w2

        # Note: stats.ttest_ind handles 2d/nd arguments
        res_sp = stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            res_sp, 14)

        # check correct ttest independent of user ddof
        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=1))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                          DescrStatsW(x2, weights=w2, ddof=2))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=0))
        cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=1))
        cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                           DescrStatsW(x2, weights=w2, ddof=2))

        res0 = cm0.ttest_ind(usevar='unequal')
        res1 = cm1.ttest_ind(usevar='unequal')
        res2 = cm2.ttest_ind(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        # check confint independent of user ddof
        res0 = cm0.tconfint_diff(usevar='pooled')
        res1 = cm1.tconfint_diff(usevar='pooled')
        res2 = cm2.tconfint_diff(usevar='pooled')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        res0 = cm0.tconfint_diff(usevar='unequal')
        res1 = cm1.tconfint_diff(usevar='unequal')
        res2 = cm2.tconfint_diff(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)
    info = max(values, key=itemgetter(2))
    a = info[0] - 1
    b = info[1] - 1
    x = range(1, 10)
    n = a + 1
    l = "User:"******"User:"******"Pearson:", scipy.stats.pearsonr(R[a], R[b]))

    print(CompareMeans(DescrStatsW(R[a]), DescrStatsW(R[b])).summary())

    results = sm.OLS(R[b], R[a]).fit()
    print(results.summary())

    for i in range(0, 9):
        if R[a][i] == 0:
            if R[b][i] >= 3:
                print("product", i + 1, "recommended to user", a + 1)
    for i in range(0, 9):
        if R[b][i] == 0:
            if R[a][i] >= 3:
                print("product", i + 1, "recommended to user", b + 1)
Пример #17
0
# Critério do valor p

# Teste Unicaudal
# Rejeitar H_0 se o valor p\leq\alpha

from statsmodels.stats.weightstats import DescrStatsW, CompareMeans

test_setosa = DescrStatsW(setosa)
test_virginica = DescrStatsW(virginica)
test_A = test_setosa.get_compare(test_virginica)
z, p_valor = test_A.ztest_ind(alternative='larger', value=0)

print('O valor de z é ', z)
print('O p-valor é ', p_valor)

test_B = CompareMeans(test_setosa, test_virginica)
z, p_valor = test_B.ztest_ind(alternative='larger', value=0)

print('O valor de z é ', z)
print('O p-valor é ', p_valor)

p_valor <= significancia

# Aceitamos a hipótese nula.

# Testes não Paramétricos
"""
Teste do Qui-Quadrado (\chi^2)
 
 Também conhecido como teste de adequação ao ajustamento, seu nome se 
 deve ao fato de utilizar uma variável estatística padronizada, representada pela 
Пример #18
0
    32.6, 33.3, 32.2
]

a = 0.05  # 有意水準(デフォルト) = 1 - 信頼係数
alt = 'two-sided'  # 両側検定(デフォルト)
# 左片側検定なら'smaller'
# 右片側検定なら'larger'

d = DescrStatsW(np.array(X) - np.array(Y))  # 対標本の場合
d.ttest_mean(alternative=alt)[1]  # p値
#> 0.0006415571512322235

d.tconfint_mean(alpha=a, alternative=alt)  # 信頼区間
#> (-3.9955246743198867, -1.3644753256801117)

c = CompareMeans(DescrStatsW(X), DescrStatsW(Y))  # 対標本でない場合

ve = 'pooled'  # 等分散を仮定する(デフォルト).仮定しないなら'unequal'.
c.ttest_ind(alternative=alt, usevar=ve)[1]  # p値
#> 0.000978530937238609

c.tconfint_diff(alpha=a, alternative=alt, usevar=ve)  # 信頼区間
#> (-4.170905570517185, -1.1890944294828283)

### 4.4.4 独立性の検定(カイ2乗検定)

import pandas as pd
my_url = ('https://raw.githubusercontent.com/taroyabuki'
          '/fromzero/master/data/smoker.csv')
my_data = pd.read_csv(my_url)