Пример #1
0
    def test_levene(self):
        data = self.data

        # lawstat: Test Statistic = 1.0866123063642, p-value = 0.3471072204516
        statistic = 1.0866123063642
        p_value = 0.3471072204516
        res0 = smo.test_scale_oneway(data,
                                     method='equal',
                                     center='median',
                                     transform='abs',
                                     trim_frac_mean=0.2)
        assert_allclose(res0.pvalue, p_value, rtol=1e-13)
        assert_allclose(res0.statistic, statistic, rtol=1e-13)

        # library car
        # > lt = leveneTest(y ~ g, df3, center=mean, trim=0.2)
        statistic = 1.10732113109744
        p_value = 0.340359251994645
        df = [2, 40]
        res0 = smo.test_scale_oneway(data,
                                     method='equal',
                                     center='trimmed',
                                     transform='abs',
                                     trim_frac_mean=0.2)
        assert_allclose(res0.pvalue, p_value, rtol=1e-13)
        assert_allclose(res0.statistic, statistic, rtol=1e-13)
        assert_allclose(res0.df, df)

        # library(onewaytests)
        # test uses mean as center
        # > st = homog.test(y ~ g, df3)
        statistic = 1.07894485177512
        parameter = [2, 40]  # df
        p_value = 0.349641166869223
        # method = "Levene's Homogeneity Test"
        res0 = smo.test_scale_oneway(data,
                                     method='equal',
                                     center='mean',
                                     transform='abs',
                                     trim_frac_mean=0.2)
        assert_allclose(res0.pvalue, p_value, rtol=1e-13)
        assert_allclose(res0.statistic, statistic, rtol=1e-13)
        assert_allclose(res0.df, parameter)

        # > st = homog.test(y ~ g, df3, method = "Bartlett")
        statistic = 3.01982414477323
        # parameter = 2  # scipy bartlett does not return df
        p_value = 0.220929402900495
        # method = "Bartlett's Homogeneity Test"
        # Bartlett is in scipy.stats
        from scipy import stats
        stat, pv = stats.bartlett(*data)
        assert_allclose(pv, p_value, rtol=1e-13)
        assert_allclose(stat, statistic, rtol=1e-13)
mod = _MultivariateOLS.from_formula('人均地区生产总值+公共财政支出 ~ 省份', data)
result = mod.fit(method='svd')
result.mv_test()
#3.多元统计-协方差阵检验

temp_data = []
temp_name = []
for name, group in data[['省份', '人均地区生产总值', '公共财政支出']].groupby(['省份']):
    temp_data.append(np.cov(np.asarray(group[['人均地区生产总值', '公共财政支出']].T)))
    temp_name.append(name)

#statistic_base 是Box's M统计量
#pvalue是书中给出的p值
test_cov_oneway(temp_data, [5, 5, 5])

#4.误差方差分析
temp_data = []
temp_name = []
for name, group in data[['省份', '人均地区生产总值', '公共财政支出']].groupby(['省份']):
    temp_data.append(np.array((group['人均地区生产总值'] - group['人均地区生产总值'].mean())))
    # temp_name.append(name)
temp_data = np.array(temp_data)
res0 = smo.test_scale_oneway(temp_data,
                             method='equal',
                             center='mean',
                             transform='abs',
                             trim_frac_mean=0.2)

'statistic:%s,df2:%s,Sig.:%s,pvalue:%s' % (res0.statistic, res0.df_num,
                                           res0.df_denom, res0.pvalue)
Пример #3
0
    def test_options(self):
        # regression tests for options,
        # many might not be implemented in other packages
        data = self.data

        # regression numbers from initial run
        statistic, p_value = 1.0173464626246675, 0.3763806150460239
        df = (2.0, 24.40374758005409)
        res = smo.test_scale_oneway(data,
                                    method='unequal',
                                    center='median',
                                    transform='abs',
                                    trim_frac_mean=0.2)
        assert_allclose(res.pvalue, p_value, rtol=1e-13)
        assert_allclose(res.statistic, statistic, rtol=1e-13)
        assert_allclose(res.df, df)

        statistic, p_value = 1.0329722145270606, 0.3622778213868562
        df = (1.83153791573948, 30.6733640949525)
        p_value2 = 0.3679999679787619
        df2 = (2, 30.6733640949525)
        res = smo.test_scale_oneway(data,
                                    method='bf',
                                    center='median',
                                    transform='abs',
                                    trim_frac_mean=0.2)
        assert_allclose(res.pvalue, p_value, rtol=1e-13)
        assert_allclose(res.statistic, statistic, rtol=1e-13)
        assert_allclose(res.df, df)
        assert_allclose(res.pvalue2, p_value2, rtol=1e-13)
        assert_allclose(res.df2, df2)

        statistic, p_value = 1.7252431333701745, 0.19112038168209514
        df = (2.0, 40.0)
        res = smo.test_scale_oneway(data,
                                    method='equal',
                                    center='mean',
                                    transform='square',
                                    trim_frac_mean=0.2)
        assert_allclose(res.pvalue, p_value, rtol=1e-13)
        assert_allclose(res.statistic, statistic, rtol=1e-13)
        assert_equal(res.df, df)

        statistic, p_value = 0.4129696057329463, 0.6644711582864451
        df = (2.0, 40.0)
        res = smo.test_scale_oneway(
            data,
            method='equal',
            center='mean',
            transform=lambda x: np.log(x * x),  # noqa
            trim_frac_mean=0.2)
        assert_allclose(res.pvalue, p_value, rtol=1e-13)
        assert_allclose(res.statistic, statistic, rtol=1e-13)
        assert_allclose(res.df, df)

        # compare no transform with standard anova
        res = smo.test_scale_oneway(data,
                                    method='unequal',
                                    center=0,
                                    transform='identity',
                                    trim_frac_mean=0.2)
        res2 = anova_oneway(self.data, use_var="unequal")

        assert_allclose(res.pvalue, res2.pvalue, rtol=1e-13)
        assert_allclose(res.statistic, res2.statistic, rtol=1e-13)
        assert_allclose(res.df, res2.df)