def test_independent_variable_singular(): data1 = data.copy() data1["dup"] = data1["Drug"] mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup", data1) assert_raises(ValueError, mod.fit) mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup", data1) assert_raises(ValueError, mod.fit)
def test_independent_variable_singular(): data1 = data.copy() data1['dup'] = data1['Drug'] mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup', data1) assert_raises(ValueError, mod.fit) mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup', data1) assert_raises(ValueError, mod.fit)
def test_from_formula_vs_no_formula(): mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted", data) r = mod.fit(method="svd") r0 = r.mv_test() endog, exog = patsy.dmatrices( "Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted", data, return_type="dataframe" ) L = np.array([[1, 0, 0, 0, 0, 0]]) # DataFrame input r = _MultivariateOLS(endog, exog).fit(method="svd") r1 = r.mv_test(hypotheses=[["Intercept", L, None]]) assert_array_almost_equal(r1["Intercept"]["stat"].values, r0["Intercept"]["stat"].values, decimal=6) # Numpy array input r = _MultivariateOLS(endog.values, exog.values).fit(method="svd") r1 = r.mv_test(hypotheses=[["Intercept", L, None]]) assert_array_almost_equal(r1["Intercept"]["stat"].values, r0["Intercept"]["stat"].values, decimal=6) L = np.array([[0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0]]) r1 = r.mv_test(hypotheses=[["Drug", L, None]]) # DataFrame input r = _MultivariateOLS(endog, exog).fit(method="svd") r1 = r.mv_test(hypotheses=[["Drug", L, None]]) assert_array_almost_equal(r1["Drug"]["stat"].values, r0["Drug"]["stat"].values, decimal=6) # Numpy array input r = _MultivariateOLS(endog.values, exog.values).fit(method="svd") r1 = r.mv_test(hypotheses=[["Drug", L, None]]) assert_array_almost_equal(r1["Drug"]["stat"].values, r0["Drug"]["stat"].values, decimal=6)
def test_from_formula_vs_no_formula(): mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted', data) r = mod.fit(method='svd') r0 = r.mv_test() endog, exog = patsy.dmatrices( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted', data, return_type="dataframe") L = np.array([[1, 0, 0, 0, 0, 0]]) # DataFrame input r = _MultivariateOLS(endog, exog).fit(method='svd') r1 = r.mv_test(hypotheses=[['Intercept', L, None]]) assert_array_almost_equal(r1['Intercept']['stat'].values, r0['Intercept']['stat'].values, decimal=6) # Numpy array input r = _MultivariateOLS(endog.values, exog.values).fit(method='svd') r1 = r.mv_test(hypotheses=[['Intercept', L, None]]) assert_array_almost_equal(r1['Intercept']['stat'].values, r0['Intercept']['stat'].values, decimal=6) L = np.array([[0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], ]) r1 = r.mv_test(hypotheses=[['Drug', L, None]]) # DataFrame input r = _MultivariateOLS(endog, exog).fit(method='svd') r1 = r.mv_test(hypotheses=[['Drug', L, None]]) assert_array_almost_equal(r1['Drug']['stat'].values, r0['Drug']['stat'].values, decimal=6) # Numpy array input r = _MultivariateOLS(endog.values, exog.values).fit(method='svd') r1 = r.mv_test(hypotheses=[['Drug', L, None]]) assert_array_almost_equal(r1['Drug']['stat'].values, r0['Drug']['stat'].values, decimal=6)
def test_L_M_matrices_1D_array(): mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted", data) r = mod.fit(method="svd") L = np.array([1, 0, 0, 0, 0, 0]) assert_raises(ValueError, r.mv_test, hypotheses=[["Drug", L, None]]) L = np.array([[1, 0, 0, 0, 0, 0]]) M = np.array([1, 0, 0, 0, 0, 0]) assert_raises(ValueError, r.mv_test, hypotheses=[["Drug", L, M]])
def test_L_M_matrices_1D_array(): mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted', data) r = mod.fit(method='svd') L = np.array([1, 0, 0, 0, 0, 0]) assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, None]]) L = np.array([[1, 0, 0, 0, 0, 0]]) M = np.array([1, 0, 0, 0, 0, 0]) assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, M]])
def test_exog_1D_array(): mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ 0 + Depleted', data) r = mod.fit(method='svd') r0 = r.mv_test() a = [[0.0019, 8.0000, 20.0000, 55.0013, 0.0000], [1.8112, 8.0000, 22.0000, 26.3796, 0.0000], [97.8858, 8.0000, 12.1818, 117.1133, 0.0000], [93.2742, 4.0000, 11.0000, 256.5041, 0.0000]] assert_array_almost_equal(r0['Depleted']['stat'].values, a, decimal=4)
def test_exog_1D_array(): mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ 0 + Depleted", data) r = mod.fit(method="svd") r0 = r.mv_test() a = [ [0.0019, 8.0000, 20.0000, 55.0013, 0.0000], [1.8112, 8.0000, 22.0000, 26.3796, 0.0000], [97.8858, 8.0000, 12.1818, 117.1133, 0.0000], [93.2742, 4.0000, 11.0000, 256.5041, 0.0000], ] assert_array_almost_equal(r0["Depleted"]["stat"].values, a, decimal=4)
def test_specify_L_M_by_string(): mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted', data) r = mod.fit() r1 = r.mv_test(hypotheses=[['Intercept', ['Intercept'], None]]) a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05], [9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05], [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05], [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]] assert_array_almost_equal(r1['Intercept']['stat'].values, a, decimal=6) L = ['Intercept', 'Drug[T.Trimethaphan]', 'Drug[T.placebo]'] M = ['Histamine1', 'Histamine3', 'Histamine5'] r1 = r.mv_test(hypotheses=[['a', L, M]]) a = [[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0]] assert_array_almost_equal(r1['a']['contrast_L'], a, decimal=10) a = [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]] assert_array_almost_equal(r1['a']['transform_M'].T, a, decimal=10)
def test_specify_L_M_by_string(): mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted", data) r = mod.fit() r1 = r.mv_test(hypotheses=[["Intercept", ["Intercept"], None]]) a = [ [2.68607660e-02, 4, 6, 5.43435304e01, 7.59585610e-05], [9.73139234e-01, 4, 6, 5.43435304e01, 7.59585610e-05], [3.62290202e01, 4, 6, 5.43435304e01, 7.59585610e-05], [3.62290202e01, 4, 6, 5.43435304e01, 7.59585610e-05], ] assert_array_almost_equal(r1["Intercept"]["stat"].values, a, decimal=6) L = ["Intercept", "Drug[T.Trimethaphan]", "Drug[T.placebo]"] M = ["Histamine1", "Histamine3", "Histamine5"] r1 = r.mv_test(hypotheses=[["a", L, M]]) a = [[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0]] assert_array_almost_equal(r1["a"]["contrast_L"], a, decimal=10) a = [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]] assert_array_almost_equal(r1["a"]["transform_M"].T, a, decimal=10)
def compare_r_output_dogs_data(method): """ Testing within-subject effect interact with 2 between-subject effect Compares with R car library Anova(, type=3) output Note: The test statistis Phillai, Wilks, Hotelling-Lawley and Roy are the same as R output but the approximate F and degree of freedoms can be different. This is due to the fact that this implementation is based on SAS formula [1] .. [1] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm """ # Repeated measures with orthogonal polynomial contrasts coding mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted", data) r = mod.fit(method=method) r = r.mv_test() a = [ [2.68607660e-02, 4, 6, 5.43435304e01, 7.59585610e-05], [9.73139234e-01, 4, 6, 5.43435304e01, 7.59585610e-05], [3.62290202e01, 4, 6, 5.43435304e01, 7.59585610e-05], [3.62290202e01, 4, 6, 5.43435304e01, 7.59585610e-05], ] assert_array_almost_equal(r["Intercept"]["stat"].values, a, decimal=6) a = [ [8.39646619e-02, 8, 1.20000000e01, 3.67658068e00, 2.12614444e-02], [1.18605382e00, 8, 1.40000000e01, 2.55003861e00, 6.01270701e-02], [7.69391362e00, 8, 6.63157895e00, 5.50814270e00, 2.07392260e-02], [7.25036952e00, 4, 7.00000000e00, 1.26881467e01, 2.52669877e-03], ] assert_array_almost_equal(r["Drug"]["stat"].values, a, decimal=6) a = [ [0.32048892, 4.0, 6.0, 3.18034906, 0.10002373], [0.67951108, 4.0, 6.0, 3.18034906, 0.10002373], [2.12023271, 4.0, 6.0, 3.18034906, 0.10002373], [2.12023271, 4.0, 6.0, 3.18034906, 0.10002373], ] assert_array_almost_equal(r["Depleted"]["stat"].values, a, decimal=6) a = [ [0.15234366, 8.0, 12.0, 2.34307678, 0.08894239], [1.13013353, 8.0, 14.0, 2.27360606, 0.08553213], [3.70989596, 8.0, 6.63157895, 2.65594824, 0.11370285], [3.1145597, 4.0, 7.0, 5.45047947, 0.02582767], ] assert_array_almost_equal(r["Drug:Depleted"]["stat"].values, a, decimal=6)
def test_affine_hypothesis(): """ Testing affine hypothesis, compared with R car linearHypothesis Note: The test statistis Phillai, Wilks, Hotelling-Lawley and Roy are the same as R output but the approximate F and degree of freedoms can be different. This is due to the fact that this implementation is based on SAS formula [1] """ mod = _MultivariateOLS.from_formula("Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted", data) r = mod.fit(method="svd") L = np.array([[0, 1.2, 1.1, 1.3, 1.5, 1.4], [0, 3.2, 2.1, 3.3, 5.5, 4.4]]) M = None C = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) r0 = r.mv_test(hypotheses=[("test1", L, M, C)]) a = [ [0.0269, 8.0000, 12.0000, 7.6441, 0.0010], [1.4277, 8.0000, 14.0000, 4.3657, 0.0080], [19.2678, 8.0000, 6.6316, 13.7940, 0.0016], [18.3470, 4.0000, 7.0000, 32.1072, 0.0001], ] assert_array_almost_equal(r0["test1"]["stat"].values, a, decimal=4) r0.summary(show_contrast_L=True, show_transform_M=True, show_constant_C=True)
def test_affine_hypothesis(): # Testing affine hypothesis, compared with R car linearHypothesis # Note: The test statistis Phillai, Wilks, Hotelling-Lawley # and Roy are the same as R output but the approximate F and degree # of freedoms can be different. This is due to the fact that this # implementation is based on SAS formula [1] mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted', data) r = mod.fit(method='svd') L = np.array([[0, 1.2, 1.1, 1.3, 1.5, 1.4], [0, 3.2, 2.1, 3.3, 5.5, 4.4]]) M = None C = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) r0 = r.mv_test(hypotheses=[('test1', L, M, C)]) a = [[0.0269, 8.0000, 12.0000, 7.6441, 0.0010], [1.4277, 8.0000, 14.0000, 4.3657, 0.0080], [19.2678, 8.0000, 6.6316, 13.7940, 0.0016], [18.3470, 4.0000, 7.0000, 32.1072, 0.0001]] assert_array_almost_equal(r0['test1']['stat'].values, a, decimal=4) r0.summary(show_contrast_L=True, show_transform_M=True, show_constant_C=True)
def compare_r_output_dogs_data(method): ''' Testing within-subject effect interact with 2 between-subject effect Compares with R car library Anova(, type=3) output Note: The test statistis Phillai, Wilks, Hotelling-Lawley and Roy are the same as R output but the approximate F and degree of freedoms can be different. This is due to the fact that this implementation is based on SAS formula [1] .. [1] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm ''' # Repeated measures with orthogonal polynomial contrasts coding mod = _MultivariateOLS.from_formula( 'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted', data) r = mod.fit(method=method) r = r.mv_test() a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05], [9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05], [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05], [3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]] assert_array_almost_equal(r['Intercept']['stat'].values, a, decimal=6) a = [[8.39646619e-02, 8, 1.20000000e+01, 3.67658068e+00, 2.12614444e-02], [1.18605382e+00, 8, 1.40000000e+01, 2.55003861e+00, 6.01270701e-02], [7.69391362e+00, 8, 6.63157895e+00, 5.50814270e+00, 2.07392260e-02], [7.25036952e+00, 4, 7.00000000e+00, 1.26881467e+01, 2.52669877e-03]] assert_array_almost_equal(r['Drug']['stat'].values, a, decimal=6) a = [[0.32048892, 4., 6., 3.18034906, 0.10002373], [0.67951108, 4., 6., 3.18034906, 0.10002373], [2.12023271, 4., 6., 3.18034906, 0.10002373], [2.12023271, 4., 6., 3.18034906, 0.10002373]] assert_array_almost_equal(r['Depleted']['stat'].values, a, decimal=6) a = [[0.15234366, 8., 12., 2.34307678, 0.08894239], [1.13013353, 8., 14., 2.27360606, 0.08553213], [3.70989596, 8., 6.63157895, 2.65594824, 0.11370285], [3.1145597, 4., 7., 5.45047947, 0.02582767]] assert_array_almost_equal(r['Drug:Depleted']['stat'].values, a, decimal=6)
from scipy import stats from statsmodels.stats.multivariate import test_cov_oneway from statsmodels.multivariate.multivariate_ols import _MultivariateOLS import statsmodels.stats.oneway as smo data = pd.read_csv(r"D:\书籍资料整理\多元统计分析\例2-1.csv") #1.正态性检验 for key, value in data.iteritems(): if key in ['省份', '城市']: continue key + ':statistic %s,pvalue:%s' % (shapiro(value)[0], shapiro(value)[1]) #仅有人均地区生产总值和公共财政支出满足正态 #K-S不适合小样本,参见统计基础-假设检验 #2.多变量检验表证明省份对Y有影响 mod = _MultivariateOLS.from_formula('人均地区生产总值+公共财政支出 ~ 省份', data) result = mod.fit(method='svd') result.mv_test() #3.多元统计-协方差阵检验 temp_data = [] temp_name = [] for name, group in data[['省份', '人均地区生产总值', '公共财政支出']].groupby(['省份']): temp_data.append(np.cov(np.asarray(group[['人均地区生产总值', '公共财政支出']].T))) temp_name.append(name) #statistic_base 是Box's M统计量 #pvalue是书中给出的p值 test_cov_oneway(temp_data, [5, 5, 5]) #4.误差方差分析