def test_f_high(self): """F high should match values from R for integer successes""" expected = { (1, 1, 0): 1, (1, 1, 1): 0.5, (1, 1, 20): 0.1400487, (1, 1, 1000000): 0.0006366196, (1, 10, 0): 1, (1, 10, 5): 0.0493322, (1, 10, 20): 0.001193467, (10, 1, 0): 1, (10, 10, 14.7): 0.0001062585, # test non-integer degrees of freedom (13.7, 11.9, 3.8): 0.01340347, # used following series to track down a bug after a failed test # case (28, 29, 2): 0.03424088, (28, 29, 10): 1.053019e-08, (28, 29, 20): 1.628245e-12, (28, 29, 300): 5.038791e-29, (28, 35, 1): 0.4946777, (28, 37, 1): 0.4934486, (28, 38, 1): 0.4928721, (28, 38.001, 1): 0.4928716, (28, 38.5, 1): 0.4925927, (28, 39, 1): 0.492319, (28, 39, 10): 1.431901e-10, (28, 39, 20): 1.432014e-15, (28, 39, 30): 1.059964e-18, (28, 39, 50): 8.846678e-23, (28, 39, 10): 1.431901e-10, (28, 39, 300): 1.226935e-37, (28, 39, 50): 8.846678e-23, (28, 39, 304.7): 9.08154e-38, (28.4, 39.2, 304.7): 5.573927e-38, (1032, 2050, 0): 1, (1032, 2050, 4.15): 1.23535e-165, (1032, 2050, 0.5): 1, (1032, 2050, 0.1): 1, } e = sorted(expected.items()) for (key, value) in e: np.testing.assert_allclose(f_high(*key), value, atol=10e-7)
def ANOVA_one_way(a): """Performs a one way analysis of variance a is a list of lists of observed values. Each list is the values within a category. The analysis must include 2 or more categories(lists). Each category of the list, and overall list, is converted to a numpy array. An F value is first calculated as the variance of the group means divided by the mean of the within-group variances. """ group_means = [] group_variances = [] num_cases = 0 # total observations in all groups all_vals = [] for i in a: num_cases += len(i) group_means.append(np.mean(i)) group_variances.append(i.var(ddof=1) * (len(i) - 1)) all_vals.extend(i) # Get within Group variances (denominator) dfd = num_cases - len(group_means) # need to add a check -- if the sum of the group variances is zero it will # error, but only if the between_Groups value is not zero within_Groups = np.sum(group_variances) / dfd if within_Groups == 0.: return nan, nan # Get between Group variances (numerator) all_vals = np.array(all_vals) grand_mean = all_vals.mean() between_Groups = 0 for i in a: diff = i.mean() - grand_mean diff_sq = diff * diff x = diff_sq * len(i) between_Groups += x dfn = len(group_means) - 1 between_Groups = between_Groups / dfn F = between_Groups / within_Groups return F, f_high(dfn, dfd, F)