Пример #1
0
 def test_pairwise_tukey(self):
     """Test function pairwise_tukey"""
     df = read_dataset('anova')
     stats = pairwise_tukey(dv='Pain threshold', between='Hair color',
                            data=df)
     assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037],
                        stats.loc[:, 'p-tukey'].values.round(3), atol=0.05)
Пример #2
0
    def test_pairwise_tukey(self):
        """Test function pairwise_tukey.

        The p-values are slightly different because of a different algorithm
        used to calculate the studentized range approximation, but
        significance should be the same.
        """
        # Compare with R package `userfriendlyscience` - Hair color dataset
        # Update Feb 2021: The userfriendlyscience package has been removed
        # from CRAN.
        df = read_dataset('anova')
        stats = pairwise_tukey(dv='Pain threshold',
                               between='Hair color',
                               data=df)
        # JASP: [0.0741, 0.4356, 0.4147, 0.0037, 0.7893, 0.0366]
        # Pingouin: [0.0742, 0.4369, 0.4160, 0.0037, 0.7697, 0.0367]
        assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037],
                           stats.loc[:, 'p-tukey'].to_numpy().round(3),
                           atol=0.05)
        # Compare with JASP in the Palmer Penguins dataset
        # The between factor (Species) is unbalanced.
        df = read_dataset("penguins")
        stats = df.pairwise_tukey(dv="body_mass_g", between="species").round(4)
        assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"])
        assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"])
        assert np.array_equal(stats['diff'], [-32.426, -1375.354, -1342.928])
        # SE is different for each group (Tukey-Kramer)
        assert np.array_equal(stats['se'], [67.5117, 56.1480, 69.8569])
        assert np.array_equal(stats['T'], [-0.4803, -24.4952, -19.2240])
        # P-values JASP: [0.8807, 0.0000, 0.0000]
        # P-values Pingouin: [0.8694, 0.0010, 0.0010]
        sig = stats['p-tukey'].apply(lambda x: 'Yes'
                                     if x < 0.05 else 'No').to_numpy()
        assert np.array_equal(sig, ['No', 'Yes', 'Yes'])

        # Same but with balanced group
        df_balanced = df.groupby('species').head(20).copy()
        # To complicate things, let's encode between as a categorical
        df_balanced['species'] = df_balanced['species'].astype('category')
        stats = df_balanced.pairwise_tukey(dv="body_mass_g",
                                           between="species").round(4)
        assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"])
        assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"])
        assert np.array_equal(stats['diff'], [-142.5, -1457.5, -1315.])
        # SE is the same for all groups (Tukey HSD)
        assert np.array_equal(stats['se'], [142.9475, 142.9475, 142.9475])
        assert np.array_equal(stats['T'], [-0.9969, -10.1961, -9.1992])
        # P-values JASP: [0.5818, 0.0000, 0.0000]
        # P-values Pingouin: [0.5766, 0.0010, 0.0010]
        sig = stats['p-tukey'].apply(lambda x: 'Yes'
                                     if x < 0.05 else 'No').to_numpy()
        assert np.array_equal(sig, ['No', 'Yes', 'Yes'])