Пример #1
0
    def test_match_distribution(self):
        """Some really basic testing for match_distribution
        """
        ds = datasets['uni2medium']  # large to get stable stats
        data = ds.samples[:, ds.a.bogus_features[0]]
        # choose bogus feature, which
        # should have close to normal distribution

        # Lets test ad-hoc rv_semifrozen
        floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
        self.assertTrue(floc[0] == 0)

        fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
        self.assertTrue(fscale[1] == 1)

        flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
        self.assertTrue(flocscale[1] == 1 and flocscale[0] == 0)

        full = scipy.stats.norm.fit(data)
        for res in [floc, fscale, flocscale, full]:
            self.assertTrue(len(res) == 2)

        data_mean = np.mean(data)
        for loc in [None, data_mean]:
            for test in ['p-roc', 'kstest']:
                # some really basic testing
                matched = match_distribution(data=data,
                                             distributions=[
                                                 'scipy',
                                                 ('norm', {
                                                     'name': 'norm_fixed',
                                                     'loc': 0.2,
                                                     'scale': 0.3
                                                 })
                                             ],
                                             test=test,
                                             loc=loc,
                                             p=0.05)
                # at least norm should be in there
                names = [m[2] for m in matched]
                if test == 'p-roc':
                    if cfg.getboolean('tests', 'labile', default='yes'):
                        # we can guarantee that only for norm_fixed
                        self.assertTrue('norm' in names)
                        self.assertTrue('norm_fixed' in names)
                        inorm = names.index('norm_fixed')
                        # and it should be at least in the first
                        # 30 best matching ;-)
                        self.assertTrue(inorm <= 30)
                    # Test plotting only once
                    if loc is None and externals.exists("pylab plottable"):
                        import pylab as pl
                        from mvpa2.clfs.stats import plot_distribution_matches
                        fig = pl.figure()
                        plot_distribution_matches(data,
                                                  matched,
                                                  legend=1,
                                                  nbest=5)
                        #pl.show()
                        pl.close(fig)
Пример #2
0
    def test_match_distribution(self):
        """Some really basic testing for match_distribution
        """
        ds = datasets['uni2medium']      # large to get stable stats
        data = ds.samples[:, ds.a.bogus_features[0]]
        # choose bogus feature, which
        # should have close to normal distribution

        # Lets test ad-hoc rv_semifrozen
        floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
        self.assertTrue(floc[0] == 0)

        fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
        self.assertTrue(fscale[1] == 1)

        flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
        self.assertTrue(flocscale[1] == 1 and flocscale[0] == 0)

        full = scipy.stats.norm.fit(data)
        for res in [floc, fscale, flocscale, full]:
            self.assertTrue(len(res) == 2)

        data_mean = np.mean(data)
        for loc in [None, data_mean]:
            for test in ['p-roc', 'kstest']:
                # some really basic testing
                matched = match_distribution(
                    data=data,
                    distributions = ['scipy',
                                     ('norm',
                                      {'name': 'norm_fixed',
                                       'loc': 0.2,
                                       'scale': 0.3})],
                    test=test, loc=loc, p=0.05)
                # at least norm should be in there
                names = [m[2] for m in matched]
                if test == 'p-roc':
                    if cfg.getboolean('tests', 'labile', default='yes'):
                        # we can guarantee that only for norm_fixed
                        self.assertTrue('norm' in names)
                        self.assertTrue('norm_fixed' in names)
                        inorm = names.index('norm_fixed')
                        # and it should be at least in the first
                        # 30 best matching ;-)
                        self.assertTrue(inorm <= 30)
                    # Test plotting only once
                    if loc is None and externals.exists("pylab plottable"):
                        import pylab as pl
                        from mvpa2.clfs.stats import plot_distribution_matches
                        fig = pl.figure()
                        plot_distribution_matches(data, matched, legend=1, nbest=5)
                        #pl.show()
                        pl.close(fig)