def test_match_distribution(self): """Some really basic testing for match_distribution """ ds = datasets['uni2medium'] # large to get stable stats data = ds.samples[:, ds.a.bogus_features[0]] # choose bogus feature, which # should have close to normal distribution # Lets test ad-hoc rv_semifrozen floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data) self.assertTrue(floc[0] == 0) fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data) self.assertTrue(fscale[1] == 1) flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data) self.assertTrue(flocscale[1] == 1 and flocscale[0] == 0) full = scipy.stats.norm.fit(data) for res in [floc, fscale, flocscale, full]: self.assertTrue(len(res) == 2) data_mean = np.mean(data) for loc in [None, data_mean]: for test in ['p-roc', 'kstest']: # some really basic testing matched = match_distribution(data=data, distributions=[ 'scipy', ('norm', { 'name': 'norm_fixed', 'loc': 0.2, 'scale': 0.3 }) ], test=test, loc=loc, p=0.05) # at least norm should be in there names = [m[2] for m in matched] if test == 'p-roc': if cfg.getboolean('tests', 'labile', default='yes'): # we can guarantee that only for norm_fixed self.assertTrue('norm' in names) self.assertTrue('norm_fixed' in names) inorm = names.index('norm_fixed') # and it should be at least in the first # 30 best matching ;-) self.assertTrue(inorm <= 30) # Test plotting only once if loc is None and externals.exists("pylab plottable"): import pylab as pl from mvpa2.clfs.stats import plot_distribution_matches fig = pl.figure() plot_distribution_matches(data, matched, legend=1, nbest=5) #pl.show() pl.close(fig)
def test_match_distribution(self): """Some really basic testing for match_distribution """ ds = datasets['uni2medium'] # large to get stable stats data = ds.samples[:, ds.a.bogus_features[0]] # choose bogus feature, which # should have close to normal distribution # Lets test ad-hoc rv_semifrozen floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data) self.assertTrue(floc[0] == 0) fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data) self.assertTrue(fscale[1] == 1) flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data) self.assertTrue(flocscale[1] == 1 and flocscale[0] == 0) full = scipy.stats.norm.fit(data) for res in [floc, fscale, flocscale, full]: self.assertTrue(len(res) == 2) data_mean = np.mean(data) for loc in [None, data_mean]: for test in ['p-roc', 'kstest']: # some really basic testing matched = match_distribution( data=data, distributions = ['scipy', ('norm', {'name': 'norm_fixed', 'loc': 0.2, 'scale': 0.3})], test=test, loc=loc, p=0.05) # at least norm should be in there names = [m[2] for m in matched] if test == 'p-roc': if cfg.getboolean('tests', 'labile', default='yes'): # we can guarantee that only for norm_fixed self.assertTrue('norm' in names) self.assertTrue('norm_fixed' in names) inorm = names.index('norm_fixed') # and it should be at least in the first # 30 best matching ;-) self.assertTrue(inorm <= 30) # Test plotting only once if loc is None and externals.exists("pylab plottable"): import pylab as pl from mvpa2.clfs.stats import plot_distribution_matches fig = pl.figure() plot_distribution_matches(data, matched, legend=1, nbest=5) #pl.show() pl.close(fig)