def test_offset(self): data = Orange.data.Table([[2, 1, 2, 2, 3]]) p = Normalize(method=Normalize.Offset)(data) np.testing.assert_equal(p.X, data.X - 1) p = Normalize(method=Normalize.Offset, limits=True, lower=0, upper=4)(data) np.testing.assert_equal(p.X, data.X - 1)
def test_minmax(self): data = Orange.data.Table([[2, 1, 2, 2, 3]]) p = Normalize(method=Normalize.MinMax)(data) np.testing.assert_equal(p.X, data.X / 3) p = Normalize(method=Normalize.MinMax, limits=True, lower=0, upper=4)(data) np.testing.assert_equal(p.X, data.X / 3)
def test_area_norm(self): data = Orange.data.Table([[2, 1, 2, 2, 3]]) p = Normalize(method=Normalize.Area, int_method=Integrate.PeakMax, lower=0, upper=4)(data) np.testing.assert_equal(p.X, data.X / 3) p = Normalize(method=Normalize.Area, int_method=Integrate.Simple, lower=0, upper=4)(data) np.testing.assert_equal(p.X, data.X / 7.5) p = Normalize(method=Normalize.Area, int_method=Integrate.Simple, lower=0, upper=2)(data) q = Integrate(methods=Integrate.Simple, limits=[[0, 2]])(p) np.testing.assert_equal(q.X, np.ones_like(q.X))
def test_vector_norm(self): data = Orange.data.Table([[2, 1, 2, 2, 3]]) p = Normalize(method=Normalize.Vector)(data) q = data.X / np.sqrt((data.X * data.X).sum(axis=1)) np.testing.assert_equal(p.X, q) p = Normalize(method=Normalize.Vector, lower=0, upper=4)(data) np.testing.assert_equal(p.X, q) p = Normalize(method=Normalize.Vector, lower=0, upper=2)(data) np.testing.assert_equal(p.X, q)
def test_attribute_norm(self): data = Orange.data.Table([[2, 1, 2, 2, 3]], metas=[[2]]) p = Normalize(method=Normalize.Attribute)(data) np.testing.assert_equal(p.X, data.X) p = Normalize(method=Normalize.Attribute, attr=data.domain.metas[0])(data) np.testing.assert_equal(p.X, data.X / 2) p = Normalize(method=Normalize.Attribute, attr=data.domain.metas[0], lower=0, upper=4)(data) np.testing.assert_equal(p.X, data.X / 2) p = Normalize(method=Normalize.Attribute, attr=data.domain.metas[0], lower=2, upper=4)(data) np.testing.assert_equal(p.X, data.X / 2)
def test_vector_norm_nan_correction(self): # even though some values are unknown the other values # should be normalized to the same results data = Orange.data.Table([[2, 2, 2, 2]]) p = Normalize(method=Normalize.Vector)(data) self.assertAlmostEqual(p.X[0, 0], 0.5) # unknown in between that can be interpolated does not change results data.X[0, 2] = float("nan") p = Normalize(method=Normalize.Vector)(data) self.assertAlmostEqual(p.X[0, 0], 0.5) self.assertTrue(np.isnan(p.X[0, 2])) # unknowns at the edges do not get interpolated data.X[0, 3] = float("nan") p = Normalize(method=Normalize.Vector)(data) self.assertAlmostEqual(p.X[0, 0], 2**0.5/2) self.assertTrue(np.all(np.isnan(p.X[0, 2:])))
def test_normalization_vector(): fns = ["collagen", dust(), spectra20nea(), "peach_juice.dpt"] for fn in fns: print(fn) data = Table(fn) p = Normalize(method=Normalize.Vector) print(data.X.shape) t = time.time() r = p(data) print("no interpolate", time.time() - t) data[0, 2] = np.nan t = time.time() r = p(data) print("with interpolate", time.time() - t) assert (np.all(np.argwhere(np.isnan(r.X)) == [[0, 2]]))
Interpolate(np.linspace(1000, 1700, 100)), SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2), Cut(lowlim=1000, highlim=1800), GaussianSmoothing(sd=3.), Absorbance(), Transmittance(), Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]), Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]), Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]), Integrate(methods=Integrate.PeakMax, limits=[[1100, 1200]]), Integrate(methods=Integrate.PeakBaseline, limits=[[1100, 1200]]), Integrate(methods=Integrate.PeakAt, limits=[[1100]]), Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]), Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]]), RubberbandBaseline(), Normalize(method=Normalize.Vector), Normalize(method=Normalize.Area, int_method=Integrate.PeakMax, lower=0, upper=10000), ] # Preprocessors that use groups of input samples to infer # internal parameters. PREPROCESSORS_GROUPS_OF_SAMPLES = [ PCADenoising(components=2), ] PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES def shuffle_attr(data): natts = list(data.domain.attributes) random.Random(0).shuffle(natts)
Integrate, Interpolate, Cut, SavitzkyGolayFiltering, \ GaussianSmoothing, PCADenoising, RubberbandBaseline, \ Normalize # Preprocessors that work per sample and should return the same # result for a sample independent of the other samples PREPROCESSORS_INDEPENDENT_SAMPLES = [ Interpolate(np.linspace(1000, 1800, 100)), SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2), Cut(lowlim=1000, highlim=1800), GaussianSmoothing(sd=3.), Absorbance(), Transmittance(), Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]), RubberbandBaseline(), Normalize(method=Normalize.Vector), ] # Preprocessors that use groups of input samples to infer # internal parameters. PREPROCESSORS_GROUPS_OF_SAMPLES = [ PCADenoising(components=2), ] PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES class TestTransmittance(unittest.TestCase): def test_domain_conversion(self): """Test whether a domain can be used for conversion.""" data = Orange.data.Table("collagen.csv")