示例#1
0
 def test_offset(self):
     data = Orange.data.Table([[2, 1, 2, 2, 3]])
     p = Normalize(method=Normalize.Offset)(data)
     np.testing.assert_equal(p.X, data.X - 1)
     p = Normalize(method=Normalize.Offset, limits=True, lower=0,
                   upper=4)(data)
     np.testing.assert_equal(p.X, data.X - 1)
示例#2
0
 def test_minmax(self):
     data = Orange.data.Table([[2, 1, 2, 2, 3]])
     p = Normalize(method=Normalize.MinMax)(data)
     np.testing.assert_equal(p.X, data.X / 3)
     p = Normalize(method=Normalize.MinMax, limits=True, lower=0,
                   upper=4)(data)
     np.testing.assert_equal(p.X, data.X / 3)
示例#3
0
 def test_area_norm(self):
     data = Orange.data.Table([[2, 1, 2, 2, 3]])
     p = Normalize(method=Normalize.Area, int_method=Integrate.PeakMax, lower=0, upper=4)(data)
     np.testing.assert_equal(p.X, data.X / 3)
     p = Normalize(method=Normalize.Area, int_method=Integrate.Simple, lower=0, upper=4)(data)
     np.testing.assert_equal(p.X, data.X / 7.5)
     p = Normalize(method=Normalize.Area, int_method=Integrate.Simple, lower=0, upper=2)(data)
     q = Integrate(methods=Integrate.Simple, limits=[[0, 2]])(p)
     np.testing.assert_equal(q.X, np.ones_like(q.X))
示例#4
0
 def test_vector_norm(self):
     data = Orange.data.Table([[2, 1, 2, 2, 3]])
     p = Normalize(method=Normalize.Vector)(data)
     q = data.X / np.sqrt((data.X * data.X).sum(axis=1))
     np.testing.assert_equal(p.X, q)
     p = Normalize(method=Normalize.Vector, lower=0, upper=4)(data)
     np.testing.assert_equal(p.X, q)
     p = Normalize(method=Normalize.Vector, lower=0, upper=2)(data)
     np.testing.assert_equal(p.X, q)
示例#5
0
 def test_attribute_norm(self):
     data = Orange.data.Table([[2, 1, 2, 2, 3]], metas=[[2]])
     p = Normalize(method=Normalize.Attribute)(data)
     np.testing.assert_equal(p.X, data.X)
     p = Normalize(method=Normalize.Attribute, attr=data.domain.metas[0])(data)
     np.testing.assert_equal(p.X, data.X / 2)
     p = Normalize(method=Normalize.Attribute, attr=data.domain.metas[0],
             lower=0, upper=4)(data)
     np.testing.assert_equal(p.X, data.X / 2)
     p = Normalize(method=Normalize.Attribute, attr=data.domain.metas[0],
             lower=2, upper=4)(data)
     np.testing.assert_equal(p.X, data.X / 2)
示例#6
0
 def test_vector_norm_nan_correction(self):
     # even though some values are unknown the other values
     # should be normalized to the same results
     data = Orange.data.Table([[2, 2, 2, 2]])
     p = Normalize(method=Normalize.Vector)(data)
     self.assertAlmostEqual(p.X[0, 0], 0.5)
     # unknown in between that can be interpolated does not change results
     data.X[0, 2] = float("nan")
     p = Normalize(method=Normalize.Vector)(data)
     self.assertAlmostEqual(p.X[0, 0], 0.5)
     self.assertTrue(np.isnan(p.X[0, 2]))
     # unknowns at the edges do not get interpolated
     data.X[0, 3] = float("nan")
     p = Normalize(method=Normalize.Vector)(data)
     self.assertAlmostEqual(p.X[0, 0], 2**0.5/2)
     self.assertTrue(np.all(np.isnan(p.X[0, 2:])))
def test_normalization_vector():
    fns = ["collagen", dust(), spectra20nea(), "peach_juice.dpt"]
    for fn in fns:
        print(fn)
        data = Table(fn)
        p = Normalize(method=Normalize.Vector)
        print(data.X.shape)
        t = time.time()
        r = p(data)
        print("no interpolate", time.time() - t)
        data[0, 2] = np.nan
        t = time.time()
        r = p(data)
        print("with interpolate", time.time() - t)
        assert (np.all(np.argwhere(np.isnan(r.X)) == [[0, 2]]))
示例#8
0
    Interpolate(np.linspace(1000, 1700, 100)),
    SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2),
    Cut(lowlim=1000, highlim=1800),
    GaussianSmoothing(sd=3.),
    Absorbance(),
    Transmittance(),
    Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
    Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakMax, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakBaseline, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakAt, limits=[[1100]]),
    Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]),
    Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]]),
    RubberbandBaseline(),
    Normalize(method=Normalize.Vector),
    Normalize(method=Normalize.Area, int_method=Integrate.PeakMax, lower=0, upper=10000),
]

# Preprocessors that use groups of input samples to infer
# internal parameters.
PREPROCESSORS_GROUPS_OF_SAMPLES = [
    PCADenoising(components=2),
]

PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES


def shuffle_attr(data):
    natts = list(data.domain.attributes)
    random.Random(0).shuffle(natts)
示例#9
0
    Integrate, Interpolate, Cut, SavitzkyGolayFiltering, \
    GaussianSmoothing, PCADenoising, RubberbandBaseline, \
    Normalize

# Preprocessors that work per sample and should return the same
# result for a sample independent of the other samples
PREPROCESSORS_INDEPENDENT_SAMPLES = [
    Interpolate(np.linspace(1000, 1800, 100)),
    SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2),
    Cut(lowlim=1000, highlim=1800),
    GaussianSmoothing(sd=3.),
    Absorbance(),
    Transmittance(),
    Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
    RubberbandBaseline(),
    Normalize(method=Normalize.Vector),
]

# Preprocessors that use groups of input samples to infer
# internal parameters.
PREPROCESSORS_GROUPS_OF_SAMPLES = [
    PCADenoising(components=2),
]

PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES


class TestTransmittance(unittest.TestCase):
    def test_domain_conversion(self):
        """Test whether a domain can be used for conversion."""
        data = Orange.data.Table("collagen.csv")