示例#1
0
 def test_cut_both(self):
     d = self.collagen
     dcut = Cut(lowlim=0, highlim=2)(d)
     self.assertFalse(getx(dcut))
     dcut = Cut(lowlim=1000, highlim=1100)(d)
     self.assertGreaterEqual(min(getx(dcut)), 1000)
     self.assertLessEqual(max(getx(dcut)), 1100)
    def test_slightly_different_domain(self):
        """ If test data has a slightly different domain then (with interpolation)
        we should obtain a similar classification score. """
        # rows full of unknowns make LogisticRegression undefined
        # we can obtain them, for example, with EMSC, if one of the badspectra
        # is a spectrum from the data
        learner = LogisticRegressionLearner(preprocessors=[_RemoveNaNRows()])

        for proc in PREPROCESSORS:
            if hasattr(proc, "skip_add_zeros"):
                continue
            # LR that can not handle unknown values
            train, test = separate_learn_test(self.collagen)
            train1 = proc(train)
            aucorig = AUC(TestOnTestData(train1, test, [learner]))
            test = destroy_atts_conversion(test)
            test = odd_attr(test)
            # a subset of points for training so that all test sets points
            # are within the train set points, which gives no unknowns
            train = Interpolate(points=getx(train)[1:-3])(train)  # interpolatable train
            train = proc(train)
            # explicit domain conversion test to catch exceptions that would
            # otherwise be silently handled in TestOnTestData
            _ = Orange.data.Table(train.domain, test)
            aucnow = AUC(TestOnTestData(train, test, [learner]))
            self.assertAlmostEqual(aucnow, aucorig, delta=0.02, msg="Preprocessor " + str(proc))
            test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
            _ = Orange.data.Table(train.domain, test)  # explicit call again
            aucnow = AUC(TestOnTestData(train, test, [learner]))
            # the difference should be slight
            self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
 def commit(self):
     out = None
     self.Error.dxzero.clear()
     self.Error.too_many_points.clear()
     if self.data:
         if self.input_radio == 0:
             points = getx(self.data)
             out = Interpolate(points)(self.data)
         elif self.input_radio == 1:
             xs = getx(self.data)
             if not self.dx > 0:
                 self.Error.dxzero()
             else:
                 xmin = self.xmin if self.xmin is not None else np.min(xs)
                 xmax = self.xmax if self.xmax is not None else np.max(xs)
                 xmin, xmax = min(xmin, xmax), max(xmin, xmax)
                 reslength = abs(math.ceil((xmax - xmin)/self.dx))
                 if reslength < 10002:
                     points = np.arange(xmin, xmax, self.dx)
                     out = Interpolate(points)(self.data)
                 else:
                     self.Error.too_many_points(reslength)
         elif self.input_radio == 2 and self.data_points_interpolate is not None:
             out = self.data_points_interpolate(self.data)
     self.Outputs.interpolated_data.send(out)
示例#4
0
 def test_roundtrip(self):
     d1 = Orange.data.Table("map_test.xyz")
     with named_file("", suffix=".xyz") as fn:
         d1.save(fn)
         d2 = Orange.data.Table(fn)
         np.testing.assert_equal(d1.X, d2.X)
         np.testing.assert_equal(getx(d1), getx(d2))
         np.testing.assert_equal(d1.metas, d2.metas)
示例#5
0
 def test_read(self):
     d = Orange.data.Table("small_Omnic.map")
     self.assertAlmostEqual(d[1, 0], 4.01309, places=5)
     self.assertAlmostEqual(d[0, 0], 3.98295, places=5)
     self.assertEqual(min(getx(d)), 1604.51001)
     self.assertEqual(max(getx(d)), 1805.074097)
     self.assertEqual(d[0]["map_x"], 0)
     self.assertEqual(d[1]["map_y"], 0)
示例#6
0
 def test_read(self):
     d = Orange.data.Table("Hermes_HDF5/small_OK.hdf5")
     self.assertEqual(d[0, 0], 1000.1)
     self.assertEqual(d[1, 0], 2000.1)
     self.assertEqual(min(getx(d)), 100.1)
     self.assertEqual(max(getx(d)), 101.1)
     self.assertEqual(d[1]["map_x"], 2.1)
     self.assertEqual(d[1]["map_y"], 11.1)
示例#7
0
 def test_roundtrip(self):
     d1 = Orange.data.Table("map_test.xyz")
     with named_file("", suffix=".xyz") as fn:
         d1.save(fn)
         d2 = Orange.data.Table(fn)
         np.testing.assert_equal(d1.X, d2.X)
         np.testing.assert_equal(getx(d1), getx(d2))
         np.testing.assert_equal(d1.metas, d2.metas)
示例#8
0
 def test_read(self):
     d = Orange.data.Table("map_test.xyz")
     self.assertEqual(len(d), 16)
     self.assertEqual(d[1]["map_x"], 1)
     self.assertEqual(d[1]["map_y"], 7)
     self.assertEqual(d[1][1], 0.1243)
     self.assertEqual(d[2][2], 0.1242)
     self.assertEqual(min(getx(d)), 1634.84)
     self.assertEqual(max(getx(d)), 1641.69)
示例#9
0
    def transformed(self, X, wavenumbers):
        # wavenumber have to be input as sorted
        # about 85% of time in __call__ function is spent is lstsq
        # compute average spectrum from the reference
        ref_X = np.atleast_2d(spectra_mean(self.reference.X))

        def interpolate_to_data(other_xs, other_data):
            # all input data needs to be interpolated (and NaNs removed)
            interpolated = interp1d_with_unknowns_numpy(other_xs, other_data, wavenumbers)
            # we know that X is not NaN. same handling of reference as of X
            interpolated, _ = nan_extend_edges_and_interpolate(wavenumbers, interpolated)
            return interpolated

        ref_X = interpolate_to_data(getx(self.reference), ref_X)

        if self.weights:
            # interpolate reference to the data
            wei_X = interp1d_with_unknowns_numpy(getx(self.weights), self.weights.X, wavenumbers)
            # set whichever weights are undefined (usually at edges) to zero
            wei_X[np.isnan(wei_X)] = 0
        else:
            wei_X = np.ones((1, len(wavenumbers)))

        N = wavenumbers.shape[0]
        m0 = - 2.0 / (wavenumbers[0] - wavenumbers[N - 1])
        c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1])

        n_badspec = len(self.badspectra) if self.badspectra is not None else 0
        if self.badspectra:
            badspectra_X = interpolate_to_data(getx(self.badspectra), self.badspectra.X)

        M = []
        for x in range(0, self.order+1):
            M.append((m0 * (wavenumbers - c_coeff)) ** x)
        for y in range(0, n_badspec):
            M.append(badspectra_X[y])
        M.append(ref_X)  # always add reference spectrum to the model
        n_add_model = len(M)
        M = np.vstack(M).T  # M is for the correction, for par. estimation M_weighted is used

        M_weighted = M*wei_X.T

        newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model))
        for i, rawspectrum in enumerate(X):
            rawspectrumW = (rawspectrum*wei_X)[0]
            m = np.linalg.lstsq(M_weighted, rawspectrum, rcond=-1)[0]
            corrected = rawspectrum

            for x in range(0, self.order+1+n_badspec):
                corrected = (corrected - (m[x] * M[:, x]))
            if self.scaling:
                corrected = corrected/m[self.order+1+n_badspec]
            corrected[np.isinf(corrected)] = np.nan  # fix values caused by zero weights
            corrected = np.hstack((corrected, m))  # append the model parameters
            newspectra[i] = corrected

        return newspectra
示例#10
0
    def transformed(self, X, wavenumbers):
        # wavenumber have to be input as sorted
        # about 85% of time in __call__ function is spent is lstsq
        # compute average spectrum from the reference
        ref_X = np.atleast_2d(spectra_mean(self.reference.X))

        def interpolate_to_data(other_xs, other_data):
            # all input data needs to be interpolated (and NaNs removed)
            interpolated = interp1d_with_unknowns_numpy(
                other_xs, other_data, wavenumbers)
            # we know that X is not NaN. same handling of reference as of X
            interpolated, _ = nan_extend_edges_and_interpolate(
                wavenumbers, interpolated)
            return interpolated

        ref_X = interpolate_to_data(getx(self.reference), ref_X)
        wei_X = weighted_wavenumbers(self.weights, wavenumbers)

        N = wavenumbers.shape[0]
        m0 = -2.0 / (wavenumbers[0] - wavenumbers[N - 1])
        c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1])

        n_badspec = len(self.badspectra) if self.badspectra is not None else 0
        if self.badspectra:
            badspectra_X = interpolate_to_data(getx(self.badspectra),
                                               self.badspectra.X)

        M = []
        for x in range(0, self.order + 1):
            M.append((m0 * (wavenumbers - c_coeff))**x)
        for y in range(0, n_badspec):
            M.append(badspectra_X[y])
        M.append(ref_X)  # always add reference spectrum to the model
        n_add_model = len(M)
        M = np.vstack(
            M
        ).T  # M is for the correction, for par. estimation M_weighted is used

        M_weighted = M * wei_X.T

        newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model))
        for i, rawspectrum in enumerate(X):
            rawspectrumW = (rawspectrum * wei_X)[0]
            m = np.linalg.lstsq(M_weighted, rawspectrum, rcond=-1)[0]
            corrected = rawspectrum

            for x in range(0, self.order + 1 + n_badspec):
                corrected = (corrected - (m[x] * M[:, x]))
            if self.scaling:
                corrected = corrected / m[self.order + 1 + n_badspec]
            corrected[np.isinf(
                corrected)] = np.nan  # fix values caused by zero weights
            corrected = np.hstack(
                (corrected, m))  # append the model parameters
            newspectra[i] = corrected

        return newspectra
示例#11
0
 def test_read(self):
     d = Orange.data.Table("map_test.xyz")
     self.assertEqual(len(d), 16)
     self.assertEqual(d[1]["map_x"], 1)
     self.assertEqual(d[1]["map_y"], 7)
     self.assertEqual(d[1][1], 0.1243)
     self.assertEqual(d[2][2], 0.1242)
     self.assertEqual(min(getx(d)), 1634.84)
     self.assertEqual(max(getx(d)), 1641.69)
示例#12
0
 def set_data(self, data):
     self.data = data
     if self.data and len(getx(data)):
         points = getx(data)
         self.xmin_edit.setPlaceholderText(str(np.min(points)))
         self.xmax_edit.setPlaceholderText(str(np.max(points)))
     else:
         self.xmin_edit.setPlaceholderText("")
         self.xmax_edit.setPlaceholderText("")
     self.commit()
 def set_data(self, data):
     self.data = data
     if self.data and len(getx(data)):
         points = getx(data)
         self.xmin_edit.setPlaceholderText(str(np.min(points)))
         self.xmax_edit.setPlaceholderText(str(np.max(points)))
     else:
         self.xmin_edit.setPlaceholderText("")
         self.xmax_edit.setPlaceholderText("")
     self.commit()
 def test_autointerpolate(self):
     self.send_signal("Data", self.collagen)
     out = self.get_output("Interpolated data")
     np.testing.assert_equal(getx(self.collagen), getx(out))
     # no auto-interpolation
     non_interp = Orange.data.Table(self.collagen.domain, self.peach)
     self.assertTrue(np.isnan(non_interp.X).all())
     # auto-interpolation
     auto_interp = Orange.data.Table(out.domain, self.peach)
     self.assertFalse(np.isnan(auto_interp.X).all())
     np.testing.assert_equal(getx(self.collagen), getx(auto_interp))
 def test_array_read(self):
     reader = initialize_reader(PTIRFileReader,
                                "photothermal/Nodax_Spectral_Array.ptir")
     reader.data_signal = b'//ZI/*/DEMODS/0/R'
     d = reader.read()
     self.assertAlmostEqual(d[0][0], 0.21426094)
     self.assertAlmostEqual(d[1][0], 1.6351842)
     self.assertEqual(min(getx(d)), 801.0)
     self.assertEqual(max(getx(d)), 1797.0)
     self.assertAlmostEqual(d[0]["map_x"], 801.9500122070312)
     self.assertAlmostEqual(d[0]["map_y"], -500.1499938964844)
 def test_autointerpolate(self):
     self.send_signal("Data", self.collagen)
     out = self.get_output("Interpolated data")
     np.testing.assert_equal(getx(self.collagen), getx(out))
     # no auto-interpolation
     non_interp = Orange.data.Table(self.collagen.domain, self.peach)
     self.assertTrue(np.isnan(non_interp.X).all())
     # auto-interpolation
     auto_interp = Orange.data.Table(out.domain, self.peach)
     self.assertFalse(np.isnan(auto_interp.X).all())
     np.testing.assert_equal(getx(self.collagen), getx(auto_interp))
示例#17
0
 def test_add_limit(self):
     dmin, dmax = min(getx(self.data)), max(getx(self.data))
     # first addition adds two limits
     self.editor.range_button.click()
     self.widget.apply()
     p = self.get_preprocessor()
     self.assertEqual(p.zero_points, [dmin, dmax])
     # the second addition adds one limit
     self.editor.range_button.click()
     self.widget.apply()
     p = self.get_preprocessor()
     self.assertEqual(p.zero_points, [dmin, dmax, (dmin + dmax) / 2])
示例#18
0
 def test_envi_comparison(self):
     # Image
     d1_a = Orange.data.Table("agilent/4_noimage_agg256.seq")
     d1_e = Orange.data.Table("agilent/4_noimage_agg256.hdr")
     np.testing.assert_equal(d1_a.X, d1_e.X)
     # Wavenumbers are rounded in .hdr files
     np.testing.assert_allclose(getx(d1_a), getx(d1_e))
     # Mosaic
     d2_a = Orange.data.Table("agilent/5_mosaic_agg1024.dms")
     d2_e = Orange.data.Table("agilent/5_mosaic_agg1024.hdr")
     np.testing.assert_equal(d2_a.X, d2_e.X)
     np.testing.assert_allclose(getx(d2_a), getx(d2_e))
 def test_interpolate_points(self):
     self.assertFalse(self.widget.Warning.reference_data_missing.is_shown())
     self.widget.controls.input_radio.buttons[2].click()
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
     self.send_signal("Data", self.peach)
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
     self.send_signal("Points", self.collagen)
     self.assertFalse(self.widget.Warning.reference_data_missing.is_shown())
     out = self.get_output("Interpolated data")
     np.testing.assert_equal(getx(self.collagen), getx(out))
     self.send_signal("Points", None)
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
 def test_interpolate_points(self):
     self.assertFalse(self.widget.Warning.reference_data_missing.is_shown())
     self.widget.controls.input_radio.buttons[2].click()
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
     self.send_signal("Data", self.peach)
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
     self.send_signal("Points", self.collagen)
     self.assertFalse(self.widget.Warning.reference_data_missing.is_shown())
     out = self.get_output("Interpolated data")
     np.testing.assert_equal(getx(self.collagen), getx(out))
     self.send_signal("Points", None)
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
示例#21
0
 def test_envi_comparison(self):
     # Image
     d1_a = Orange.data.Table("agilent/4_noimage_agg256.dat")
     d1_e = Orange.data.Table("agilent/4_noimage_agg256.hdr")
     np.testing.assert_equal(d1_a.X, d1_e.X)
     # Wavenumbers are rounded in .hdr files
     np.testing.assert_allclose(getx(d1_a), getx(d1_e))
     # Mosaic
     d2_a = Orange.data.Table("agilent/5_mosaic_agg1024.dmt")
     d2_e = Orange.data.Table("agilent/5_Mosaic_agg1024.hdr")
     np.testing.assert_equal(d2_a.X, d2_e.X)
     np.testing.assert_allclose(getx(d2_a), getx(d2_e))
示例#22
0
    def transformed(self, X, wavenumbers):
        # about 85% of time in __call__ function is spent is lstsq

        # compute average spectrum from the reference
        ref_X = np.atleast_2d(spectra_mean(self.reference.X))
        # interpolate reference to the data
        ref_X = interp1d_with_unknowns_numpy(getx(self.reference), ref_X,
                                             wavenumbers)
        # we know that X is not NaN. same handling of reference as of X
        ref_X, _ = nan_extend_edges_and_interpolate(wavenumbers, ref_X)

        if self.weights:
            # interpolate reference to the data
            wei_X = interp1d_with_unknowns_numpy(getx(self.weights),
                                                 self.weights.X, wavenumbers)
            # set whichever weights are undefined (usually at edges) to zero
            wei_X[np.isnan(wei_X)] = 0
        else:
            wei_X = np.ones((1, len(wavenumbers)))

        N = wavenumbers.shape[0]
        m0 = -2.0 / (wavenumbers[0] - wavenumbers[N - 1])
        c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1])
        M = []
        for x in range(0, self.order + 1):
            M.append((m0 * (wavenumbers - c_coeff))**x)
        M.append(ref_X)  # always add reference spectrum to the model
        n_add_model = len(M)
        M = np.vstack(
            M
        ).T  # M is needed below for the correction, for par estimation M_weigheted is used

        M_weighted = M * wei_X.T

        newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model))
        for i, rawspectrum in enumerate(X):
            rawspectrumW = (rawspectrum * wei_X)[0]
            m = np.linalg.lstsq(M_weighted, rawspectrum)[0]
            corrected = rawspectrum

            for x in range(0, self.order + 1):
                corrected = (corrected - (m[x] * M[:, x]))
            if self.scaling:
                corrected = corrected / m[self.order + 1]
            corrected[np.isinf(
                corrected
            )] = np.nan  # fix values which can be caused by zero weights
            corrected = np.hstack(
                (corrected, m))  # append the model parameters
            newspectra[i] = corrected

        return newspectra
示例#23
0
 def test_unordered_features(self):
     for proc in PREPROCESSORS:
         data = preprocessor_data(proc)
         data_reversed = reverse_attr(data)
         data_shuffle = shuffle_attr(data)
         pdata = proc(data)
         X = pdata.X[:, np.argsort(getx(pdata))]
         pdata_reversed = proc(data_reversed)
         X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))]
         np.testing.assert_almost_equal(X, X_reversed, err_msg="Preprocessor " + str(proc))
         pdata_shuffle = proc(data_shuffle)
         X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))]
         np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
 def test_hyperspectral_read(self):
     reader = initialize_reader(PTIRFileReader,
                                "photothermal/Hyper_Sample.ptir")
     reader.data_signal = b'//ZI/*/DEMODS/0/R'
     d = reader.read()
     self.assertEqual(len(d), 35)
     self.assertEqual(len(d.domain.attributes), 451)
     self.assertAlmostEqual(d[0][0], 0.0137912575)
     self.assertAlmostEqual(d[1][0], -0.08101661)
     self.assertEqual(min(getx(d)), 900.0)
     self.assertEqual(max(getx(d)), 1800.0)
     self.assertAlmostEqual(d[0]["map_x"], -4088.96337890625)
     self.assertAlmostEqual(d[0]["map_y"], -886.1981201171875)
示例#25
0
 def test_image_read(self):
     d = Orange.data.Table("agilent/4_noimage_agg256.dat")
     self.assertEqual(len(d), 64)
     # Pixel sizes are 5.5 * 16 = 88.0 (binning to reduce test data)
     self.assertAlmostEqual(d[1]["map_x"] - d[0]["map_x"], 88.0)
     self.assertAlmostEqual(d[8]["map_y"] - d[7]["map_y"], 88.0)
     # Last pixel should start at (8 - 1) * 88.0 = 616.0
     self.assertAlmostEqual(d[-1]["map_x"], 616.0)
     self.assertAlmostEqual(d[-1]["map_y"], 616.0)
     self.assertAlmostEqual(d[1][1], 1.27181053)
     self.assertAlmostEqual(d[2][2], 1.27506005)
     self.assertEqual(min(getx(d)), 1990.178226)
     self.assertEqual(max(getx(d)), 2113.600132)
 def test_unordered_features(self):
     data = self.collagen
     data_reversed = reverse_attr(data)
     data_shuffle = shuffle_attr(data)
     for proc in PREPROCESSORS:
         pdata = proc(data)
         X = pdata.X[:, np.argsort(getx(pdata))]
         pdata_reversed = proc(data_reversed)
         X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))]
         np.testing.assert_almost_equal(X, X_reversed, err_msg="Preprocessor " + str(proc))
         pdata_shuffle = proc(data_shuffle)
         X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))]
         np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
示例#27
0
 def test_mosaic_read(self):
     d = Orange.data.Table("agilent/5_mosaic_agg1024.dmt")
     self.assertEqual(len(d), 32)
     # Pixel sizes are 5.5 * 32 = 176.0 (binning to reduce test data)
     self.assertAlmostEqual(d[1]["map_x"] - d[0]["map_x"], 176.0)
     self.assertAlmostEqual(d[4]["map_y"] - d[3]["map_y"], 176.0)
     # Last pixel should start at (4 - 1) * 176.0 = 528.0
     self.assertAlmostEqual(d[-1]["map_x"], 528.0)
     # 1 x 2 mosiac, (8 - 1) * 176.0 = 1232.0
     self.assertAlmostEqual(d[-1]["map_y"], 1232.0)
     self.assertAlmostEqual(d[1][1], 1.14792180)
     self.assertAlmostEqual(d[2][2], 1.14063489)
     self.assertEqual(min(getx(d)), 1990.178226)
     self.assertEqual(max(getx(d)), 2113.600132)
示例#28
0
 def test_reference_preprocessed(self):
     data = SMALL_COLLAGEN
     self.send_signal("Data", data)
     self.send_signal("Reference", data)
     self.widget.add_preprocessor(pack_editor(CutEditor))
     self.widget.add_preprocessor(pack_editor(RememberDataEditor))
     self.widget.apply()
     processed = getx(RememberData.reference)
     original = getx(data)
     # cut by default cuts 10% of the data on both edges
     removed = set(original) - set(processed)
     self.assertGreater(len(removed), 0)
     self.assertEqual(set(), set(processed) - set(original))
     self.assertFalse(self.widget.Warning.reference_compat.is_shown())
示例#29
0
 def test_image_read(self):
     d = Orange.data.Table("agilent/4_noimage_agg256.seq")
     self.assertEqual(len(d), 64)
     # Pixel sizes are 5.5 * 16 = 88.0 (binning to reduce test data)
     self.assertAlmostEqual(
         d[1]["map_x"] - d[0]["map_x"], 88.0)
     self.assertAlmostEqual(
         d[8]["map_y"] - d[7]["map_y"], 88.0)
     # Last pixel should start at (8 - 1) * 88.0 = 616.0
     self.assertAlmostEqual(d[-1]["map_x"], 616.0)
     self.assertAlmostEqual(d[-1]["map_y"], 616.0)
     self.assertAlmostEqual(d[1][1], 1.27181053)
     self.assertAlmostEqual(d[2][2], 1.27506005)
     self.assertEqual(min(getx(d)), 1990.178226)
     self.assertEqual(max(getx(d)), 2113.600132)
 def test_predict_different_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Interpolate(points=getx(test) - 1.)(test) # other test domain
     train = Interpolate(points=getx(train))(train)  # make train capable of interpolation
     aucshift = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(aucorig, aucshift, delta=0.01)  # shift can decrease AUC slightly
     test = Cut(1000, 1700)(test)
     auccut1 = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1100, 1600)(test)
     auccut2 = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1200, 1500)(test)
     auccut3 = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     # the more we cut the lower precision we get
     self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
示例#31
0
 def test_predict_different_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = Interpolate(points=getx(test) - 1.)(test) # other test domain
     train = Interpolate(points=getx(train))(train)  # make train capable of interpolation
     aucshift = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(aucorig, aucshift, delta=0.01)  # shift can decrease AUC slightly
     test = Cut(1000, 1700)(test)
     auccut1 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = Cut(1100, 1600)(test)
     auccut2 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = Cut(1200, 1500)(test)
     auccut3 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     # the more we cut the lower precision we get
     self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
示例#32
0
def _transform_to_sorted_features(data):
    xs = getx(data)
    xsind = np.argsort(xs)
    mon = is_increasing(xsind)
    X = data.X
    X = X if mon else X[:, xsind]
    return xs, xsind, mon, X
示例#33
0
 def test_predict_savgol_another_interpolate(self):
     train, test = separate_learn_test(self.collagen)
     train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train)
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     train = Interpolate(points=getx(train))(train)
     aucai = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(auc, aucai, delta=0.02)
示例#34
0
 def test_predict_samename_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = destroy_atts_conversion(test)
     train = Interpolate(points=getx(train))(train) # make train capable of interpolation
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertEqual(aucorig, auc)
示例#35
0
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
示例#36
0
def process_stack(data, xat, yat, upsample_factor=100, use_sobel=False, ref_frame_num=0):
    hypercube, lsx, lsy = get_hypercube(data, xat, yat)

    calculate_shift = RegisterTranslation(upsample_factor=upsample_factor)
    filterfn = sobel if use_sobel else lambda x: x
    shifts, aligned_stack = alignstack(hypercube.T,
                                       shiftfn=calculate_shift,
                                       ref_frame_num=ref_frame_num,
                                       filterfn=filterfn)

    xmin, ymin = shifts[:, 0].min(), shifts[:, 1].min()
    xmax, ymax = shifts[:, 0].max(), shifts[:, 1].max()
    xmin, xmax = int(round(xmin)), int(round(xmax))
    ymin, ymax = int(round(ymin)), int(round(ymax))

    shape = hypercube.shape
    slicex = slice(max(xmax, 0), min(shape[1], shape[1]+xmin))
    slicey = slice(max(ymax, 0), min(shape[0], shape[0]+ymin))
    cropped = np.array(aligned_stack).T[slicey, slicex]

    # transform numpy array back to Orange.data.Table
    return shifts, build_spec_table(*_spectra_from_image(cropped,
                                                         getx(data),
                                                         np.linspace(*lsx)[slicex],
                                                         np.linspace(*lsy)[slicey]))
def process_stack(data,
                  xat,
                  yat,
                  upsample_factor=100,
                  use_sobel=False,
                  ref_frame_num=0):
    hypercube, lsx, lsy = get_hypercube(data, xat, yat)
    if bn.anynan(hypercube):
        raise NanInsideHypercube(True)

    calculate_shift = RegisterTranslation(upsample_factor=upsample_factor)
    filterfn = sobel if use_sobel else lambda x: x
    shifts, aligned_stack = alignstack(hypercube.T,
                                       shiftfn=calculate_shift,
                                       ref_frame_num=ref_frame_num,
                                       filterfn=filterfn)

    xmin, ymin = shifts[:, 0].min(), shifts[:, 1].min()
    xmax, ymax = shifts[:, 0].max(), shifts[:, 1].max()
    xmin, xmax = int(round(xmin)), int(round(xmax))
    ymin, ymax = int(round(ymin)), int(round(ymax))

    shape = hypercube.shape
    slicex = slice(max(xmax, 0), min(shape[1], shape[1] + xmin))
    slicey = slice(max(ymax, 0), min(shape[0], shape[0] + ymin))
    cropped = np.array(aligned_stack).T[slicey, slicex]

    # transform numpy array back to Orange.data.Table
    return shifts, build_spec_table(
        *_spectra_from_image(cropped, getx(data),
                             np.linspace(*lsx)[slicex],
                             np.linspace(*lsy)[slicey]))
示例#38
0
 def set_preview_data(self, data):
     if not self.user_changed:
         x = getx(data)
         if len(x):
             self.set_value("Low limit", min(x))
             self.set_value("High limit", max(x))
             self.edited.emit()
示例#39
0
 def set_preview_data(self, data):
     if not self.user_changed:
         x = getx(data)
         if len(x):
             self.set_value("Low limit", min(x))
             self.set_value("High limit", max(x))
             self.edited.emit()
示例#40
0
 def test_mosaic_read(self):
     d = Orange.data.Table("agilent/5_mosaic_agg1024.dms")
     self.assertEqual(len(d), 32)
     # Pixel sizes are 5.5 * 32 = 176.0 (binning to reduce test data)
     self.assertAlmostEqual(
         d[1]["map_x"] - d[0]["map_x"], 176.0)
     self.assertAlmostEqual(
         d[4]["map_y"] - d[3]["map_y"], 176.0)
     # Last pixel should start at (4 - 1) * 176.0 = 528.0
     self.assertAlmostEqual(d[-1]["map_x"], 528.0)
     # 1 x 2 mosiac, (8 - 1) * 176.0 = 1232.0
     self.assertAlmostEqual(d[-1]["map_y"], 1232.0)
     self.assertAlmostEqual(d[1][1], 1.14792180)
     self.assertAlmostEqual(d[2][2], 1.14063489)
     self.assertEqual(min(getx(d)), 1990.178226)
     self.assertEqual(max(getx(d)), 2113.600132)
示例#41
0
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
            replace_infs(data.X)
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(
                    data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                replace_infs(data.X)
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
 def test_unknown_elsewhere_different(self):
     data = Orange.data.Table("iris")
     with data.unlocked():
         data.X[0, 1] = np.nan
         data.X[1, 1] = np.nan
         data.X[1, 2] = np.nan
     im = Interpolate(getx(data))
     im.interpfn = interp1d_with_unknowns_numpy
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
     im.interpfn = interp1d_with_unknowns_scipy
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
     save_X = interpolated.X
     im.interpfn = interp1d_wo_unknowns_scipy
     interpolated = im(data)
     self.assertTrue(np.any(np.isnan(interpolated.X)))
     # parts without unknown should be the same
     np.testing.assert_almost_equal(data.X[2:], save_X[2:])
 def transformed(self, data):
     if len(data):
         ref_X = self.interpolate_extend_to(self.reference, getx(data))
         return replace_infs(
             np.angle(np.exp(data.X * 1j) / np.exp(ref_X * 1j)))
     else:
         return data
 def test_predict_savgol_another_interpolate(self):
     train, test = separate_learn_test(self.collagen)
     train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train)
     auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     train = Interpolate(points=getx(train))(train)
     aucai = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(auc, aucai, delta=0.02)
示例#45
0
def transform_to_sorted_features(data):
    xs = getx(data)
    xsind = np.argsort(xs)
    mon = is_increasing(xsind)
    X = data.X
    X = X if mon else X[:, xsind]
    return xs, xsind, mon, X
    def __call__(self, data):

        if data.X.shape[1] > 0:
            # --- compute K
            energies = np.sort(getx(data))  # input data can be in any order
            start_idx, end_idx = extra_exafs.get_idx_bounds(
                energies, self.edge, self.extra_from, self.extra_to)
            k_interp, k_points = extra_exafs.get_K_points(
                energies, self.edge, start_idx, end_idx)
            # ----------

            common = _ExtractEXAFSCommon(self.edge, self.extra_from,
                                         self.extra_to, self.poly_deg,
                                         self.kweight, self.m, k_interp,
                                         data.domain)

            newattrs = [
                ContinuousVariable(name=str(var),
                                   compute_value=ExtractEXAFSFeature(
                                       i, common))
                for i, var in enumerate(k_interp)
            ]
        else:
            newattrs = []

        domain = Orange.data.Domain(newattrs, data.domain.class_vars,
                                    data.domain.metas)
        return data.transform(domain)
 def transformed(self, data):
     if len(data):  # numpy does not like to divide shapes (0, b) by (a, b)
         ref_X = self.interpolate_extend_to(self.reference, getx(data))
         result = data.X - self.amount * ref_X
         return result
     else:
         return data
示例#48
0
    def run_preview(data: Table, m_def, state: TaskState):
        def progress_interrupt(_: float):
            if state.is_interruption_requested():
                raise InterruptException

        # Protects against running the task in succession many times, as would
        # happen when adding a preprocessor (there, commit() is called twice).
        # Wait 500 ms before processing - if a new task is started in meanwhile,
        # allow that is easily` cancelled.
        for _ in range(10):
            time.sleep(0.050)
            progress_interrupt(0)

        orig_data = data

        model, parameters = create_composite_model(m_def)

        model_result = {}
        x = getx(data)
        if data is not None and model is not None:
            for row in data:
                progress_interrupt(0)
                model_result[row.id] = model.fit(row.x, parameters, x=x)

        return orig_data, data, model_result
 def test_predict_samename_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = destroy_atts_conversion(test)
     train = Interpolate(points=getx(train))(train) # make train capable of interpolation
     auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertEqual(aucorig, auc)
 def test_unknown_middle(self):
     data = Orange.data.Table("iris")
     # whole column in the middle should be interpolated
     with data.unlocked():
         data.X[:, 1] = np.nan
     interpolated = Interpolate(getx(data))(data)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
 def test_unordered_features(self):
     data = self.collagen
     data_reversed = reverse_attr(data)
     data_shuffle = shuffle_attr(data)
     for proc in PREPROCESSORS:
         comparison = np.testing.assert_equal
         # TODO find out why there are small differences for certain preprocessors
         if isinstance(proc, (RubberbandBaseline, Normalize, PCADenoising)):
             comparison = lambda x,y: np.testing.assert_almost_equal(x, y, decimal=5)
         pdata = proc(data)
         X = pdata.X[:, np.argsort(getx(pdata))]
         pdata_reversed = proc(data_reversed)
         X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))]
         comparison(X, X_reversed)
         pdata_shuffle = proc(data_shuffle)
         X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))]
         comparison(X, X_shuffle)
示例#52
0
 def __init__(self, target, kind="linear", handle_nans=True):
     self.target = target
     if not all(isinstance(a, Orange.data.ContinuousVariable) for a in self.target.domain.attributes):
         raise NotAllContinuousException()
     self.points = getx(self.target)
     self.kind = kind
     self.handle_nans = handle_nans
     self.interpfn = None
 def test_line_intersection(self):
     data = self.collagen
     x = getx(data)
     sort = np.argsort(x)
     x = x[sort]
     ys = data.X[:, sort]
     boola = intersect_curves(x, ys, np.array([0, 1.15]), np.array([3000, 1.15]))
     intc = np.flatnonzero(boola)
     np.testing.assert_equal(intc, [191, 635, 638, 650, 712, 716, 717, 726])
示例#54
0
    def test_autointerpolate(self):
        d1 = Orange.data.Table("peach_juice.dpt")
        d2 = Orange.data.Table("collagen.csv")
        d3 = Orange.data.Table(d1.domain, d2)
        d1x = getx(d1)
        d2x = getx(d2)

        #have the correct number of non-nan elements
        validx = np.where(d1x >= min(d2x), d1x, np.nan)
        validx = np.where(d1x <= max(d2x), validx, np.nan)
        self.assertEqual(np.sum(~np.isnan(validx)),
                         np.sum(~np.isnan(d3.X[0])))

        #check roundtrip
        atts = features_with_interpolation(d2x)
        ndom = Orange.data.Domain(atts, None)
        dround = Orange.data.Table(ndom, d3)
        #edges are unknown, the rest roughly the same
        np.testing.assert_allclose(dround.X[:, 1:-1], d2.X[:, 1:-1], rtol=0.011)
示例#55
0
 def interpolate_extend_to(self, interpolate, wavenumbers):
     """
     Interpolate data to given wavenumbers and extend the possibly
     nan-edges with the nearest values.
     """
     # interpolate reference to the given wavenumbers
     X = interp1d_with_unknowns_numpy(getx(interpolate), interpolate.X, wavenumbers)
     # we know that X is not NaN. same handling of reference as of X
     X, _ = nan_extend_edges_and_interpolate(wavenumbers, X)
     return X
 def test_predict_different_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = Interpolate(points=getx(test) - 1)(test) # other test domain
     try:
         from Orange.data.table import DomainTransformationError
         with self.assertRaises(DomainTransformationError):
             LogisticRegressionLearner()(train)(test)
     except ImportError:  # until Orange 3.19
         aucdestroyed = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertTrue(0.45 < aucdestroyed < 0.55)
示例#57
0
 def transformed(self, data):
     if self.ref is not None:
         # Calculate from single-channel data
         ref_X = self.interpolate_extend_to(self.ref, getx(data))
         transd = data.X / ref_X
     else:
         # Calculate from absorbance data
         transd = data.X.copy()
         transd *= -1
         np.power(10, transd, transd)
     return transd
示例#58
0
 def transformed(self, data):
     if self.ref is not None:
         # Calculate from single-channel data
         ref_X = self.interpolate_extend_to(self.ref, getx(data))
         absd = ref_X / data.X
         np.log10(absd, absd)
     else:
         # Calculate from transmittance data
         absd = np.log10(data.X)
         absd *= -1
     return absd
 def test_unknown_elsewhere(self):
     data = Orange.data.Table("iris")
     data.X[0, 1] = np.nan
     data.X[1, 1] = np.nan
     data.X[1, 2] = np.nan
     im = Interpolate(getx(data))
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
示例#60
0
 def __call__(self, data):
     x = getx(data)
     if not self.inverse:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is None or self.lowlim <= v) and
                       (self.highlim is None or v <= self.highlim)]
     else:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is not None and v <= self.lowlim) or
                       (self.highlim is not None and self.highlim <= v)]
     domain = Orange.data.Domain(okattrs, data.domain.class_vars, metas=data.domain.metas)
     return data.from_table(domain, data)