def test_cut_both(self): d = self.collagen dcut = Cut(lowlim=0, highlim=2)(d) self.assertFalse(getx(dcut)) dcut = Cut(lowlim=1000, highlim=1100)(d) self.assertGreaterEqual(min(getx(dcut)), 1000) self.assertLessEqual(max(getx(dcut)), 1100)
def test_slightly_different_domain(self): """ If test data has a slightly different domain then (with interpolation) we should obtain a similar classification score. """ # rows full of unknowns make LogisticRegression undefined # we can obtain them, for example, with EMSC, if one of the badspectra # is a spectrum from the data learner = LogisticRegressionLearner(preprocessors=[_RemoveNaNRows()]) for proc in PREPROCESSORS: if hasattr(proc, "skip_add_zeros"): continue # LR that can not handle unknown values train, test = separate_learn_test(self.collagen) train1 = proc(train) aucorig = AUC(TestOnTestData(train1, test, [learner])) test = destroy_atts_conversion(test) test = odd_attr(test) # a subset of points for training so that all test sets points # are within the train set points, which gives no unknowns train = Interpolate(points=getx(train)[1:-3])(train) # interpolatable train train = proc(train) # explicit domain conversion test to catch exceptions that would # otherwise be silently handled in TestOnTestData _ = Orange.data.Table(train.domain, test) aucnow = AUC(TestOnTestData(train, test, [learner])) self.assertAlmostEqual(aucnow, aucorig, delta=0.02, msg="Preprocessor " + str(proc)) test = Interpolate(points=getx(test) - 1.)(test) # also do a shift _ = Orange.data.Table(train.domain, test) # explicit call again aucnow = AUC(TestOnTestData(train, test, [learner])) # the difference should be slight self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
def commit(self): out = None self.Error.dxzero.clear() self.Error.too_many_points.clear() if self.data: if self.input_radio == 0: points = getx(self.data) out = Interpolate(points)(self.data) elif self.input_radio == 1: xs = getx(self.data) if not self.dx > 0: self.Error.dxzero() else: xmin = self.xmin if self.xmin is not None else np.min(xs) xmax = self.xmax if self.xmax is not None else np.max(xs) xmin, xmax = min(xmin, xmax), max(xmin, xmax) reslength = abs(math.ceil((xmax - xmin)/self.dx)) if reslength < 10002: points = np.arange(xmin, xmax, self.dx) out = Interpolate(points)(self.data) else: self.Error.too_many_points(reslength) elif self.input_radio == 2 and self.data_points_interpolate is not None: out = self.data_points_interpolate(self.data) self.Outputs.interpolated_data.send(out)
def test_roundtrip(self): d1 = Orange.data.Table("map_test.xyz") with named_file("", suffix=".xyz") as fn: d1.save(fn) d2 = Orange.data.Table(fn) np.testing.assert_equal(d1.X, d2.X) np.testing.assert_equal(getx(d1), getx(d2)) np.testing.assert_equal(d1.metas, d2.metas)
def test_read(self): d = Orange.data.Table("small_Omnic.map") self.assertAlmostEqual(d[1, 0], 4.01309, places=5) self.assertAlmostEqual(d[0, 0], 3.98295, places=5) self.assertEqual(min(getx(d)), 1604.51001) self.assertEqual(max(getx(d)), 1805.074097) self.assertEqual(d[0]["map_x"], 0) self.assertEqual(d[1]["map_y"], 0)
def test_read(self): d = Orange.data.Table("Hermes_HDF5/small_OK.hdf5") self.assertEqual(d[0, 0], 1000.1) self.assertEqual(d[1, 0], 2000.1) self.assertEqual(min(getx(d)), 100.1) self.assertEqual(max(getx(d)), 101.1) self.assertEqual(d[1]["map_x"], 2.1) self.assertEqual(d[1]["map_y"], 11.1)
def test_roundtrip(self): d1 = Orange.data.Table("map_test.xyz") with named_file("", suffix=".xyz") as fn: d1.save(fn) d2 = Orange.data.Table(fn) np.testing.assert_equal(d1.X, d2.X) np.testing.assert_equal(getx(d1), getx(d2)) np.testing.assert_equal(d1.metas, d2.metas)
def test_read(self): d = Orange.data.Table("map_test.xyz") self.assertEqual(len(d), 16) self.assertEqual(d[1]["map_x"], 1) self.assertEqual(d[1]["map_y"], 7) self.assertEqual(d[1][1], 0.1243) self.assertEqual(d[2][2], 0.1242) self.assertEqual(min(getx(d)), 1634.84) self.assertEqual(max(getx(d)), 1641.69)
def transformed(self, X, wavenumbers): # wavenumber have to be input as sorted # about 85% of time in __call__ function is spent is lstsq # compute average spectrum from the reference ref_X = np.atleast_2d(spectra_mean(self.reference.X)) def interpolate_to_data(other_xs, other_data): # all input data needs to be interpolated (and NaNs removed) interpolated = interp1d_with_unknowns_numpy(other_xs, other_data, wavenumbers) # we know that X is not NaN. same handling of reference as of X interpolated, _ = nan_extend_edges_and_interpolate(wavenumbers, interpolated) return interpolated ref_X = interpolate_to_data(getx(self.reference), ref_X) if self.weights: # interpolate reference to the data wei_X = interp1d_with_unknowns_numpy(getx(self.weights), self.weights.X, wavenumbers) # set whichever weights are undefined (usually at edges) to zero wei_X[np.isnan(wei_X)] = 0 else: wei_X = np.ones((1, len(wavenumbers))) N = wavenumbers.shape[0] m0 = - 2.0 / (wavenumbers[0] - wavenumbers[N - 1]) c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1]) n_badspec = len(self.badspectra) if self.badspectra is not None else 0 if self.badspectra: badspectra_X = interpolate_to_data(getx(self.badspectra), self.badspectra.X) M = [] for x in range(0, self.order+1): M.append((m0 * (wavenumbers - c_coeff)) ** x) for y in range(0, n_badspec): M.append(badspectra_X[y]) M.append(ref_X) # always add reference spectrum to the model n_add_model = len(M) M = np.vstack(M).T # M is for the correction, for par. estimation M_weighted is used M_weighted = M*wei_X.T newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model)) for i, rawspectrum in enumerate(X): rawspectrumW = (rawspectrum*wei_X)[0] m = np.linalg.lstsq(M_weighted, rawspectrum, rcond=-1)[0] corrected = rawspectrum for x in range(0, self.order+1+n_badspec): corrected = (corrected - (m[x] * M[:, x])) if self.scaling: corrected = corrected/m[self.order+1+n_badspec] corrected[np.isinf(corrected)] = np.nan # fix values caused by zero weights corrected = np.hstack((corrected, m)) # append the model parameters newspectra[i] = corrected return newspectra
def transformed(self, X, wavenumbers): # wavenumber have to be input as sorted # about 85% of time in __call__ function is spent is lstsq # compute average spectrum from the reference ref_X = np.atleast_2d(spectra_mean(self.reference.X)) def interpolate_to_data(other_xs, other_data): # all input data needs to be interpolated (and NaNs removed) interpolated = interp1d_with_unknowns_numpy( other_xs, other_data, wavenumbers) # we know that X is not NaN. same handling of reference as of X interpolated, _ = nan_extend_edges_and_interpolate( wavenumbers, interpolated) return interpolated ref_X = interpolate_to_data(getx(self.reference), ref_X) wei_X = weighted_wavenumbers(self.weights, wavenumbers) N = wavenumbers.shape[0] m0 = -2.0 / (wavenumbers[0] - wavenumbers[N - 1]) c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1]) n_badspec = len(self.badspectra) if self.badspectra is not None else 0 if self.badspectra: badspectra_X = interpolate_to_data(getx(self.badspectra), self.badspectra.X) M = [] for x in range(0, self.order + 1): M.append((m0 * (wavenumbers - c_coeff))**x) for y in range(0, n_badspec): M.append(badspectra_X[y]) M.append(ref_X) # always add reference spectrum to the model n_add_model = len(M) M = np.vstack( M ).T # M is for the correction, for par. estimation M_weighted is used M_weighted = M * wei_X.T newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model)) for i, rawspectrum in enumerate(X): rawspectrumW = (rawspectrum * wei_X)[0] m = np.linalg.lstsq(M_weighted, rawspectrum, rcond=-1)[0] corrected = rawspectrum for x in range(0, self.order + 1 + n_badspec): corrected = (corrected - (m[x] * M[:, x])) if self.scaling: corrected = corrected / m[self.order + 1 + n_badspec] corrected[np.isinf( corrected)] = np.nan # fix values caused by zero weights corrected = np.hstack( (corrected, m)) # append the model parameters newspectra[i] = corrected return newspectra
def test_read(self): d = Orange.data.Table("map_test.xyz") self.assertEqual(len(d), 16) self.assertEqual(d[1]["map_x"], 1) self.assertEqual(d[1]["map_y"], 7) self.assertEqual(d[1][1], 0.1243) self.assertEqual(d[2][2], 0.1242) self.assertEqual(min(getx(d)), 1634.84) self.assertEqual(max(getx(d)), 1641.69)
def set_data(self, data): self.data = data if self.data and len(getx(data)): points = getx(data) self.xmin_edit.setPlaceholderText(str(np.min(points))) self.xmax_edit.setPlaceholderText(str(np.max(points))) else: self.xmin_edit.setPlaceholderText("") self.xmax_edit.setPlaceholderText("") self.commit()
def set_data(self, data): self.data = data if self.data and len(getx(data)): points = getx(data) self.xmin_edit.setPlaceholderText(str(np.min(points))) self.xmax_edit.setPlaceholderText(str(np.max(points))) else: self.xmin_edit.setPlaceholderText("") self.xmax_edit.setPlaceholderText("") self.commit()
def test_autointerpolate(self): self.send_signal("Data", self.collagen) out = self.get_output("Interpolated data") np.testing.assert_equal(getx(self.collagen), getx(out)) # no auto-interpolation non_interp = Orange.data.Table(self.collagen.domain, self.peach) self.assertTrue(np.isnan(non_interp.X).all()) # auto-interpolation auto_interp = Orange.data.Table(out.domain, self.peach) self.assertFalse(np.isnan(auto_interp.X).all()) np.testing.assert_equal(getx(self.collagen), getx(auto_interp))
def test_array_read(self): reader = initialize_reader(PTIRFileReader, "photothermal/Nodax_Spectral_Array.ptir") reader.data_signal = b'//ZI/*/DEMODS/0/R' d = reader.read() self.assertAlmostEqual(d[0][0], 0.21426094) self.assertAlmostEqual(d[1][0], 1.6351842) self.assertEqual(min(getx(d)), 801.0) self.assertEqual(max(getx(d)), 1797.0) self.assertAlmostEqual(d[0]["map_x"], 801.9500122070312) self.assertAlmostEqual(d[0]["map_y"], -500.1499938964844)
def test_autointerpolate(self): self.send_signal("Data", self.collagen) out = self.get_output("Interpolated data") np.testing.assert_equal(getx(self.collagen), getx(out)) # no auto-interpolation non_interp = Orange.data.Table(self.collagen.domain, self.peach) self.assertTrue(np.isnan(non_interp.X).all()) # auto-interpolation auto_interp = Orange.data.Table(out.domain, self.peach) self.assertFalse(np.isnan(auto_interp.X).all()) np.testing.assert_equal(getx(self.collagen), getx(auto_interp))
def test_add_limit(self): dmin, dmax = min(getx(self.data)), max(getx(self.data)) # first addition adds two limits self.editor.range_button.click() self.widget.apply() p = self.get_preprocessor() self.assertEqual(p.zero_points, [dmin, dmax]) # the second addition adds one limit self.editor.range_button.click() self.widget.apply() p = self.get_preprocessor() self.assertEqual(p.zero_points, [dmin, dmax, (dmin + dmax) / 2])
def test_envi_comparison(self): # Image d1_a = Orange.data.Table("agilent/4_noimage_agg256.seq") d1_e = Orange.data.Table("agilent/4_noimage_agg256.hdr") np.testing.assert_equal(d1_a.X, d1_e.X) # Wavenumbers are rounded in .hdr files np.testing.assert_allclose(getx(d1_a), getx(d1_e)) # Mosaic d2_a = Orange.data.Table("agilent/5_mosaic_agg1024.dms") d2_e = Orange.data.Table("agilent/5_mosaic_agg1024.hdr") np.testing.assert_equal(d2_a.X, d2_e.X) np.testing.assert_allclose(getx(d2_a), getx(d2_e))
def test_interpolate_points(self): self.assertFalse(self.widget.Warning.reference_data_missing.is_shown()) self.widget.controls.input_radio.buttons[2].click() self.assertTrue(self.widget.Warning.reference_data_missing.is_shown()) self.send_signal("Data", self.peach) self.assertTrue(self.widget.Warning.reference_data_missing.is_shown()) self.send_signal("Points", self.collagen) self.assertFalse(self.widget.Warning.reference_data_missing.is_shown()) out = self.get_output("Interpolated data") np.testing.assert_equal(getx(self.collagen), getx(out)) self.send_signal("Points", None) self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
def test_interpolate_points(self): self.assertFalse(self.widget.Warning.reference_data_missing.is_shown()) self.widget.controls.input_radio.buttons[2].click() self.assertTrue(self.widget.Warning.reference_data_missing.is_shown()) self.send_signal("Data", self.peach) self.assertTrue(self.widget.Warning.reference_data_missing.is_shown()) self.send_signal("Points", self.collagen) self.assertFalse(self.widget.Warning.reference_data_missing.is_shown()) out = self.get_output("Interpolated data") np.testing.assert_equal(getx(self.collagen), getx(out)) self.send_signal("Points", None) self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
def test_envi_comparison(self): # Image d1_a = Orange.data.Table("agilent/4_noimage_agg256.dat") d1_e = Orange.data.Table("agilent/4_noimage_agg256.hdr") np.testing.assert_equal(d1_a.X, d1_e.X) # Wavenumbers are rounded in .hdr files np.testing.assert_allclose(getx(d1_a), getx(d1_e)) # Mosaic d2_a = Orange.data.Table("agilent/5_mosaic_agg1024.dmt") d2_e = Orange.data.Table("agilent/5_Mosaic_agg1024.hdr") np.testing.assert_equal(d2_a.X, d2_e.X) np.testing.assert_allclose(getx(d2_a), getx(d2_e))
def transformed(self, X, wavenumbers): # about 85% of time in __call__ function is spent is lstsq # compute average spectrum from the reference ref_X = np.atleast_2d(spectra_mean(self.reference.X)) # interpolate reference to the data ref_X = interp1d_with_unknowns_numpy(getx(self.reference), ref_X, wavenumbers) # we know that X is not NaN. same handling of reference as of X ref_X, _ = nan_extend_edges_and_interpolate(wavenumbers, ref_X) if self.weights: # interpolate reference to the data wei_X = interp1d_with_unknowns_numpy(getx(self.weights), self.weights.X, wavenumbers) # set whichever weights are undefined (usually at edges) to zero wei_X[np.isnan(wei_X)] = 0 else: wei_X = np.ones((1, len(wavenumbers))) N = wavenumbers.shape[0] m0 = -2.0 / (wavenumbers[0] - wavenumbers[N - 1]) c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1]) M = [] for x in range(0, self.order + 1): M.append((m0 * (wavenumbers - c_coeff))**x) M.append(ref_X) # always add reference spectrum to the model n_add_model = len(M) M = np.vstack( M ).T # M is needed below for the correction, for par estimation M_weigheted is used M_weighted = M * wei_X.T newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model)) for i, rawspectrum in enumerate(X): rawspectrumW = (rawspectrum * wei_X)[0] m = np.linalg.lstsq(M_weighted, rawspectrum)[0] corrected = rawspectrum for x in range(0, self.order + 1): corrected = (corrected - (m[x] * M[:, x])) if self.scaling: corrected = corrected / m[self.order + 1] corrected[np.isinf( corrected )] = np.nan # fix values which can be caused by zero weights corrected = np.hstack( (corrected, m)) # append the model parameters newspectra[i] = corrected return newspectra
def test_unordered_features(self): for proc in PREPROCESSORS: data = preprocessor_data(proc) data_reversed = reverse_attr(data) data_shuffle = shuffle_attr(data) pdata = proc(data) X = pdata.X[:, np.argsort(getx(pdata))] pdata_reversed = proc(data_reversed) X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))] np.testing.assert_almost_equal(X, X_reversed, err_msg="Preprocessor " + str(proc)) pdata_shuffle = proc(data_shuffle) X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))] np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
def test_hyperspectral_read(self): reader = initialize_reader(PTIRFileReader, "photothermal/Hyper_Sample.ptir") reader.data_signal = b'//ZI/*/DEMODS/0/R' d = reader.read() self.assertEqual(len(d), 35) self.assertEqual(len(d.domain.attributes), 451) self.assertAlmostEqual(d[0][0], 0.0137912575) self.assertAlmostEqual(d[1][0], -0.08101661) self.assertEqual(min(getx(d)), 900.0) self.assertEqual(max(getx(d)), 1800.0) self.assertAlmostEqual(d[0]["map_x"], -4088.96337890625) self.assertAlmostEqual(d[0]["map_y"], -886.1981201171875)
def test_image_read(self): d = Orange.data.Table("agilent/4_noimage_agg256.dat") self.assertEqual(len(d), 64) # Pixel sizes are 5.5 * 16 = 88.0 (binning to reduce test data) self.assertAlmostEqual(d[1]["map_x"] - d[0]["map_x"], 88.0) self.assertAlmostEqual(d[8]["map_y"] - d[7]["map_y"], 88.0) # Last pixel should start at (8 - 1) * 88.0 = 616.0 self.assertAlmostEqual(d[-1]["map_x"], 616.0) self.assertAlmostEqual(d[-1]["map_y"], 616.0) self.assertAlmostEqual(d[1][1], 1.27181053) self.assertAlmostEqual(d[2][2], 1.27506005) self.assertEqual(min(getx(d)), 1990.178226) self.assertEqual(max(getx(d)), 2113.600132)
def test_unordered_features(self): data = self.collagen data_reversed = reverse_attr(data) data_shuffle = shuffle_attr(data) for proc in PREPROCESSORS: pdata = proc(data) X = pdata.X[:, np.argsort(getx(pdata))] pdata_reversed = proc(data_reversed) X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))] np.testing.assert_almost_equal(X, X_reversed, err_msg="Preprocessor " + str(proc)) pdata_shuffle = proc(data_shuffle) X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))] np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
def test_mosaic_read(self): d = Orange.data.Table("agilent/5_mosaic_agg1024.dmt") self.assertEqual(len(d), 32) # Pixel sizes are 5.5 * 32 = 176.0 (binning to reduce test data) self.assertAlmostEqual(d[1]["map_x"] - d[0]["map_x"], 176.0) self.assertAlmostEqual(d[4]["map_y"] - d[3]["map_y"], 176.0) # Last pixel should start at (4 - 1) * 176.0 = 528.0 self.assertAlmostEqual(d[-1]["map_x"], 528.0) # 1 x 2 mosiac, (8 - 1) * 176.0 = 1232.0 self.assertAlmostEqual(d[-1]["map_y"], 1232.0) self.assertAlmostEqual(d[1][1], 1.14792180) self.assertAlmostEqual(d[2][2], 1.14063489) self.assertEqual(min(getx(d)), 1990.178226) self.assertEqual(max(getx(d)), 2113.600132)
def test_reference_preprocessed(self): data = SMALL_COLLAGEN self.send_signal("Data", data) self.send_signal("Reference", data) self.widget.add_preprocessor(pack_editor(CutEditor)) self.widget.add_preprocessor(pack_editor(RememberDataEditor)) self.widget.apply() processed = getx(RememberData.reference) original = getx(data) # cut by default cuts 10% of the data on both edges removed = set(original) - set(processed) self.assertGreater(len(removed), 0) self.assertEqual(set(), set(processed) - set(original)) self.assertFalse(self.widget.Warning.reference_compat.is_shown())
def test_image_read(self): d = Orange.data.Table("agilent/4_noimage_agg256.seq") self.assertEqual(len(d), 64) # Pixel sizes are 5.5 * 16 = 88.0 (binning to reduce test data) self.assertAlmostEqual( d[1]["map_x"] - d[0]["map_x"], 88.0) self.assertAlmostEqual( d[8]["map_y"] - d[7]["map_y"], 88.0) # Last pixel should start at (8 - 1) * 88.0 = 616.0 self.assertAlmostEqual(d[-1]["map_x"], 616.0) self.assertAlmostEqual(d[-1]["map_y"], 616.0) self.assertAlmostEqual(d[1][1], 1.27181053) self.assertAlmostEqual(d[2][2], 1.27506005) self.assertEqual(min(getx(d)), 1990.178226) self.assertEqual(max(getx(d)), 2113.600132)
def test_predict_different_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Interpolate(points=getx(test) - 1.)(test) # other test domain train = Interpolate(points=getx(train))(train) # make train capable of interpolation aucshift = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(aucorig, aucshift, delta=0.01) # shift can decrease AUC slightly test = Cut(1000, 1700)(test) auccut1 = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Cut(1100, 1600)(test) auccut2 = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) test = Cut(1200, 1500)(test) auccut3 = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) # the more we cut the lower precision we get self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
def test_predict_different_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) test = Interpolate(points=getx(test) - 1.)(test) # other test domain train = Interpolate(points=getx(train))(train) # make train capable of interpolation aucshift = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(aucorig, aucshift, delta=0.01) # shift can decrease AUC slightly test = Cut(1000, 1700)(test) auccut1 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) test = Cut(1100, 1600)(test) auccut2 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) test = Cut(1200, 1500)(test) auccut3 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) # the more we cut the lower precision we get self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
def _transform_to_sorted_features(data): xs = getx(data) xsind = np.argsort(xs) mon = is_increasing(xsind) X = data.X X = X if mon else X[:, xsind] return xs, xsind, mon, X
def test_predict_savgol_another_interpolate(self): train, test = separate_learn_test(self.collagen) train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train) auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) train = Interpolate(points=getx(train))(train) aucai = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(auc, aucai, delta=0.02)
def test_predict_samename_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) test = destroy_atts_conversion(test) train = Interpolate(points=getx(train))(train) # make train capable of interpolation auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()])) self.assertEqual(aucorig, auc)
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def process_stack(data, xat, yat, upsample_factor=100, use_sobel=False, ref_frame_num=0): hypercube, lsx, lsy = get_hypercube(data, xat, yat) calculate_shift = RegisterTranslation(upsample_factor=upsample_factor) filterfn = sobel if use_sobel else lambda x: x shifts, aligned_stack = alignstack(hypercube.T, shiftfn=calculate_shift, ref_frame_num=ref_frame_num, filterfn=filterfn) xmin, ymin = shifts[:, 0].min(), shifts[:, 1].min() xmax, ymax = shifts[:, 0].max(), shifts[:, 1].max() xmin, xmax = int(round(xmin)), int(round(xmax)) ymin, ymax = int(round(ymin)), int(round(ymax)) shape = hypercube.shape slicex = slice(max(xmax, 0), min(shape[1], shape[1]+xmin)) slicey = slice(max(ymax, 0), min(shape[0], shape[0]+ymin)) cropped = np.array(aligned_stack).T[slicey, slicex] # transform numpy array back to Orange.data.Table return shifts, build_spec_table(*_spectra_from_image(cropped, getx(data), np.linspace(*lsx)[slicex], np.linspace(*lsy)[slicey]))
def process_stack(data, xat, yat, upsample_factor=100, use_sobel=False, ref_frame_num=0): hypercube, lsx, lsy = get_hypercube(data, xat, yat) if bn.anynan(hypercube): raise NanInsideHypercube(True) calculate_shift = RegisterTranslation(upsample_factor=upsample_factor) filterfn = sobel if use_sobel else lambda x: x shifts, aligned_stack = alignstack(hypercube.T, shiftfn=calculate_shift, ref_frame_num=ref_frame_num, filterfn=filterfn) xmin, ymin = shifts[:, 0].min(), shifts[:, 1].min() xmax, ymax = shifts[:, 0].max(), shifts[:, 1].max() xmin, xmax = int(round(xmin)), int(round(xmax)) ymin, ymax = int(round(ymin)), int(round(ymax)) shape = hypercube.shape slicex = slice(max(xmax, 0), min(shape[1], shape[1] + xmin)) slicey = slice(max(ymax, 0), min(shape[0], shape[0] + ymin)) cropped = np.array(aligned_stack).T[slicey, slicex] # transform numpy array back to Orange.data.Table return shifts, build_spec_table( *_spectra_from_image(cropped, getx(data), np.linspace(*lsx)[slicex], np.linspace(*lsy)[slicey]))
def set_preview_data(self, data): if not self.user_changed: x = getx(data) if len(x): self.set_value("Low limit", min(x)) self.set_value("High limit", max(x)) self.edited.emit()
def set_preview_data(self, data): if not self.user_changed: x = getx(data) if len(x): self.set_value("Low limit", min(x)) self.set_value("High limit", max(x)) self.edited.emit()
def test_mosaic_read(self): d = Orange.data.Table("agilent/5_mosaic_agg1024.dms") self.assertEqual(len(d), 32) # Pixel sizes are 5.5 * 32 = 176.0 (binning to reduce test data) self.assertAlmostEqual( d[1]["map_x"] - d[0]["map_x"], 176.0) self.assertAlmostEqual( d[4]["map_y"] - d[3]["map_y"], 176.0) # Last pixel should start at (4 - 1) * 176.0 = 528.0 self.assertAlmostEqual(d[-1]["map_x"], 528.0) # 1 x 2 mosiac, (8 - 1) * 176.0 = 1232.0 self.assertAlmostEqual(d[-1]["map_y"], 1232.0) self.assertAlmostEqual(d[1][1], 1.14792180) self.assertAlmostEqual(d[2][2], 1.14063489) self.assertEqual(min(getx(d)), 1990.178226) self.assertEqual(max(getx(d)), 2113.600132)
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X replace_infs(data.X) elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance( data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X replace_infs(data.X) nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def test_unknown_elsewhere_different(self): data = Orange.data.Table("iris") with data.unlocked(): data.X[0, 1] = np.nan data.X[1, 1] = np.nan data.X[1, 2] = np.nan im = Interpolate(getx(data)) im.interpfn = interp1d_with_unknowns_numpy interpolated = im(data) self.assertAlmostEqual(interpolated.X[0, 1], 3.25) self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334) self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667) self.assertFalse(np.any(np.isnan(interpolated.X))) im.interpfn = interp1d_with_unknowns_scipy interpolated = im(data) self.assertAlmostEqual(interpolated.X[0, 1], 3.25) self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334) self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667) self.assertFalse(np.any(np.isnan(interpolated.X))) save_X = interpolated.X im.interpfn = interp1d_wo_unknowns_scipy interpolated = im(data) self.assertTrue(np.any(np.isnan(interpolated.X))) # parts without unknown should be the same np.testing.assert_almost_equal(data.X[2:], save_X[2:])
def transformed(self, data): if len(data): ref_X = self.interpolate_extend_to(self.reference, getx(data)) return replace_infs( np.angle(np.exp(data.X * 1j) / np.exp(ref_X * 1j))) else: return data
def test_predict_savgol_another_interpolate(self): train, test = separate_learn_test(self.collagen) train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train) auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) train = Interpolate(points=getx(train))(train) aucai = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertAlmostEqual(auc, aucai, delta=0.02)
def transform_to_sorted_features(data): xs = getx(data) xsind = np.argsort(xs) mon = is_increasing(xsind) X = data.X X = X if mon else X[:, xsind] return xs, xsind, mon, X
def __call__(self, data): if data.X.shape[1] > 0: # --- compute K energies = np.sort(getx(data)) # input data can be in any order start_idx, end_idx = extra_exafs.get_idx_bounds( energies, self.edge, self.extra_from, self.extra_to) k_interp, k_points = extra_exafs.get_K_points( energies, self.edge, start_idx, end_idx) # ---------- common = _ExtractEXAFSCommon(self.edge, self.extra_from, self.extra_to, self.poly_deg, self.kweight, self.m, k_interp, data.domain) newattrs = [ ContinuousVariable(name=str(var), compute_value=ExtractEXAFSFeature( i, common)) for i, var in enumerate(k_interp) ] else: newattrs = [] domain = Orange.data.Domain(newattrs, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def transformed(self, data): if len(data): # numpy does not like to divide shapes (0, b) by (a, b) ref_X = self.interpolate_extend_to(self.reference, getx(data)) result = data.X - self.amount * ref_X return result else: return data
def run_preview(data: Table, m_def, state: TaskState): def progress_interrupt(_: float): if state.is_interruption_requested(): raise InterruptException # Protects against running the task in succession many times, as would # happen when adding a preprocessor (there, commit() is called twice). # Wait 500 ms before processing - if a new task is started in meanwhile, # allow that is easily` cancelled. for _ in range(10): time.sleep(0.050) progress_interrupt(0) orig_data = data model, parameters = create_composite_model(m_def) model_result = {} x = getx(data) if data is not None and model is not None: for row in data: progress_interrupt(0) model_result[row.id] = model.fit(row.x, parameters, x=x) return orig_data, data, model_result
def test_predict_samename_domain_interpolation(self): train, test = separate_learn_test(self.collagen) aucorig = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) test = destroy_atts_conversion(test) train = Interpolate(points=getx(train))(train) # make train capable of interpolation auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertEqual(aucorig, auc)
def test_unknown_middle(self): data = Orange.data.Table("iris") # whole column in the middle should be interpolated with data.unlocked(): data.X[:, 1] = np.nan interpolated = Interpolate(getx(data))(data) self.assertFalse(np.any(np.isnan(interpolated.X)))
def test_unordered_features(self): data = self.collagen data_reversed = reverse_attr(data) data_shuffle = shuffle_attr(data) for proc in PREPROCESSORS: comparison = np.testing.assert_equal # TODO find out why there are small differences for certain preprocessors if isinstance(proc, (RubberbandBaseline, Normalize, PCADenoising)): comparison = lambda x,y: np.testing.assert_almost_equal(x, y, decimal=5) pdata = proc(data) X = pdata.X[:, np.argsort(getx(pdata))] pdata_reversed = proc(data_reversed) X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))] comparison(X, X_reversed) pdata_shuffle = proc(data_shuffle) X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))] comparison(X, X_shuffle)
def __init__(self, target, kind="linear", handle_nans=True): self.target = target if not all(isinstance(a, Orange.data.ContinuousVariable) for a in self.target.domain.attributes): raise NotAllContinuousException() self.points = getx(self.target) self.kind = kind self.handle_nans = handle_nans self.interpfn = None
def test_line_intersection(self): data = self.collagen x = getx(data) sort = np.argsort(x) x = x[sort] ys = data.X[:, sort] boola = intersect_curves(x, ys, np.array([0, 1.15]), np.array([3000, 1.15])) intc = np.flatnonzero(boola) np.testing.assert_equal(intc, [191, 635, 638, 650, 712, 716, 717, 726])
def test_autointerpolate(self): d1 = Orange.data.Table("peach_juice.dpt") d2 = Orange.data.Table("collagen.csv") d3 = Orange.data.Table(d1.domain, d2) d1x = getx(d1) d2x = getx(d2) #have the correct number of non-nan elements validx = np.where(d1x >= min(d2x), d1x, np.nan) validx = np.where(d1x <= max(d2x), validx, np.nan) self.assertEqual(np.sum(~np.isnan(validx)), np.sum(~np.isnan(d3.X[0]))) #check roundtrip atts = features_with_interpolation(d2x) ndom = Orange.data.Domain(atts, None) dround = Orange.data.Table(ndom, d3) #edges are unknown, the rest roughly the same np.testing.assert_allclose(dround.X[:, 1:-1], d2.X[:, 1:-1], rtol=0.011)
def interpolate_extend_to(self, interpolate, wavenumbers): """ Interpolate data to given wavenumbers and extend the possibly nan-edges with the nearest values. """ # interpolate reference to the given wavenumbers X = interp1d_with_unknowns_numpy(getx(interpolate), interpolate.X, wavenumbers) # we know that X is not NaN. same handling of reference as of X X, _ = nan_extend_edges_and_interpolate(wavenumbers, X) return X
def test_predict_different_domain(self): train, test = separate_learn_test(self.collagen) test = Interpolate(points=getx(test) - 1)(test) # other test domain try: from Orange.data.table import DomainTransformationError with self.assertRaises(DomainTransformationError): LogisticRegressionLearner()(train)(test) except ImportError: # until Orange 3.19 aucdestroyed = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()])) self.assertTrue(0.45 < aucdestroyed < 0.55)
def transformed(self, data): if self.ref is not None: # Calculate from single-channel data ref_X = self.interpolate_extend_to(self.ref, getx(data)) transd = data.X / ref_X else: # Calculate from absorbance data transd = data.X.copy() transd *= -1 np.power(10, transd, transd) return transd
def transformed(self, data): if self.ref is not None: # Calculate from single-channel data ref_X = self.interpolate_extend_to(self.ref, getx(data)) absd = ref_X / data.X np.log10(absd, absd) else: # Calculate from transmittance data absd = np.log10(data.X) absd *= -1 return absd
def test_unknown_elsewhere(self): data = Orange.data.Table("iris") data.X[0, 1] = np.nan data.X[1, 1] = np.nan data.X[1, 2] = np.nan im = Interpolate(getx(data)) interpolated = im(data) self.assertAlmostEqual(interpolated.X[0, 1], 3.25) self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334) self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667) self.assertFalse(np.any(np.isnan(interpolated.X)))
def __call__(self, data): x = getx(data) if not self.inverse: okattrs = [at for at, v in zip(data.domain.attributes, x) if (self.lowlim is None or self.lowlim <= v) and (self.highlim is None or v <= self.highlim)] else: okattrs = [at for at, v in zip(data.domain.attributes, x) if (self.lowlim is not None and v <= self.lowlim) or (self.highlim is not None and self.highlim <= v)] domain = Orange.data.Domain(okattrs, data.domain.class_vars, metas=data.domain.metas) return data.from_table(domain, data)