Пример #1
0
 def commit(self):
     out = None
     self.Error.dxzero.clear()
     self.Error.too_many_points.clear()
     if self.data:
         if self.input_radio == 0:
             points = getx(self.data)
             out = Interpolate(points)(self.data)
         elif self.input_radio == 1:
             xs = getx(self.data)
             if not self.dx > 0:
                 self.Error.dxzero()
             else:
                 xmin = self.xmin if self.xmin is not None else np.min(xs)
                 xmax = self.xmax if self.xmax is not None else np.max(xs)
                 xmin, xmax = min(xmin, xmax), max(xmin, xmax)
                 reslength = abs(math.ceil((xmax - xmin) / self.dx))
                 if reslength < 10002:
                     points = np.arange(xmin, xmax, self.dx)
                     out = Interpolate(points)(self.data)
                 else:
                     self.Error.too_many_points(reslength)
         elif self.input_radio == 2 and self.data_points is not None:
             out = Interpolate(self.data_points)(self.data)
     self.send("Interpolated data", out)
Пример #2
0
 def test_cut_both(self):
     d = self.collagen
     dcut = Cut(lowlim=0, highlim=2)(d)
     self.assertFalse(getx(dcut))
     dcut = Cut(lowlim=1000, highlim=1100)(d)
     self.assertGreaterEqual(min(getx(dcut)), 1000)
     self.assertLessEqual(max(getx(dcut)), 1100)
Пример #3
0
 def test_slightly_different_domain(self):
     """ If test data has a slightly different domain then (with interpolation)
     we should obtain a similar classification score. """
     learner = LogisticRegressionLearner(preprocessors=[])
     for proc in PREPROCESSORS:
         # LR that can not handle unknown values
         train, test = separate_learn_test(self.collagen)
         train1 = proc(train)
         aucorig = AUC(TestOnTestData(train1, test, [learner]))
         test = destroy_atts_conversion(test)
         test = odd_attr(test)
         # a subset of points for training so that all test sets points
         # are within the train set points, which gives no unknowns
         train = Interpolate(points=getx(train)[1:-3])(
             train)  # make train capable of interpolation
         train = proc(train)
         # explicit domain conversion test to catch exceptions that would
         # otherwise be silently handled in TestOnTestData
         _ = Orange.data.Table(train.domain, test)
         aucnow = AUC(TestOnTestData(train, test, [learner]))
         self.assertAlmostEqual(aucnow, aucorig, delta=0.02)
         test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
         _ = Orange.data.Table(train.domain, test)  # explicit call again
         aucnow = AUC(TestOnTestData(train, test, [learner]))
         self.assertAlmostEqual(
             aucnow, aucorig, delta=0.05)  # the difference should be slight
Пример #4
0
 def test_roundtrip(self):
     d1 = Orange.data.Table("map_test.xyz")
     _, fn = tempfile.mkstemp(suffix=".xyz")
     d1.save(fn)
     d2 = Orange.data.Table(fn)
     np.testing.assert_equal(d1.X, d2.X)
     np.testing.assert_equal(getx(d1), getx(d2))
     np.testing.assert_equal(d1.metas, d2.metas)
     os.remove(fn)
Пример #5
0
 def test_read(self):
     d = Orange.data.Table("map_test.xyz")
     self.assertEqual(len(d), 16)
     self.assertEqual(d[1]["map_x"], 1)
     self.assertEqual(d[1]["map_y"], 7)
     self.assertEqual(d[1][1], 0.1243)
     self.assertEqual(d[2][2], 0.1242)
     self.assertEqual(min(getx(d)), 1634.84)
     self.assertEqual(max(getx(d)), 1641.69)
Пример #6
0
 def set_data(self, data):
     self.data = data
     if self.data and len(getx(data)):
         points = getx(data)
         self.xmin_edit.setPlaceholderText(str(np.min(points)))
         self.xmax_edit.setPlaceholderText(str(np.max(points)))
     else:
         self.xmin_edit.setPlaceholderText("")
         self.xmax_edit.setPlaceholderText("")
     self.commit()
Пример #7
0
 def test_autointerpolate(self):
     self.send_signal("Data", self.collagen)
     out = self.get_output("Interpolated data")
     np.testing.assert_equal(getx(self.collagen), getx(out))
     # no auto-interpolation
     non_interp = Orange.data.Table(self.collagen.domain, self.peach)
     self.assertTrue(np.isnan(non_interp.X).all())
     # auto-interpolation
     auto_interp = Orange.data.Table(out.domain, self.peach)
     self.assertFalse(np.isnan(auto_interp.X).all())
     np.testing.assert_equal(getx(self.collagen), getx(auto_interp))
Пример #8
0
 def test_interpolate_points(self):
     self.assertFalse(self.widget.Warning.reference_data_missing.is_shown())
     self.widget.controls.input_radio.buttons[2].click()
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
     self.send_signal("Data", self.peach)
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
     self.send_signal("Points", self.collagen)
     self.assertFalse(self.widget.Warning.reference_data_missing.is_shown())
     out = self.get_output("Interpolated data")
     np.testing.assert_equal(getx(self.collagen), getx(out))
     self.send_signal("Points", None)
     self.assertTrue(self.widget.Warning.reference_data_missing.is_shown())
Пример #9
0
 def set_data(self, data):
     self.clear_data()
     self.attrs[:] = []
     if data is not None:
         self.attrs[:] = ["(Same color)"] + [
             var for var in chain(data.domain,
                                  data.domain.metas)
             if isinstance(var, str) or var.is_discrete]
         self.color_attr = 0
     if data is not None:
         if self.data:
             self.rescale_next = not data.domain == self.data.domain
         else:
             self.rescale_next = True
         self.data = data
         # reset selection if dataset sizes do not match
         if self.selected_indices and \
                 (max(self.selected_indices) >= len(self.data) or self.data_size != len(self.data)):
             self.selected_indices.clear()
         self.data_size = len(self.data)
         # get and sort input data
         x = getx(self.data)
         xsind = np.argsort(x)
         self.data_x = x[xsind]
         self.data_xsind = xsind
         self._set_subset_indices()  # refresh subset indices according to the current subset
Пример #10
0
 def set_data(self, data):
     old_domain = self.data.domain if self.data else None
     self.clear_data()
     domain = data.domain if data is not None else None
     self.feature_color_model.set_domain(domain)
     if old_domain and domain != old_domain:  # do not reset feature_color
         self.feature_color = self.feature_color_model[
             0] if self.feature_color_model else None
     if data is not None:
         if self.data:
             self.rescale_next = not data.domain == self.data.domain
         else:
             self.rescale_next = True
         self.data = data
         # reset selection if dataset sizes do not match
         if self.selected_indices and \
                 (max(self.selected_indices) >= len(self.data) or self.data_size != len(self.data)):
             self.selected_indices.clear()
         self.data_size = len(self.data)
         # get and sort input data
         x = getx(self.data)
         xsind = np.argsort(x)
         self.data_x = x[xsind]
         self.data_xsind = xsind
         self._set_subset_indices(
         )  # refresh subset indices according to the current subset
Пример #11
0
 def set_data(self, data, rescale="auto"):
     self.clear_graph()
     self.clear_data()
     self.attrs[:] = []
     if data is not None:
         self.attrs[:] = ["(Same color)"] + [
             var for var in chain(data.domain,
                                  data.domain.metas)
             if isinstance(var, str) or var.is_discrete]
         self.color_attr = 0
     self.set_pen_colors()
     if data is not None:
         if rescale == "auto":
             if self.data:
                 rescale = not data.domain == self.data.domain
             else:
                 rescale = True
         self.data = data
         # reset selection if dataset sizes do not match
         if self.selected_indices and \
                 (max(self.selected_indices) >= len(self.data) or self.data_size != len(self.data)):
             self.selected_indices.clear()
         self.data_size = len(self.data)
         # get and sort input data
         x = getx(self.data)
         xsind = np.argsort(x)
         self.data_x = x[xsind]
         self.data_ys = data.X[:, xsind]
         self.update_view()
         if rescale == True:
             self.plot.vb.autoRange()
Пример #12
0
    def __call__(self, data):
        if data.domain != self.domain:
            data = data.from_table(self.domain, data)

        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(method=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            # attr normalization applies to entire spectrum, regardless of limits
            # meta indices are -ve and start at -1
            if self.attr not in (None, "None", ""):
                attr_index = -1 - data.domain.index(self.attr)
                factors = data.metas[:, attr_index].astype(float)
                data.X /= factors[:, None]
        return data.X
Пример #13
0
 def __call__(self, data):
     if data.domain != self.domain:
         data = data.from_table(self.domain, data)
     x = getx(data)
     newd = np.zeros_like(data.X)
     for rowi, row in enumerate(data.X):
         # remove NaNs which ConvexHull can not handle
         source = np.column_stack((x, row))
         source = source[~np.isnan(source).any(axis=1)]
         try:
             v = ConvexHull(source).vertices
         except QhullError:
             # FIXME notify user
             baseline = np.zeros_like(row)
         else:
             if self.peak_dir == 0:
                 v = np.roll(v, -v.argmax())
                 v = v[:v.argmin() + 1]
             elif self.peak_dir == 1:
                 v = np.roll(v, -v.argmin())
                 v = v[:v.argmax() + 1]
             # If there are NaN values at the edges of data then convex hull
             # does not include the endpoints. Because the same values are also
             # NaN in the current row, we can fill them with NaN (bounds_error
             # achieves this).
             baseline = interp1d(source[v, 0],
                                 source[v, 1],
                                 bounds_error=False)(x)
         finally:
             if self.sub == 0:
                 newd[rowi] = row - baseline
             else:
                 newd[rowi] = baseline
     return newd
Пример #14
0
def _transform_to_sorted_features(data):
    xs = getx(data)
    xsind = np.argsort(xs)
    mon = is_increasing(xsind)
    X = data.X
    X = X if mon else X[:, xsind]
    return xs, xsind, mon, X
Пример #15
0
 def set_preview_data(self, data):
     if not self.user_changed:
         x = getx(data)
         if len(x):
             self.set_value("Low limit", min(x))
             self.set_value("High limit", max(x))
             self.edited.emit()
Пример #16
0
 def test_predict_savgol_another_interpolate(self):
     train, test = separate_learn_test(self.collagen)
     train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train)
     auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     train = Interpolate(points=getx(train))(train)
     aucai = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(auc, aucai, delta=0.02)
Пример #17
0
 def test_unordered_features(self):
     data = self.collagen
     data_reversed = reverse_attr(data)
     data_shuffle = shuffle_attr(data)
     for proc in PREPROCESSORS:
         comparison = np.testing.assert_equal
         # TODO find out why there are small differences for certain preprocessors
         if isinstance(proc, (RubberbandBaseline, Normalize, PCADenoising)):
             comparison = lambda x,y: np.testing.assert_almost_equal(x, y, decimal=5)
         pdata = proc(data)
         X = pdata.X[:, np.argsort(getx(pdata))]
         pdata_reversed = proc(data_reversed)
         X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))]
         comparison(X, X_reversed)
         pdata_shuffle = proc(data_shuffle)
         X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))]
         comparison(X, X_shuffle)
Пример #18
0
 def test_predict_samename_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = destroy_atts_conversion(test)
     train = Interpolate(points=getx(train))(
         train)  # make train capable of interpolation
     auc = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertEqual(aucorig, auc)
Пример #19
0
    def show_data(self):
        self.img.clear()
        if self.data:
            xat = self.data.domain[self.attr_x]
            yat = self.data.domain[self.attr_y]

            ndom = Orange.data.Domain([xat, yat])
            datam = Orange.data.Table(ndom, self.data)
            coorx = datam.X[:, 0]
            coory = datam.X[:, 1]
            lsx = values_to_linspace(coorx)
            lsy = values_to_linspace(coory)

            l1, l2 = self.parent.lowlim, self.parent.highlim

            gx = getx(self.data)

            if l1 is None:
                l1 = min(gx) - 1
            if l2 is None:
                l2 = max(gx) + 1

            l1, l2 = min(l1, l2), max(l1, l2)

            imethod = self.parent.integration_methods[
                self.parent.integration_method]
            datai = Integrate(method=imethod, limits=[[l1, l2]])(self.data)

            di = {}
            if self.parent.curveplot.selected_indices:
                ind = list(self.parent.curveplot.selected_indices)[0]
                di = datai.domain.attributes[0].compute_value.draw_info(
                    self.data[ind:ind + 1])
            self.refresh_markings(di)

            d = datai.X[:, 0]

            # set data
            imdata = np.ones((lsy[2], lsx[2])) * float("nan")
            xindex = index_values(coorx, lsx)
            yindex = index_values(coory, lsy)
            imdata[yindex, xindex] = d

            levels = get_levels(imdata)
            self.update_color_schema()

            self.img.setImage(imdata, levels=levels)

            # shift centres of the pixels so that the axes are useful
            shiftx = (lsx[1] - lsx[0]) / (2 * (lsx[2] - 1))
            shifty = (lsy[1] - lsy[0]) / (2 * (lsy[2] - 1))
            left = lsx[0] - shiftx
            bottom = lsy[0] - shifty
            width = (lsx[1] - lsx[0]) + 2 * shiftx
            height = (lsy[1] - lsy[0]) + 2 * shifty
            self.img.setRect(QRectF(left, bottom, width, height))
Пример #20
0
 def test_line_intersection(self):
     data = self.collagen
     x = getx(data)
     sort = np.argsort(x)
     x = x[sort]
     ys = data.X[:, sort]
     boola = intersect_curves(x, ys, np.array([0, 1.15]),
                              np.array([3000, 1.15]))
     intc = np.flatnonzero(boola)
     np.testing.assert_equal(intc, [191, 635, 638, 650, 712, 716, 717, 726])
Пример #21
0
    def test_autointerpolate(self):
        d1 = Orange.data.Table("peach_juice.dpt")
        d2 = Orange.data.Table("collagen.csv")
        d3 = Orange.data.Table(d1.domain, d2)
        d1x = getx(d1)
        d2x = getx(d2)

        #have the correct number of non-nan elements
        validx = np.where(d1x >= min(d2x), d1x, np.nan)
        validx = np.where(d1x <= max(d2x), validx, np.nan)
        self.assertEqual(np.sum(~np.isnan(validx)),
                         np.sum(~np.isnan(d3.X[0])))

        #check roundtrip
        atts = features_with_interpolation(d2x)
        ndom = Orange.data.Domain(atts, None)
        dround = Orange.data.Table(ndom, d3)
        #edges are unknown, the rest roughly the same
        np.testing.assert_allclose(dround.X[:, 1:-1], d2.X[:, 1:-1], rtol=0.011)
def test_time():
    fns = ["collagen", dust(), spectra20nea(), "peach_juice.dpt"]
    for fn in fns:
        print(fn)
        data = Table(fn)
        print(data.X.shape)
        data[0, 2] = np.nan
        t = time.time()
        interpolated = Interpolate(getx(data), handle_nans=False)(data)
        print("no nan", time.time() - t)
        t = time.time()
        intp = Interpolate(getx(data), handle_nans=True)
        intp.interpfn = interp1d_with_unknowns_numpy
        interpolated = intp(data)
        print("nan handling with numpy", time.time() - t)
        intp.interpfn = interp1d_with_unknowns_scipy
        interpolated = intp(data)
        print("nan handling with scipy", time.time() - t)
        assert (not np.any(np.isnan(interpolated.X)))
 def test_unknown_elsewhere(self):
     data = Orange.data.Table("iris")
     data.X[0, 1] = np.nan
     data.X[1, 1] = np.nan
     data.X[1, 2] = np.nan
     im = Interpolate(getx(data))
     interpolated = im(data)
     self.assertAlmostEqual(interpolated.X[0, 1], 3.25)
     self.assertAlmostEqual(interpolated.X[1, 1], 3.333333333333334)
     self.assertAlmostEqual(interpolated.X[1, 2], 1.766666666666667)
     self.assertFalse(np.any(np.isnan(interpolated.X)))
Пример #24
0
 def test_slightly_different_domain(self):
     """ If test data has a slightly different domain then (with interpolation)
     we should obtain a similar classification score. """
     for proc in PREPROCESSORS:
         train, test = separate_learn_test(self.collagen)
         train1 = proc(train)
         aucorig = AUC(
             TestOnTestData(train1, test, [LogisticRegressionLearner()]))
         test = destroy_atts_conversion(test)
         test = odd_attr(test)
         train = Interpolate(points=getx(train))(
             train)  # make train capable of interpolation
         train = proc(train)
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertAlmostEqual(aucnow, aucorig, delta=0.02)
         test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertAlmostEqual(
             aucnow, aucorig, delta=0.05)  # the difference should be slight
Пример #25
0
 def __call__(self, data):
     x = getx(data)
     if not self.inverse:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is None or self.lowlim <= v) and
                       (self.highlim is None or v <= self.highlim)]
     else:
         okattrs = [at for at, v in zip(data.domain.attributes, x)
                    if (self.lowlim is not None and v <= self.lowlim) or
                       (self.highlim is not None and self.highlim <= v)]
     domain = Orange.data.Domain(okattrs, data.domain.class_vars, metas=data.domain.metas)
     return data.from_table(domain, data)
Пример #26
0
 def test_predict_different_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Interpolate(points=getx(test) - 1.)(test)  # other test domain
     train = Interpolate(points=getx(train))(
         train)  # make train capable of interpolation
     aucshift = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(aucorig, aucshift,
                            delta=0.01)  # shift can decrease AUC slightly
     test = Cut(1000, 1700)(test)
     auccut1 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1100, 1600)(test)
     auccut2 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     test = Cut(1200, 1500)(test)
     auccut3 = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     # the more we cut the lower precision we get
     self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
Пример #27
0
 def test_interpolate_interval(self):
     self.widget.controls.input_radio.buttons[1].click()
     self.send_signal("Data", self.peach)
     out = self.get_output("Interpolated data")
     np.testing.assert_almost_equal(np.arange(499.53234, 4000.1161, 10), getx(out))
     self.widget.controls.dx.setText("0")
     self.widget.commit()
     self.assertTrue(self.widget.Error.dxzero.is_shown())
     self.widget.controls.dx.setText("0.001")
     self.widget.commit()
     self.assertTrue(self.widget.Error.too_many_points.is_shown())
     self.widget.controls.dx.setText("10")
     self.widget.commit()
     self.assertFalse(self.widget.Error.dxzero.is_shown())
     self.assertFalse(self.widget.Error.too_many_points.is_shown())
     self.widget.controls.xmin.setText("4000.1161")
     self.widget.controls.xmax.setText("499.53234")
     self.widget.commit()
     out2 = self.get_output("Interpolated data")
     np.testing.assert_almost_equal(getx(out2), getx(out))
     self.send_signal("Data", None)
     self.assertTrue(self.get_output("Interpolated data") is None)
Пример #28
0
 def test_cut_single_inverse(self):
     d = self.collagen
     dcut = Cut(lowlim=1000, inverse=True)(d)
     self.assertLessEqual(max(getx(dcut)), 1000)
     self.assertEqual(min(getx(dcut)), min(getx(d)))
     dcut = Cut(highlim=1000, inverse=True)(d)
     self.assertGreaterEqual(min(getx(dcut)), 1000)
     self.assertEqual(max(getx(dcut)), max(getx(d)))
Пример #29
0
 def __call__(self, data):
     # convert to data domain if any conversion is possible,
     # otherwise we use the interpolator directly to make domains compatible
     if self.domain and data.domain != self.domain \
             and any(at.compute_value for at in self.domain.attributes):
         data = data.from_table(self.domain, data)
     x = getx(data)
     if len(x) == 0:
         return np.ones((len(data), len(self.points))) * np.nan
     f = interp1d(x,
                  data.X,
                  fill_value=np.nan,
                  bounds_error=False,
                  kind=self.kind)
     inter = f(self.points)
     return inter
Пример #30
0
 def test_cut_both_inverse(self):
     d = self.collagen
     # cutting out of x interval - need all
     dcut = Cut(lowlim=0, highlim=2, inverse=True)(d)
     np.testing.assert_equal(getx(dcut), getx(d))
     # cutting in the middle - edged are the same
     dcut = Cut(lowlim=1000, highlim=1100, inverse=True)(d)
     dcutx = getx(dcut)
     self.assertEqual(min(dcutx), min(getx(d)))
     self.assertEqual(max(dcutx), max(getx(d)))
     self.assertLess(len(dcutx), len(getx(d)))
     np.testing.assert_equal(np.where(dcutx < 1100), np.where(dcutx < 1000))
     np.testing.assert_equal(np.where(dcutx > 1100), np.where(dcutx > 1000))