示例#1
0
    def commit(self):
        self.warning(1)
        self.error(1)

        data = distances = None
        if self.data is not None:
            metric = _METRICS[self.metric_idx][1]
            if not any(a.is_continuous for a in self.data.domain.attributes):
                self.error(1, "No continuous features")
                data = None
            elif (any(a.is_discrete for a in self.data.domain.attributes) or
                  numpy.any(numpy.isnan(self.data.X))):
                data = distance._preprocess(self.data)
                if len(self.data.domain.attributes) - len(data.domain.attributes) > 0:
                    self.warning(1, "Ignoring discrete features")
            else:
                data = self.data

        if data is not None:
            shape = (len(data), len(data.domain.attributes))
            if numpy.product(shape) == 0:
                self.error(1, "Empty data (shape == {})".format(shape))
            else:
                distances = metric(data, data, 1 - self.axis)

        self.send("Distances", distances)
示例#2
0
        def checks(metric, data):
            if data is None:
                return

            if issparse(data.X) and not metric.supports_sparse:
                self.Error.dense_metric_sparse_data()
                return

            if not any(a.is_continuous for a in data.domain.attributes):
                self.Error.no_continuous_features()
                return

            needs_preprocessing = False
            if any(a.is_discrete for a in self.data.domain.attributes):
                self.Warning.ignoring_discrete()
                needs_preprocessing = True

            if not issparse(data.X) and bn.anynan(data.X):
                self.Warning.imputing_data()
                needs_preprocessing = True

            if needs_preprocessing:
                # removes discrete features and imputes data
                data = distance._preprocess(data)

            if not data.X.size:
                self.Error.empty_data()
                return

            return data
示例#3
0
    def commit(self):
        self.warning(1)
        self.error(1)

        data = distances = None
        if self.data is not None:
            metric = self.available_metrics[self.metric_idx]
            if isinstance(metric, distance.MahalanobisDistance):
                metric.fit(self.data, axis=1 - self.axis)

            if not any(a.is_continuous for a in self.data.domain.attributes):
                self.error(1, "No continuous features")
                data = None
            elif any(a.is_discrete for a in self.data.domain.attributes) or \
                    (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))):
                data = distance._preprocess(self.data)
                if len(self.data.domain.attributes) - len(
                        data.domain.attributes) > 0:
                    self.warning(1, "Ignoring discrete features")
            else:
                data = self.data

        if data is not None:
            shape = (len(data), len(data.domain.attributes))
            if numpy.product(shape) == 0:
                self.error(1, "Empty data (shape == {})".format(shape))
            else:
                distances = metric(data, data, 1 - self.axis, impute=True)

        self.send("Distances", distances)
示例#4
0
    def commit(self):
        metric = METRICS[self.metric_idx]
        distances = None
        data = self.data
        if data is not None and issparse(data.X) and \
                not metric.supports_sparse:
            data = None
        self.clear_messages()

        if data is not None:
            if isinstance(metric, distance.MahalanobisDistance):
                metric.fit(self.data, axis=1 - self.axis)

            if not any(a.is_continuous for a in self.data.domain.attributes):
                self.Error.no_continuous_features()
                data = None
            elif any(a.is_discrete for a in self.data.domain.attributes) or \
                    (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))):
                data = distance._preprocess(self.data)
                if len(self.data.domain.attributes) - len(
                        data.domain.attributes) > 0:
                    self.Warning.ignoring_discrete()
            else:
                data = self.data

        if data is not None:
            shape = (len(data), len(data.domain.attributes))
            if numpy.product(shape) == 0:
                self.Error.empty_data(shape)
            else:
                distances = metric(data, data, 1 - self.axis, impute=True)

        self.send("Distances", distances)
示例#5
0
    def commit(self):
        self.warning(1)
        self.error(1)

        data = distances = None
        if self.data is not None:
            metric = _METRICS[self.metric_idx][1]
            if not any(a.is_continuous for a in self.data.domain.attributes):
                self.error(1, "No continuous features")
                data = None
            elif (any(a.is_discrete for a in self.data.domain.attributes)
                  or numpy.any(numpy.isnan(self.data.X))):
                data = distance._preprocess(self.data)
                if len(self.data.domain.attributes) - len(
                        data.domain.attributes) > 0:
                    self.warning(1, "Ignoring discrete features")
            else:
                data = self.data

        if data is not None:
            shape = (len(data), len(data.domain.attributes))
            if numpy.product(shape) == 0:
                self.error(1, "Empty data (shape == {})".format(shape))
            else:
                distances = metric(data, data, 1 - self.axis, impute=True)

        self.send("Distances", distances)
示例#6
0
    def commit(self):
        metric = METRICS[self.metric_idx]
        distances = None
        data = self.data
        if data is not None and issparse(data.X) and \
                not metric.supports_sparse:
            data = None
        self.clear_messages()

        if data is not None:
            if isinstance(metric, distance.MahalanobisDistance):
                metric.fit(self.data, axis=1-self.axis)

            if not any(a.is_continuous for a in self.data.domain.attributes):
                self.Error.no_continuous_features()
                data = None
            elif any(a.is_discrete for a in self.data.domain.attributes) or \
                    (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))):
                data = distance._preprocess(self.data)
                if len(self.data.domain.attributes) - len(data.domain.attributes) > 0:
                    self.Warning.ignoring_discrete()
            else:
                data = self.data

        if data is not None:
            shape = (len(data), len(data.domain.attributes))
            if numpy.product(shape) == 0:
                self.Error.empty_data(shape)
            else:
                distances = metric(data, data, 1 - self.axis, impute=True)

        self.send("Distances", distances)
示例#7
0
    def commit(self):
        self.warning(1)
        self.error(1)
        metric = METRICS[self.metric_idx]
        distances = None
        data = self.data
        if data is not None and issparse(data.X) and \
                not metric.supports_sparse:
            data = None

        if data is not None:
            if isinstance(metric, distance.MahalanobisDistance):
                metric.fit(self.data, axis=1-self.axis)

            if not any(a.is_continuous for a in self.data.domain.attributes):
                self.error(1, "No continuous features")
                data = None
            elif any(a.is_discrete for a in self.data.domain.attributes) or \
                    (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))):
                data = distance._preprocess(self.data)
                if len(self.data.domain.attributes) - len(data.domain.attributes) > 0:
                    self.warning(1, "Ignoring discrete features")
            else:
                data = self.data

        if data is not None:
            shape = (len(data), len(data.domain.attributes))
            if numpy.product(shape) == 0:
                self.error(1, "Empty data (shape == {})".format(shape))
            else:
                distances = metric(data, data, 1 - self.axis, impute=True)

        self.send("Distances", distances)
示例#8
0
 def test_preprocess_multiclass(self):
     table = Table('test5.tab')
     new_table = _preprocess(table)
     np.testing.assert_equal(new_table.Y, table.Y)
     self.assertEqual([a.name for a in new_table.domain.attributes],
                      [a.name for a in table.domain.attributes
                       if a.is_continuous])
     self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
示例#9
0
 def test_preprocess_multiclass(self):
     table = self.test5
     new_table = _preprocess(table)
     np.testing.assert_equal(new_table.Y, table.Y)
     self.assertEqual(
         [a.name for a in new_table.domain.attributes],
         [a.name for a in table.domain.attributes if a.is_continuous])
     self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
示例#10
0
 def test_preprocess(self):
     domain = Domain([
         ContinuousVariable("c"),
         DiscreteVariable("d", values=['a', 'b'])
     ], [DiscreteVariable("cls", values=['e', 'f'])], [StringVariable("m")])
     table = Table(domain, [[1, 'a', 'e', 'm1'], [2, 'b', 'f', 'm2']])
     new_table = _preprocess(table)
     np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1))
     np.testing.assert_equal(new_table.Y, table.Y)
     np.testing.assert_equal(new_table.metas, table.metas)
     self.assertEqual(
         [a.name for a in new_table.domain.attributes],
         [a.name for a in table.domain.attributes if a.is_continuous])
     self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
     self.assertEqual(new_table.domain.metas, table.domain.metas)
示例#11
0
    def commit(self):
        self.warning(1)
        self.error(1)

        distances = None
        if self.data is not None:
            metric = _METRICS[self.metric_idx][1]
            data = distance._preprocess(self.data)
            if len(data.domain.attributes) == 0:
                self.error(1, "No continuous features")
            elif len(self.data.domain.attributes) - len(data.domain.attributes) > 0:
                self.warning(1, "Ignoring categoric features")
            if len(data.domain.attributes) != 0:
                distances = metric(data, data, 1-self.axis)

        self.send("Distances", distances)
示例#12
0
 def test_preprocess(self):
     domain = Domain([ContinuousVariable("c"),
                      DiscreteVariable("d", values=['a', 'b'])],
                     [DiscreteVariable("cls", values=['e', 'f'])],
                     [StringVariable("m")])
     table = Table(domain, [[1, 'a', 'e', 'm1'],
                            [2, 'b', 'f', 'm2']])
     new_table = _preprocess(table)
     np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1))
     np.testing.assert_equal(new_table.Y, table.Y)
     np.testing.assert_equal(new_table.metas, table.metas)
     self.assertEqual([a.name for a in new_table.domain.attributes],
                      [a.name for a in table.domain.attributes
                       if a.is_continuous])
     self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
     self.assertEqual(new_table.domain.metas, table.domain.metas)
示例#13
0
    def commit(self):
        self.warning(1)
        self.error(1)

        distances = None
        if self.data is not None:
            metric = _METRICS[self.metric_idx][1]
            data = distance._preprocess(self.data)
            if len(data.domain.attributes) == 0:
                self.error(1, "No continuous features")
            elif len(self.data.domain.attributes) - len(
                    data.domain.attributes) > 0:
                self.warning(1, "Ignoring categoric features")
            if len(data.domain.attributes) != 0:
                distances = metric(data, data, 1 - self.axis)

        self.send("Distances", distances)
示例#14
0
    def compute_distances(self, metric, data):
        self.clear_messages()

        if data is None:
            return

        if issparse(data.X) and not metric.supports_sparse:
            self.Error.dense_metric_sparse_data()
            return

        if not any(a.is_continuous for a in data.domain.attributes):
            self.Error.no_continuous_features()
            return

        needs_preprocessing = False
        if any(a.is_discrete for a in self.data.domain.attributes):
            self.Warning.ignoring_discrete()
            needs_preprocessing = True

        if not issparse(data.X) and bn.anynan(data.X):
            self.Warning.imputing_data()
            needs_preprocessing = True

        if needs_preprocessing:
            # removes discrete features and imputes data
            data = distance._preprocess(data)

        if not data.X.size:
            self.Error.empty_data()
            return

        if isinstance(metric, distance.MahalanobisDistance):
            n, m = data.X.shape
            if self.axis == 1:
                n, m = m, n

        if isinstance(metric, distance.MahalanobisDistance):
            # Mahalanobis distance has to be trained before it can be used
            # to compute distances
            try:
                metric.fit(data, axis=1 - self.axis)
            except (ValueError, MemoryError) as e:
                self.Error.mahalanobis_error(e)
                return

        return metric(data, data, 1 - self.axis, impute=True)
示例#15
0
    def compute_distances(self, metric, data):
        self.clear_messages()

        if data is None:
            return

        if issparse(data.X) and not metric.supports_sparse:
            self.Error.dense_metric_sparse_data()
            return

        if not any(a.is_continuous for a in data.domain.attributes):
            self.Error.no_continuous_features()
            return

        needs_preprocessing = False
        if any(a.is_discrete for a in self.data.domain.attributes):
            self.Warning.ignoring_discrete()
            needs_preprocessing = True

        if not issparse(data.X) and bn.anynan(data.X):
            self.Warning.imputing_data()
            needs_preprocessing = True

        if needs_preprocessing:
            # removes discrete features and imputes data
            data = distance._preprocess(data)

        if not data.X.size:
            self.Error.empty_data(data.X.shape)
            return

        if isinstance(metric, distance.MahalanobisDistance):
            n, m = data.X.shape
            if self.axis == 1:
                n, m = m, n
            if n <= m:
                self.Error.too_few_observations()
                return

        if isinstance(metric, distance.MahalanobisDistance):
            # Mahalanobis distance has to be trained before it can be used
            # to compute distances
            metric.fit(data, axis=1 - self.axis)

        return metric(data, data, 1 - self.axis, impute=True)
示例#16
0
 def test_preprocess(self):
     domain = Domain(
         [
             ContinuousVariable("c"),
             DiscreteVariable("d", values=["a", "b"])
         ],
         [DiscreteVariable("cls", values=["e", "f"])],
         [StringVariable("m")],
     )
     table = Table(domain, [[1, "a", "e", "m1"], [2, "b", "f", "m2"]])
     new_table = _preprocess(table)
     np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1))
     np.testing.assert_equal(new_table.Y, table.Y)
     np.testing.assert_equal(new_table.metas, table.metas)
     self.assertEqual(
         [a.name for a in new_table.domain.attributes],
         [a.name for a in table.domain.attributes if a.is_continuous],
     )
     self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
     self.assertEqual(new_table.domain.metas, table.domain.metas)
示例#17
0
 def test_preprocess_impute(self):
     new_table = _preprocess(self.test5)
     self.assertFalse(np.isnan(new_table.X).any())
示例#18
0
 def test_preprocess_impute(self):
     table = Table('test5.tab')
     new_table = _preprocess(table)
     self.assertFalse(np.isnan(new_table.X).any())
示例#19
0
 def test_preprocess_impute(self):
     table = Table('test5.tab')
     new_table = _preprocess(table)
     self.assertFalse(np.isnan(new_table.X).any())
示例#20
0
 def test_preprocess_impute(self):
     new_table = _preprocess(self.test5)
     self.assertFalse(np.isnan(new_table.X).any())
    def paint(self, painter, *args):
        if self._line is None:
            self.boundingRect()

        painter.save()
        painter.setPen(self.pen())
        painter.drawLine(self._line)
        painter.restore()


def clusters_at_height(root, height):
    """Return a list of clusters by cutting the clustering at `height`.
    """
    lower = set()
    cluster_list = []
    for cl in preorder(root):
        if cl in lower:
            continue
        if cl.value.height < height:
            cluster_list.append(cl)
            lower.update(preorder(cl))
    return cluster_list


if __name__ == "__main__":  # pragma: no cover
    from Orange import distance
    data = Orange.data.Table("iris")
    matrix = distance.Euclidean(distance._preprocess(data))
    WidgetPreview(OWHierarchicalClustering).run(matrix)