def commit(self): self.warning(1) self.error(1) data = distances = None if self.data is not None: metric = _METRICS[self.metric_idx][1] if not any(a.is_continuous for a in self.data.domain.attributes): self.error(1, "No continuous features") data = None elif (any(a.is_discrete for a in self.data.domain.attributes) or numpy.any(numpy.isnan(self.data.X))): data = distance._preprocess(self.data) if len(self.data.domain.attributes) - len(data.domain.attributes) > 0: self.warning(1, "Ignoring discrete features") else: data = self.data if data is not None: shape = (len(data), len(data.domain.attributes)) if numpy.product(shape) == 0: self.error(1, "Empty data (shape == {})".format(shape)) else: distances = metric(data, data, 1 - self.axis) self.send("Distances", distances)
def checks(metric, data): if data is None: return if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data() return if not any(a.is_continuous for a in data.domain.attributes): self.Error.no_continuous_features() return needs_preprocessing = False if any(a.is_discrete for a in self.data.domain.attributes): self.Warning.ignoring_discrete() needs_preprocessing = True if not issparse(data.X) and bn.anynan(data.X): self.Warning.imputing_data() needs_preprocessing = True if needs_preprocessing: # removes discrete features and imputes data data = distance._preprocess(data) if not data.X.size: self.Error.empty_data() return return data
def commit(self): self.warning(1) self.error(1) data = distances = None if self.data is not None: metric = self.available_metrics[self.metric_idx] if isinstance(metric, distance.MahalanobisDistance): metric.fit(self.data, axis=1 - self.axis) if not any(a.is_continuous for a in self.data.domain.attributes): self.error(1, "No continuous features") data = None elif any(a.is_discrete for a in self.data.domain.attributes) or \ (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))): data = distance._preprocess(self.data) if len(self.data.domain.attributes) - len( data.domain.attributes) > 0: self.warning(1, "Ignoring discrete features") else: data = self.data if data is not None: shape = (len(data), len(data.domain.attributes)) if numpy.product(shape) == 0: self.error(1, "Empty data (shape == {})".format(shape)) else: distances = metric(data, data, 1 - self.axis, impute=True) self.send("Distances", distances)
def commit(self): metric = METRICS[self.metric_idx] distances = None data = self.data if data is not None and issparse(data.X) and \ not metric.supports_sparse: data = None self.clear_messages() if data is not None: if isinstance(metric, distance.MahalanobisDistance): metric.fit(self.data, axis=1 - self.axis) if not any(a.is_continuous for a in self.data.domain.attributes): self.Error.no_continuous_features() data = None elif any(a.is_discrete for a in self.data.domain.attributes) or \ (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))): data = distance._preprocess(self.data) if len(self.data.domain.attributes) - len( data.domain.attributes) > 0: self.Warning.ignoring_discrete() else: data = self.data if data is not None: shape = (len(data), len(data.domain.attributes)) if numpy.product(shape) == 0: self.Error.empty_data(shape) else: distances = metric(data, data, 1 - self.axis, impute=True) self.send("Distances", distances)
def commit(self): self.warning(1) self.error(1) data = distances = None if self.data is not None: metric = _METRICS[self.metric_idx][1] if not any(a.is_continuous for a in self.data.domain.attributes): self.error(1, "No continuous features") data = None elif (any(a.is_discrete for a in self.data.domain.attributes) or numpy.any(numpy.isnan(self.data.X))): data = distance._preprocess(self.data) if len(self.data.domain.attributes) - len( data.domain.attributes) > 0: self.warning(1, "Ignoring discrete features") else: data = self.data if data is not None: shape = (len(data), len(data.domain.attributes)) if numpy.product(shape) == 0: self.error(1, "Empty data (shape == {})".format(shape)) else: distances = metric(data, data, 1 - self.axis, impute=True) self.send("Distances", distances)
def commit(self): metric = METRICS[self.metric_idx] distances = None data = self.data if data is not None and issparse(data.X) and \ not metric.supports_sparse: data = None self.clear_messages() if data is not None: if isinstance(metric, distance.MahalanobisDistance): metric.fit(self.data, axis=1-self.axis) if not any(a.is_continuous for a in self.data.domain.attributes): self.Error.no_continuous_features() data = None elif any(a.is_discrete for a in self.data.domain.attributes) or \ (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))): data = distance._preprocess(self.data) if len(self.data.domain.attributes) - len(data.domain.attributes) > 0: self.Warning.ignoring_discrete() else: data = self.data if data is not None: shape = (len(data), len(data.domain.attributes)) if numpy.product(shape) == 0: self.Error.empty_data(shape) else: distances = metric(data, data, 1 - self.axis, impute=True) self.send("Distances", distances)
def commit(self): self.warning(1) self.error(1) metric = METRICS[self.metric_idx] distances = None data = self.data if data is not None and issparse(data.X) and \ not metric.supports_sparse: data = None if data is not None: if isinstance(metric, distance.MahalanobisDistance): metric.fit(self.data, axis=1-self.axis) if not any(a.is_continuous for a in self.data.domain.attributes): self.error(1, "No continuous features") data = None elif any(a.is_discrete for a in self.data.domain.attributes) or \ (not issparse(self.data.X) and numpy.any(numpy.isnan(self.data.X))): data = distance._preprocess(self.data) if len(self.data.domain.attributes) - len(data.domain.attributes) > 0: self.warning(1, "Ignoring discrete features") else: data = self.data if data is not None: shape = (len(data), len(data.domain.attributes)) if numpy.product(shape) == 0: self.error(1, "Empty data (shape == {})".format(shape)) else: distances = metric(data, data, 1 - self.axis, impute=True) self.send("Distances", distances)
def test_preprocess_multiclass(self): table = Table('test5.tab') new_table = _preprocess(table) np.testing.assert_equal(new_table.Y, table.Y) self.assertEqual([a.name for a in new_table.domain.attributes], [a.name for a in table.domain.attributes if a.is_continuous]) self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
def test_preprocess_multiclass(self): table = self.test5 new_table = _preprocess(table) np.testing.assert_equal(new_table.Y, table.Y) self.assertEqual( [a.name for a in new_table.domain.attributes], [a.name for a in table.domain.attributes if a.is_continuous]) self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
def test_preprocess(self): domain = Domain([ ContinuousVariable("c"), DiscreteVariable("d", values=['a', 'b']) ], [DiscreteVariable("cls", values=['e', 'f'])], [StringVariable("m")]) table = Table(domain, [[1, 'a', 'e', 'm1'], [2, 'b', 'f', 'm2']]) new_table = _preprocess(table) np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1)) np.testing.assert_equal(new_table.Y, table.Y) np.testing.assert_equal(new_table.metas, table.metas) self.assertEqual( [a.name for a in new_table.domain.attributes], [a.name for a in table.domain.attributes if a.is_continuous]) self.assertEqual(new_table.domain.class_vars, table.domain.class_vars) self.assertEqual(new_table.domain.metas, table.domain.metas)
def commit(self): self.warning(1) self.error(1) distances = None if self.data is not None: metric = _METRICS[self.metric_idx][1] data = distance._preprocess(self.data) if len(data.domain.attributes) == 0: self.error(1, "No continuous features") elif len(self.data.domain.attributes) - len(data.domain.attributes) > 0: self.warning(1, "Ignoring categoric features") if len(data.domain.attributes) != 0: distances = metric(data, data, 1-self.axis) self.send("Distances", distances)
def test_preprocess(self): domain = Domain([ContinuousVariable("c"), DiscreteVariable("d", values=['a', 'b'])], [DiscreteVariable("cls", values=['e', 'f'])], [StringVariable("m")]) table = Table(domain, [[1, 'a', 'e', 'm1'], [2, 'b', 'f', 'm2']]) new_table = _preprocess(table) np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1)) np.testing.assert_equal(new_table.Y, table.Y) np.testing.assert_equal(new_table.metas, table.metas) self.assertEqual([a.name for a in new_table.domain.attributes], [a.name for a in table.domain.attributes if a.is_continuous]) self.assertEqual(new_table.domain.class_vars, table.domain.class_vars) self.assertEqual(new_table.domain.metas, table.domain.metas)
def commit(self): self.warning(1) self.error(1) distances = None if self.data is not None: metric = _METRICS[self.metric_idx][1] data = distance._preprocess(self.data) if len(data.domain.attributes) == 0: self.error(1, "No continuous features") elif len(self.data.domain.attributes) - len( data.domain.attributes) > 0: self.warning(1, "Ignoring categoric features") if len(data.domain.attributes) != 0: distances = metric(data, data, 1 - self.axis) self.send("Distances", distances)
def compute_distances(self, metric, data): self.clear_messages() if data is None: return if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data() return if not any(a.is_continuous for a in data.domain.attributes): self.Error.no_continuous_features() return needs_preprocessing = False if any(a.is_discrete for a in self.data.domain.attributes): self.Warning.ignoring_discrete() needs_preprocessing = True if not issparse(data.X) and bn.anynan(data.X): self.Warning.imputing_data() needs_preprocessing = True if needs_preprocessing: # removes discrete features and imputes data data = distance._preprocess(data) if not data.X.size: self.Error.empty_data() return if isinstance(metric, distance.MahalanobisDistance): n, m = data.X.shape if self.axis == 1: n, m = m, n if isinstance(metric, distance.MahalanobisDistance): # Mahalanobis distance has to be trained before it can be used # to compute distances try: metric.fit(data, axis=1 - self.axis) except (ValueError, MemoryError) as e: self.Error.mahalanobis_error(e) return return metric(data, data, 1 - self.axis, impute=True)
def compute_distances(self, metric, data): self.clear_messages() if data is None: return if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data() return if not any(a.is_continuous for a in data.domain.attributes): self.Error.no_continuous_features() return needs_preprocessing = False if any(a.is_discrete for a in self.data.domain.attributes): self.Warning.ignoring_discrete() needs_preprocessing = True if not issparse(data.X) and bn.anynan(data.X): self.Warning.imputing_data() needs_preprocessing = True if needs_preprocessing: # removes discrete features and imputes data data = distance._preprocess(data) if not data.X.size: self.Error.empty_data(data.X.shape) return if isinstance(metric, distance.MahalanobisDistance): n, m = data.X.shape if self.axis == 1: n, m = m, n if n <= m: self.Error.too_few_observations() return if isinstance(metric, distance.MahalanobisDistance): # Mahalanobis distance has to be trained before it can be used # to compute distances metric.fit(data, axis=1 - self.axis) return metric(data, data, 1 - self.axis, impute=True)
def test_preprocess(self): domain = Domain( [ ContinuousVariable("c"), DiscreteVariable("d", values=["a", "b"]) ], [DiscreteVariable("cls", values=["e", "f"])], [StringVariable("m")], ) table = Table(domain, [[1, "a", "e", "m1"], [2, "b", "f", "m2"]]) new_table = _preprocess(table) np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1)) np.testing.assert_equal(new_table.Y, table.Y) np.testing.assert_equal(new_table.metas, table.metas) self.assertEqual( [a.name for a in new_table.domain.attributes], [a.name for a in table.domain.attributes if a.is_continuous], ) self.assertEqual(new_table.domain.class_vars, table.domain.class_vars) self.assertEqual(new_table.domain.metas, table.domain.metas)
def test_preprocess_impute(self): new_table = _preprocess(self.test5) self.assertFalse(np.isnan(new_table.X).any())
def test_preprocess_impute(self): table = Table('test5.tab') new_table = _preprocess(table) self.assertFalse(np.isnan(new_table.X).any())
def paint(self, painter, *args): if self._line is None: self.boundingRect() painter.save() painter.setPen(self.pen()) painter.drawLine(self._line) painter.restore() def clusters_at_height(root, height): """Return a list of clusters by cutting the clustering at `height`. """ lower = set() cluster_list = [] for cl in preorder(root): if cl in lower: continue if cl.value.height < height: cluster_list.append(cl) lower.update(preorder(cl)) return cluster_list if __name__ == "__main__": # pragma: no cover from Orange import distance data = Orange.data.Table("iris") matrix = distance.Euclidean(distance._preprocess(data)) WidgetPreview(OWHierarchicalClustering).run(matrix)