def test_remove_non_binary(self): b1, b2, b3 = (DiscreteVariable(c, values=tuple("12")) for c in "abc") d1, d2, d3 = (DiscreteVariable(c, values=tuple("123")) for c in "def") c1, c2 = (ContinuousVariable(c) for c in "xy") t = StringVariable("t") domain = Domain([d1, b1, b2, c1], d2, [c2, d3, t, b3]) data = Table.from_domain(domain, 5) reduced = distance.remove_nonbinary_features(data) self.assertEqual(reduced.domain.attributes, (b1, b2)) self.assertEqual(reduced.domain.class_var, d2) self.assertEqual(reduced.domain.metas, (c2, d3, t, b3)) reduced = distance.remove_nonbinary_features(data, to_metas=True) self.assertEqual(reduced.domain.attributes, (b1, b2)) self.assertEqual(reduced.domain.class_var, d2) self.assertEqual(reduced.domain.metas, (c2, d3, t, b3, d1, c1))
def _fix_nonbinary(): nonlocal data if metric is distance.Jaccard and not issparse(data.X): nbinary = sum(a.is_discrete and len(a.values) == 2 for a in data.domain.attributes) if not nbinary: self.Error.no_binary_features() return False elif nbinary < len(data.domain.attributes): self.Warning.ignoring_nonbinary() data = distance.remove_nonbinary_features(data) return True