def test_eq(self): flt1 = FilterContinuous(1, FilterContinuous.Between, 1, 2) flt2 = FilterContinuous(1, FilterContinuous.Between, 1, 2) flt3 = FilterContinuous(1, FilterContinuous.Between, 1, 3) self.assertEqual(flt1, flt2) self.assertNotEqual(flt1, flt3) self.assertEqual(flt1.__dict__ == flt2.__dict__, flt1 == flt2) self.assertEqual(flt1.__dict__ == flt3.__dict__, flt1 == flt3)
def _gather_conditions(self): conditions = [] attr = self.attribute group_attr = self.group_var for data_range in self.selection: if attr.is_discrete: # If some value was removed from the data (in case settings are # loaded from a scheme), do not include the corresponding # filter; this is appropriate since data with such value does # not exist anyway if not data_range.value: condition = IsDefined([attr], negate=True) elif data_range.value not in attr.values: continue else: condition = FilterDiscrete(attr, [data_range.value]) else: condition = FilterContinuous(attr, FilterContinuous.Between, data_range.low, data_range.high) if data_range.group_value: if not data_range.group_value: grp_filter = IsDefined([group_attr], negate=True) elif data_range.group_value not in group_attr.values: continue else: grp_filter = FilterDiscrete(group_attr, [data_range.group_value]) condition = Values([condition, grp_filter], conjunction=True) conditions.append(condition) return conditions
def test_values(self): vs = self.iris.domain.variables f1 = FilterContinuous(vs[0], FilterContinuous.Less, 5) f2 = FilterContinuous(vs[1], FilterContinuous.Greater, 3) f3 = FilterDiscrete(vs[4], [2]) f12 = Values([f1, f2], conjunction=False, negate=True) f123 = Values([f12, f3]) d12 = f12(self.iris) d123 = f123(self.iris) self.assertGreater(len(d12), len(d123)) self.assertTrue((d123.X[:, 0] >= 5).all()) self.assertTrue((d123.X[:, 1] <= 3).all()) self.assertTrue((d123.Y == 2).all()) self.assertEqual(len(d123), (~((self.iris.X[:, 0] < 5) | (self.iris.X[:, 1] > 3)) & (self.iris.Y == 2)).sum())
def __init__(self, dist, attr, group_val_index=None, group_var=None): self.dist = dist self.n = n = np.sum(dist[1]) if n == 0: return self.a_min = float(dist[0, 0]) self.a_max = float(dist[0, -1]) self.mean = float(np.sum(dist[0] * dist[1]) / n) self.var = float(np.sum(dist[1] * (dist[0] - self.mean)**2) / n) self.dev = math.sqrt(self.var) s = 0 thresholds = [n / 4, n / 2, n / 4 * 3] thresh_i = 0 q = [] for i, e in enumerate(dist[1]): s += e if s >= thresholds[thresh_i]: if s == thresholds[thresh_i] and i + 1 < dist.shape[1]: q.append(float((dist[0, i] + dist[0, i + 1]) / 2)) else: q.append(float(dist[0, i])) thresh_i += 1 if thresh_i == 3: self.q25, self.median, self.q75 = q break else: self.q25 = self.q75 = None self.median = q[1] if len(q) == 2 else None self.conditions = [ FilterContinuous(attr, FilterContinuous.Between, self.q25, self.q75) ] if group_val_index is not None: self.conditions.append(FilterDiscrete(group_var, [group_val_index]))
def test_reprs(self): flid = IsDefined(negate=True) flhc = HasClass() flr = Random() fld = FilterDiscrete(self.attr_disc, None) flsv = SameValue(self.attr_disc, self.value_disc, negate=True) flc = FilterContinuous(self.vs[0], FilterContinuous.Less, 5) flc2 = FilterContinuous(self.vs[1], FilterContinuous.Greater, 3) flv = Values([flc, flc2], conjunction=False, negate=True) flvf = ValueFilter(self.attr_disc) fls = FilterString("name", FilterString.Equal, "Aardvark", case_sensitive=False) flsl = FilterStringList("name", ["Aardvark"], case_sensitive=False) flrx = FilterRegex("name", "^c...$") filters = [flid, flhc, flr, fld, flsv, flc, flv, flvf, fls, flsl, flrx] for f in filters: repr_str = repr(f) new_f = eval(repr_str) self.assertEqual(repr(new_f), repr_str)
def test_min(self): flt = FilterContinuous(1, FilterContinuous.Between, 1, 2) self.assertEqual(flt.min, 1) self.assertEqual(flt.max, 2) self.assertEqual(flt.ref, 1) flt.ref = 0 self.assertEqual(flt.min, 0) flt.min = -1 self.assertEqual(flt.ref, -1) self.assertRaises( TypeError, FilterContinuous, 1, FilterContinuous.Equal, 0, c=12) self.assertRaises( TypeError, FilterContinuous, 1, FilterContinuous.Equal, 0, min=5, c=12) flt = FilterContinuous(1, FilterContinuous.Between, min=1, max=2) self.assertEqual(flt.ref, 1)
def test_min(self): flt = FilterContinuous(1, FilterContinuous.Between, 1, 2) self.assertEqual(flt.min, 1) self.assertEqual(flt.max, 2) self.assertEqual(flt.ref, 1) flt.ref = 0 self.assertEqual(flt.min, 0) flt.min = -1 self.assertEqual(flt.ref, -1) self.assertRaises(TypeError, FilterContinuous, 1, FilterContinuous.Equal, 0, c=12) self.assertRaises(TypeError, FilterContinuous, 1, FilterContinuous.Equal, 0, min=5, c=12) flt = FilterContinuous(1, FilterContinuous.Between, min=1, max=2) self.assertEqual(flt.ref, 1)
def get_filtered_data(self): if not self.data or not self.selection or not self.pivot.pivot_table: return None cond = [] for i, j in self.selection: f = [] for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]), (self.col_feature, j)]: if isinstance(at, DiscreteVariable): f.append(FilterDiscrete(at, [val])) elif isinstance(at, ContinuousVariable): f.append(FilterContinuous(at, FilterContinuous.Equal, val)) cond.append(Values(f)) return Values(cond, conjunction=False)(self.data)
def __init__(self, dist, attr, group_val_index=None, group_var=None): self.dist = dist self.n = n = np.sum(dist[1]) if n == 0: return self.a_min = float(dist[0, 0]) self.a_max = float(dist[0, -1]) self.mean = float(np.sum(dist[0] * dist[1]) / n) self.var = float(np.sum(dist[1] * (dist[0] - self.mean) ** 2) / n) self.dev = math.sqrt(self.var) a, freq = np.asarray(dist) q25, median, q75 = _quantiles(a, freq, [0.25, 0.5, 0.75]) self.median = median # The code below omits the q25 or q75 in the plot when they are None self.q25 = None if q25 == median else q25 self.q75 = None if q75 == median else q75 self.conditions = [FilterContinuous(attr, FilterContinuous.Between, self.q25, self.q75)] if group_val_index is not None: self.conditions.append(FilterDiscrete(group_var, [group_val_index]))
def bench_adult_filter_pre_pandas(self): age_filter = FilterContinuous(self.adult.domain["age"], FilterContinuous.Greater, 30) workclass_filter = FilterDiscrete(self.adult.domain["workclass"], [0]) combined = Values([age_filter, workclass_filter]) combined(self.adult)
def test_str(self): flt = FilterContinuous(1, FilterContinuous.Equal, 1) self.assertEqual(str(flt), "feature(1) = 1") flt = FilterContinuous("foo", FilterContinuous.Equal, 1) self.assertEqual(str(flt), "foo = 1") flt = FilterContinuous(self.domain[0], FilterContinuous.Equal, 1, 2) self.assertEqual(str(flt), "a = 1") flt.oper = flt.NotEqual self.assertEqual(str(flt), "a ≠ 1") flt.oper = flt.Less self.assertEqual(str(flt), "a < 1") flt.oper = flt.LessEqual self.assertEqual(str(flt), "a ≤ 1") flt.oper = flt.Greater self.assertEqual(str(flt), "a > 1") flt.oper = flt.GreaterEqual self.assertEqual(str(flt), "a ≥ 1") flt.oper = flt.Between self.assertEqual(str(flt), "1 ≤ a ≤ 2") flt.oper = flt.Outside self.assertEqual(str(flt), "not 1 ≤ a ≤ 2") flt.oper = flt.IsDefined self.assertEqual(str(flt), "a is defined") flt.oper = -1 self.assertEqual(str(flt), "invalid operator")