def test_pivot(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1], domain[2]) pivot_tab = pivot.pivot_table atts = (Dv("Aggregate", ["Count", "Count defined", "Sum", "Mean", "Min", "Max", "Mode", "Median", "Var"]), Cv("c"), Cv("d"), Cv("e")) X = np.array([[0, 0, 2, 1, 0], [0, 1, 2, 1, 0], [0, 2, 5, 2, 0], [0, 3, 2.5, 2, np.nan], [0, 4, 1, 2, np.nan], [0, 5, 4, 2, np.nan], [0, 6, 1, 2, np.nan], [0, 7, 2.5, 2, np.nan], [0, 8, 2.25, 0, np.nan], [1, 0, 2, 1, 1], [1, 1, 2, 0, 1], [1, 2, 11, 0, 7], [1, 3, 5.5, np.nan, 7], [1, 4, 5, np.nan, 7], [1, 5, 6, np.nan, 7], [1, 6, 5, np.nan, 7], [1, 7, 5.5, np.nan, 7], [1, 8, 0.25, np.nan, 0]]) self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
def test_pivot_no_col_var(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0], None, domain[2]) pivot_tab = pivot.pivot_table atts = (Dv("Aggregate", ["Count", "Count defined", "Sum", "Mean", "Min", "Max", "Mode", "Median", "Var"]), Cv("a"), Cv("b")) X = np.array([[0, 0, 4, 0], [0, 1, 4, 0], [0, 2, 10, 0], [0, 3, 2.5, np.nan], [0, 4, 1, np.nan], [0, 5, 4, np.nan], [0, 6, 1, np.nan], [0, 7, 2.5, np.nan], [0, 8, 1.25, np.nan], [1, 0, 0, 4], [1, 1, 0, 3], [1, 2, 0, 18], [1, 3, np.nan, 6], [1, 4, np.nan, 5], [1, 5, np.nan, 7], [1, 6, np.nan, 5], [1, 7, np.nan, 6], [1, 8, np.nan, 2 / 3]]) self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
def test_pivot_no_val_var(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1]) pivot_tab = pivot.pivot_table atts = (Dv("Aggregate", ["Count"]), Cv("c"), Cv("d"), Cv("e")) X = np.array([[0, 0, 2, 1, 0], [1, 0, 2, 1, 1]]) self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
def setUpClass(cls): domain = Domain([Dv("d1", ("a", "b")), Dv("d2", ("c", "d", "e")), Cv("c1")]) X = np.array([[0, 0, 1], [0, 1, 2], [0, np.nan, 3], [0, 0, 4], [1, 0, 5], [1, 0, 6], [1, 1, np.nan], [1, 2, 7], [np.nan, 0, 8]]) cls.table = Table(domain, X) domain = Domain([Cv("c0"), Dv("d1", ("a", "b")), Cv("c1"), Dv("d2", ("a", "b")), Cv("c2")], Dv("cls", ("a", "b")), [sv("m1"), sv("m2")]) X = np.array([[np.nan, 0, 1, 0, 2], [np.nan, 1, 2, np.nan, 3], [np.nan, 0, 3, 1, np.nan]]) M = np.array([["aa", "dd"], ["bb", "ee"], ["cc", ""]], dtype=object) cls.table1 = Table(domain, X, np.array([0, 0, 1]), M)
def test_pivot_time_val_var(self): domain = Domain([Dv("d1", ("a", "b")), Dv("d2", ("c", "d")), Tv("t1", have_date=1)]) X = np.array([[0, 1, 1e9], [0, 0, 1e8], [1, 0, 2e8], [1, 1, np.nan]]) table = Table(domain, X) # Min pivot = Pivot(table, [Pivot.Min], domain[0], domain[1], domain[2]) atts = (domain[0], Dv("Aggregate", ["Min"]), Tv("c", have_date=1), Tv("d", have_date=1)) X = np.array([[0, 0, 1e8, 1e9], [1, 0, 2e8, np.nan]]) self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X)) # Min, Max pivot = Pivot(table, [Pivot.Min, Pivot.Max], domain[0], domain[1], domain[2]) atts = (domain[0], Dv("Aggregate", ["Min", "Max"]), Tv("c", have_date=1), Tv("d", have_date=1)) X = np.array([[0, 0, 1e8, 1e9], [0, 1, 1e8, 1e9], [1, 0, 2e8, np.nan], [1, 1, 2e8, np.nan]]) self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X)) # Count defined, Sum pivot = Pivot(table, [Pivot.Count_defined, Pivot.Sum], domain[0], domain[1], domain[2]) atts = (domain[0], Dv("Aggregate", ["Count defined", "Sum"]), Cv("c"), Cv("d")) X = np.array([[0, 0, 1, 1], [0, 1, 1e8, 1e9], [1, 0, 1, 0], [1, 1, 2e8, 0]]) self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X)) # Count defined, Max pivot = Pivot(table, [Pivot.Count_defined, Pivot.Max], domain[0], domain[1], domain[2]) atts = (domain[0], Dv("Aggregate", ["Count defined", "Max"]), Dv("c", ["1.0", "1973-03-03", "1976-05-03"]), Dv("d", ["0.0", "1.0", "2001-09-09"])) X = np.array([[0, 0, 0, 1], [0, 1, 1, 2], [1, 0, 0, 0], [1, 1, 2, np.nan]]) self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))
def test_pivot_total(self): domain = self.table.domain pivot = Pivot(self.table, [Pivot.Functions.Count, Pivot.Functions.Sum], domain[0], domain[1], domain[2]) atts = (Dv(domain[0].name, ["Total"]), Dv("Aggregate", ["Count", "Sum"]), Cv("c"), Cv("d"), Cv("e")) X = np.array([[0, 0, 4, 2, 1], [0, 1, 16, 2, 7]]) table = Table(Domain(atts), X) self.assert_table_equal(pivot.pivot_total_h, table) table = Table(Domain((Cv("Total"),)), np.array([[3], [7], [4], [18]])) self.assert_table_equal(pivot.pivot_total_v, table) table = Table(Domain((Cv("Total"),)), np.array([[7], [25]])) self.assert_table_equal(pivot.pivot_total, table)
def test_pivot_no_col_var(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0], None, domain[2]) pivot_tab = pivot.pivot_table atts = (Dv( "Aggregate", ["数目", "非缺失数目", "总和", "平均值", "最小值", "最大值", "众数", "中位数", "方差"]), Cv("a"), Cv("b")) X = np.array([[0, 0, 4, 0], [0, 1, 4, 0], [0, 2, 10, 0], [0, 3, 2.5, np.nan], [0, 4, 1, np.nan], [0, 5, 4, np.nan], [0, 6, 1, np.nan], [0, 7, 2.5, np.nan], [0, 8, 1.25, np.nan], [1, 0, 0, 4], [1, 1, 0, 3], [1, 2, 0, 18], [1, 3, np.nan, 6], [1, 4, np.nan, 5], [1, 5, np.nan, 7], [1, 6, np.nan, 5], [1, 7, np.nan, 6], [1, 8, np.nan, 2 / 3]]) self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
def test_pivot(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1], domain[2]) pivot_tab = pivot.pivot_table atts = (Dv( "Aggregate", ["数目", "非缺失数目", "总和", "平均值", "最小值", "最大值", "众数", "中位数", "方差"]), Cv("c"), Cv("d"), Cv("e")) X = np.array([[0, 0, 2, 1, 0], [0, 1, 2, 1, 0], [0, 2, 5, 2, 0], [0, 3, 2.5, 2, np.nan], [0, 4, 1, 2, np.nan], [0, 5, 4, 2, np.nan], [0, 6, 1, 2, np.nan], [0, 7, 2.5, 2, np.nan], [0, 8, 2.25, 0, np.nan], [1, 0, 2, 1, 1], [1, 1, 2, 0, 1], [1, 2, 11, 0, 7], [1, 3, 5.5, np.nan, 7], [1, 4, 5, np.nan, 7], [1, 5, 6, np.nan, 7], [1, 6, 5, np.nan, 7], [1, 7, 5.5, np.nan, 7], [1, 8, 0.25, np.nan, 0]]) self.assert_table_equal(pivot_tab, Table(Domain(domain[:1] + atts), X))
def test_group_table(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[1]) group_tab = pivot.group_table atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["c", "d", "e"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)")) X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25], [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0], [ 0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25], [ 1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]]) self.assert_table_equal(group_tab, Table(Domain(domain[:2] + atts), X))
def test_group_table_1(self): var = self.table1.domain.variables[1] domain = Domain([ var, Cv("(count)"), Cv("c0 (count defined)"), Cv("c0 (sum)"), Cv("c0 (mean)"), Cv("c0 (min)"), Cv("c0 (max)"), Cv("c0 (mode)"), Cv("c0 (median)"), Cv("c0 (var)"), Cv("d1 (count defined)"), Dv("d1 (majority)", ["a", "b"]), Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)"), Cv("d2 (count defined)"), Dv("d2 (majority)", ["a", "b"]), Cv("c2 (count defined)"), Cv("c2 (sum)"), Cv("c2 (mean)"), Cv("c2 (min)"), Cv("c2 (max)"), Cv("c2 (mode)"), Cv("c2 (median)"), Cv("c2 (var)"), Cv("cls (count defined)"), Dv("cls (majority)", ["a", "b"]), Cv("m1 (count defined)"), Cv("m2 (count defined)") ]) X = np.array([[ 0, 2, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 2, 0, 2, 4, 2, 1, 3, 1, 2, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 1 ], [ 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, np.nan, 1, 3, 3, 3, 3, 3, 3, 0, 1, 0, 1, 1 ]]) pivot = Pivot(self.table1, Pivot.Functions, var) group_tab = pivot.group_table self.assert_table_equal(group_tab, Table(domain, X))
def test_group_table_update(self): domain = self.table.domain atts = (Cv("(count)"), Cv("d1 (count defined)"), Dv("d1 (majority)", ["a", "b"]), Cv("d2 (count defined)"), Dv("d2 (majority)", ["c", "d", "e"]), Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)")) X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25], [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0], [ 0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25], [ 1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]]) table = Table(Domain(domain[:2] + atts), X) agg = [Pivot.Functions.Count, Pivot.Functions.Sum] pivot = Pivot(self.table, agg, domain[0], domain[1]) group_tab = pivot.group_table pivot.update_group_table(Pivot.Functions) self.assert_table_equal(pivot.group_table, table) pivot.update_group_table(agg) self.assert_table_equal(group_tab, pivot.group_table)
def test_group_table_no_col_var_metas(self): for var in self.table1.domain.metas: self.assertRaises(TypeError, Pivot, self.table1, var) domain = Domain( [Dv("d1", ("a", "b")), Cv("c1"), Dv("d2", ("a", "b")), Cv("c2")]) X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]]) table = Table(domain, X).transform( Domain(domain.attributes[:2], metas=domain.attributes[2:])) pivot = Pivot(table, Pivot.Functions, table.domain[-1]) group_tab = pivot.group_table atts = (table.domain[-1], Cv("(count)"), Cv("d1 (count defined)"), Dv("d1 (majority)", ["a", "b"]), Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)"), Cv("d2 (count defined)"), Dv("d2 (majority)", ["a", "b"]), Cv("c2 (count defined)"), Cv("c2 (sum)"), Cv("c2 (mean)"), Cv("c2 (min)"), Cv("c2 (max)"), Cv("c2 (mode)"), Cv("c2 (median)"), Cv("c2 (var)")) X = np.array([[ 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0 ], [ 1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ]], dtype=float) self.assert_table_equal(group_tab, Table(Domain(atts), X))
def test_group_table_no_col_var(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0]) group_tab = pivot.group_table atts = (Cv("(count)"), Cv("d1 (count defined)"), Dv("d1 (majority)", ["a", "b"]), Cv("d2 (count defined)"), Dv("d2 (majority)", ["c", "d", "e"]), Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)")) domain = Domain(domain[:1] + atts) X = np.array([[0, 4, 4, 0, 3, 0, 4, 10, 2.5, 1, 4, 1, 2.5, 1.25], [1, 4, 4, 1, 4, 0, 3, 18, 6, 5, 7, 5, 6, 2 / 3]], dtype=float) self.assert_table_equal(group_tab, Table(Domain(domain[:1] + atts), X)) pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[0]) group_tab_same_vars = pivot.group_table self.assert_table_equal(group_tab, group_tab_same_vars)
def test_group_table_use_cached(self, count_func, sum_func): domain = self.table.domain pivot = Pivot(self.table, [Pivot.Count, Pivot.Sum], domain[0], domain[1]) group_tab = pivot.group_table count_func.reset_mock() sum_func.reset_mock() pivot.update_group_table(Pivot.Functions) count_func.assert_not_called() sum_func.assert_not_called() atts = (Cv("(count)"), Cv("d1 (count defined)"), Dv("d1 (majority)", ["a", "b"]), Cv("d2 (count defined)"), Dv("d2 (majority)", ["c", "d", "e"]), Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)")) X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25], [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0], [ 0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25], [ 1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]]) self.assert_table_equal(pivot.group_table, Table(Domain(domain[:2] + atts), X)) pivot.update_group_table([Pivot.Count, Pivot.Sum]) count_func.assert_not_called() sum_func.assert_not_called() self.assert_table_equal(pivot.group_table, group_tab)
def test_group_table_use_cached(self, count_func, sum_func): domain = self.table.domain pivot = Pivot(self.table, [Pivot.Count, Pivot.Sum], domain[0], domain[1]) group_tab = pivot.group_table count_func.reset_mock() sum_func.reset_mock() pivot.update_group_table(Pivot.Functions) count_func.assert_not_called() sum_func.assert_not_called() atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["c", "d", "e"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)")) X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25], [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0], [ 0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25], [ 1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]]) self.assert_table_equal(pivot.group_table, Table(Domain(domain[:2] + atts), X)) pivot.update_group_table([Pivot.Count, Pivot.Sum]) count_func.assert_not_called() sum_func.assert_not_called() self.assert_table_equal(pivot.group_table, group_tab)
def test_group_table_no_col_var(self): domain = self.table.domain pivot = Pivot(self.table, Pivot.Functions, domain[0]) group_tab = pivot.group_table atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["c", "d", "e"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)")) domain = Domain(domain[:1] + atts) X = np.array([[0, 4, 4, 0, 3, 0, 4, 10, 2.5, 1, 4, 1, 2.5, 1.25], [1, 4, 4, 1, 4, 0, 3, 18, 6, 5, 7, 5, 6, 2 / 3]], dtype=float) self.assert_table_equal(group_tab, Table(Domain(domain[:1] + atts), X)) pivot = Pivot(self.table, Pivot.Functions, domain[0], domain[0]) group_tab_same_vars = pivot.group_table self.assert_table_equal(group_tab, group_tab_same_vars)
def test_group_table_no_col_var_metas(self): for var in self.table1.domain.metas: self.assertRaises(TypeError, Pivot, self.table1, var) domain = Domain( [Dv("d1", ("a", "b")), Cv("c1"), Dv("d2", ("a", "b")), Cv("c2")]) X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]]) table = Table(domain, X).transform( Domain(domain.attributes[:2], metas=domain.attributes[2:])) pivot = Pivot(table, Pivot.Functions, table.domain[-1]) group_tab = pivot.group_table atts = (table.domain[-1], Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["a", "b"]), Cv("c2 (非缺失数目)"), Cv("c2 (总和)"), Cv("c2 (平均值)"), Cv("c2 (最小值)"), Cv("c2 (最大值)"), Cv("c2 (众数)"), Cv("c2 (中位数)"), Cv("c2 (方差)")) X = np.array([[ 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0 ], [ 1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ]], dtype=float) self.assert_table_equal(group_tab, Table(Domain(atts), X))
def test_group_table_metas(self): domain = Domain( [Dv("d1", ("a", "b")), Cv("c1"), Dv("d2", ("a", "b")), Cv("c2")]) X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]]) table = Table(domain, X).transform( Domain(domain.attributes[:2], metas=domain.attributes[2:])).copy() with table.unlocked(): table.metas = table.metas.astype(object) pivot = Pivot(table, Pivot.Functions, table.domain[-1]) group_tab = pivot.group_table atts = (table.domain[-1], Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["a", "b"]), Cv("c2 (非缺失数目)"), Cv("c2 (总和)"), Cv("c2 (平均值)"), Cv("c2 (最小值)"), Cv("c2 (最大值)"), Cv("c2 (众数)"), Cv("c2 (中位数)"), Cv("c2 (方差)")) X = np.array([[ 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0 ], [ 1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ]], dtype=float) self.assert_table_equal(group_tab, Table(Domain(atts), X))
def test_group_table_metas(self): domain = Domain([Dv("d1", ("a", "b")), Cv("c1"), Dv("d2", ("a", "b")), Cv("c2")]) X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]]) table = Table(domain, X).transform( Domain(domain.attributes[:2], metas=domain.attributes[2:]), copy=True) with table.unlocked(): table.metas = table.metas.astype(object) pivot = Pivot(table, Pivot.Functions, table.domain[-1]) group_tab = pivot.group_table atts = (table.domain[-1], Cv("(count)"), Cv("d1 (count defined)"), Dv("d1 (majority)", ["a", "b"]), Cv("c1 (count defined)"), Cv("c1 (sum)"), Cv("c1 (mean)"), Cv("c1 (min)"), Cv("c1 (max)"), Cv("c1 (mode)"), Cv("c1 (median)"), Cv("c1 (var)"), Cv("d2 (count defined)"), Dv("d2 (majority)", ["a", "b"]), Cv("c2 (count defined)"), Cv("c2 (sum)"), Cv("c2 (mean)"), Cv("c2 (min)"), Cv("c2 (max)"), Cv("c2 (mode)"), Cv("c2 (median)"), Cv("c2 (var)")) X = np.array([[0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 0], [1, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]], dtype=float) self.assert_table_equal(group_tab, Table(Domain(atts), X))
def test_group_table_1(self): var = self.table1.domain.variables[1] domain = Domain([ var, Cv("(数目)"), Cv("c0 (非缺失数目)"), Cv("c0 (总和)"), Cv("c0 (平均值)"), Cv("c0 (最小值)"), Cv("c0 (最大值)"), Cv("c0 (众数)"), Cv("c0 (中位数)"), Cv("c0 (方差)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)"), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["a", "b"]), Cv("c2 (非缺失数目)"), Cv("c2 (总和)"), Cv("c2 (平均值)"), Cv("c2 (最小值)"), Cv("c2 (最大值)"), Cv("c2 (众数)"), Cv("c2 (中位数)"), Cv("c2 (方差)"), Cv("cls (非缺失数目)"), Dv("cls (最常见)", ["a", "b"]), Cv("m1 (非缺失数目)"), Cv("m2 (非缺失数目)") ]) X = np.array([[ 0, 2, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 2, 0, 2, 4, 2, 1, 3, 1, 2, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 1 ], [ 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, np.nan, 1, 3, 3, 3, 3, 3, 3, 0, 1, 0, 1, 1 ]]) pivot = Pivot(self.table1, Pivot.Functions, var) group_tab = pivot.group_table self.assert_table_equal(group_tab, Table(domain, X))
def test_group_table_update(self): domain = self.table.domain atts = (Cv("(数目)"), Cv("d1 (非缺失数目)"), Dv("d1 (最常见)", ["a", "b"]), Cv("d2 (非缺失数目)"), Dv("d2 (最常见)", ["c", "d", "e"]), Cv("c1 (非缺失数目)"), Cv("c1 (总和)"), Cv("c1 (平均值)"), Cv("c1 (最小值)"), Cv("c1 (最大值)"), Cv("c1 (众数)"), Cv("c1 (中位数)"), Cv("c1 (方差)")) X = np.array([[0, 0, 2, 2, 0, 2, 0, 2, 5, 2.5, 1, 4, 1, 2.5, 2.25], [0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0], [ 0, 2, 0, 0, np.nan, 0, np.nan, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 0, 2, 2, 1, 2, 0, 2, 11, 5.5, 5, 6, 5, 5.5, 0.25], [ 1, 1, 1, 1, 1, 1, 1, 0, 0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan ], [1, 2, 1, 1, 1, 1, 2, 1, 7, 7, 7, 7, 7, 7, 0]]) table = Table(Domain(domain[:2] + atts), X) agg = [Pivot.Functions.Count, Pivot.Functions.Sum] pivot = Pivot(self.table, agg, domain[0], domain[1]) group_tab = pivot.group_table pivot.update_group_table(Pivot.Functions) self.assert_table_equal(pivot.group_table, table) pivot.update_group_table(agg) self.assert_table_equal(group_tab, pivot.group_table)