def aggregate_nd(nd): nrows = 1000 div = 50 column = [i % div for i in range(nrows)] matrix = [column] * nd out_types = [ltype.int] * nd + [ltype.int] out_value = [list(range(div))] * nd + \ [[nrows // div] * div] d_in = dt.Frame(matrix) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=div, seed=1, progress_fn=report_progress) a_members = d_members.to_list()[0] d = d_exemplars.sort("C0") ri = frame_column_rowindex(d, 0).to_list() for i, member in enumerate(a_members): a_members[i] = ri.index(member) frame_integrity_check(d_members) assert d_members.shape == (nrows, 1) assert d_members.ltypes == (ltype.int, ) assert a_members == column frame_integrity_check(d_exemplars) assert d_exemplars.shape == (div, nd + 1) assert d_exemplars.ltypes == tuple(out_types) assert d.to_list() == out_value assert_equals(d_in, d_in_copy)
def test_aggregate_2d_mixed_random(): nx_bins = 6 d_in = dt.Frame([[1, 3, 0, None, None, 6, 6, None, 1, 2, 4], [ None, "blue", "indigo", "abc", "def", "red", "violet", "ghi", "yellow", "violet", "red" ]]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nx_bins=nx_bins, progress_fn=report_progress) frame_integrity_check(d_members) assert d_members.shape == (11, 1) assert d_members.ltypes == (ltype.int, ) assert d_members.to_list() == [[0, 2, 3, 1, 1, 5, 7, 1, 8, 6, 4]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (9, 3) assert d_exemplars.ltypes == (ltype.int, ltype.str, ltype.int) assert d_exemplars.to_list() == [[1, None, 3, 0, 4, 6, 2, 6, 1], [ None, 'abc', 'blue', 'indigo', 'red', 'red', 'violet', 'violet', 'yellow' ], [1, 3, 1, 1, 1, 1, 1, 1, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_2d_categorical_sampling(): d_in = dt.Frame([[ "blue", None, "indigo", "red", "violet", "yellow", "violet", "green", None, None ], [ "Monday", "abc", "Monday", "Wednesday", "Saturday", "Thursday", "Friday", "Wednesday", "def", "ghi" ]]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, nx_bins=2, ny_bins=2, min_rows=0, seed=1) frame_integrity_check(d_members) assert d_members.shape == (10, 1) assert d_members.ltypes == (ltype.int, ) # assert d_members.to_list() == [[...]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (4, 3) assert d_exemplars.ltypes == (ltype.str, ltype.str, ltype.int) # assert d_exemplars.to_list() == [[...], # [...], # [...]] assert_equals(d_in, d_in_copy)
def test_aggregate_2d_categorical_sorted(): d_in = dt.Frame([[ None, None, "abc", "blue", "green", "indigo", "orange", "red", "violet", "yellow" ], [ None, "abc", None, "Friday", "Monday", "Saturday", "Sunday", "Thursday", "Tuesday", "Wednesday" ]]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0) frame_integrity_check(d_members) assert d_members.shape == (10, 1) assert d_members.ltypes == (ltype.int, ) assert d_members.to_list() == [[0, 2, 1, 3, 4, 5, 6, 7, 8, 9]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (10, 3) assert d_exemplars.ltypes == (ltype.str, ltype.str, ltype.int) assert d_exemplars.to_list() == [[ None, "abc", None, "blue", "green", "indigo", "orange", "red", "violet", "yellow" ], [ None, None, "abc", "Friday", "Monday", "Saturday", "Sunday", "Thursday", "Tuesday", "Wednesday" ], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_2d_mixed_sorted(): nx_bins = 7 d_in = dt.Frame([[None, None, 0, 0, 1, 2, 3, 4, 5, 6], [ None, "a", None, "blue", "green", "indigo", "orange", "red", "violet", "yellow" ]]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nx_bins=nx_bins, progress_fn=report_progress) frame_integrity_check(d_members) assert d_members.shape == (10, 1) assert d_members.ltypes == (ltype.int, ) assert d_members.to_list() == [[0, 2, 1, 3, 4, 5, 6, 7, 8, 9]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (10, 3) assert d_exemplars.ltypes == (ltype.int, ltype.str, ltype.int) assert d_exemplars.to_list() == [[None, 0, None, 0, 1, 2, 3, 4, 5, 6], [ None, None, "a", "blue", "green", "indigo", "orange", "red", "violet", "yellow" ], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_3d_real(): d_in = dt.Frame([ [0.95, 0.50, 0.55, 0.10, 0.90, 0.50, 0.90, 0.50, 0.90, 1.00], [1.00, 0.55, 0.45, 0.05, 0.95, 0.45, 0.90, 0.40, 1.00, 0.90], [0.90, 0.50, 0.55, 0.00, 1.00, 0.50, 0.95, 0.45, 0.95, 0.95] ]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=3) a_members = d_members.to_list()[0] d = d_exemplars.sort("C0") ri = frame_column_rowindex(d, 0).to_list() for i, member in enumerate(a_members): a_members[i] = ri.index(member) frame_integrity_check(d_members) assert d_members.shape == (10, 1) assert d_members.ltypes == (ltype.int,) assert a_members == [2, 1, 1, 0, 2, 1, 2, 1, 2, 2] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (3, 4) assert d_exemplars.ltypes == (ltype.real, ltype.real, ltype.real, ltype.int) assert d.to_list() == [[0.10, 0.50, 0.95], [0.05, 0.55, 1.00], [0.00, 0.50, 0.90], [1, 4, 5]] assert_equals(d_in, d_in_copy)
def test_aggregate_3d_fixed_small_radius(): DT = dt.Frame([range(10)] * 3) [DTE, DTM] = aggregate(DT, min_rows=0, nd_max_bins=1, fixed_radius=0.1) DTE_ref = cbind( DT, dt.Frame([1] * 10 / dt.stype.int32, names=["members_count"])) DTM_ref = dt.Frame(range(10), names=["exemplar_id"]) assert_equals(DTE, DTE_ref) assert_equals(DTM, DTM_ref)
def test_aggregate_nd_none(ncols): nrows = 10000 DT = dt.Frame([[None] * nrows] * ncols) [DTE, DTM] = aggregate(DT) frame_integrity_check(DT) frame_integrity_check(DTE) frame_integrity_check(DTM) assert DTE.to_list() == [[None]] * ncols + [[nrows]] assert DTM.to_list() == [[0] * nrows]
def test_aggregate_3d_fixed_big_radius(): DT = dt.Frame([range(10)] * 3) [DTE, DTM] = aggregate(DT, min_rows=0, nd_max_bins=10, fixed_radius=10) DTE_ref = dt.Frame([[0], [0], [0], [10]], stypes=[dt.stype.int32] * 4, names=["C0", "C1", "C2", "members_count"]) DTM_ref = dt.Frame([0] * 10 / dt.stype.int32, names=["exemplar_id"]) assert_equals(DTE, DTE_ref) assert_equals(DTM, DTM_ref)
def test_aggregate_1d_na(): n_bins = 1 d_in = dt.Frame([None] * 53) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) frame_integrity_check(d_exemplars) assert_equals( d_members, dt.Frame([0] * 53, names=["exemplar_id"], stypes=[dt.stype.int32])) assert_equals(d_exemplars, dt.Frame(C0=[None], members_count=[53])) assert_equals(d_in, d_in_copy)
def aggregate_nd(nd): nrows = 1000 div = 50 column = [i % div for i in range(nrows)] matrix = [column] * nd out_types = [ltype.int] * nd + [ltype.int] out_value = [list(range(div))] * nd + \ [[nrows // div] * div] d_in = dt.Frame(matrix) d_in_copy = dt.Frame(d_in) messages = [] def progress_fn(p): assert 0 <= p.progress <= 1 assert p.status in ("running", "finished", "cancelled", "error") assert p.message in ("", "Preparing", "Aggregating", "Sampling", "Finalizing") messages.append(p) with dt.options.progress.context(callback=progress_fn, enabled=True, min_duration=0): [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=div, seed=1) assert messages[0].progress == 0 assert messages[0].status == "running" assert messages[0].message == "Preparing" assert messages[-2].progress <= 1.0 assert messages[-2].status == "running" assert messages[-2].message == "Finalizing" assert messages[-1].progress == 1.0 assert messages[-1].status == "finished" assert messages[-1].message == "" a_members = d_members.to_list()[0] d = d_exemplars.sort("C0") ri = frame_column_rowindex(d, 0).to_list() for i, member in enumerate(a_members): a_members[i] = ri.index(member) frame_integrity_check(d_members) assert d_members.shape == (nrows, 1) assert d_members.ltypes == (ltype.int, ) assert a_members == column frame_integrity_check(d_exemplars) assert d_exemplars.shape == (div, nd + 1) assert d_exemplars.ltypes == tuple(out_types) assert d.to_list() == out_value assert_equals(d_in, d_in_copy)
def test_aggregate_2d_mixed_distinct_na(): d_in = dt.Frame([["a", "b", "c", "d", "e", "f"], [None] * 6]) d_in_copy = dt.Frame(d_in) d_exemplars, d_members = aggregate(d_in, min_rows=0, nx_bins=3, ny_bins=3) frame_integrity_check(d_members) frame_integrity_check(d_exemplars) assert_equals( d_members, dt.Frame([0] * 6, names=["exemplar_id"], stypes=[dt.stype.int32])) assert_equals(d_exemplars, dt.Frame(C0=['a'], C1=[None], members_count=[6])) assert_equals(d_in, d_in_copy)
def test_aggregate_2d_mixed_na(): d_in = dt.Frame([["Hello world"] * 53, [None] * 53]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0) frame_integrity_check(d_members) frame_integrity_check(d_exemplars) assert_equals( d_members, dt.Frame([0] * 53, names=["exemplar_id"], stypes=[dt.stype.int32])) assert_equals(d_exemplars, dt.Frame(C0=["Hello world"], C1=[None], members_count=[53])) assert_equals(d_in, d_in_copy)
def test_aggregate_nd(ncols): nrows = 1000 div = 50 column = [i % div for i in range(nrows)] matrix = [column] * ncols out_types = [ltype.int] * ncols + [ltype.int] out_value = [list(range(div))] * ncols + \ [[nrows // div] * div] d_in = dt.Frame(matrix) d_in_copy = dt.Frame(d_in) progress_reports = [] def progress_fn(p): assert 0 <= p.progress <= 1 assert p.status in ("running", "finished", "cancelled", "error") progress_reports.append(p) with dt.options.progress.context(callback=progress_fn, enabled=True, min_duration=0): [d_exemplars, d_members] = aggregate(d_in, min_rows=0, nd_max_bins=div, seed=1) messages = ("", "Preparing", "Aggregating", "Sampling", "Finalizing") message_index = 0 for i, p in enumerate(progress_reports): if i > 0: assert p.progress >= progress_reports[i - 1].progress message_index_new = messages.index(p.message) assert message_index <= message_index_new or p.status == "finished" message_index = message_index_new assert progress_reports[-1].progress == 1.0 assert progress_reports[-1].status == "finished" assert progress_reports[-1].message == "Finalizing" frame_integrity_check(d_members) assert d_members.shape == (nrows, 1) assert d_members.ltypes == (ltype.int, ) for i in range(nrows): assert i % div == d_exemplars[d_members[i, 0], 0] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (div, ncols + 1) assert d_exemplars.ltypes == tuple(out_types) assert d_exemplars.sort("C0").to_list() == out_value assert_equals(d_in, d_in_copy)
def test_aggregate_1d_continuous_real_sorted(): n_bins = 3 d_in = dt.Frame([0.0, None, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (11, 1) assert d_members.ltypes == (ltype.int, ) assert d_members.to_list() == [[1, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (4, 2) assert d_exemplars.ltypes == (ltype.real, ltype.int) assert d_exemplars.to_list() == [[None, 0.0, 0.4, 0.7], [1, 4, 3, 3]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_continuous_integer_random(): n_bins = 3 d_in = dt.Frame([None, 9, 8, None, 2, 3, 3, 0, 5, 5, 8, 1, None]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (13, 1) assert d_members.ltypes == (ltype.int, ) assert d_members.to_list() == [[0, 3, 3, 0, 1, 1, 1, 1, 2, 2, 3, 1, 0]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (4, 2) assert d_exemplars.ltypes == (ltype.int, ltype.int) assert d_exemplars.to_list() == [[None, 2, 5, 9], [3, 5, 2, 3]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_continuous_integer_sorted(): n_bins = 3 d_in = dt.Frame([0, 1, None, 2, 3, 4, 5, 6, 7, None, 8, 9]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (12, 1) assert d_members.ltypes == (ltype.int, ) assert d_members.to_list() == [[1, 1, 0, 1, 1, 2, 2, 2, 3, 0, 3, 3]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (4, 2) assert d_exemplars.ltypes == (ltype.int, ltype.int) assert d_exemplars.to_list() == [[None, 0, 4, 7], [2, 4, 3, 3]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_continuous_integer_equal(): n_bins = 2 d_in = dt.Frame([0, 0, None, 0, None, 0, 0, 0, 0, 0]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (10, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[1, 1, 0, 1, 0, 1, 1, 1, 1, 1]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (2, 2) assert d_exemplars.ltypes == (ltype.bool, ltype.int) assert d_exemplars.to_list() == [[None, 0], [2, 8]] assert_equals(d_in, d_in_copy)
def test_aggregate_view_0d_continuous_integer(): d_in = dt.Frame([0, 1, None, 2, None, 3, 3, 4, 4, None, 5]) d_in_copy = dt.Frame(d_in) d_in_view = d_in[5:11, :] [d_exemplars, d_members] = aggregate(d_in_view, min_rows=100, n_bins=10) frame_integrity_check(d_members) assert d_members.shape == (6, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[1, 2, 3, 4, 0, 5]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (6, 2) assert d_exemplars.ltypes == (ltype.int, ltype.int) assert d_exemplars.to_list() == [[None, 3, 3, 4, 4, 5], [1, 1, 1, 1, 1, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_continuous_integer_tiny(): n_bins = 1 d_in = dt.Frame([5]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (1, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[0]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (1, 2) assert d_exemplars.ltypes == (ltype.int, ltype.int) assert d_exemplars.to_list() == [[5], [1]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_empty(): n_bins = 1 d_in = dt.Frame([]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (0, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (0, 1) assert d_exemplars.ltypes == (ltype.int,) assert d_exemplars.to_list() == [[]] assert_equals(d_in, d_in_copy)
def test_aggregate_2d_na(): d_in = dt.Frame([[None] * 53, [None] * 53]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0) frame_integrity_check(d_members) frame_integrity_check(d_exemplars) assert_equals( d_members, dt.Frame([0] * 53, names=["exemplar_id"], stypes=[dt.stype.int32])) assert_equals( d_exemplars, dt.Frame([[None], [None], [53]], names=["C0", "C1", "members_count"], stypes=[dt.stype.bool8, dt.stype.bool8, dt.stype.int32])) assert_equals(d_in, d_in_copy)
def test_aggregate_view_1d_categorical(): d_in = dt.Frame(["alpha", "bravo", "delta", None, "charlie", "charlie", "echo", None]) d_in_copy = dt.Frame(d_in) d_in_view = d_in[2:6, :] [d_exemplars, d_members] = aggregate(d_in_view, min_rows=0) frame_integrity_check(d_members) assert d_members.shape == (4, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[2, 0, 1, 1]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (3, 2) assert d_exemplars.ltypes == (ltype.str, ltype.int) assert d_exemplars.to_list() == [[None, "charlie", "delta"], [1, 2, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_categorical_sampling(): d_in = dt.Frame(["blue", "orange", "yellow", None, "green", "blue", "indigo", None, "violet"]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, n_bins=3, min_rows=0, seed=1) frame_integrity_check(d_members) assert d_members.shape == (9, 1) assert d_members.ltypes == (ltype.int,) # assert d_members.to_list() == [[3, None, 2, 0, 1, 3, None, 0, None]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (3, 2) assert d_exemplars.ltypes == (ltype.str, ltype.int) # assert d_exemplars.to_list() == [[None, 'green', 'yellow', 'blue'], # [2, 1, 1, 2]] assert_equals(d_in, d_in_copy)
def test_aggregate_view_1d_continuous_float(): d_in = dt.Frame([0.0, 1.1, None, 2.2, None, 3.1, 3.2, 4.1, 4.0, None, 5.1]) d_in_copy = dt.Frame(d_in) d_in_view = d_in[5:11, :] [d_exemplars, d_members] = aggregate(d_in_view, min_rows=0, n_bins=5) frame_integrity_check(d_members) assert d_members.shape == (6, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[1, 1, 2, 2, 0, 3]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (4, 2) assert d_exemplars.ltypes == (ltype.real, ltype.int) assert d_exemplars.to_list() == [[None, 3.1, 4.1, 5.1], [1, 2, 2, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_na_inf(): n_bins = 1 d_in = dt.Frame([None, math.inf] * 26) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0, n_bins=n_bins) frame_integrity_check(d_members) frame_integrity_check(d_exemplars) assert_equals( d_members, dt.Frame([0] * 52, names=["exemplar_id"], stypes=[dt.stype.int32])) assert_equals( d_exemplars, dt.Frame([[None], [52]], names=["C0", "members_count"], stypes=[dt.stype.float64, dt.stype.int32])) assert_equals(d_in, d_in_copy)
def test_aggregate_0d_continuous_integer_random(): n_bins = 3 # `nrows < min_rows`, so we also test that this input is ignored min_rows = 500 d_in = dt.Frame([None, 9, 8, None, 2, 3, 3, 0, 5, 5, 8, 1, None]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=min_rows, n_bins=n_bins) frame_integrity_check(d_members) assert d_members.shape == (13, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[0, 12, 10, 1, 5, 6, 7, 3, 8, 9, 11, 4, 2]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (13, 2) assert d_exemplars.ltypes == (ltype.int, ltype.int) assert d_exemplars.to_list() == [[None, None, None, 0, 1, 2, 3, 3, 5, 5, 8, 8, 9], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_1d_categorical_unsorted(): d_in = dt.Frame(["blue", "orange", "yellow", None, "green", "blue", "indigo", None, "violet"]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0) frame_integrity_check(d_members) assert d_members.shape == (9, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[1, 4, 6, 0, 2, 1, 3, 0, 5]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (7, 2) assert d_exemplars.ltypes == (ltype.str, ltype.int) assert d_exemplars.to_list() == [[None, "blue", "green", "indigo", "orange", "violet", "yellow"], [2, 2, 1, 1, 1, 1, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_view_2d_categorical(): d_in = dt.Frame([["alpha", None, "bravo", "charlie", "charlie", "bravo", "echo"], ["red", "green", "blue", None, "red", "blue", "orange"]]) d_in_copy = dt.Frame(d_in) d_in_view = d_in[2:6, :] [d_exemplars, d_members] = aggregate(d_in_view, min_rows=0) frame_integrity_check(d_members) assert d_members.shape == (4, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[1, 0, 2, 1]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (3, 3) assert d_exemplars.ltypes == (ltype.str, ltype.str, ltype.int) assert d_exemplars.to_list() == [["charlie", "bravo", "charlie"], [None, "blue", "red"], [1, 2, 1]] assert_equals(d_in, d_in_copy)
def test_aggregate_2d_categorical_unsorted(): d_in = dt.Frame([["blue", "indigo", "red", "violet", "yellow", "violet", "red"], ["Monday", "Monday", "Wednesday", "Saturday", "Thursday", "Friday", "Wednesday"]]) d_in_copy = dt.Frame(d_in) [d_exemplars, d_members] = aggregate(d_in, min_rows=0) frame_integrity_check(d_members) assert d_members.shape == (7, 1) assert d_members.ltypes == (ltype.int,) assert d_members.to_list() == [[0, 1, 2, 4, 5, 3, 2]] frame_integrity_check(d_exemplars) assert d_exemplars.shape == (6, 3) assert d_exemplars.ltypes == (ltype.str, ltype.str, ltype.int) assert d_exemplars.to_list() == [['blue', 'indigo', 'red', 'violet', 'violet', 'yellow'], ['Monday', 'Monday', 'Wednesday', 'Friday', 'Saturday', 'Thursday'], [1, 1, 2, 1, 1, 1]] assert_equals(d_in, d_in_copy)