def test_rows(unittest): data_id, duplicates_type = "1", "rows" with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, { "keep": "first", "subset": "foo" }) unittest.assertEqual(builder.test(), dict(removed=0, total=5, remaining=5)) pre_length = len(data[data_id]) new_data_id = builder.execute() assert pre_length == len(data[new_data_id]) with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, { "keep": "first", "subset": ["foO", "bar"] }) unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2)) new_data_id = builder.execute() assert len(data[new_data_id]) == 2 unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [1, 4]) with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, { "keep": "last", "subset": ["foO", "bar"] }) unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2)) new_data_id = builder.execute() assert len(data[new_data_id]) == 2 unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [3, 5]) with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, { "keep": "none", "subset": ["foO", "bar"] }) unittest.assertEqual(builder.test(), dict(removed=5, total=5, remaining=0)) with pytest.raises(RemoveAllDataException): builder.execute()
def test_columns(unittest): import dtale.global_state as global_state global_state.clear_store() data_id, duplicates_type = "1", "columns" data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "first"}) unittest.assertEqual(builder.test(), {"Foo": ["foo"]}) new_data_id = builder.execute() unittest.assertEqual( list(global_state.get_data(new_data_id).columns), ["Foo", "fOo", "foO", "bar"] ) data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "last"}) unittest.assertEqual(builder.test(), {"foo": ["Foo"]}) new_data_id = builder.execute() unittest.assertEqual( list(global_state.get_data(new_data_id).columns), ["foo", "fOo", "foO", "bar"] ) data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"}) unittest.assertEqual(builder.test(), {"Foo": ["foo"]}) new_data_id = builder.execute() unittest.assertEqual( list(global_state.get_data(new_data_id).columns), ["fOo", "foO", "bar"] ) data = {data_id: duplicates_data().drop(["fOo", "foO", "bar"], axis=1)} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"}) with pytest.raises(RemoveAllDataException): builder.execute() data = {data_id: non_duplicate_data()} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"}) with pytest.raises(NoDuplicatesException): builder.checker.remove(data[data_id])
def test_columns(unittest): data_id, duplicates_type = "1", "columns" with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "first"}) unittest.assertEquals(builder.test(), {"Foo": ["foo"]}) new_data_id = builder.execute() unittest.assertEquals( list(data[new_data_id].columns), ["Foo", "fOo", "foO", "bar"] ) with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "last"}) unittest.assertEquals(builder.test(), {"foo": ["Foo"]}) new_data_id = builder.execute() unittest.assertEquals( list(data[new_data_id].columns), ["foo", "fOo", "foO", "bar"] ) with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"}) unittest.assertEquals(builder.test(), {"Foo": ["foo"]}) new_data_id = builder.execute() unittest.assertEquals(list(data[new_data_id].columns), ["fOo", "foO", "bar"]) with ExitStack() as stack: data = {data_id: duplicates_data().drop(["fOo", "foO", "bar"], axis=1)} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"}) with pytest.raises(RemoveAllDataException): builder.execute() with ExitStack() as stack: data = {data_id: non_duplicate_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"}) with pytest.raises(NoDuplicatesException): builder.checker.remove(data[data_id])
def test_rows(unittest): import dtale.global_state as global_state global_state.clear_store() data_id, duplicates_type = "1", "rows" data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck( data_id, duplicates_type, {"keep": "first", "subset": "foo"} ) unittest.assertEqual(builder.test(), dict(removed=0, total=5, remaining=5)) pre_length = len(data[data_id]) new_data_id = builder.execute() assert pre_length == len(global_state.get_data(new_data_id)) data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck( data_id, duplicates_type, {"keep": "first", "subset": ["foO", "bar"]} ) unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2)) new_data_id = builder.execute() assert len(global_state.get_data(new_data_id)) == 2 unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [1, 4]) data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck( data_id, duplicates_type, {"keep": "last", "subset": ["foO", "bar"]} ) unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2)) new_data_id = builder.execute() assert len(global_state.get_data(new_data_id)) == 2 unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [3, 5]) data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck( data_id, duplicates_type, {"keep": "none", "subset": ["foO", "bar"]} ) unittest.assertEqual(builder.test(), dict(removed=5, total=5, remaining=0)) with pytest.raises(RemoveAllDataException): builder.execute()
def test_show_duplicates(unittest): data_id, duplicates_type = "1", "show" with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foo"]}) unittest.assertEqual(builder.test(), {}) with pytest.raises(NoDuplicatesToShowException): builder.execute() with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foO", "bar"]}) unittest.assertEqual( builder.test(), { "4, 5": dict(count=3, filter=["4", "5"]), "4, 6": dict(count=2, filter=["4", "6"]), }, ) new_data_id = builder.execute() assert new_data_id == "2" unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [1, 2, 3, 4, 5]) with ExitStack() as stack: data = {data_id: duplicates_data()} stack.enter_context(mock.patch("dtale.global_state.DATA", data)) builder = DuplicateCheck(data_id, duplicates_type, { "group": ["foO", "bar"], "filter": ["4", "5"] }) new_data_id = builder.execute() assert new_data_id == "2" unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [1, 2, 3])
def test_show_duplicates(unittest): import dtale.global_state as global_state global_state.clear_store() data_id, duplicates_type = "1", "show" data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foo"]}) unittest.assertEqual(builder.test(), {}) with pytest.raises(NoDuplicatesToShowException): builder.execute() data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foO", "bar"]}) unittest.assertEqual( builder.test(), { "4, 5": dict(count=3, filter=["4", "5"]), "4, 6": dict(count=2, filter=["4", "6"]), }, ) new_data_id = builder.execute() assert new_data_id == 2 unittest.assertEqual( global_state.get_data(new_data_id)["Foo"].tolist(), [1, 2, 3, 4, 5] ) data = {data_id: duplicates_data()} build_data_inst(data) builder = DuplicateCheck( data_id, duplicates_type, {"group": ["foO", "bar"], "filter": ["4", "5"]} ) new_data_id = builder.execute() unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [1, 2, 3])