def test_wordcloud(): import dtale.views as views df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9])) with app.test_client() as c: df, _ = views.format_data(df) build_data_inst({c.port: df}) inputs = { "chart_type": "wordcloud", "x": "a", "y": ["b"], "z": None, "group": None, "agg": None, "window": None, "rolling_comp": None, } chart_inputs = { "cpg": False, "cpy": False, "barmode": "group", "barsort": None } extended_aggregation = [ dict(col="b", agg="mean"), dict(col="b", agg="sum") ] params = build_chart_params(c.port, inputs, chart_inputs, extended_aggregation=extended_aggregation) response = c.post("/dtale/charts/_dash-update-component", json=params) resp_data = response.get_json()["response"] assert (resp_data["chart-content"]["children"]["props"]["children"][1] ["type"] == "Wordcloud")
def test_login(): import dtale.views as views df, _ = views.format_data(pd.DataFrame(dict(a=[1, 2, 3, 4, 5, 6]))) with ExitStack() as stack: stack.enter_context( mock.patch( "dtale.auth.global_state.get_auth_settings", return_value={ "active": True, "username": "******", "password": "******" }, )) mock_session = stack.enter_context( mock.patch("dtale.auth.session", dict())) with build_app(url=URL).test_client() as c: build_data_inst({c.port: df}) resp = c.get("/dtale/static/css/main.css") assert resp.status_code == 200 resp = c.get("/dtale/main/{}".format(c.port)) assert resp.status_code == 302 assert resp.location == "http://localhost:{}/login".format(c.port) resp = c.post("/login", data=dict(username="******", password="******")) assert resp.location == "http://localhost:{}/dtale/main/{}".format( c.port, c.port) assert mock_session["logged_in"] assert mock_session["username"] == "foo" resp = c.get("/logout") assert resp.location == "http://localhost:{}/login".format(c.port) assert mock_session.get("logged_in") is None
def test_failure(): import dtale.views as views df, _ = views.format_data( pd.DataFrame( [ [1, 1, 3.29, 3.41, 3.64], [1, 2, 2.44, 2.32, 2.42], [1, 3, 4.34, 4.17, 4.27], [2, 1, 3.08, 3.25, 3.07], [2, 2, 2.53, 1.78, 2.32], [2, 3, 4.19, 3.94, 4.34], [2, 4, 3.01, 4.03, 3.2], [2, 5, 2.44, 1.8, 1.72], [3, 1, 3.04, 2.89, 2.85], [3, 2, 1.62, 1.87, 2.04], [3, 3, 3.88, 4.09, 3.67], [3, 4, 3.14, 3.2, 3.11], [3, 5, 1.54, 1.93, 1.55], ], columns=["o", "p", "m1", "m2", "m3"], )) with build_app(url=URL).test_client() as c: build_data_inst({c.port: df}) resp = c.get( "/dtale/gage-rnr/{}".format(c.port), query_string=dict(operator=json.dumps(["o"])), ) resp = resp.json assert "error" in resp
def test_network_analysis(network_data, unittest): import dtale.views as views df, _ = views.format_data(network_data) with build_app(url=URL).test_client() as c: build_data_inst({c.port: df}) resp = c.get( "/dtale/network-analysis/{}".format(c.port), query_string={ "to": "to", "from": "from", "weight": "weight" }, ) unittest.assertEqual( resp.json["data"], { "avg_weight": 2.68, "edge_ct": 36, "leaf_ct": 3, "max_edge": "10 (source: h, target: j)", "min_edge": "1 (source: j, target: k)", "most_connected_node": "g (Connections: 5)", "node_ct": 14, "triangle_ct": 2, }, )
def test_value(unittest): df = replacements_data() data_id, replacement_type = "1", "value" build_data_inst({data_id: df}) cfg = {"value": [dict(value="nan", type="raw", replace="for test")]} builder = ColumnReplacement(data_id, "e", replacement_type, cfg) verify_builder( builder, lambda col: unittest.assertEqual(list(col.values), ["a", "for test", "b"]), ) cfg = { "value": [ dict(value="nan", type="raw", replace="for test"), dict(value="a", type="raw", replace="d"), ] } builder = ColumnReplacement(data_id, "e", replacement_type, cfg) verify_builder( builder, lambda col: unittest.assertEqual(list(col.values), ["d", "for test", "b"]), ) cfg = {"value": [dict(value="nan", type="agg", replace="median")]} builder = ColumnReplacement(data_id, "d", replacement_type, cfg) verify_builder( builder, lambda col: unittest.assertEqual(list(col.values), [1.1, 2.05, 3]) )
def test_encoder(): df = pd.DataFrame({ "car": ["Honda", "Benze", "Ford", "Honda", "Benze", "Ford", np.nan], }) data_id, column_type = "1", "encoder" build_data_inst({data_id: df}) cfg = {"col": "car", "algo": "one_hot"} builder = ColumnBuilder(data_id, column_type, "Col1", cfg) verify_builder( builder, lambda col: all( [col[c].isnull().sum() == 0 for c in ["car_Ford", "car_Honda"]]), ) cfg = {"col": "car", "algo": "ordinal"} builder = ColumnBuilder(data_id, column_type, "Col1", cfg) verify_builder(builder, lambda col: col.isnull().sum() == 0) cfg = {"col": "car", "algo": "label"} builder = ColumnBuilder(data_id, column_type, "Col1", cfg) verify_builder(builder, lambda col: col.isnull().sum() == 0) cfg = {"col": "car", "algo": "feature_hasher", "n": 1} builder = ColumnBuilder(data_id, column_type, "Col1", cfg) verify_builder(builder, lambda col: col["car_0"].isnull().sum() == 0)
def test_number_value(unittest): df = pd.DataFrame( { "year": [ 1992.0, 2005.0, 2011.0, 0.0, 2008.0, 1999.0, 1983.0, 2010.0, 0.0, 2002.0, ] } ) data_id, replacement_type = "1", "value" build_data_inst({data_id: df}) cfg = {"value": [dict(value=0, type="raw", replace="nan")]} builder = ColumnReplacement(data_id, "year", replacement_type, cfg) verify_builder( builder, lambda col: unittest.assertEqual(np.isnan(col.values[-2]), True), )
def test_from_object(): df = conversion_data() data_id, column_type = "1", "type_conversion" i = 0 build_data_inst({data_id: df}) cfg = {"col": "str_date", "to": "date", "from": "object", "fmt": "%Y%m%d"} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder( builder, lambda col: pd.Timestamp(col.values[0]).strftime("%Y%m%d") == "20200101", ) cfg = {"col": "str_date2", "to": "date", "from": "object"} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder( builder, lambda col: pd.Timestamp(col.values[0]).strftime("%Y%m%d") == "20200101", ) cfg = {"col": "str_bool", "to": "bool", "from": "object"} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: col.values[0])
def test_rolling(rolling_data): import dtale.views as views df, _ = views.format_data(rolling_data) data_id, column_type = "1", "rolling" build_data_inst({data_id: df}) cfg = {"col": "0", "comp": "mean", "window": "5", "min_periods": 1} builder = ColumnBuilder(data_id, column_type, "0_rolling_mean", cfg) verify_builder( builder, lambda col: col.isnull().sum() == 0, ) cfg = { "col": "0", "comp": "mean", "window": "5", "min_periods": 1, "on": "date", "center": True, } builder = ColumnBuilder(data_id, column_type, "0_rolling_mean", cfg) verify_builder( builder, lambda col: col.isnull().sum() == 0, )
def test_building_choropleth_map_w_custom_geojson(unittest): import dtale.views as views df = pd.DataFrame([ dict(id="US.MA", name="mass", pop=125), dict(id="US.WA", name="wash", pop=500), dict(id="US.CA", name="cali", pop=1000), ]) with app.test_client() as c: with ExitStack() as stack: custom_geojson_data = [] stack.enter_context( mock.patch( "dtale.dash_application.custom_geojson.CUSTOM_GEOJSON", custom_geojson_data, )) params = { "output": "..output-geojson-upload.children...geojson-dropdown.options..", "changedPropIds": ["upload-geojson.content"], "inputs": [{ "id": "upload-geojson", "property": "content", "value": build_geojson_data(), }], "state": [{ "id": "upload-geojson", "property": "filename", "value": "USA.json", }], } c.post("/dtale/charts/_dash-update-component", json=params) df, _ = views.format_data(df) build_data_inst({c.port: df}) pathname = path_builder(c.port) inputs = {"chart_type": "maps", "agg": "raw"} map_inputs = { "map_type": "choropleth", "loc_mode": "geojson-id", "geojson": "USA", "featureidkey": "HASC_1", "loc": "id", "map_val": "pop", } chart_inputs = {"colorscale": "Reds"} params = build_chart_params(pathname, inputs, chart_inputs, map_inputs=map_inputs) response = c.post("/dtale/charts/_dash-update-component", json=params) chart_markup = response.get_json( )["response"]["chart-content"]["children"]["props"]["children"][1] unittest.assertEqual( chart_markup["props"]["figure"]["layout"]["title"], {"text": "Map of pop (No Aggregation)"}, )
def test_drop_all_space(): df = pd.DataFrame(dict(foo=["a b"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["drop_all_space"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: col.values[0] == "ab")
def test_drop_stopwords(): df = pd.DataFrame(dict(foo=["foo bar biz"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["stopwords"], "stopwords": ["bar"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: col.values[0] == "foo biz")
def test_string(): df = pd.DataFrame(dict(a=[1], b=[2], c=["a"], d=[True])) data_id, column_type = "1", "string" build_data_inst({data_id: df}) cfg = {"cols": list(df.columns), "joinChar": "-"} builder = ColumnBuilder(data_id, column_type, "Col1", cfg) verify_builder(builder, lambda col: col.values[-1] == "1-2-a-True")
def test_transpose(custom_data, unittest): from dtale.views import build_dtypes_state global_state.clear_store() with app.test_client() as c: data = {c.port: custom_data} dtypes = {c.port: build_dtypes_state(custom_data)} settings = {c.port: {}} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) reshape_cfg = dict(index=["security_id"], columns=["Col0"]) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="transpose", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) new_key = int(c.port) + 1 assert "error" in response_data min_date = custom_data["date"].min().strftime("%Y-%m-%d") global_state.set_settings(c.port, dict(query="date == '{}'".format(min_date))) reshape_cfg = dict(index=["date", "security_id"], columns=["Col0"]) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="transpose", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], [ "index", "{} 00:00:00 100000".format(min_date), "{} 00:00:00 100001".format(min_date), ], ) assert len(global_state.get_data(new_key)) == 1 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key)) reshape_cfg = dict(index=["date", "security_id"]) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="override", type="transpose", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == c.port
def test_matrix(): import dtale.views as views df, _ = views.format_data(pd.DataFrame(dict(a=[1, 2, 3, 4, 5, 6]))) with build_app(url=URL).test_client() as c: build_data_inst({c.port: df}) resp = c.get("/dtale/missingno/matrix/{}".format(c.port)) assert resp.content_type == "image/png"
def test_dendrogram(rolling_data): import dtale.views as views df, _ = views.format_data(rolling_data) with build_app(url=URL).test_client() as c: build_data_inst({c.port: df}) resp = c.get("/dtale/missingno/dendrogram/{}".format(c.port)) assert resp.content_type == "image/png"
def test_update_case(unittest): df = pd.DataFrame(dict(foo=["a b"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["update_case"], "caseType": "upper"} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: col.values[0] == "A B")
def test_space_vals_to_empty(): df = pd.DataFrame(dict(foo=[" ", "", "a"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["space_vals_to_empty"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: sum(col == "") == 2)
def test_hidden_chars(): df = pd.DataFrame(dict(foo=[" ", "", "a"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["hidden_chars"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: sum(col.isnull()) == 0)
def test_nltk_stopwords(): pytest.importorskip("nltk") df = pd.DataFrame(dict(foo=["foo do biz"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["nltk_stopwords"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: col.values[0] == "foo biz")
def test_replace_hyphens_w_space(): df = pd.DataFrame(dict(foo=["a‐b᠆c﹣d-e⁃f−g", "", "a"])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["replace_hyphen_w_space"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: col.values[0] == "a b c d e f g")
def test_multiple_cleaners(unittest): df = pd.DataFrame(dict(foo=["a999b", " "])) data_id, column_type = "1", "cleaning" i = 0 build_data_inst({data_id: df}) cfg = {"col": "foo", "cleaners": ["drop_numbers", "space_vals_to_empty"]} builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg) verify_builder(builder, lambda col: sum(col == "") == 1 and col.values[0] == "ab")
def test_replace(): import dtale.views as views df, _ = views.format_data(pd.DataFrame({"A": ["foo_bar"]})) data_id, column_type = "1", "replace" build_data_inst({data_id: df}) cfg = {"col": "A", "search": "_bar", "replacement": "_baz"} builder = ColumnBuilder(data_id, column_type, "A_replace", cfg) verify_builder(builder, lambda col: col.values[0] == "foo_baz")
def test_cumsum_groupby(custom_data): data_id, column_type = "1", "cumsum" build_data_inst({data_id: custom_data}) builder = ColumnBuilder(data_id, column_type, "Col0", { "col": "int_val", "group": ["security_id"] }) verify_builder(builder, lambda col: col.max() > 0)
def test_simple_imputers(unittest): df = replacements_data() data_id, replacement_type = "1", "imputer" build_data_inst({data_id: df}) cfg = {"type": "simple"} builder = ColumnReplacement(data_id, "d", replacement_type, cfg) verify_builder( builder, lambda col: unittest.assertEqual(list(col.values), [1.1, 2.05, 3]) )
def test_stack(unittest): from dtale.views import build_dtypes_state import dtale.global_state as global_state global_state.clear_store() df1 = pd.DataFrame({ "A": ["A0", "A1"], "B": ["B0", "B1"], "C": ["C0", "C1"], "D": ["D0", "D1"], }) df2 = pd.DataFrame({ "A": ["A2", "A3"], "B": ["B3", "B3"], "C": ["C3", "C3"], "D": ["D3", "D3"], }) with app.test_client() as c: data = {"1": df1, "2": df2} dtypes = {k: build_dtypes_state(v) for k, v in data.items()} settings = {k: {} for k in data.keys()} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) datasets = [dict(dataId="1", columns=[]), dict(dataId="2", columns=[])] config = dict(ignore_index=False) resp = c.post( "/dtale/merge", data=dict( action="stack", config=json.dumps(config), datasets=json.dumps(datasets), ), ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) unittest.assertEqual(list(final_df["A"].values), ["A0", "A1", "A2", "A3"]) unittest.assertEqual(list(final_df["index"].values), [0, 1, 0, 1]) config["ignoreIndex"] = True resp = c.post( "/dtale/merge", data=dict( action="stack", config=json.dumps(config), datasets=json.dumps(datasets), ), ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) assert "index" not in final_df.columns unittest.assertEqual(list(final_df["A"].values), ["A0", "A1", "A2", "A3"])
def test_ipython_import_error(builtin_pkg): from dtale.views import DtaleData orig_import = __import__ def import_mock(name, *args, **kwargs): if name in ["IPython", "IPython.display"]: raise ImportError return orig_import(name, *args, **kwargs) df = pd.DataFrame([1, 2, 3]) with ExitStack() as stack: stack.enter_context( mock.patch("{}.__import__".format(builtin_pkg), side_effect=import_mock) ) stack.enter_context( mock.patch("dtale.views.in_ipython_frontend", return_value=False) ) build_data_inst({9999: df}) getter = namedtuple("get", "ok") stack.enter_context( mock.patch("dtale.app.requests.get", return_value=getter(False)) ) instance = DtaleData(9999, "http://localhost:9999") assert not instance.is_up() assert instance._build_iframe() is None assert instance.notebook() == df.__repr__() assert str(instance) == str(df) assert instance.__repr__() == "http://localhost:9999/dtale/main/9999" instance.adjust_cell_dimensions(width=5, height=5) instance._notebook_handle = mock.Mock() instance._build_iframe = mock.Mock() instance.adjust_cell_dimensions(width=5, height=5) instance._notebook_handle.update.assert_called_once() instance._build_iframe.assert_called_once() assert {"width": 5, "height": 5} == instance._build_iframe.call_args[1] with ExitStack() as stack: stack.enter_context( mock.patch("{}.__import__".format(builtin_pkg), side_effect=import_mock) ) stack.enter_context( mock.patch("dtale.views.in_ipython_frontend", return_value=True) ) build_data_inst({9999: df}) instance = DtaleData(9999, "http://localhost:9999") instance.notebook = mock.Mock() assert str(instance) == "" instance.notebook.assert_called_once() instance.notebook.reset_mock() assert instance.__repr__() is None instance.notebook.assert_called_once()
def test_data_slope(): df = pd.DataFrame({"entity": [5, 7, 5, 5, 5, 6, 3, 2, 0, 5]}) data_id, column_type = "1", "data_slope" build_data_inst({data_id: df}) cfg = {"col": "entity"} builder = ColumnBuilder(data_id, column_type, "entity_data_slope", cfg) verify_builder( builder, lambda col: col.sum() == 35, )
def test_diff(): df = pd.DataFrame({"A": [9, 4, 2, 1]}) data_id, column_type = "1", "diff" build_data_inst({data_id: df}) cfg = {"col": "A", "periods": "1"} builder = ColumnBuilder(data_id, column_type, "dA", cfg) verify_builder( builder, lambda col: col.isnull().sum() == 1 and col.sum() == -8, )
def test_get_pps_matrix(unittest, test_data): import dtale.views as views with app.test_client() as c: test_data, _ = views.format_data(test_data) build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) response = c.get("/dtale/correlations/{}?pps=true".format(c.port)) response_data = response.json expected = [ { "bar": 1, "column": "bar", "foo": 0, "security_id": 0 }, { "bar": 0, "column": "foo", "foo": 1, "security_id": 0 }, { "bar": 0, "column": "security_id", "foo": 0, "security_id": 1 }, ] unittest.assertEqual( response_data["data"], expected, "should return scores", ) pps_val = next( (p for p in response_data["pps"] if p["y"] == "security_id" and p["x"] == "foo"), None, ) expected = { "baseline_score": 12.5, "case": "regression", "is_valid_score": "True", "metric": "mean absolute error", "model": "DecisionTreeRegressor()", "model_score": 12.635071, "ppscore": 0, "x": "foo", "y": "security_id", } unittest.assertEqual(pps_val, expected, "should return PPS information") assert "import ppscore" in response_data["code"] assert "corr_data = ppscore.matrix(corr_data)" in response_data["code"]