def test_dont_convert_non_df_to_pandas(): """ Check that if we just give a number or a string or something else, we get an ApiException """ with pytest.raises(ApiException) as exception_int: x_conv = convert_to_pandas(345) with pytest.raises(ApiException) as exception_str: x_conv = convert_to_pandas("abc")
def test_execute_pd_concat(): """ given an input dict of value assignments and a code snippet, substitute the values in, and evaluate. """ input_code = "z = pd.concat([x,y],join='outer', ignore_index=True, sort=True)" input_vals = { "x": [{ "a": 1, "b": 2 }, { "a": 2, "b": 3 }], "y": [{ "b": 4, "c": 2 }, { "b": 5, "c": 7 }] } output_hash = "somehash" file_contents = {} return_targets = find_assignments(input_code)["targets"] result_dict = execute_code(file_contents, input_code, input_vals, return_targets, output_hash) result = result_dict["results"] assert (len(result) == 1) # only one output of function assert (isinstance(result['z'], bytes)) result_df = convert_to_pandas(result['z']) assert (result_df.size == 12) ## 4 rows * 3 columns
def test_json_to_pandas_to_json(): """ convert a json dataframe into a pandas one, and back again, and check we get the same one back. """ d_orig = [{"Col1": 123, "Col2": "Abc"}, {"Col1": 456, "Col2": "Def"}] pd_df = convert_to_pandas(d_orig) assert (isinstance(pd_df, pd.DataFrame)) d_new = json.loads(convert_from_pandas(pd_df, max_size_json=1024)) assert (d_orig == d_new)
def test_convert_null_to_nan(): """ check we get NaN in the Pandas DF when we have null in the JSON, in a column that has other numbers """ json_string = '[{"a": 1, "b": 33},{"a": 2, "b": null}]' json_obj = json.loads(json_string) ## should be converted to None assert (json_obj[1]["b"] == None) df = convert_to_pandas(json_obj) ## will now be NaN assert (np.isnan(df["b"][1])) ## but when we convert it back into json, we want it to be None new_json = json.loads(convert_from_pandas(df, max_size_json=1024)) assert (new_json[1]["b"] == None)
def test_execute_simple_func(): """ import numpy, and define a trivial function in the code snippet, which is then used when filling a dataframe """ input_code = 'import numpy\ndef squareroot(x):\n return numpy.sqrt(x)\n\ndf= pd.DataFrame({\"a\":[numpy.sqrt(9),squareroot(16),13],\"b\":[14,15,16]})' input_vals = {} file_contents = {} return_targets = find_assignments(input_code)["targets"] output_hash = "somehash" result_dict = execute_code(file_contents, input_code, input_vals, return_targets, output_hash) result = result_dict["results"] assert (result) assert (isinstance(result, dict)) assert ("df" in result.keys()) pddf = convert_to_pandas(result["df"]) assert (isinstance(pddf, pd.DataFrame)) assert (pddf["a"][0] == 3) assert (pddf["a"][1] == 4)