def test_dataframe_parquet_serializers(df): """The (de)serialization functions should be interoperable""" serialized = server_utils.dataframe_into_parquet_bytes(df.copy()) df_clone = server_utils.dataframe_from_parquet_bytes(serialized) assert df.columns.tolist() == df_clone.columns.tolist() assert df.index.tolist() == df_clone.index.tolist() assert np.allclose(df.values, df_clone.values)
def test_second_anomaly_prediction_endpoint_all_columns( second_base_route, sensors_str, influxdb, gordo_ml_server_client, sensors, resp_format, ): data_to_post = { "X": np.random.random(size=(10, len(sensors_str))).tolist(), "y": np.random.random(size=(10, len(sensors_str))).tolist(), } endpoint = ( f"{second_base_route}/anomaly/prediction?all_columns=yes&format={resp_format}" ) resp = gordo_ml_server_client.post(endpoint, json=data_to_post) assert resp.status_code == 200 if resp_format in (None, "json"): assert "data" in resp.json data = server_utils.dataframe_from_dict(resp.json["data"]) else: data = server_utils.dataframe_from_parquet_bytes(resp.data) assert "smooth-tag-anomaly-scaled" in data assert "smooth-tag-anomaly-unscaled" in data assert "smooth-total-anomaly-scaled" in data assert "smooth-total-anomaly-unscaled" in data
def test_anomaly_prediction_endpoint( base_route, sensors_str, influxdb, gordo_ml_server_client, data_size, sensors, resp_format, ): """ Anomaly GET and POST responses are the same """ data_to_post = { "X": np.random.random(size=(data_size, len(sensors_str))).tolist(), "y": np.random.random(size=(data_size, len(sensors_str))).tolist(), } endpoint = f"{base_route}/anomaly/prediction" if resp_format is not None: endpoint += f"?format={resp_format}" resp = gordo_ml_server_client.post(endpoint, json=data_to_post) # From here, the response should be (pretty much) the same format from GET or POST assert resp.status_code == 200 if resp_format in (None, "json"): assert "data" in resp.json data = server_utils.dataframe_from_dict(resp.json["data"]) else: data = server_utils.dataframe_from_parquet_bytes(resp.data) # Only different between POST and GET is POST will return None for # start and end dates, because the server can't know what those are assert "start" in data assert "end" in data if data_to_post is not None: assert np.all(data["start"].isna()) assert np.all(data["end"].isna()) else: assert not np.any(data["start"].isna()) assert not np.any(data["end"].isna()) assert all(key in data for key in ( "total-anomaly-scaled", "total-anomaly-unscaled", "tag-anomaly-scaled", "tag-anomaly-unscaled", "model-input", "model-output", ))
def test_prediction_endpoint_post_ok( base_route, sensors, sensors_str, gordo_ml_server_client, data_size, to_dict_arg, resp_format, send_as_parquet, ): """ Test the expected successful data posts, by sending a variety of valid JSON formats of a dataframe, as well as parquet serializations. """ data_to_post = np.random.random(size=(data_size, len(sensors))).tolist() if to_dict_arg is not None: df = pd.DataFrame(data_to_post, columns=sensors_str) data_to_post = df.to_dict(to_dict_arg) endpoint = f"{base_route}/prediction" if resp_format is not None: endpoint += f"?format={resp_format}" if send_as_parquet: X = pd.DataFrame.from_dict(data_to_post) kwargs = dict(data={ "X": (io.BytesIO(server_utils.dataframe_into_parquet_bytes(X)), "X") }) else: kwargs = dict(json={"X": data_to_post}) resp = gordo_ml_server_client.post(endpoint, **kwargs) assert resp.status_code == 200 if resp_format in (None, "json"): data = server_utils.dataframe_from_dict(resp.json["data"]) else: data = server_utils.dataframe_from_parquet_bytes(resp.data) # Expected column names assert all(key in data for key in ("model-output", "model-input"))
def dataframe_from_response( response: typing.Union[dict, bytes]) -> pd.DataFrame: """ The response from the server, parsed as either JSON / dict or raw bytes, of which would be expected to be loadable from :func:`server.utils.dataframe_from_parquet_bytes` Parameters ---------- response: Union[dict, bytes] The parsed response from the ML server. Returns ------- pandas.DataFrame """ if isinstance(response, dict): predictions = server_utils.dataframe_from_dict(response["data"]) else: predictions = server_utils.dataframe_from_parquet_bytes(response) return predictions