Пример #1
0
def test_dataframe_parquet_serializers(df):
    """The (de)serialization functions should be interoperable"""
    serialized = server_utils.dataframe_into_parquet_bytes(df.copy())
    df_clone = server_utils.dataframe_from_parquet_bytes(serialized)
    assert df.columns.tolist() == df_clone.columns.tolist()
    assert df.index.tolist() == df_clone.index.tolist()
    assert np.allclose(df.values, df_clone.values)
Пример #2
0
def test_second_anomaly_prediction_endpoint_all_columns(
    second_base_route,
    sensors_str,
    influxdb,
    gordo_ml_server_client,
    sensors,
    resp_format,
):
    data_to_post = {
        "X": np.random.random(size=(10, len(sensors_str))).tolist(),
        "y": np.random.random(size=(10, len(sensors_str))).tolist(),
    }

    endpoint = (
        f"{second_base_route}/anomaly/prediction?all_columns=yes&format={resp_format}"
    )

    resp = gordo_ml_server_client.post(endpoint, json=data_to_post)

    assert resp.status_code == 200
    if resp_format in (None, "json"):
        assert "data" in resp.json
        data = server_utils.dataframe_from_dict(resp.json["data"])
    else:
        data = server_utils.dataframe_from_parquet_bytes(resp.data)

    assert "smooth-tag-anomaly-scaled" in data
    assert "smooth-tag-anomaly-unscaled" in data
    assert "smooth-total-anomaly-scaled" in data
    assert "smooth-total-anomaly-unscaled" in data
Пример #3
0
def test_anomaly_prediction_endpoint(
    base_route,
    sensors_str,
    influxdb,
    gordo_ml_server_client,
    data_size,
    sensors,
    resp_format,
):
    """
    Anomaly GET and POST responses are the same
    """

    data_to_post = {
        "X": np.random.random(size=(data_size, len(sensors_str))).tolist(),
        "y": np.random.random(size=(data_size, len(sensors_str))).tolist(),
    }

    endpoint = f"{base_route}/anomaly/prediction"
    if resp_format is not None:
        endpoint += f"?format={resp_format}"

    resp = gordo_ml_server_client.post(endpoint, json=data_to_post)

    # From here, the response should be (pretty much) the same format from GET or POST
    assert resp.status_code == 200
    if resp_format in (None, "json"):
        assert "data" in resp.json
        data = server_utils.dataframe_from_dict(resp.json["data"])
    else:
        data = server_utils.dataframe_from_parquet_bytes(resp.data)

    # Only different between POST and GET is POST will return None for
    # start and end dates, because the server can't know what those are
    assert "start" in data
    assert "end" in data
    if data_to_post is not None:
        assert np.all(data["start"].isna())
        assert np.all(data["end"].isna())
    else:
        assert not np.any(data["start"].isna())
        assert not np.any(data["end"].isna())

    assert all(key in data for key in (
        "total-anomaly-scaled",
        "total-anomaly-unscaled",
        "tag-anomaly-scaled",
        "tag-anomaly-unscaled",
        "model-input",
        "model-output",
    ))
Пример #4
0
def test_prediction_endpoint_post_ok(
    base_route,
    sensors,
    sensors_str,
    gordo_ml_server_client,
    data_size,
    to_dict_arg,
    resp_format,
    send_as_parquet,
):
    """
    Test the expected successful data posts, by sending a variety of valid
    JSON formats of a dataframe, as well as parquet serializations.
    """
    data_to_post = np.random.random(size=(data_size, len(sensors))).tolist()

    if to_dict_arg is not None:
        df = pd.DataFrame(data_to_post, columns=sensors_str)
        data_to_post = df.to_dict(to_dict_arg)

    endpoint = f"{base_route}/prediction"
    if resp_format is not None:
        endpoint += f"?format={resp_format}"

    if send_as_parquet:
        X = pd.DataFrame.from_dict(data_to_post)
        kwargs = dict(data={
            "X": (io.BytesIO(server_utils.dataframe_into_parquet_bytes(X)),
                  "X")
        })
    else:
        kwargs = dict(json={"X": data_to_post})

    resp = gordo_ml_server_client.post(endpoint, **kwargs)
    assert resp.status_code == 200

    if resp_format in (None, "json"):
        data = server_utils.dataframe_from_dict(resp.json["data"])
    else:
        data = server_utils.dataframe_from_parquet_bytes(resp.data)

    # Expected column names
    assert all(key in data for key in ("model-output", "model-input"))
Пример #5
0
    def dataframe_from_response(
            response: typing.Union[dict, bytes]) -> pd.DataFrame:
        """
        The response from the server, parsed as either JSON / dict or raw bytes,
        of which would be expected to be loadable from :func:`server.utils.dataframe_from_parquet_bytes`

        Parameters
        ----------
        response: Union[dict, bytes]
            The parsed response from the ML server.

        Returns
        -------
        pandas.DataFrame
        """
        if isinstance(response, dict):
            predictions = server_utils.dataframe_from_dict(response["data"])
        else:
            predictions = server_utils.dataframe_from_parquet_bytes(response)
        return predictions