Exemplo n.º 1
0
def test_second_anomaly_prediction_endpoint_all_columns(
    second_base_route,
    sensors_str,
    influxdb,
    gordo_ml_server_client,
    sensors,
    resp_format,
):
    data_to_post = {
        "X": np.random.random(size=(10, len(sensors_str))).tolist(),
        "y": np.random.random(size=(10, len(sensors_str))).tolist(),
    }

    endpoint = (
        f"{second_base_route}/anomaly/prediction?all_columns=yes&format={resp_format}"
    )

    resp = gordo_ml_server_client.post(endpoint, json=data_to_post)

    assert resp.status_code == 200
    if resp_format in (None, "json"):
        assert "data" in resp.json
        data = server_utils.dataframe_from_dict(resp.json["data"])
    else:
        data = server_utils.dataframe_from_parquet_bytes(resp.data)

    assert "smooth-tag-anomaly-scaled" in data
    assert "smooth-tag-anomaly-unscaled" in data
    assert "smooth-total-anomaly-scaled" in data
    assert "smooth-total-anomaly-unscaled" in data
Exemplo n.º 2
0
def test_ml_server_dataframe_to_dict_and_back(sensors_str, use_test_project_tags):
    """
    Tests the flow of the server creating a dataframe from the model's data, putting into
    a dict of string to df. lists of values, and the client being able to reconstruct it back
    to the original dataframe (less the second level names)
    """
    # Run test with test project tag names
    if use_test_project_tags:
        tags = sensors_str
    # Run project with random names
    else:
        tags = [string.ascii_uppercase[i] for i in range(len(sensors_str))]

    # Some synthetic data
    original_input = np.random.random((10, len(tags)))
    model_output = np.random.random((10, len(tags)))

    # Convert this data into a dataframe with multi index columns
    df = model_utils.make_base_dataframe(tags, original_input, model_output)

    # Server then converts this into a dict which maps top level names to lists
    serialized = server_utils.dataframe_to_dict(df)

    # Client reproduces this dataframe
    df_clone = server_utils.dataframe_from_dict(serialized)

    # each subset of column under the top level names should be equal
    top_lvl_names = df.columns.get_level_values(0)
    for top_lvl_name in filter(lambda n: n not in ("start", "end"), top_lvl_names):
        assert np.allclose(df[top_lvl_name].values, df_clone[top_lvl_name].values)
Exemplo n.º 3
0
def test_dataframe_to_from_dict(expect_multi_lvl: bool, data: dict):
    """
    Creating dataframes from various raw data structures should have determined behavior
    such as not creating MultiIndex columns with a dict of simple key to array mappings.
    """
    df = server_utils.dataframe_from_dict(data)
    if expect_multi_lvl:
        assert isinstance(df.columns, pd.MultiIndex)
    else:
        assert not isinstance(df.columns, pd.MultiIndex)
Exemplo n.º 4
0
def test_anomaly_prediction_endpoint(
    base_route,
    sensors_str,
    influxdb,
    gordo_ml_server_client,
    data_size,
    sensors,
    resp_format,
):
    """
    Anomaly GET and POST responses are the same
    """

    data_to_post = {
        "X": np.random.random(size=(data_size, len(sensors_str))).tolist(),
        "y": np.random.random(size=(data_size, len(sensors_str))).tolist(),
    }

    endpoint = f"{base_route}/anomaly/prediction"
    if resp_format is not None:
        endpoint += f"?format={resp_format}"

    resp = gordo_ml_server_client.post(endpoint, json=data_to_post)

    # From here, the response should be (pretty much) the same format from GET or POST
    assert resp.status_code == 200
    if resp_format in (None, "json"):
        assert "data" in resp.json
        data = server_utils.dataframe_from_dict(resp.json["data"])
    else:
        data = server_utils.dataframe_from_parquet_bytes(resp.data)

    # Only different between POST and GET is POST will return None for
    # start and end dates, because the server can't know what those are
    assert "start" in data
    assert "end" in data
    if data_to_post is not None:
        assert np.all(data["start"].isna())
        assert np.all(data["end"].isna())
    else:
        assert not np.any(data["start"].isna())
        assert not np.any(data["end"].isna())

    assert all(key in data for key in (
        "total-anomaly-scaled",
        "total-anomaly-unscaled",
        "tag-anomaly-scaled",
        "tag-anomaly-unscaled",
        "model-input",
        "model-output",
    ))
Exemplo n.º 5
0
def test_dataframe_from_to_dict(df):
    """
    Test (de)serializations back and forth between dataframe -> dict -> dataframe
    """
    index_was_datetimes: bool = isinstance(df.index, pd.DatetimeIndex)

    cloned = server_utils.dataframe_from_dict(
        server_utils.dataframe_to_dict(df))

    if index_was_datetimes:
        # Ensure the function hasn't mutated the index.
        assert isinstance(df.index, pd.DatetimeIndex)

    assert np.allclose(df.values, cloned.values)
    assert df.columns.tolist() == cloned.columns.tolist()
    assert df.index.tolist() == cloned.index.tolist()
Exemplo n.º 6
0
def test_prediction_endpoint_post_ok(
    base_route,
    sensors,
    sensors_str,
    gordo_ml_server_client,
    data_size,
    to_dict_arg,
    resp_format,
    send_as_parquet,
):
    """
    Test the expected successful data posts, by sending a variety of valid
    JSON formats of a dataframe, as well as parquet serializations.
    """
    data_to_post = np.random.random(size=(data_size, len(sensors))).tolist()

    if to_dict_arg is not None:
        df = pd.DataFrame(data_to_post, columns=sensors_str)
        data_to_post = df.to_dict(to_dict_arg)

    endpoint = f"{base_route}/prediction"
    if resp_format is not None:
        endpoint += f"?format={resp_format}"

    if send_as_parquet:
        X = pd.DataFrame.from_dict(data_to_post)
        kwargs = dict(data={
            "X": (io.BytesIO(server_utils.dataframe_into_parquet_bytes(X)),
                  "X")
        })
    else:
        kwargs = dict(json={"X": data_to_post})

    resp = gordo_ml_server_client.post(endpoint, **kwargs)
    assert resp.status_code == 200

    if resp_format in (None, "json"):
        data = server_utils.dataframe_from_dict(resp.json["data"])
    else:
        data = server_utils.dataframe_from_parquet_bytes(resp.data)

    # Expected column names
    assert all(key in data for key in ("model-output", "model-input"))
Exemplo n.º 7
0
    def dataframe_from_response(
            response: typing.Union[dict, bytes]) -> pd.DataFrame:
        """
        The response from the server, parsed as either JSON / dict or raw bytes,
        of which would be expected to be loadable from :func:`server.utils.dataframe_from_parquet_bytes`

        Parameters
        ----------
        response: Union[dict, bytes]
            The parsed response from the ML server.

        Returns
        -------
        pandas.DataFrame
        """
        if isinstance(response, dict):
            predictions = server_utils.dataframe_from_dict(response["data"])
        else:
            predictions = server_utils.dataframe_from_parquet_bytes(response)
        return predictions
Exemplo n.º 8
0
def test_dataframe_from_dict_ordering(index):
    """
    We expect that from_dict should order based on the index, and will parse the index
    either as datetime or integers and sort in ascending order from there.
    """
    df = pd.DataFrame(np.random.random((10, 5)))
    df.index = index
    original = df.copy()

    # What we want
    if isinstance(original.index[0], str):
        # Parse as datetime or integers if index is string
        try:
            original.index = original.index.map(dateutil.parser.isoparse)
        except ValueError:
            original.index = original.index.map(int)
    original.sort_index(inplace=True)

    # What we get
    df_out = server_utils.dataframe_from_dict(
        server_utils.dataframe_to_dict(df))

    assert np.alltrue(df_out.index == original.index)
    assert np.alltrue(df_out.values == original.values)