Exemplo n.º 1
0
def predict(
    ctx: click.Context,
    start: datetime,
    end: datetime,
    data_provider: providers.GordoBaseDataProvider,
    output_dir: str,
    influx_uri: str,
    influx_api_key: str,
    influx_recreate_db: bool,
    forward_resampled_sensors: bool,
    ignore_unhealthy_targets: bool,
    n_retries: int,
    parquet: bool,
):
    """
    Run some predictions against the target
    """
    ctx.obj["kwargs"].update({
        "data_provider": data_provider,
        "forward_resampled_sensors": forward_resampled_sensors,
        "ignore_unhealthy_targets": ignore_unhealthy_targets,
        "n_retries": n_retries,
        "use_parquet": parquet,
    })

    client = Client(*ctx.obj["args"], **ctx.obj["kwargs"])

    if influx_uri is not None:
        client.prediction_forwarder = ForwardPredictionsIntoInflux(
            destination_influx_uri=influx_uri,
            destination_influx_api_key=influx_api_key,
            destination_influx_recreate=influx_recreate_db,
            n_retries=n_retries,
        )

    # Fire off getting predictions
    predictions = client.predict(
        start, end
    )  # type: typing.Iterable[typing.Tuple[str, pd.DataFrame, typing.List[str]]]

    # Loop over all error messages for each result and log them
    click.secho(
        f"\n{'-' * 20} Summary of failed predictions (if any) {'-' * 20}")
    exit_code = 0
    for (_name, _df, error_messages) in predictions:
        for err_msg in error_messages:
            # Any error message indicates we encountered at least one error
            exit_code = 1
            click.secho(err_msg, fg="red")

    # Shall we write the predictions out?
    if output_dir is not None:
        for (name, prediction_df, _err_msgs) in predictions:
            prediction_df.to_csv(os.path.join(output_dir, f"{name}.csv.gz"),
                                 compression="gzip")
    sys.exit(exit_code)
def test_client_predictions_diff_batch_sizes_and_toggle_data_provider(
    influxdb, watchman_service, use_data_provider: bool, batch_size: int
):
    """
    Run the prediction client with different batch-sizes and whether to use
    a data provider or not.
    """
    # Time range used in this test
    start, end = (
        isoparse("2016-01-01T00:00:00+00:00"),
        isoparse("2016-01-01T12:00:00+00:00"),
    )

    # Client only used within the this test
    test_client = client_utils.influx_client_from_uri(tu.INFLUXDB_URI)

    # Created measurements by prediction client with dest influx
    query = f"""
    SELECT *
    FROM "model-output"
    WHERE("machine" =~ /^{tu.GORDO_SINGLE_TARGET}$/)
    """

    # Before predicting, influx destination db should be empty for 'predictions' measurement
    vals = test_client.query(query)
    assert len(vals) == 0

    data_provider = (
        providers.InfluxDataProvider(
            measurement=tu.INFLUXDB_MEASUREMENT,
            value_name="Value",
            client=client_utils.influx_client_from_uri(
                uri=tu.INFLUXDB_URI, dataframe_client=True
            ),
        )
        if use_data_provider
        else None
    )

    prediction_client = Client(
        project=tu.GORDO_PROJECT,
        data_provider=data_provider,
        prediction_forwarder=ForwardPredictionsIntoInflux(
            destination_influx_uri=tu.INFLUXDB_URI
        ),
        batch_size=batch_size,
    )

    # Should have discovered machine-1
    assert len(prediction_client.endpoints) == 1

    # All endpoints should be healthy
    assert all(ep.healthy for ep in prediction_client.endpoints)

    # Get predictions
    predictions = prediction_client.predict(start=start, end=end)
    assert isinstance(predictions, list)
    assert len(predictions) == 1

    name, predictions, error_messages = predictions[0]  # First dict of predictions
    assert isinstance(name, str)
    assert isinstance(predictions, pd.DataFrame)
    assert isinstance(error_messages, list)

    assert isinstance(predictions.index, pd.core.indexes.datetimes.DatetimeIndex)

    # This should have resulted in writting predictions to influx
    # Before predicting, influx destination db should be empty
    vals = test_client.query(query)
    assert (
        len(vals) > 0
    ), f"Expected new values in 'predictions' measurement, but found {vals}"