Exemplo n.º 1
0
def test_model_prediction_differential(
        *, save_file: str = 'test_data_predictions.csv'):
    """
    This test compares the prediction result similarity of
    the current model with the previous model's results.
    """

    # Given
    # Load the saved previous model predictions
    previous_model_df = pd.read_csv(
        os.path.join(config.PACKAGE_ROOT, 'tests', save_file))
    previous_model_predictions = previous_model_df.predictions.values

    test_data = load_data(file_name=model_config.DATA_FILE)
    multiple_test_input = test_data[:20]

    # When
    current_result = make_predictions(input_data=multiple_test_input)
    current_model_predictions = current_result.get('predictions')
    _logger.info("New predictions were made on model version: {}".format(
        current_result.get('version')))

    # Then
    # diff the current model vs. the old model
    assert len(previous_model_predictions) == len(current_model_predictions)

    # Perform the differential test
    assert (math.isclose(sum(previous_model_predictions),
                         sum(current_model_predictions),
                         rel_tol=model_config.ACCEPTABLE_MODEL_DIFFERENCE))
Exemplo n.º 2
0
def test_predictions():
    data = load_data(config.TEST_FILE_NAME)
    test_json = data.to_json(orient='records')
    output = make_predictions(json.loads(test_json))

    assert output is not None
    assert output.get('predictions').dtype.type == np.int64
    assert (output.get('predictions') == np.array([0,1])).all()
def capture_predictions() -> None:
    """Save the test data predictions to a CSV."""

    save_file = 'test_data_predictions.csv'
    test_data = load_data(file_name=model_config.DATA_FILE)

    multiple_test_input = test_data[:20]

    predictions = make_predictions(input_data=multiple_test_input)

    # save predictions for the test dataset
    predictions_df = pd.DataFrame(predictions)

    # hack here to save the file to the regression model
    # package of the repo, not the installed package
    predictions_df.to_csv(os.path.join(config.PACKAGE_ROOT, "tests",
                                       save_file))
Exemplo n.º 4
0
def train() -> None:
    """Main training function. Loads the data, split it to train and val,
       fit a model, save the model.
    """
    data = load_data(file_name=config.DATA_FILE)

    train, val = train_test_split(data,
                                  test_size=config.VAL_SIZE,
                                  random_state=config.SEED)

    X_train = train[config.FEATURES]
    y_train = train[config.TARGET]
    X_val = val[config.FEATURES]
    y_val = val[config.TARGET]

    pipeline.pima_pipeline.fit(X_train[config.FEATURES], y_train)
    _logger.info('Saving model version: {}'.format(_version))
    save_pipeline(pipeline_to_persist=pipeline.pima_pipeline)
Exemplo n.º 5
0
def test_prediction_endpoint_returns_prediction(flask_test_client):
    # Given
    # Load the test data from the regression_model package
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages.
    test_data = load_data(file_name=model_config.TEST_FILE_NAME)
    post_json = test_data.to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict/classify',
                                      json=json.loads(post_json))

    # Then
    assert response.status_code == 200
    response_json = json.loads(response.data)
    prediction = response_json['predictions']
    response_version = response_json['version']
    assert (prediction == np.array([0, 1])).all()
    assert response_version == _version
Exemplo n.º 6
0
def test_prediction_endpoint_validation_200(flask_test_client):
    # Given
    # Load the test data from the deployment_pima package.
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages.
    test_data = load_data(file_name=config.TEST_FILE_NAME)
    post_json = test_data.to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict/classify',
                                      json=json.loads(post_json))

    # Then
    assert response.status_code == 200
    response_json = json.loads(response.data)

    # Check correct number of errors removed
    assert len(response_json.get('predictions')) + len(
        response_json.get('errors')) == len(test_data)