def run_training() -> None:

    data = load_data(filename=config.DATA_FILE)
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.4,
                                                        random_state=42)
    y_train = encode_target(y_train)
    status_pipeline.fit(X_train, y_train)
    save_pipeline(pipeline_to_persist=status_pipeline)
def test_make_single_prediction():
	# Given
	test_data = load_data(filename = 'test.csv')
	single_test = test_data[config.FEATURES][0:1]
	single_test_json = single_test.to_json(orient = 'records')


	# When 
	subject = make_prediction(input_data = single_test_json)

	# Then
	assert subject is not None
def test_make_multiple_predictions():
	# Given 
	test_data = load_data(filename = 'test.csv')
	original_data_length = len(test_data)
	multiple_test = test_data[config.FEATURES]
	multiple_test_json = multiple_test.to_json(orient = 'records')

	# when
	subject = make_prediction(input_data = multiple_test_json)

	# Then
	assert subject is not None
	assert len(subject['predictions']) == 33149
Пример #4
0
def testing_prediction_endpoint_returns_prediction(flask_test_client):
    # Given
    test_data = load_data(filename='test.csv')
    features = test_data[model_config.FEATURES][0:10]
    features_json = features.to_json(orient='records')

    # When
    response = flask_test_client.post('/predict', json=features_json)

    # Then
    response_json = json.loads(response.data)
    prediction = response_json['predictions']
    pred_len = len(prediction)
    assert response.status_code == 200
    assert pred_len == 10
Пример #5
0
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
import logging

_logger = logging.getLogger(__name__)

pipeline_file_name = f"{config.PIPELINE_SAVE_FILE}.pkl"
_status_pipe = load_pipeline(filename=pipeline_file_name)


def make_prediction(*, input_data):
    data = pd.read_json(input_data)
    predictions = _status_pipe.predict(data)
    results = {'predictions': predictions}

    return results


if __name__ == '__main__':
    data = load_data(filename=config.DATA_FILE)
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.4,
                                                        random_state=42)
    y_pred = _status_pipe.predict(X_test)
    y_test = encode_target(y_test)
    acc = accuracy_score(y_test, y_pred)

    print(f"test_accuracy: {acc}")
    print(classification_report(y_test, y_pred))