def test_xtrain_larger_than_xtest():
	# given
	test_xtrain_data = load_dataset(file_name='xtrain.csv')
	test_xtrain_data_size = len(test_xtrain_data.index)

	# when
	test_xtest_data = load_dataset(file_name='xtest.csv')
	test_xtest_data_size = len(test_xtest_data.index)

	# then
	assert test_xtrain_data_size is not None
	assert test_xtest_data_size is not None

	assert test_xtrain_data_size > test_xtest_data_size
def test_make_single_prediction():

    test_data = load_dataset(file_name='test.csv')
    single_test_input = test_data[0:1]

    subject = make_prediction(input_data=single_test_input)

    assert subject is not None
    assert isinstance(subject.get('prediction')[0], float)
    assert math.ceil(subject.get('prediction')[0]) == 112476
def test_xtest_playerNname_dtype():
    # given
    test_data_xtest = load_dataset(file_name='xtest.csv')

    # when
    correct_dtype = np.dtype(np.int64)
    test_dtype_xtest = test_data_xtest["playerName"].dtypes

    # then
    assert test_dtype_xtest == correct_dtype
def test_xtrain_column_length():
	# given
	test_data = load_dataset(file_name='xtrain.csv')
	test_data_column_len = len(test_data.columns) - 6

	# when
	correct_column_len = len(cfg.FEATURE_LIST)

	# then
	assert test_data_column_len is not None
	assert test_data_column_len == correct_column_len
def test_prediction_endpoint_returns_prediction(flask_test_client):
    test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE)
    post_json = test_data[0:1].to_json(orient='records')

    response = flask_test_client.post('/v1/predict/regression', json=post_json)

    assert response.status_code == 200
    response_json = json.loads(response.data)
    prediction = response_json['predictions']
    response_version = response_json['version']
    assert math.ceil(prediction) == 112476
    assert response_version == _version
Пример #6
0
def test_prediction_endpoint_validation_200(flask_test_client):
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    post_json = test_data.to_json(orient='records')

    response = flask_test_client.post('/v1/predict/regression',
                                      json=post_json)

    assert response.status_code == 200
    response_json = json.loads(response.data)

    assert len(response_json.get('predictions')) + len(
        response_json.get('errors')) == len(test_data)
def test_make_multiple_predictions():

    test_data = load_dataset(file_name='test.csv')
    original_data_length = len(test_data)
    multiple_test_input = test_data

    subject = make_prediction(input_data=multiple_test_input)

    assert subject is not None
    assert len(subject.get('prediction')) == 1451

    assert len(subject.get('prediction')) != original_data_length
Пример #8
0
def test_make_single_prediction():
    # given
    test_data = load_dataset(file_name='xtest.csv')
    single_test_json = test_data[0:1].to_json(orient='records')
    # reads the 1st line of 'test.csv'

    # when
    subject = make_prediction(input_data=single_test_json)
    # print(subject.get('predictions')[0])
    # the above print statement was used to get the correct predicted value for
    # line 22 ~ subject to change after editing

    # then
    assert subject is not None
    assert isinstance(subject.get('predictions')[0],
                      float)  # calling the 'predictions' keyword from the dict
    assert math.ceil(subject.get('predictions')[0]) == 16  # see line 17
Пример #9
0
def test_make_multiple_predictions():
    # given
    test_data = load_dataset(file_name='xtest.csv')
    original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    # when
    subject = make_prediction(input_data=multiple_test_json)
    # print(original_data_length) # only work when tests fail
    # print(subject.get('predictions')) --> verified the points for each individual player is outputted

    # then
    assert subject is not None
    assert len(subject.get('predictions')) == 1034  # white box testing

    # we dont expect any rows to be filtered out
    assert len(subject.get('predictions')) == original_data_length
def run_training() -> None:
    """ Train the model"""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # Setting the seed

    # transform target
    y_train = np.log(y_train)
    # y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Пример #11
0
def run_training():
    print('Training the model...')

    # read training data
    data = load_dataset(file_name=cfg.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[cfg.FEATURE_LIST],
        data[cfg.TARGET],
        test_size=0.2,
        random_state=0)  # setting the seed here

    # if data tranformed then the target would be transformed
    # here as well

    pipeline.ffml_pipe.fit(X_train[cfg.FEATURE_LIST], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.ffml_pipe)

    print('Model trained...')