Пример #1
0
def test_ignore_nan_values():
    estimate = np.array([1, 2, 3, np.nan])
    actual = np.array([1, 2, 3, np.nan])
    weights = np.array([1, 1, 1, 1])
    calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights)

    assert calculated_nwrmsle == 0.0
def main(model=Model.DECISION_TREE, seed=None):
    original_train, original_validate = load_data()
    train, validate = encode(original_train, original_validate)
    with tracking.track() as track:
        track.set_model(model)
        model, params = train_model(train, model, seed)
        track.log_params(params)
        validation_predictions = make_predictions(model, validate)

        print("Calculating metrics")
        evaluation_metrics = {
            'nwrmsle':
            evaluation.nwrmsle(validation_predictions,
                               validate['unit_sales'].values,
                               validate['perishable'].values),
            'r2_score':
            metrics.r2_score(y_true=validate['unit_sales'].values,
                             y_pred=validation_predictions)
        }
        track.log_metrics(evaluation_metrics)

        write_predictions_and_score(evaluation_metrics, model,
                                    original_train.columns)

        print("Evaluation done with metrics {}.".format(
            json.dumps(evaluation_metrics)))
Пример #3
0
def test_calculates_nwrmsle_for_perfect_match():
    estimate = np.array([1, 2, 3])
    actual = np.array([1, 2, 3])
    weights = np.array([1, 1, 1])
    calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights)

    assert calculated_nwrmsle == 0.0
Пример #4
0
def test_eliminate_negative_values():
    estimate = np.array([1, 2, 3, -1])
    actual = np.array([1, 2, 3, -1])
    weights = np.array([1, 1, 1, 1])
    calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights)

    assert calculated_nwrmsle == 0.0
Пример #5
0
def main():
    train, validate = load_data()

    print("Not predicting returns...")
    train.loc[train['unit_sales'] < 0, 'unit_sales'] = 0
    validate.loc[validate['unit_sales'] < 0, 'unit_sales'] = 0

    validation_predictions, problem_pairs_part = get_predictions(
        validate, train)

    preds_sorted = validation_predictions.sort_values(by=['id'])
    subset_for_validation = validate[validate.id.isin(
        validation_predictions['id'])].sort_values(by=['id'])

    print("Calculating estimated error")
    validation_score = evaluation.nwrmsle(
        preds_sorted['unit_sales'].values,
        subset_for_validation['unit_sales'].values,
        subset_for_validation['perishable'].values)

    write_predictions_and_score(validation_score, 0, train.columns)

    print(
        "Times series analysis done with a validation score (error rate) of {}."
        .format(validation_score))
Пример #6
0
def test_calculates_nwrmsle_for_perfect_match():
    estimate = np.array([1, 2, 3])
    actual = np.array([1, 2, 3])
    weights = np.array([1, 1, 1])
    calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights)

    assert calculated_nwrmsle == 0.0
Пример #7
0
def test_calculates_nwrmsle_for_imperfect_match():
    estimate = np.array([0, 0, 0])
    actual = np.array([1, 1, 1])
    weights = np.array([1, 1, 1])
    calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights)

    # Assert by-hand calculation of nwrmsle is reasonably close to python calculation
    assert approx(calculated_nwrmsle, 0.69314718)
Пример #8
0
def test_calculates_nwrmsle_for_imperfect_match():
    estimate = np.array([0, 0, 0])
    actual = np.array([1, 1, 1])
    weights = np.array([1, 1, 1])
    calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights)

    # Assert by-hand calculation of nwrmsle is reasonably close to python calculation
    assert approx(calculated_nwrmsle, 0.69314718)
def main(model=Model.RANDOM_FOREST):
    original_train, original_validate = load_data()
    train, validate = encode(original_train, original_validate)
    model = make_model(train, model)
    validation_predictions = make_predictions(model, validate)

    print("Calculating estimated error")
    validation_score = evaluation.nwrmsle(validation_predictions, validate['unit_sales'].values, validate['perishable'].values)

    write_predictions_and_score(validation_score, model, original_train.columns)

    print("Decision tree analysis done with a validation score (error rate) of {}.".format(validation_score))
def main():
    train, validate = load_data()

    print("Not predicting returns...")
    train.loc[train['unit_sales']<0, 'unit_sales']=0
    validate.loc[validate['unit_sales']<0, 'unit_sales']=0

    validation_predictions, problem_pairs_part = get_predictions(validate, train)

    preds_sorted = validation_predictions.sort_values(by=['id'])
    subset_for_validation = validate[validate.id.isin(validation_predictions['id'])].sort_values(by=['id'])

    print("Calculating estimated error")
    validation_score = evaluation.nwrmsle(preds_sorted['unit_sales'].values, subset_for_validation['unit_sales'].values, subset_for_validation['perishable'].values)

    write_predictions_and_score(validation_score, 0, train.columns)

    print("Times series analysis done with a validation score (error rate) of {}.".format(validation_score))