Пример #1
0
def log_prediction_error_chart(regressor,
                               X_train,
                               X_test,
                               y_train,
                               y_test,
                               experiment=None):
    """Log prediction error chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_prediction_error_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = PredictionError(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Prediction Error')
        plt.close(fig)
    except Exception as e:
        print('Did not log prediction error chart. Error: {}'.format(e))
Пример #2
0
def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test):
    """Create prediction error chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['prediction_error'] = npt_utils.create_prediction_error_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = PredictionError(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log prediction error chart. Error: {}'.format(e))

    return chart
Пример #3
0
    def test_prepredict_regressor(self):
        """
        Test the prepredict estimator with a prediction error plot
        """
        # Make prepredictions
        X, y = self.continuous.X, self.continuous.y
        y_pred = LinearRegression().fit(X.train, y.train).predict(X.test)

        # Create prepredict estimator with prior predictions
        estimator = PrePredict(y_pred, REGRESSOR)
        assert estimator.fit(X.train, y.train) is estimator
        assert estimator.predict(X.train) is y_pred
        assert estimator.score(X.test, y.test) == pytest.approx(0.9999983124154966, rel=1e-2)

        # Test that a visualizer works with the pre-predictions.
        viz = PredictionError(estimator)
        viz.fit(X.train, y.train)
        viz.score(X.test, y.test)
        viz.finalize()

        self.assert_images_similar(viz, tol=10.0)
Пример #4
0
def regression(fname="regression.png"):
    """
    Create figures for regression models
    """
    _, axes = plt.subplots(ncols=2, figsize=(18, 6))
    alphas = np.logspace(-10, 1, 300)
    data = load_concrete(split=True)

    # Plot prediction error in the middle
    oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Plot residuals on the right
    oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
Пример #5
0
def regression(fname="regression.png"):
    """
    Create figures for regression models
    """
    _, axes = plt.subplots(ncols=2, figsize=(18, 6))
    alphas = np.logspace(-10, 1, 300)
    data = load_concrete(split=True)

    # Plot prediction error in the middle
    oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Plot residuals on the right
    oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
Пример #6
0
Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila
terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama.
Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun.
'''
'''
#### 2. Residual Plot
'''
st.write('')
visualizer_residual = ResidualsPlot(model_lr)
visualizer_residual.fit(X_train, y_train)
visualizer_residual.score(X_test, y_test)
visualizer_residual.finalize()

st.pyplot()
'''
Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew.
'''
'''
#### 3. Prediction Error
'''

st.write('')
visualizer_prediction_error = PredictionError(model_lr)
visualizer_prediction_error.fit(X_train, y_train)
visualizer_prediction_error.score(X_test, y_test)
visualizer_prediction_error.finalize()

st.pyplot()
'''
Antara garis best fit dengan garis identity tidak begitu jauh, sehingga dapat dikatakan bahwa model yang dibuat optimal.
'''
def evaluate_results_time_series(df,
                                 time_period_col,
                                 model,
                                 target,
                                 path_to_save_report,
                                 max_features=None,
                                 plot_since_period=0):

    mean_error = []

    with PdfPages(path_to_save_report) as pdf:
        for period in range(df[time_period_col].min() + 1,
                            df[time_period_col].max() + 1):

            train = df[df.time_period < period]
            test = df[df.time_period == period]

            X_train, X_test = train.drop(target, 1), test.drop(target, 1)
            y_train, y_test = train[target], test[target]

            #model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            error = rmse(y_test, y_pred)

            mean_error.append(error)

            if period >= plot_since_period:

                fig = plt.figure(figsize=(22, 5))
                title = fig.suptitle(
                    'Period {} - Error {} - Train size: {} / Test size: {}'.
                    format(period, round(error, 5), len(y_train), len(y_test)),
                    fontsize=14)
                fig.subplots_adjust(top=0.85, wspace=0.1)

                ax1 = fig.add_subplot(1, 2, 1)

                visualizer = PredictionError(model, ax=ax1, line_color="red")
                visualizer.score(X_test, y_test)
                visualizer.finalize()

                ax2 = fig.add_subplot(1, 2, 2)
                visualizer = ResidualsPlot(model, ax=ax2)
                visualizer.fit(X_train, y_train)
                visualizer.score(X_test, y_test)
                visualizer.finalize()

                #ax3 = fig.add_subplot(1,3,3)
                #visualize.plot_coefficients(model, X_train)

                # plt.show()
                pdf.savefig(fig)
                plt.close()

                _logger.info('Period %d - Error %.5f' % (period, error))

            else:
                _logger.info('Period %d - Error %.5f' % (period, error))

    _logger.info('Mean Error = %.5f' % np.mean(mean_error))

    return model, X_train, y_train, X_test, y_test, mean_error