Python PredictionError.score примеры, yellowbrick.regressor.PredictionError.score Python примеры использования

Пример #1

0

Показать файл

def regression_visualization(model, X_train, X_test, y_train, y_test):
    visualizer = PredictionError(model)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.title('Score visualization')
    plt.legend()
    st.pyplot()

Пример #2

0

Показать файл

Файл: gallery.py Проект: tylerhuntington222/yellowbrick

def peplot():
    X, y = load_concrete()
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = PredictionError(Lasso(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "prediction_error")

Пример #3

0

Показать файл

Файл: health_death_lib.py Проект: BanJeez/Health-Death

    def prediction_error_plot(lin_model,x_train, y_train, x_test, y_test):
        fig = plt.figure(figsize=(16,12))
        ax1 = fig.add_subplot(111)
        visualizer_pred_err = PredictionError(lin_model, ax=ax1)

        visualizer_pred_err.fit(x_train, y_train)  # Fit the training data to the visualizer
        visualizer_pred_err.score(x_test, y_test)  # Evaluate the model on the test data
        visualizer_pred_err.show()

Пример #4

0

Показать файл

def visualiza_erros(train_x,train_y,test_x,test_y):
    visualizer = PredictionError(LinearRegression())
    visualizer.fit(train_x, train_y)
    visualizer.score(test_x, test_y)
    visualizer.poof()
    
    visualizer = ResidualsPlot(LinearRegression())
    visualizer.fit(train_x, train_y)
    visualizer.score(test_x, test_y)
    visualizer.poof()

Пример #5

0

Показать файл

Файл: sklearn.py Проект: oddeirikigland/neptune-contrib

def log_prediction_error_chart(regressor,
                               X_train,
                               X_test,
                               y_train,
                               y_test,
                               experiment=None):
    """Log prediction error chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_prediction_error_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = PredictionError(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Prediction Error')
        plt.close(fig)
    except Exception as e:
        print('Did not log prediction error chart. Error: {}'.format(e))

Пример #6

0

Показать файл

	def visualize_prediction_error(self, model_info):
		model = model_info['model']
		X_train = model_info['X_train']
		X_test = model_info['X_test']
		Y_train = model_info['Y_train']
		Y_test = model_info['Y_test']

		visualizer = PredictionError(model)

		visualizer.fit(X_train, Y_train)  # Fit the training data to the visualizer
		visualizer.score(X_test, Y_test)  # Evaluate the model on the test data

Пример #7

0

Показать файл

def regression_sanity_check(model, X_train, X_test, y_train, y_test):
    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    plt.sca(ax1)
    visualizer = ResidualsPlot(model, ax=ax1)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.sca(ax2)
    visualizer2 = PredictionError(model, ax=ax2)
    visualizer2.fit(X_train, y_train)
    visualizer2.score(X_test, y_test)
    visualizer.finalize()
    visualizer2.poof()

Пример #8

0

Показать файл

Файл: __init__.py Проект: neptune-ai/neptune-sklearn

def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test):
    """Create prediction error chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['prediction_error'] = npt_utils.create_prediction_error_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = PredictionError(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log prediction error chart. Error: {}'.format(e))

    return chart

Пример #9

0

Показать файл

def testFunc9(savepath='Results/bikeshare_Ridge_PredictionError.png'):
    '''
    基于共享单车数据使用AlphaSelection
    '''
    data = pd.read_csv('fixtures/bikeshare/bikeshare.csv')
    X = data[[
        "season", "month", "hour", "holiday", "weekday", "workingday",
        "weather", "temp", "feelslike", "humidity", "windspeed"
    ]]
    Y = data["riders"]

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
    visualizer = PredictionError(Ridge(alpha=3.181))
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=savepath)

Пример #10

0

Показать файл

Файл: gallery.py Проект: subodhchhabra/PyCon2017

def perror(ax):
    from sklearn.linear_model import LassoCV
    from yellowbrick.regressor import PredictionError

    features = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]

    splits = load_data('concrete', cols=features, target='strength', tts=True)
    X_train, X_test, y_train, y_test = splits

    estimator = LassoCV()
    visualizer = PredictionError(estimator, ax=ax)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    return visualizer

Пример #11

0

Показать файл

Файл: ModelEvaluator.py Проект: qemtek/football_trading

 def prediction_error_plot(self) -> None:
     """Plot the actual targets from the dataset against the predicted values
     generated by our model. This allows us to see how much variance is in the model.
     """
     visualizer = PredictionError(self.trained_model)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/prediction_error_plot_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/prediction_error_plot_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()

Пример #12

0

Показать файл

Файл: regression_models.py Проект: J-Klass/nyc-property-prices

def lasso_regression(X_train, y_train, X_test, y_test, plot):
    """
    Perfomring a lasso regression with built in CV and plotting the feature importance
    """
    # Fit the ridge regression
    reg = LassoCV()    
    reg.fit(X_train, y_train)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" % reg.score(X_train, y_train))
    coef = pd.Series(reg.coef_, index=X_train.columns)
    print(
        "Lasso picked "
        + str(sum(coef != 0))
        + " variables and eliminated the other "
        + str(sum(coef == 0))
        + " variables"
    )
    # Extract the feature importance
    imp_coef = coef.sort_values()
    # Plot the feature importance
    if plot:
        plt.rcParams["figure.figsize"] = (8.0, 10.0)
        imp_coef.plot(kind="barh")
        plt.title("Feature importance using Lasso Model")
        plt.show()

        # Plotting the prediction error
        visualizer = PredictionError(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
        # Visualizing the regression
        visualizer = ResidualsPlot(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
    # Using the test data to calculate a score
    y_pred = reg.predict(X_test)
    # Return metrics
    return {
        "name": "Lasso Regression",
        "R squared": reg.score(X_test, y_test),
        "RMSE": rmse(y_test, y_pred),
        "R squared training": reg.score(X_train, y_train),
        "MAE": mean_absolute_error(y_test, y_pred),
    }

Пример #13

0

Показать файл

    def test_prepredict_regressor(self):
        """
        Test the prepredict estimator with a prediction error plot
        """
        # Make prepredictions
        X, y = self.continuous.X, self.continuous.y
        y_pred = LinearRegression().fit(X.train, y.train).predict(X.test)

        # Create prepredict estimator with prior predictions
        estimator = PrePredict(y_pred, REGRESSOR)
        assert estimator.fit(X.train, y.train) is estimator
        assert estimator.predict(X.train) is y_pred
        assert estimator.score(X.test, y.test) == pytest.approx(0.9999983124154966, rel=1e-2)

        # Test that a visualizer works with the pre-predictions.
        viz = PredictionError(estimator)
        viz.fit(X.train, y.train)
        viz.score(X.test, y.test)
        viz.finalize()

        self.assert_images_similar(viz, tol=10.0)

Пример #14

0

Показать файл

def showError():
    # Load the data
    df = load_data('concrete')
    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data

Пример #15

0

Показать файл

def regression(fname="regression.png"):
    """
    Create figures for regression models
    """
    _, axes = plt.subplots(ncols=2, figsize=(18, 6))
    alphas = np.logspace(-10, 1, 300)
    data = load_concrete(split=True)

    # Plot prediction error in the middle
    oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Plot residuals on the right
    oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)

Пример #16

0

Показать файл

Файл: figures.py Проект: DistrictDataLabs/yellowbrick

def regression(fname="regression.png"):
    """
    Create figures for regression models
    """
    _, axes = plt.subplots(ncols=2, figsize=(18, 6))
    alphas = np.logspace(-10, 1, 300)
    data = load_concrete(split=True)

    # Plot prediction error in the middle
    oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Plot residuals on the right
    oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)

Пример #17

0

Показать файл

Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila
terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama.
Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun.
'''
'''
#### 2. Residual Plot
'''
st.write('')
visualizer_residual = ResidualsPlot(model_lr)
visualizer_residual.fit(X_train, y_train)
visualizer_residual.score(X_test, y_test)
visualizer_residual.finalize()

st.pyplot()
'''
Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew.
'''
'''
#### 3. Prediction Error
'''

st.write('')
visualizer_prediction_error = PredictionError(model_lr)
visualizer_prediction_error.fit(X_train, y_train)
visualizer_prediction_error.score(X_test, y_test)
visualizer_prediction_error.finalize()

st.pyplot()
'''
Antara garis best fit dengan garis identity tidak begitu jauh, sehingga dapat dikatakan bahwa model yang dibuat optimal.
'''

Пример #18

0

Показать файл

from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

from yellowbrick.regressor import PredictionError

if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(
        outpath="images/prediction_error.png")  # Draw/show/poof the data

Пример #19

0

Показать файл

Файл: peplot.py Проект: DistrictDataLabs/yellowbrick

import pandas as pd

from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

from yellowbrick.regressor import PredictionError


if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age']
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names]
    y = df[target_name]

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(outpath="images/prediction_error.png")             # Draw/show/poof the data

Пример #20

0

Показать файл

Файл: lasso_model_casual.py Проект: noahnewberger/Bikeshare-DC

# How does our model perform on the test data?
score_model(lasso)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(lasso)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(lasso)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtrain, ytrain)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt

lasso.fit(Xtrain, ytrain)

yhat = lasso.predict(Xtest)
resid = ytest - yhat

data = pd.DataFrame({
    't': range(1,
               len(yhat) + 1),
    'ytest': ytest,

Пример #21

0

Показать файл

Файл: Predict Sales Revenue with multiple linear regression.py Проект: BhumikaRavichander/Predict-Sales-Revenue

x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1)

lm5 = LinearRegression().fit(x_train,y_train)
lm5_pred=lm5.predict(x_test)

print("RMSE = ", np.sqrt(mean_squared_error(y_test,lm5_pred)))
print("R^2 = ", r2_score(y_test,lm5_pred))


# In[30]:


from yellowbrick.regressor import PredictionError, ResidualsPlot

visualizer=PredictionError(lm5).fit(x_train, y_train)
visualizer.score(x_test, y_test)
visualizer.show()


# In[32]:


#TASK 7: INTERACTION EFFECT - SYNERGY

advert['interaction']= advert['TV'] * advert['radio']

x=advert[['TV', 'radio', 'interaction']]
y=advert.sales

x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1)

Пример #22

0

Показать файл

# Model building
# Lasso
regressor = Lasso(alpha=0.005, random_state=0)
regressor.fit(X_train, y_train)
prediction_Lasso = regressor.predict(
    scaler.transform(np.array(values_topredict)))
# Random Forest Regressor
regressor1 = RandomForestRegressor(n_estimators=300, random_state=0)
regressor1.fit(X_train, y_train)
prediction_RFR = regressor1.predict(
    scaler.transform(np.array(values_topredict)))

visualiser = PredictionError(regressor)
visualiser.fit(X_train, y_train)
visualiser.score(X_test, y_test)
visualiser.poof()

visualiser1 = PredictionError(regressor1)
visualiser1.fit(X_train, y_train)
visualiser1.score(X_test, y_test)
visualiser1.poof()

y_pred1 = regressor1.predict(X_test)

importance = pd.Series(np.abs(regressor.coef_.ravel()))
importance.index = df.columns.values.tolist()[:20]
importance.sort_values(inplace=True, ascending=False)
importance.plot.bar()
plt.ylabel('Lasso Coefficients')
plt.title('Feature Importance')

Пример #23

0

Показать файл

def predict():
    filename = request.form['name']
    regressor = pickle.load(open(filename, 'rb'))

    temp_array = list()

    if request.method == 'POST':
        batting_team = request.form['batting-team']
        if batting_team == 'Chennai Super Kings':
            temp_array = temp_array + [1, 0, 0, 0, 0, 0, 0, 0]
        elif batting_team == 'Delhi Daredevils':
            temp_array = temp_array + [0, 1, 0, 0, 0, 0, 0, 0]
        elif batting_team == 'Kings XI Punjab':
            temp_array = temp_array + [0, 0, 1, 0, 0, 0, 0, 0]
        elif batting_team == 'Kolkata Knight Riders':
            temp_array = temp_array + [0, 0, 0, 1, 0, 0, 0, 0]
        elif batting_team == 'Mumbai Indians':
            temp_array = temp_array + [0, 0, 0, 0, 1, 0, 0, 0]
        elif batting_team == 'Rajasthan Royals':
            temp_array = temp_array + [0, 0, 0, 0, 0, 1, 0, 0]
        elif batting_team == 'Royal Challengers Bangalore':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 1, 0]
        elif batting_team == 'Sunrisers Hyderabad':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 0, 1]

        bowling_team = request.form['bowling-team']
        if bowling_team == 'Chennai Super Kings':
            temp_array = temp_array + [1, 0, 0, 0, 0, 0, 0, 0]
        elif bowling_team == 'Delhi Daredevils':
            temp_array = temp_array + [0, 1, 0, 0, 0, 0, 0, 0]
        elif bowling_team == 'Kings XI Punjab':
            temp_array = temp_array + [0, 0, 1, 0, 0, 0, 0, 0]
        elif bowling_team == 'Kolkata Knight Riders':
            temp_array = temp_array + [0, 0, 0, 1, 0, 0, 0, 0]
        elif bowling_team == 'Mumbai Indians':
            temp_array = temp_array + [0, 0, 0, 0, 1, 0, 0, 0]
        elif bowling_team == 'Rajasthan Royals':
            temp_array = temp_array + [0, 0, 0, 0, 0, 1, 0, 0]
        elif bowling_team == 'Royal Challengers Bangalore':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 1, 0]
        elif bowling_team == 'Sunrisers Hyderabad':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 0, 1]

        overs = float(request.form['overs'])
        runs = int(request.form['runs'])
        wickets = int(request.form['wickets'])
        runs_in_prev_5 = int(request.form['runs_in_prev_5'])
        wickets_in_prev_5 = int(request.form['wickets_in_prev_5'])

        temp_array = temp_array + [overs, runs,
                                   wickets, runs_in_prev_5, wickets_in_prev_5]

        data = np.array([temp_array])
        my_prediction = int(regressor.predict(data)[0])

        model = regressor
        visualizer_pe = PredictionError(model)
        visualizer_pe.fit(X_train, y_train)
        visualizer_pe.score(X_test, y_test)
        vpe = visualizer_pe.poof()

        return render_template('prediction.html', lower_limit=my_prediction-10, upper_limit=my_prediction+5, vpe=vpe)

Пример #24

0

Показать файл

Файл: HaBaoAnh_19442001.py Проект: baoanhcr7/Bai_tap_LTPTDL

df.plot.scatter(x='H6', y='H5', c='DarkBlue')
#Bước 7: Phân tích hồi quy Linear Regression
train_df, test_df = train_test_split(df, test_size=0.2, random_state=1)

y_train = np.array(train_df.H5)
X_train = np.array(train_df.H6)
X_train = X_train.reshape(X_train.shape[0], 1)

y_test = np.array(test_df.H5)
X_test = np.array(test_df.H6)
X_test = X_test.reshape(X_test.shape[0], 1)

model1 = LinearRegression()
visualizer = PredictionError(model1)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_train, y_train)  #valuate the model on the test data
visualizer.show()
#Bước 8 Đánh giá mô hình
print('Coefficients: ', model1.coef_)
print('Score_train: {}'.format(model1.score(X_train, y_train)))
print('Score_test: {}'.format(model1.score(X_test, y_test)))
# plot for residual error

## setting plot style
plt.style.use('fivethirtyeight')

## plotting residual errors in training data
plt.scatter(model1.predict(X_train),
            model1.predict(X_train) - y_train,
            color="green",
            s=10,

Пример #25

0

Показать файл

from sklearn.linear_model import Ridge
from yellowbrick.regressor import PredictionError
import bikeshare

visualizer = PredictionError(Ridge(alpha=3.181))
visualizer.fit(bikeshare.X_train, bikeshare.y_train)
visualizer.score(bikeshare.X_test, bikeshare.y_test)
visualizer.poof()

Пример #26

0

Показать файл

Файл: backend.py Проект: JackyP/punditkit

 def prediction_error_plot(self):
     visualizer = PredictionError(self.pipe)
     visualizer.score(self.X_test, self.y_test)
     return visualizer.show()

Пример #27

0

Показать файл

Файл: rf_model_total_rides.py Проект: noahnewberger/Bikeshare-DC

score_model(rf)
score_model(rf_random)
score_model(rf_best)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(rf_best)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(rf_best)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtest, ytest)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt

rf_best.fit(Xtrain, ytrain)

yhat = rf_best.predict(Xtest)
error = ytest - yhat

data = pd.DataFrame({
    't': range(1,
               len(yhat) + 1),
    'ytest': ytest,

Пример #28

0

Показать файл

final_s_gbr = sum(acc_gbr) / len(acc_gbr)

acc_train_gbr = []
for i in range(0, len(y_pred_train_gbr)):
    acc_train_gbr.append(abs(y_pred_train_gbr[i] - Y_train[i]) / Y_train[i])
final_s_train_gbr = sum(acc_train_gbr) / len(acc_train_gbr)
final_acc_gbr = (1 - final_s_train_gbr) * 100
print("Accuracy of GradientBoostRegression is")
print(final_acc_gbr)
print("The mean absolute error of GradientBoost ")
mae_gbr = mean_absolute_error(Y_test, y_pred_gbr)
print(mae_gbr)
model = Lasso()
visualizer1 = PredictionError(modelgb)
visualizer1.fit(X_train, Y_train)  # Fit the training data to the visualizer
visualizer1.score(X_test, Y_test)  # Evaluate the model on the test data
g = visualizer1.poof()

from sklearn.ensemble import RandomForestRegressor
rfregressor = RandomForestRegressor(n_estimators=100, random_state=0)
modelrfr = rfregressor.fit(X_train, Y_train)
y_pred_rfr = rfregressor.predict(X_test)
y_pred_train_rfr = rfregressor.predict(X_train)
y_pred_train_rfr = y_pred_train_rfr.tolist()

acc_rfr = []
for i in range(0, len(y_pred_rfr)):
    acc_rfr.append(abs(y_pred_rfr[i] - Y_test[i]) / Y_test[i])
final_s_rfr = sum(acc_rfr) / len(acc_rfr)

acc_train_rfr = []

Пример #29

0

Показать файл

advert.columns = columns
# advert.head()
# advert.info()
col = columns[1:]
# sns.pairplot(advert, x_vars=col, y_vars='线路价格（不含税）', height=14, aspect=0.7)
X = advert[col]
y = advert['线路总成本']
lm1 = LinearRegression()
lm1.fit(X, y)
lm1_predict = lm1.predict(X[col])
xtrain,xtest,ytrain,ytest = train_test_split(X,y,random_state=1)
# print("R^2:",r2_score(y,lm1_predict))
# 高因素影响 R^2: 0.9797304791768885
lm2 = LinearRegression().fit(xtrain,ytrain)
lm2_predict = lm2.predict(xtest)
print("RMSE2:",np.sqrt(mean_squared_error(ytest, lm2_predict)))
print("R^2  lm2:",r2_score(ytest,lm2_predict))
print(lm2.intercept_)
print(lm2.coef_)
# R^2: 0.9797304791768885
# RMSE: 535.8592414949177
visualizer = PredictionError(lm1).fit(xtrain,ytrain)
visualizer.score(xtest,ytest)
visualizer.poof()
# sns.heatmap(advert.corr(),cmap="YlGnBu",annot=True)
# plt.show()
print("R^2  lm1:",r2_score(y,lm1_predict))
print(lm1.intercept_)
print(lm1.coef_)
# plt.show()

Пример #30

0

Показать файл

Файл: visualize.py Проект: diefergil/cookiecutter-data-science

def evaluate_results_time_series(df,
                                 time_period_col,
                                 model,
                                 target,
                                 path_to_save_report,
                                 max_features=None,
                                 plot_since_period=0):

    mean_error = []

    with PdfPages(path_to_save_report) as pdf:
        for period in range(df[time_period_col].min() + 1,
                            df[time_period_col].max() + 1):

            train = df[df.time_period < period]
            test = df[df.time_period == period]

            X_train, X_test = train.drop(target, 1), test.drop(target, 1)
            y_train, y_test = train[target], test[target]

            #model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            error = rmse(y_test, y_pred)

            mean_error.append(error)

            if period >= plot_since_period:

                fig = plt.figure(figsize=(22, 5))
                title = fig.suptitle(
                    'Period {} - Error {} - Train size: {} / Test size: {}'.
                    format(period, round(error, 5), len(y_train), len(y_test)),
                    fontsize=14)
                fig.subplots_adjust(top=0.85, wspace=0.1)

                ax1 = fig.add_subplot(1, 2, 1)

                visualizer = PredictionError(model, ax=ax1, line_color="red")
                visualizer.score(X_test, y_test)
                visualizer.finalize()

                ax2 = fig.add_subplot(1, 2, 2)
                visualizer = ResidualsPlot(model, ax=ax2)
                visualizer.fit(X_train, y_train)
                visualizer.score(X_test, y_test)
                visualizer.finalize()

                #ax3 = fig.add_subplot(1,3,3)
                #visualize.plot_coefficients(model, X_train)

                # plt.show()
                pdf.savefig(fig)
                plt.close()

                _logger.info('Period %d - Error %.5f' % (period, error))

            else:
                _logger.info('Period %d - Error %.5f' % (period, error))

    _logger.info('Mean Error = %.5f' % np.mean(mean_error))

    return model, X_train, y_train, X_test, y_test, mean_error

Пример #31

0

Показать файл

Файл: linear_regression.py Проект: eyelovedata/ophthalmologycitations

def scikit_learn_method(x,
                        y,
                        min_x,
                        max_x,
                        max_y,
                        ln_bool,
                        df=all_scopus,
                        test_size=0.2,
                        random_state=0):

    # https://stackoverflow.com/questions/42988348/typeerror-cannot-convert-the-series-to-class-float
    if ln_bool:
        y = np.log(y)

    # set random_state = 0 for consistent seed
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    # reshape (-1, 1) - gives us 1 sample; no need to reshape y
    # https://datatofish.com/dropna/
    # https://stackoverflow.com/questions/18691084/what-does-1-mean-in-numpy-reshape
    # https://stackoverflow.com/questions/53723928/attributeerror-series-object-has-no-attribute-reshape
    # https://stackoverflow.com/questions/35082140/preprocessing-in-scikit-learn-single-sample-depreciation-warning

    x_train = x_train.values.reshape(-1, 1)
    x_test = x_test.values.reshape(-1, 1)

    model_withOutliers = LinearRegression()
    model_withOutliers = model_withOutliers.fit(x_train, y_train)

    print('y-hat = %sx + %s' %
          (model_withOutliers.coef_[0], model_withOutliers.intercept_))

    # https://stackoverflow.com/questions/41635448/how-can-i-draw-scatter-trend-line-on-matplot-python-pandas/41635626
    from sklearn.metrics import r2_score

    plt.scatter(x, y)  # with outliers
    plt.title('With outliers')
    m, b = model_withOutliers.coef_[0], model_withOutliers.intercept_
    plt.plot(x, m * x + b)
    plt.show()

    text = f"$y={m:0.3f}\;x{b:+0.3f}$\n$R^2 = {r2_score(y, m * x + b):0.3f}$"
    plt.gca().text(0.05,
                   0.95,
                   text,
                   transform=plt.gca().transAxes,
                   fontsize=14,
                   verticalalignment='bottom')

    # https://www.scikit-yb.org/en/latest/api/regressor/peplot.html
    from sklearn.linear_model import Lasso
    from yellowbrick.regressor import PredictionError

    lasso_model = Lasso()
    visualizer = PredictionError(lasso_model)

    visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(x_test, y_test)  # Evaluate the model on the test data
    visualizer.show()

    # https://stackoverflow.com/questions/28876243/how-to-delete-the-current-row-in-pandas-dataframe-during-df-iterrows

    plt.xlim(min_x, max_x)  # without outliers
    plt.ylim(0, max_y)
    plt.title('Without outliers')

    plt.scatter(x, y)
    plt.show()

    text = f"$y={m:0.3f}\;x{b:+0.3f}$\n$R^2 = {r2_score(y, m*x+b):0.3f}$"
    plt.gca().text(0.05,
                   0.95,
                   text,
                   transform=plt.gca().transAxes,
                   fontsize=14,
                   verticalalignment='bottom')

    y_pred_with_outliers = model_withOutliers.predict(x_test)

    sum_outliers = 0

    for i in range(len(df)):
        squared_with_outliers = (y_test - y_pred_with_outliers)**2
        sum_outliers += squared_with_outliers

    mean = sum_outliers / len(df)

    rms = mean**0.5

    rms_value = 0

    for element in rms:
        rms_value += element
    rms_value = rms_value / len(rms)

    print('Root mean squared, with outliers:', rms_value)

Python PredictionError.score примеры использования