def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) _, ax = plt.subplots() visualizer = ResidualsPlot(model, ax=ax, **kwargs) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=outpath)
plt.show() from sklearn.linear_model import LinearRegression reg = LinearRegression() reg.fit(xrm, y) print(reg.score(xrm, y)) xx = np.linspace(min(xrm), max(xrm)).reshape(-1, 1) plt.scatter(xrm, y, color="blue") plt.plot(xx, reg.predict(xx), color="red", linewidth=3) plt.ylabel("y: Value of house / 1000 USD") plt.xlabel("x: Number of rooms") plt.show() from yellowbrick.regressor import ResidualsPlot visualizer = ResidualsPlot(reg, hist=False) visualizer.fit(xrm, y) visualizer.score(xrm, y) visualizer.poof() # use data multi var # split data: 70%-training 30%-testing from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42) reg = LinearRegression() reg.fit(x_train, y_train) y_pred = reg.predict(x_test) print("R^2 = ", reg.score(x_train, y_train))
from sklearn.linear_model import LinearRegression from yellowbrick.regressor import ResidualsPlot base = pd.read_csv('cars.csv') base = base.drop(['Unnamed: 0'], axis=1) X = base.iloc[:, 1].values y = base.iloc[:, 0].values correlacao = np.corrcoef(X, y) # coefiente de correção X = X.reshape(-1, 1) modelo = LinearRegression() modelo.fit(X, y) modelo.intercept_ modelo.coef_ plt.scatter(X, y) plt.plot(X, modelo.predict(X), color='red') # distância 22 pés modelo.intercept_ + modelo.coef_ * 22 modelo.predict(22) modelo._residues visualizador = ResidualsPlot(modelo) visualizador.fit(X, y) visualizador.poof()
# ---------------------------regressão Linear--------------------------------- # from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X, y) # aprendizagem # ------------------------------Coeficientes---------------------------------- # # b0 regressor.intercept_ # b1 regressor.coef_ # ---------------------------------Grafico------------------------------------ # plt.scatter(X, y) #Relação entre x e y por pontos plt.plot(X, regressor.predict(X), color = 'red') # reta dos privissores plt.title ("Regressão linear simples") plt.xlabel("Idade", color = 'red') plt.ylabel("Custo", color = 'red') # -------------------------previsão pessoa com 40 anos-------------------------# previsao1 = regressor.intercept_ + regressor.coef_ * 40 previsao2 = regressor.predict(np.array(40).reshape(1, -1)) # calculo manual da previsão # --------------------Avaliação da precisão da Regressão--------------------- # score = regressor.score(X,y) # -------------------------Valores Resuduais Grafico ------------------------- # from yellowbrick.regressor import ResidualsPlot visualizador = ResidualsPlot(regressor) visualizador.fit(X, y) visualizador.poof()
# Evaluate the results of the regression mse_train = mean_squared_error(y_train, pred_train) mse_val = mean_squared_error(y_val, pred_val) print("MSE score on train dataset : %s" % mse_train) print("MSE score on validation dataset : %s" % mse_val) # This is the first score we obtain for our prediction using the Ridge regression. Obviously it can be improved using more powerful models but already gives us a benchmark to beat from here on. # In[25]: # Instantiate the linear model and visualizer visualizer = ResidualsPlot(clf) visualizer.fit(x_train, y_train) # Fit the training data to the model visualizer.score(x_val, y_val) # Evaluate the model on the validation data visualizer.poof() # # Tree regression # # In order to do thing properly for this model, we will try and pick the paramters that fit best. # - First the max depth # - Second the min samples split # - Third the min samples leaf # # In[26]:
if (regressorName == 'SVR'): return SVR(kernel='rbf', gamma='scale', C=1.0, epsilon=0.01) if (regressorName == 'MLPRegressor'): return MLPRegressor(hidden_layer_sizes=(100, 200)) if (regressorName == 'DecisionTreeRegressor'): return DecisionTreeRegressor() if (regressorName == 'RandomForestRegressor'): return RandomForestRegressor() if (regressorName == 'GradientBoostingRegressor'): return GradientBoostingRegressor() # regressors = ['LinearRegression', 'KNeighborsRegressor', 'SVR', 'MLPRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'GradientBoostingRegressor'] regressors = ['LinearRegression'] for regressorName in regressors: for index in range(2): print('Running regressor ' + regressorName + ' on column ' + str(index)) dataset = loadData() features, labels = getFeaturesAndLabels(dataset, index) x_train, x_test, y_train, y_test = Split(features, labels) my_title = regressorName + ' on column ' + str(index) visualizer = ResidualsPlot(getRegressor(regressorName), title=my_title) visualizer.fit(x_train, y_train.ravel()) score = visualizer.score(x_test, y_test.ravel()) visualizer.poof() print("Score: " + str(score))
lasso_lars = grid.best_estimator_ plt.scatter(range(X_poly.shape[1]), lasso_lars.coef_, c=np.sign(lasso_lars.coef_), cmap="bwr_r") ######## Yellowbrick from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError from sklearn.linear_model import LassoLarsCV ### Find optimal alpha lassolars_yb = AlphaSelection(LassoLarsCV()) lassolars_yb.fit(X, y) lassolars_yb.poof() ### RVF plot lasso_yb = ResidualsPlot(lasso_lars, hist=True) lasso_yb.fit(X_train, y_train) lasso_yb.score(X_test, y_test) lasso_yb.poof() ### Prediction Error lasso_yb = PredictionError(lasso_lars, hist=True) lasso_yb.fit(X_train, y_train) lasso_yb.score(X_test, y_test) lasso_yb.poof()
x_list = [1, 2, 3, 4, 5] plt.xlabel("Number of folds") plt.ylabel("Mean Absolute Error") plt.plot(x_list, accNN, label="Neural Network") plt.plot(x_list, accSVR, label="Support Vector Regression") plt.plot(x_list, accRF, label="Random Forest") plt.legend() plt.show() # # Residual Plot # In[ ]: # Reference: https://media.readthedocs.org/pdf/yellowbrick/stable/yellowbrick.pdf from sklearn.linear_model import LinearRegression from yellowbrick.regressor import ResidualsPlot ridge = LinearRegression() visualizer = ResidualsPlot(ridge) Ytrain = train['price'] / train['price'].max() Xtrain = pd.DataFrame(train.drop(['price'], axis=1)) Ytest = test['price'] / test['price'].max() Xtest = test.drop(['price'], axis=1) visualizer.fit(Xtrain, Ytrain) # Fit the training data to the model visualizer.score(Xtest, Ytest) # Evaluate the model on the test data visualizer.poof() # In[ ]: # In[ ]:
modelo = LinearRegression() modelo.fit(X_reshaped, y) #usado para fazer o treinamento intercept = modelo.intercept_ #variavel independente no modelo linear inclinacao = modelo.coef_ #inclinacao plt.scatter(X_reshaped, y) #para plotar o grafico plt.scatter(X_reshaped, modelo.predict(X_reshaped), color = 'red') #passando os dados e os as previsões dos dados, ele traçará a linha de melhor ajuste (ou linha de regressão) """ exercicio: Para uma distância de 22, qual a velocidade prevista? """ distancia = 22 #Forma 1: previsao_metodo_1 = modelo.intercept_ + modelo.coef_ * distancia #Forma 2 previsao_metodo_2 = modelo.predict(distancia) #residuais (distancia entre os pontos da tua base de dados, para a linha de regressão) residuais = modelo._residues #gerado através da sklearn, e não mostra o valor de resíduo de cada ponto. Caso deseje esses valores individuais, devemos usar a biblioteca yellowbrick """ usando a biblioteca yellowbrick """ visualizador = ResidualsPlot(modelo) visualizador.fit(X_reshaped, y) visualizador.poof() #metodo para visualizar o grafico
lr_log = LinearRegression() lr_log.fit(X=X_train_log, y=y_train_log) print(f"Train R2 is {lr_log.score(X=X_train_log, y=y_train_log)}") print(f"Test R2 is {lr_log.score(X=X_test_log, y=y_test_log)}") # There is a slight improvement (~2%) in the train R2 and test R2 utilizing log transform # + [markdown] pycharm={"name": "#%% md\n"} # ## Model Evaluation - Linear Regression # ### The following section evaluates the random error, constant variance and normal distribution with mean 0 assumption of linear model in the context of the four initial models utilizing a residual plot from Yellowbrick. # # + pycharm={"is_executing": false} # Residual Plot for Huber LR with no log-transform from yellowbrick.regressor import ResidualsPlot rpv_hr = ResidualsPlot(hr) rpv_hr.fit(X=X_train, y=y_train) rpv_hr.score(X=X_test, y=y_test) rpv_hr.poof() # + pycharm={"is_executing": false} rpv_lr = ResidualsPlot(lr) rpv_lr.fit(X=X_train, y=y_train) rpv_lr.score(X=X_test, y=y_test) rpv_lr.poof() # + pycharm={"is_executing": false} # Residual Plot for LR with log transform rpv_lr_log = ResidualsPlot(lr_log) rpv_lr_log.fit(X=X_train_log, y=y_train_log) rpv_lr_log.score(X=X_test_log, y=y_test_log)
df print('Mean Absolute Error:', metrics.mean_absolute_error(ytest, ypred)) print('Mean Squared Error:', metrics.mean_squared_error(ytest, ypred)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(ytest, ypred))) print('Median absolute error:',metrics.median_absolute_error(ytest, ypred)) r2=regressor.score(ytest, ypred) def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 mean_absolute_percentage_error(ytest, ypred) from yellowbrick.regressor import ResidualsPlot # residuals vs. predicted values visualizer = ResidualsPlot(regressor) visualizer.score(Xtest, ytest) # Evaluate the model on the test data visualizer.show() sns.residplot(ytest, ypred) np.mean(ytest-ypred) sns.distplot(ytest-ypred)
import pandas as pd from yellowbrick.regressor import ResidualsPlot from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split data = pd.read_csv('../CSV/bikeshare.csv') X = data[[ "season", "month", "hour", "holiday", "weekday", "workingday", "weather", "temp", "feelslike", "humidity", "windspeed" ]] y = data["riders"] # Create training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) visualizer = ResidualsPlot(LinearRegression()) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof()
def residuals_plot(ax=None): data = load_concrete(return_dataset=True) X, y = data.to_pandas() viz = ResidualsPlot(Ridge(), ax=ax) return tts_plot(viz, X, y)
plt.ylabel('Total de álcool ingerido (L)') plt.plot(X, modelo_cerveja.predict(X), color = 'red') #Calculo manual e utilizando o modelo para prever o valor de y, respectivamente modelo_cerveja.intercept_ + modelo_cerveja.coef_ * 400 modelo_cerveja.predict([[400]]) ''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro, uma pessoa bebe, aproximadamente, 13.65 litros de álcool (cerca de 11.37% aproximadamente) ''' #Visualização dos resíduos e o seu gráfico(resultado entre a distância dos pontos com a linha de regressão) modelo_cerveja._residues visualizador_cerveja = ResidualsPlot(modelo_cerveja) visualizador_cerveja.fit(X, y) visualizador_cerveja.poof() '''2)Regressão linear de destilados VS total álcool ingerido''' A = bebida_mundo.iloc[:, 2].values b = bebida_mundo.iloc[:, 4].values correlacao_destilados = np.corrcoef(A, b) A = A.reshape(-1, 1) modelo_destilados = LinearRegression() modelo_destilados.fit(A, b) score_destilados = modelo_destilados.score(A, b)
# In[30]: TimeSeriesSplit(max_train_size=None, n_splits=5) # In[37]: from sklearn.linear_model import Ridge from yellowbrick.regressor import ResidualsPlot # Instantiate the linear model and visualizer model = Ridge() visualizer = ResidualsPlot(model, size=(1080, 720)) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() # Draw the data # In[38]: from sklearn.svm import SVR from sklearn.neural_network import MLPRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import BayesianRidge, LinearRegression regressors = {
# inrececção modelo.intercept_ # coeficiente do modelo modelo.coef_ # visualizando os dados e modelo plt.scatter(x2, y) plt.plot(x2, modelo.predict(x2), color='red') ### previsão # para da a 22 pes modelo.intercept_ + modelo.coef_ * 22 # ou modelo.predict(np.array([22]).reshape(-1, 1)) # sklearn traz apenas um valor total # residuais modelo._residues # para ver todos os residuos # pip install yellowbrick # usar o anaconda... from yellowbrick.regressor import ResidualsPlot visualizador = ResidualsPlot(modelo) visualizador.fit(x2, y) visualizador.plot()
#REGRESSÃO LINEAR treino_mod = dict() teste_mod = dict() pred = dict() colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_clusters), colors): treino_mod[i] = var_teste2_op2[var_teste2_op2['pred'] == i].join( treino[target_reg]) teste_mod[i] = base_teste_teste2_op2[base_teste_teste2_op2['pred'] == i].join(teste[target_reg]) X = treino_mod[i][var] y = treino_mod[i][target_reg] model = LinearRegression().fit(X, y) pred[i] = model.predict(teste_mod[i][var]) plt.figure() plt.figure(figsize=[15, 5]) plt.subplot(1, 2, 1) visualizer = ResidualsPlot(model, hist=False) visualizer.fit(X, y) visualizer.score(teste_mod[i][var], teste_mod[i][target_reg]) plt.subplot(1, 2, 2) plt.scatter(pred[i], teste_mod[i][target_reg], color='darkorange') plt.title('Target x Predict') plt.xlabel('Predict') plt.ylabel('True value') visualizer.show()
""" @author: LucasLimaPinho """ import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from yellowbrick.regressor import ResidualsPlot base = pd.read_csv('cars.csv') base = base.drop(['Unnamed: 0'], axis=1) #axis = 1 -> erase per collumns x = base.iloc[:, 1].values x = x.reshape(-1, 1) y = base.iloc[:, 0].values correlacao = np.corrcoef(x, y) model = LinearRegression() model.fit(x, y) model.intercept_ model.coef_ plt.scatter(x, y) plt.plot(x, model.predict(x), color="red") model.predict(22) model._residues visual = ResidualsPlot(model) visual.fit(x, y) visual.poof()
# %% # Residuals Plot (Trying new things) # The residuals plot shows how the model is injecting error, the bold \ # horizontal line at residuals = 0 is no error, and any point above or below \ # that line, indicates the magnitude of error. # (https://www.scikit-yb.org/en/latest/quickstart.html#installation) # Load a regression dataset X, y = load_concrete() # Create training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) visualizer = ResidualsPlot(LinearRegression()) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() # Finalize and render the figure # Xenia: Saving my plots plt.show() fig.set_size_inches(7, 5) plt.savefig("6._Residuals_Plot.png") fig.savefig("6._Residuals_Plot.png") # %% # New Plots with Temperature & Precipitation # Time series of flow values with the x axis range limited fig, ax = plt.subplots()
from sklearn.neighbors import KNeighborsRegressor from sklearn.model_selection import train_test_split, StratifiedKFold, KFold from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt from yellowbrick.regressor import ResidualsPlot from sklearn.linear_model import LinearRegression scaler = StandardScaler() # neigh = KNeighborsRegressor(n_neighbors=5) # regression_visualizers = [ResidualsPlot(neigh), PredictionError(neigh)] features = [ "longitude", "latitude", "peak_load", "off-grid", "avg_peak_winter", "avg_peak_spring", "avg_peak_summer", "avg_peak_autumn", "avg_base_winter", "avg_base_spring", "avg_base_summer", "avg_base_autumn" ] case_name = "mg_sizing_dataset_with_loc" df = pd.read_csv("results/" + case_name + ".csv", sep=";|,", engine="python", index_col='index') X = df[features] scaler.fit(X) X = scaler.transform(X) targets = ["PV", "BAT", "RBAT", "INV", "GEN", "NPV"] y = df[targets[0]] model = LinearRegression() visualizer_residuals = ResidualsPlot(model) visualizer_residuals.fit(X, y) visualizer_residuals.show()
base = base.drop(['Unnamed: 0'], axis=1) X = base.iloc[:, 1].values # aqui ele transforma no estilo numpy array X = X.reshape(-1, 1) # transforma as colunas em matriz y = base.iloc[:, 0].values correlacao = np.corrcoef(X, y) # aqui é calculado a correlação modelo = LinearRegression() modelo.fit(X, y) modelo.intercept_ # aqui ele mostra a intersecção modelo.coef_ # aqui o coeficiente plt.scatter(X, y) # plota um grafico de dispersão plt.plot(X, modelo.predict(X), color='red') # ele desenha a linha da regressão no grafico # distância 22 pés modelo.intercept_ + modelo.coef_ * 22 # previsão manual modelo.predict(22) # aqui o modelo prevê modelo._residues # mostra a distancia dos dados pra linha de regressão visualizador = ResidualsPlot( modelo ) # plota o grafico de residuos mostrando a dispersão abaixo da intersecção visualizador.fit(X, y) visualizador.poof( ) #aqui ele plota o grafico, e quanto mais perto de 0 estiverem os dados mas o modelo está se adequando
viz = FeatureImportances(rf, ax=ax) viz.fit(Xtrain, ytrain) viz.poof(outpath="rf_featureimportances.png") # Rank2d fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot() rank = Rank2D(features=feature_cols, algorithm='pearson', ax=ax) rank.fit(Xtrain, ytrain) rank.transform(Xtrain) rank.poof(outpath="rf_rank2d.png") # Residuals Plot fig = plt.figure() ax = fig.add_subplot() resplot = ResidualsPlot(rf, ax=ax) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) resplot.poof(outpath="rf_resplot.png") # Actual vs Predicted rf.fit(Xtrain, ytrain) yhat = rf.predict(Xtest) error = ytest - yhat data = pd.DataFrame({ 't': test['date'], 'ytest': ytest, 'yhat': yhat, 'error': error, 'neg_error': np.negative(error), 'dless': dless
def show_residusal(model, train_tup, test_tup): resPlot = ResidualsPlot(model) resPlot.fit(*train_tup) resPlot.score(*test_tup) resPlot.show()
can use only for max 3 variable so if number of predictors are more than 3 than we should go for Residual plots only. So, it’s good to check always Residual plots. The most useful way to plot the residuals, though, is with your predicted values on the x-axis, and your residuals on the y-axis. ''' ''' To detect nonlinearity one can inspect plots of observed vs. predicted values or residuals vs. predicted values. The desired outcome is that points are symmetrically distributed around a diagonal line in the former plot or around a horizontal line in the latter one. In both cases with a roughly constant variance. ''' from yellowbrick.regressor import ResidualsPlot # residuals vs. predicted values visualizer = ResidualsPlot(regressor) #visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() visualizer = ResidualsPlot(regressor, hist=False) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show() # Test R2 is the r2 on test data of model ''' A common use of the residuals plot is to analyze the variance of the error of the regressor. If the points are randomly dispersed around the horizontal axis, a linear regression model is usually appropriate for the data; otherwise, a non-linear model is more appropriate. In the case above, we see a fairly random, uniform distribution of the residuals against the target in two dimensions. This seems
import numpy as np corr = np.corrcoef(features, target) features = features.reshape(-1, 1) from sklearn.linear_model import LinearRegression regression = LinearRegression() regression.fit(features, target) print(regression.intercept_, regression.coef_) import matplotlib.pyplot as plt plt.scatter(features, target) plt.plot(features, regression.predict(features), color='red') plt.title("Simple Linear Regression") plt.xlabel("Age") plt.ylabel("Cost") prediction_1 = regression.predict([[40]]) previcion_2 = regression.intercept_ + regression.coef_ * 40 score = regression.score(features, target) from yellowbrick.regressor import ResidualsPlot visualizer = ResidualsPlot(regression) visualizer.fit(features, target) visualizer.poof()
2. Model Fitting * Fit on training data and predict on test data * Check residuals and prediction error graphs (yellowbrick) * Plot predicted values vs actuals (yhat, ytest) * Calculate and plot residuals (ytest - yhat) """ # How do our models perform on the test data? score_model(rf) score_model(rf_random) score_model(rf_best) # What do our residuals look like? from yellowbrick.regressor import ResidualsPlot resplot = ResidualsPlot(rf_best) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) g = resplot.poof() # What does our prediction error look like? from yellowbrick.regressor import PredictionError prederr = PredictionError(rf_best) prederr.fit(Xtrain, ytrain) prederr.score(Xtest, ytest) g = prederr.poof() # Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare. # We also calculate our residuals by subtracting our fitted values from the actuals. import matplotlib.pyplot as plt
modelo = LinearRegression() #instancia um objeto LinearRegression modelo.fit( X, Y) #encaixa os dados de x e y no modelo; faz o treinamento do modelo modelo.intercept_ #indica onde a linha de regressao intercepta o eixo Y modelo.coef_ #indica a inclinação da linha plt.scatter(X, Y) plt.plot(X, modelo.predict(X), color="red") #o método predict traz os dados que # o algoritmo previu a partir dos dados reais de X # prever a velocidade para o valor distancia = 22 pés. tem duas formas: modelo.intercept_ + modelo.coef_ * 22 # ou de modo mais direto: modelo.predict(22) # residuos da linha de regressao: modelo._residues #outra forma de visualizar pela biblioteca Yellowbrick visualizador = ResidualsPlot(modelo) #cria objeto ResidualsPlot visualizador.fit(X, Y) visualizador.poof()
params = clf.get_params(deep=True) #evaluate on test print("default (R^2) score:" + np.array2string(clf.score(X_test_selected,y_test))) print("intercept: " + np.array2string(clf.intercept_)) print("coefficients: " + np.array2string(clf.coef_)) # %% #predict using classifier y_pred_B = clf.predict(X_all_selected) # %% # residual plots visualizer = ResidualsPlot(clf) visualizer.fit(X_train_selected, y_train) # Fit the training data to the visualizer visualizer.score(X_test_selected, y_test) # Evaluate the model on the test data visualizer.poof() # Draw/show/poof the data # %% #plot all results, including train & test plt.figure(figsize=(16,6)) #fig3, ax3 = plt.subplots(figsize=(16, 6)) sns.lineplot(X_all['DateFraction'],y_pred_B, color = 'b') #ax3 = plt.twinx() sns.lineplot(X_all['DateFraction'],y_all, color = 'g') sns.lineplot(X_test['DateFraction'],X_all['InflationTrailing5yrFactor']**.2-1, color = 'r') sns.lineplot(X_test['DateFraction'],X_all['RateGS10']/100, color = 'y')
'Total', 'Precipitation', 'Date', 'Day', 'Brooklyn Bridge', 'Manhattan Bridge', 'Queensboro Bridge', 'Williamsburg Bridge' ]) y_train = df_new['Total'] #%% from sklearn import preprocessing from sklearn.linear_model import Ridge reg = Ridge(alpha=100) reg.fit(x_train, y_train) #%% reg.coef_ #%% from sklearn.metrics import r2_score, mean_squared_error y_pred = reg.predict(x_train) print(r2_score(y_train, y_pred)) print(mean_squared_error(y_train, y_pred)) #%% import yellowbrick res = y_train - y_pred #%% from yellowbrick.regressor import ResidualsPlot visualizer = ResidualsPlot(reg) visualizer.score(x_train, y_train) # Evaluate the model on the test data visualizer.poof() # Draw/show/poof the data
#Calculo manual com o modelo treinado para qualquer achar o y (x[beer_servings] = 400) modelo1.intercept_ + modelo1.coef_ * 400 #Cálculo automático da máquina modelo1.predict([[400]]) ''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro, uma pessoa bebe, aproximadamente, 13.65 litros de álcool (cerca de 11.37% aproximadamente) ''' #Visualização dos resíduos(resultado entre a distância dos pontos com a linha de referência) modelo1._residues #Visualização dos resíduos no gráfico visualizador1 = ResidualsPlot(modelo1) visualizador1.fit(X, y) visualizador1.poof() #Os resíduos quando mais próximo de zero, melhor o modelo '''2) Relação linear entre total de álcool ingerido (em Litros) com o total de destilados ingerido (em porções) OBS: Bebidas destiladas são todas que tiveram seu processo de destilação (vodca, uísque, tequila, rum, dentre outros) ''' A = bebida_mundo.iloc[:, 2].values #spirit_servings b = bebida_mundo.iloc[:, 4].values #total_litres_of_alcohol correlacao2 = np.corrcoef(A, b) A = A.reshape(-1, 1) modelo2 = LinearRegression() modelo2.fit(A, b)
rmse = sqrt(mse) print('Accuracy:', accuracy, '%.') print('Root Mean square Error:', rmse) print('Mean absolute Error:', mae) print('R2:', r2) #maekfold = results_rf.mean() #print ('Mean absolute Error kfold:', maekfold) #%% #plotting results model = rf visualizer = ResidualsPlot(model) visualizer.fit(x_train, y_train) # Fit the training data to the visualizer visualizer.score(x_test, y_test) # Evaluate the model on the test data visualizer.poof() # Draw/show/poof the data visualizer = PredictionError(model) visualizer.fit(x_train, y_train) # Fit the training data to the visualizer visualizer.score(x_test, y_test) # Evaluate the model on the test data visualizer.poof() # Draw/show/poof the data #%% import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import norm