def log_residuals_chart(regressor, X_train, X_test, y_train, y_test, experiment=None): """Log residuals chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The regression target for training y_test (:obj:`ndarray`): | The regression target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfr = RandomForestRegressor() rfr.fit(X_train, y_train) neptune.init('my_workspace/my_project') exp = neptune.create_experiment() log_residuals_chart(rfr, X_train, X_test, y_train, y_test, experiment=exp) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Residuals Plot') plt.close(fig) except Exception as e: print('Did not log residuals chart. Error: {}'.format(e))
def regression_sanity_check(model, X_train, X_test, y_train, y_test): fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10)) plt.sca(ax1) visualizer = ResidualsPlot(model, ax=ax1) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) plt.sca(ax2) visualizer2 = PredictionError(model, ax=ax2) visualizer2.fit(X_train, y_train) visualizer2.score(X_test, y_test) visualizer.finalize() visualizer2.poof()
def create_residuals_chart(regressor, X_train, X_test, y_train, y_test): """Create residuals chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The regression target for training y_test (:obj:`ndarray`): | The regression target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfr = RandomForestRegressor() rfr.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/residuals'] = npt_utils.create_residuals_chart(rfr, X_train, X_test, y_train, y_test) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' chart = None try: fig, ax = plt.subplots() visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log residuals chart. Error: {}'.format(e)) return chart
eli5.formatters.as_dataframe.explain_weights_df( estimator=model_lr, feature_names=feature_names)[['feature', 'weight']]) ''' Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama. Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun. ''' ''' #### 2. Residual Plot ''' st.write('') visualizer_residual = ResidualsPlot(model_lr) visualizer_residual.fit(X_train, y_train) visualizer_residual.score(X_test, y_test) visualizer_residual.finalize() st.pyplot() ''' Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew. ''' ''' #### 3. Prediction Error ''' st.write('') visualizer_prediction_error = PredictionError(model_lr) visualizer_prediction_error.fit(X_train, y_train) visualizer_prediction_error.score(X_test, y_test) visualizer_prediction_error.finalize()
def plot(dados, ativo_x, ativo_y, period = 100, tipo = 'residuos', save=False): import matplotlib.pyplot as plt modelo, y_pred, residuos, media, desvio = coint_period(dados, ativo_x, ativo_y, period = period, model = True) dados = dados.iloc[-period:, :] X = dados.loc[:, ativo_x].values[-period:] y = dados.loc[:, ativo_y].values[-period:] residuos_padronizado = residuos/np.std(residuos) if ((tipo == 'residuos') | (tipo == 'Residuos')): n = period plt.figure(figsize=(15, 6)) plt.title('Série Temporal Resíduos Padronizada {} períodos'.format(period)) plt.plot(dados['Date'], residuos_padronizado , color='blue', alpha = 0.6, label='Resíduo Padronizado') plt.plot(dados['Date'], np.repeat(media,n) , color='black', linestyle='--') plt.plot(dados['Date'], np.repeat(2,n) , color='red', linestyle=':', label='Dois Desvios Padrões') plt.plot(dados['Date'], np.repeat(-2,n) , color='red', linestyle=':') nome = 'Resíduos_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period) plt.legend(loc=0) if save==False: plt.show() else: plt.savefig(nome) elif ((tipo == 'fechamento') | (tipo == 'Fechamento')): plt.title('Preço de Fechamento {} períodos'.format(period)) plt.plot(dados['Date'], X, color='blue', label=ativo_x) plt.plot(dados['Date'], y, color='red', label=ativo_y) plt.legend(loc=0) nome = 'Fechamento_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period) if save==False: plt.show() else: plt.savefig(nome) elif ((tipo == 'spread') | (tipo == 'Spread')): nome = 'Spread_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period) plt.title(nome+str(period)) arr = X/y plt.plot(dados['Date'], arr, color='blue', label='Spread') if save==False: plt.show() else: plt.savefig(nome) elif ((tipo == 'regression') | (tipo == 'Regression')): from yellowbrick.regressor import ResidualsPlot X = X.reshape(-1, 1) fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(8, 12)) ax1.set_title('Regressão Linear dos preços') ax1.scatter(X, y, color='blue') ax1.plot(X, modelo.predict(X), color='red') visualizador = ResidualsPlot(modelo, ax= ax2) visualizador.fit(X, y) visualizador.finalize() nome = 'Regression_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period) if save==False: plt.show() else: plt.savefig(nome) else: lista = ['residuos', 'fechamento', 'spread', 'regression'] listaM = ['Residuos', 'Fechamento', 'Spread', 'Regression'] print('Escolha entre as opções abaixo:') for i in range(len(lista)): print(lista[i], ' ou ', listaM[i])