def norm_plot(self, x):
        '''Generate subplots of QQPlot and histgram to visualize
        the normality of a variable.
           
        Parameters:
        ----------
        x : list of numpy.ndarray
            The variable to plot
            
        Returns:
        -------
        ax1 : matplotlib.axes
            To plot the QQplot of variable x
        ax2 : matplotlib.axes
            To plot histogram of variable x
        
        Notes:
        -----
        None'''

        fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(8, 3))

        qlt = sm.ProbPlot(x.reshape(-1), fit=True)
        qq = qlt.qqplot(marker='o', color='coral', ax=ax1)
        sm.qqline(qq.axes[0], line='45', fmt='g--')

        ax2.hist(x, color='orange', alpha=.6)

        return ax1, ax2
Exemplo n.º 2
0
def plot_qq(resid, title='', ax=None, z=2.807, strftime='%Y-%m-%d'):
    pp = ProbPlot(resid, fit=True)
    outliers = abs(pp.sample_quantiles) > z
    ax = ax or plt.gca()
    pp.qqplot(ax=ax, color='C0', alpha=.5)
    sm.qqline(ax=ax, line='45', fmt='r--', lw=1)
    z = resid.sort_values().index[outliers]
    for x, y, i in zip(pp.theoretical_quantiles[outliers],
                       pp.sample_quantiles[outliers], z):
        ax.annotate(i.strftime(strftime) if strftime else str(i),
                    xy=(x, y),
                    c='m')
    ax.set_title(title or 'Normal Q-Q')
    ax.set_ylabel('Standardized residuals')
    return DataFrame(
        {
            'residuals': pp.sorted_data[outliers],
            'standardized': pp.sample_quantiles[outliers]
        },
        index=z)
Exemplo n.º 3
0
    axes[0, 1].set_xlabel('Fitted values', fontweight='bold')
    axes[0, 1].axhline(y=0, color='k')
    prettyplot(axes[0, 1])

    # (Subplot 3)
    # Q-Q plot. Analyze the distribution of residuals for departure from
    # normality.
    #

    pp = sm.ProbPlot(residuals, stats.t, fit=True)
    qq = pp.qqplot(marker='.',
                   markerfacecolor='gray',
                   markeredgecolor='gray',
                   alpha=0.5,
                   ax=axes[1, 0])
    sm.qqline(axes[1, 0], line='45', fmt='k-')

    axes[1, 0].set_title('Normal Q-Q Plot')
    axes[1, 0].set_xlabel('Theoretical quantiles', fontweight='bold')
    axes[1, 0].set_ylabel('Sample quantiles', fontweight='bold')
    prettyplot(axes[1, 0])

    # (Subplot 4)
    # Look at residuals by year. See if any paterns or notable departures
    # Residuals can be thought of like the errors.A zero residual means perfect prediction.
    # Some questions to ask: Was there a really bad year for the model? Are there
    # trends in the residuals over time (e.g. did the model perform badly for
    # recent years
    #

    axes[1, 1].plot(residuals, color='k')
Exemplo n.º 4
0
Arquivo: MLR.py Projeto: iris3333/pre2
           s=9)
ax.set(xlim=(-4.5, -0.5), ylim=(-4.5, -0.5))
ax.plot(ax.get_xlim(), ax.get_ylim(), linewidth=1, color="Red", linestyle="--")
plt.xlabel("Actual Price", fontsize=10)
plt.ylabel("Predicted Price", fontsize=10)
plt.title(list(df.columns)[:-1], fontsize=11)
plt.show()

fig, ax = plt.subplots()
qq = sm.ProbPlot(reg.resid, dist=stats.norm, fit=True)
qq = qq.qqplot(ax=ax,
               markerfacecolor="Black",
               markeredgecolor="Black",
               markersize=2)
ax.set(xlim=(-6, 6), ylim=(-6, 6))
sm.qqline(qq.axes[0], line="45", fmt="r--")
plt.xlabel("Theoretical Quantiles", fontsize=10)
plt.ylabel("Sample Quantiles", fontsize=10)
plt.title("Normality", fontsize=11)
plt.show()

fig, ax = plt.subplots()
ax.scatter(x=reg.predict(sm.add_constant(df.ix[:, :-1])),
           y=reg.resid,
           c="Black",
           s=9)
ax.set(xlim=(-4.5, -0.5), ylim=(-1.5, 1.5))
ax.plot(ax.get_xlim(), (0, 0), linewidth=1, color="Red", linestyle="--")
plt.xlabel("Fitted Values", fontsize=10)
plt.ylabel("Residuals", fontsize=10)
plt.title("Homoscedasticity", fontsize=11)
Exemplo n.º 5
0
# In[16]:

x = pd.concat([df['B_FREQ'], df['P_BETA250']], axis=1)
y = df['PROFIT']

X = sm.add_constant(x)
model = sm.OLS(y, X)
result = model.fit()
print(result.summary())

# In[17]:

X

# In[33]:

# set the dimension of the plot as 1x1x1
ax = plt.subplot(111)
plt.scatter(df['B_FREQ'], df['PROFIT'])
ax.set_xlabel('B_FREQ')
ax.set_ylabel('PROFIT')
sm.qqline(ax, "r", x, y)
plt.show()

# In[ ]:

# In[ ]:

# In[ ]:
import pandas as pd
import matplotlib.pylab as plt
import statsmodels.api as sm
from pathlib import Path

current_directory = os.path.dirname(os.path.realpath(__file__))
folder_directory = Path(current_directory)
historical_data_path = os.path.join(folder_directory, 'Data',
                                    'HistoricalStoxx50E.txt')

historical_data = pd.read_csv(historical_data_path,
                              header=None,
                              names=["date", "price"],
                              sep=";")
no_dates = len(historical_data['date'])
historical_data['price'][1:] = historical_data['price'][1:].apply(
    lambda x: float(x))
log_increments = np.diff(np.log(list(historical_data['price'][1:])))

mean_log_increments = log_increments.mean()
std_log_increments = log_increments.std()
log_increments_normalized = (log_increments -
                             mean_log_increments) / std_log_increments

plt.figure(figsize=(20, 10))
pp = sm.ProbPlot(log_increments_normalized, fit=True)
qq = pp.qqplot(marker='.', markerfacecolor='k', markeredgecolor='k', alpha=0.3)
sm.qqline(qq.axes[0], line='45', fmt='k--')

plt.show()
Exemplo n.º 7
0
def validation_plot_bastos(X_test, Y_test, m_test, v_test):
    """
    Produces a 2x2 matrix plot of the leave-1-out-validation of the emulator according to Bastos and O’Hagan (2009). 
    
    Each point corresponds to the training of the emulator on all points except one and then testing on exactly that point. 
    Individual standardized errors are plotted against:
     - top left: emulator output
     - top right: input parameters
     - bottom left: QQ-plot of the individual standardized errors against a student-T distribution
    
    In addition, the bottom right plot shows the emulator against model output, with the error bars indicating the 95% confidence interval on the emulator predictions. 
    Predictions for which the model result lies outside that interval are marked red. For this subplot the :func:`validation_plot` is used.
    The dashed lines in the top row plots are drawn at an individual standardized error of zero and 2, which is the threshold discussed in Bastos and O’Hagan (2009).
    
    Example usage and interpretation of this plot can be found in Proske et al. (2021)

    Source:
        Bastos and O'Hagan (2009): Diagnostics for Gaussian Process Emulators, Technometrics,
        51, 425-438. https://doi.org/10.1198/TECH.2009.08019
        Code for the lower-right plot adapted from the ESEm package validation_plot() (see above)

    Author:
        Ulrike Proske ([email protected])


    Parameters
    ----------
    X_test : array-like of shape (n_samples, n_features)
            Input data 
    Y_test : array-like of shape (n_samples,)
            Simulated output
    m_test : array-like of shape (n_samples, n_features)
            Emulator output
    v_test : array-like of shape (n_samples,)
            Variance of emulator

    """

    import matplotlib.pyplot as plt
    from statsmodels.compat.python import lzip
    import statsmodels.api as sm
    from scipy import stats

    # Namelist
    c_black = 'black'
    c_blue = '#1f78b4'
    c_green = '#33a02c'
    c_orange = '#ff7f00'
    c_purple = '#6a3d9a'
    colors = [c_blue, c_green, c_orange, c_purple]
    alpha = 0.75

    # Start plotting
    _, axs = plt.subplots(nrows=2,
                          ncols=2,
                          figsize=(4.5, 4.5),
                          gridspec_kw={
                              'hspace': 0.35,
                              'wspace': 0.75
                          })
    errors_std = (Y_test - m_test) / np.sqrt(v_test)  # standardized errors
    axs[0, 0].scatter(m_test, errors_std, c=c_black, marker='.', alpha=alpha)
    axs[0, 0].set_xlabel(r'$Y_{\mathrm{emu}}$')
    axs[0,
        0].set_ylabel(r'$({Y_{\mathrm{sim}} - Y_{\mathrm{emu}})}/{\sqrt{V}}$')
    # customize qq plot
    pp = sm.ProbPlot(errors_std.ravel(), stats.t, fit=True)
    qq_plot = pp.qqplot(marker='.',
                        markerfacecolor='k',
                        markeredgecolor='k',
                        alpha=alpha,
                        ax=axs[1, 0])
    end_pts = lzip(axs[1, 0].get_xlim(), axs[1, 0].get_ylim())
    sm.qqline(qq_plot.axes[2], line='45', fmt='k--')
    axs[1, 0].set_xlim([end_pts[0][0], end_pts[1][0]])
    axs[1, 0].set_ylim([end_pts[0][1], end_pts[1][1]])
    axs[1, 0].set_ylabel('Standardized quantiles')

    for i in range(0, np.shape(X_test)[1]):
        # Slightly convoluted way to expand the parameters to match the shape of the outputs
        expanded_params = np.broadcast_to(
            np.expand_dims(X_test.to_numpy()[:, i],
                           axis=[i for i in range(1, len(errors_std.shape))]),
            errors_std.shape)
        if isinstance(X_test, pd.DataFrame):
            axs[0, 1].scatter(expanded_params,
                              errors_std,
                              c=colors[i],
                              label=X_test.columns[i],
                              marker='.',
                              alpha=alpha)
        else:
            axs[0, 1].scatter(expanded_params,
                              errors_std,
                              c=colors[i],
                              label=str(i),
                              marker='.',
                              alpha=alpha)

    axs[0, 1].legend()
    axs[0, 1].set_xlabel(r'$\eta_i$')
    axs[0,
        1].set_ylabel(r'$({Y_{\mathrm{sim}} - Y_{\mathrm{emu}})}/{\sqrt{V}}$')

    # add hlines
    axs[0, 1].axhline(y=-2, c=c_black, linestyle='--')
    axs[0, 1].axhline(y=2, c=c_black, linestyle='--')
    axs[0, 1].axhline(y=0, c=c_black, linestyle='--')
    axs[0, 0].axhline(y=-2, c=c_black, linestyle='--')
    axs[0, 0].axhline(y=2, c=c_black, linestyle='--')
    axs[0, 0].axhline(y=0, c=c_black, linestyle='--')

    validation_plot(Y_test, m_test, v_test, ax=axs[1, 1])