Exemplo n.º 1
0
def plot_fit(res, exog_idx, exog_name="", y_true=None, ax=None, fontsize="small"):
    """Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    """
    fig, ax = utils.create_mpl_ax(ax)

    if exog_name == "":
        exog_name = "variable %d" % exog_idx

    # maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    ax.plot(x1, y, "bo", label="observed")
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], "b-", label="true")
        title = "fitted versus regressor %s" % exog_name
    else:
        title = "fitted versus regressor %s" % exog_name

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues[x1_argsort], "k-", label="fitted")  #'k-o')
    # ax.plot(x1, iv_u, 'r--')
    # ax.plot(x1, iv_l, 'r--')
    ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color="k")
    ax.set_title(title, fontsize=fontsize)

    return fig
Exemplo n.º 2
0
def plot_fit(res, exog_idx, exog_name='', y_true=None, ax=None, fontsize='small'):
    """Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    """
    fig, ax = utils.create_mpl_ax(ax)

    if exog_name == '':
        exog_name = 'variable %d' % exog_idx

    #maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    ax.plot(x1, y, 'bo', label='observed')
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], 'b-', label='true')
        title = 'fitted versus regressor %s' % exog_name
    else:
        title = 'fitted versus regressor %s' % exog_name

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues[x1_argsort], 'k-', label='fitted') #'k-o')
    #ax.plot(x1, iv_u, 'r--')
    #ax.plot(x1, iv_l, 'r--')
    ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k')
    ax.set_title(title, fontsize=fontsize)

    return fig
Exemplo n.º 3
0
def summary_table(res, alpha=0.05):
    '''generate summary table of outlier and influence similar to SAS

    Parameters
    ----------
    alpha : float
       significance level for confidence interval

    Returns
    -------
    st : SimpleTable instance
       table with results that can be printed
    data : ndarray
       calculated measures and statistics for the table
    ss2 : list of strings
       column_names for table (Note: rows of table are observations)

    '''

    from scipy import stats
    from gwstatsmodels.sandbox.regression.predstd import wls_prediction_std

    infl = Influence(res)

    #standard error for predicted mean
    #Note: using hat_matrix only works for fitted values
    predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid)

    tppf = stats.t.isf(alpha/2., res.df_resid)
    predict_mean_ci = np.column_stack([
                        res.fittedvalues - tppf * predict_mean_se,
                        res.fittedvalues + tppf * predict_mean_se])


    #standard error for predicted observation
    predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res)
    predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))

    #standard deviation of residual
    resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))

    table_sm = np.column_stack([
                                  np.arange(res.nobs) + 1,
                                  res.model.endog,
                                  res.fittedvalues,
                                  predict_mean_se,
                                  predict_mean_ci[:,0],
                                  predict_mean_ci[:,1],
                                  predict_ci[:,0],
                                  predict_ci[:,1],
                                  res.resid,
                                  resid_se,
                                  infl.resid_studentized_internal,
                                  infl.cooks_distance[0]
                                  ])


    #colnames, data = zip(*table_raw) #unzip
    data = table_sm
    ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"]
    colnames = ss2
    #self.table_data = data
    #data = np.column_stack(data)
    from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt
    from gwstatsmodels.iolib.tableformatting import fmt_base
    from copy import deepcopy
    fmt = deepcopy(fmt_base)
    fmt_html = deepcopy(default_html_fmt)
    fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
    #fmt_html['data_fmts'] = fmt['data_fmts']
    st = SimpleTable(data, headers=colnames, txt_fmt=fmt,
                       html_fmt=fmt_html)

    return st, data, ss2
Exemplo n.º 4
0
def summary_table(res, alpha=0.05):
    '''generate summary table of outlier and influence similar to SAS

    Parameters
    ----------
    alpha : float
       significance level for confidence interval

    Returns
    -------
    st : SimpleTable instance
       table with results that can be printed
    data : ndarray
       calculated measures and statistics for the table
    ss2 : list of strings
       column_names for table (Note: rows of table are observations)

    '''

    from scipy import stats
    from gwstatsmodels.sandbox.regression.predstd import wls_prediction_std

    infl = Influence(res)

    #standard error for predicted mean
    #Note: using hat_matrix only works for fitted values
    predict_mean_se = np.sqrt(infl.hat_matrix_diag * res.mse_resid)

    tppf = stats.t.isf(alpha / 2., res.df_resid)
    predict_mean_ci = np.column_stack([
        res.fittedvalues - tppf * predict_mean_se,
        res.fittedvalues + tppf * predict_mean_se
    ])

    #standard error for predicted observation
    predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res)
    predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))

    #standard deviation of residual
    resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))

    table_sm = np.column_stack([
        np.arange(res.nobs) + 1, res.model.endog, res.fittedvalues,
        predict_mean_se, predict_mean_ci[:, 0], predict_mean_ci[:, 1],
        predict_ci[:, 0], predict_ci[:, 1], res.resid, resid_se,
        infl.resid_studentized_internal, infl.cooks_distance[0]
    ])

    #colnames, data = zip(*table_raw) #unzip
    data = table_sm
    ss2 = [
        'Obs', 'Dep Var\nPopulation', 'Predicted\nValue',
        'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp',
        'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual',
        'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"
    ]
    colnames = ss2
    #self.table_data = data
    #data = np.column_stack(data)
    from gwstatsmodels.iolib.table import SimpleTable, default_html_fmt
    from gwstatsmodels.iolib.tableformatting import fmt_base
    from copy import deepcopy
    fmt = deepcopy(fmt_base)
    fmt_html = deepcopy(default_html_fmt)
    fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
    #fmt_html['data_fmts'] = fmt['data_fmts']
    st = SimpleTable(data, headers=colnames, txt_fmt=fmt, html_fmt=fmt_html)

    return st, data, ss2
Exemplo n.º 5
0
    # estimate only linear function, misspecified because of non-linear terms
    exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)

    #    plt.figure()
    #    plt.plot(x1, y, 'o', x1, y_true, 'b-')

    res = sm.OLS(y, exog0).fit()
    # print res.params
    # print res.bse

    plot_old = 0  # True
    if plot_old:

        # current bug predict requires call to model.results
        # print res.model.predict
        prstd, iv_l, iv_u = wls_prediction_std(res)
        plt.plot(x1, res.fittedvalues, "r-o")
        plt.plot(x1, iv_u, "r--")
        plt.plot(x1, iv_l, "r--")
        plt.title("blue: true,   red: OLS")

        plt.figure()
        plt.plot(res.resid, "o")
        plt.title("Residuals")

        fig2 = plt.figure()
        ax = fig2.add_subplot(2, 1, 1)
        # namestr = ' for %s' % self.name if self.name else ''
        plt.plot(x1, res.resid, "o")
        ax.set_title("residuals versus exog")  # + namestr)
        ax = fig2.add_subplot(2, 1, 2)
Exemplo n.º 6
0
    exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)

#    plt.figure()
#    plt.plot(x1, y, 'o', x1, y_true, 'b-')

    res = sm.OLS(y, exog0).fit()
    #print res.params
    #print res.bse


    plot_old = 0 #True
    if plot_old:

        #current bug predict requires call to model.results
        #print res.model.predict
        prstd, iv_l, iv_u = wls_prediction_std(res)
        plt.plot(x1, res.fittedvalues, 'r-o')
        plt.plot(x1, iv_u, 'r--')
        plt.plot(x1, iv_l, 'r--')
        plt.title('blue: true,   red: OLS')

        plt.figure()
        plt.plot(res.resid, 'o')
        plt.title('Residuals')

        fig2 = plt.figure()
        ax = fig2.add_subplot(2,1,1)
        #namestr = ' for %s' % self.name if self.name else ''
        plt.plot(x1, res.resid, 'o')
        ax.set_title('residuals versus exog')# + namestr)
        ax = fig2.add_subplot(2,1,2)