Python qqplot示例，statsmodels.graphics.gofplots.qqplot Python示例

示例#1

0

显示文件

文件： qa_view.py 项目： Deel18/GamestonkTerminal

def display_qqplot(name: str, df: pd.DataFrame, target: str):
    """Show QQ plot for data against normal quantiles

    Parameters
    ----------
    name : str
        Stock ticker
    df : pd.DataFrame
        Dataframe
    target : str
        Column in data to look at
    """
    # Statsmodels has a UserWarning for marker kwarg-- which we dont use
    warnings.filterwarnings(category=UserWarning, action="ignore")
    data = df[target]
    fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    qqplot(data, stats.distributions.norm, fit=True, line="45", ax=ax)
    ax.set_title(f"Q-Q plot for {name} {target}")
    ax.set_ylabel("Sample quantiles")
    ax.set_xlabel("Theoretical quantiles")
    ax.grid(True)

    if gtff.USE_ION:
        plt.ion()
    fig.tight_layout(pad=1)
    plt.show()
    print("")

示例#2

0

显示文件

文件： ResearchStudyClass.py 项目： vaiblast/quant-research-env

    def _plotQQPlotOtherBars(self, saveDirectory='', showIt=False):

        # Plot the quantile plot of each asset in the portfolio:
        for eachAssetName, eachAssetDataFrame in self.ALTERNATIVE_BARS.items():

            logger.warning(f'[{self._plotQQPlotOtherBars.__name__}] - Looping for asset <{eachAssetName}>...')

            # Plot the QQplot:
            qqplot(eachAssetDataFrame.Returns.values, line='s')

            # Add more variables:
            plt.grid(linestyle='dotted')
            plt.xlabel('Theoretical Quantiles', horizontalalignment='center', verticalalignment='center', fontsize=14, labelpad=20)
            plt.ylabel('Sample Quantiles', horizontalalignment='center', verticalalignment='center', fontsize=14, labelpad=20)
            plt.title(f'Asset: {eachAssetName} -- Quantile-Quantile (QQ) Plot')
            plt.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0)

            # In PNG:
            plt.savefig(saveDirectory + f'/QQPlot_{eachAssetName}.png')

            # Show it:
            if showIt:
                plt.show()

    ######################### PLOTS #########################

示例#3

0

显示文件

文件： code2.py 项目： asarantsev/StockMarketMichaelReyesFall2019

def normal(x):
    print('Shapiro-Wilk p =', stats.shapiro(x)[1])
    print('Jarque-Bera p =', stats.jarque_bera(x)[1])
    print('QQ plot')
    qqplot(x, line='s')
    pyplot.show()
    return 0

示例#4

0

显示文件

def test_distribution(dataframe, t=None):
    def print_res(p, alpha):
        print('p = ', p)
        if np.isnan(p):
            print('p is null')
        elif p < alpha:
            print(
                "The null hypothesis of normality can be rejected --> NOT NORMAL"
            )
        else:
            print(
                "The null hypothesis of normality cannot be rejected --> LIKELY NORMAL"
            )

    alpha = 0.05
    global arr
    arr = dataframe.values.flatten()
    arr = arr[~np.isnan(arr)]
    corrected = (arr - np.mean(arr)) / np.std(arr)
    plt.hist(corrected, bins=15)
    plt.suptitle(t)
    plt.show()
    qqplot(corrected)
    plt.show()
    # test raw values
    print("Raw Data:")
    k2, p = normaltest(corrected)
    print_res(p, alpha)

示例#5

0

显示文件

def plot(data):
    n, bins, patches = plt.hist(np.array(data), 50)
    mu = np.mean(data)
    sigma = np.std(data)
    print("Mean: {}, std: {}".format(mu, sigma))
    # Shapiro test
    stat, p = shapiro(np.array(data))
    if p > 0.05:
        print("Shapiro: Data is normally distributed")
    else:
        print("Shapiro: Data is NOT normally distributed, p-value: {}".format(p))
    stat, p = normaltest(np.array(data))
    if p > 0.05:
        print("D'Agostino: Data is normally distributed")
    else:
        print("D'Agostino: Data is NOT normally distributed, p-value: {}".format(p))
    result = anderson(np.array(data))
    p = 0
    for i in range(len(result.critical_values)):
	    sl, cv = result.significance_level[i], result.critical_values[i]
	    if result.statistic < result.critical_values[i]:
		    print('Anderson: %.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
	    else:
		    print('Anderson: %.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))
    plt.plot(bins, norm.pdf(bins, mu, sigma))
    qqplot(np.array(data), line='s')
    plt.show()

示例#6

0

显示文件

    def _plot_data(self):

        for plot in ["MA", "STD"]:
            plt.figure()
            col_temp = [
                col1 for col1 in self.data.columns if str(plot) in col1
            ]
            for col in col_temp:
                plt.plot(self.data['DateTime_Stamp'], self.data[col])

            plt.title(plot)
            plt.xticks(rotation=90)
            plt.subplots_adjust(bottom=0.2)
            plt.show()

        plt.figure()
        plt.hist(self.data['Y_label'], bins=np.arange(0.00, 1.01, 0.05))
        qqplot(self.data['Y_label'], line='s')

        plt.figure()
        plt.plot(self.data['DateTime_Stamp'], self.data["OPEN_Bid"])
        col_temp = [col1 for col1 in self.data.columns if "Band" in col1]
        for col in col_temp:
            plt.plot(self.data['DateTime_Stamp'], self.data[col])

        return self

示例#7

0

显示文件

文件： TimeSeriesFeatureEngineer.py 项目： XINZHANG-ops/OwnUtilities

 def qq_plot(self, column=None):
     if column:
         series = self.data[column]
     else:
         column = self.data_column
         series = self.data[self.data_column]
     qqplot(series, line='r')
     plt.show()

示例#8

0

显示文件

文件： simple_statistics.py 项目： stevao-andrade/ACL_defect_prediction

def qq_plot(data):

    #convert data (python list) to a numpy array
    data = np.array(data)

    #plot the data
    qqplot(data, line='s')
    pyplot.show()

示例#9

0

显示文件

def plot(label):
    QQdata = pd.read_csv(
        f"~/Dropbox/Fundamental Market Research/QQPlots/{label}.csv")
    numpyQQdata = QQdata.values
    newdata = numpy.array(
        [numpyQQdata[k, 0] for k in range(numpy.size(numpyQQdata))])
    qqplot(newdata, line='s')
    pyplot.show()

示例#10

0

显示文件

文件： normaliti_test.py 项目： dungdl/python-intro

def qq_plot():
    # seed the random number generator
    seed(1)
    # generate univariate observations
    data = 5 * randn(100) + 50
    # q-q plot
    qqplot(data, line='s')
    pyplot.show()

示例#11

0

显示文件

文件： forecasting.py 项目： SreerajHub/TimeSeriesForecasting

def AR_model(X, data_in, lag, i):
    model = AR(data_in)
    results_AR = model.fit(maxlag=lag, disp=0)
    AR_data = results_AR.fittedvalues
    act = data_in[3:]
    print("Parameters of Autoregressive Model AR(%d) are:" % lag)
    print(results_AR.params)
    plt.figure()
    plt.plot(act, color='blue', label='Actual Value')
    plt.plot(results_AR.fittedvalues, color='red', label="Predicted Value")
    plt.legend(loc='best')
    plt.xlabel("Time")
    plt.ylabel("Time series values")
    plt.title('AR(' + str(lag) + ")" + "Model with RMSE:" +
              str(np.sqrt((np.sum(np.square(AR_data - act))) / len(act))))
    plt.title("AR Fit (not scaled)")
    plt.savefig("AR fit not scaled" + str(i))
    #plt.show()
    inverted_in = [
        inverse_difference(X[i], data_in[i]) for i in range(len(AR_data))
    ]
    inverted_AR = [
        inverse_difference(X[i], AR_data[i]) for i in range(len(AR_data))
    ]

    plt.figure()
    plt.plot(inverted_in, color='red', label="actual value")
    plt.plot(inverted_AR, color='blue', label="predicted value")
    plt.legend(loc='upper left')
    plt.title(
        "Comparison of predicted and actual values for Autoregression model, lag"
        + str(lag))
    plt.savefig(" AR Fit Final" + str(i))

    print("RMSE on the Data is:" +
          str(np.sqrt((np.sum(np.square(AR_data - act))) / len(act))))

    residuals = results_AR.resid
    plt.figure()
    plt.title("Residual Scatter Plot")
    plt.scatter(AR_data, residuals)
    plt.savefig("residuals" + str(i))
    #plt.show()

    plt.figure()
    qqplot(residuals)
    plt.title("Residual Q-Q Plot")
    plt.savefig("QQ" + str(i))

    plt.figure()
    plt.hist(residuals)
    plt.title("Residual Histogram")
    plt.savefig("Hist" + str(i))

    k2, p = stats.normaltest(residuals)
    alpha = 0.001
    print("Chi-Square Test : k2 = %.4f  p = %.4f" % (k2, p))
    print("two sided chi squared probability :" + str(p))

示例#12

0

显示文件

文件： views.py 项目： duchoan6814/PhanTichDuLieu1

def newDeath(request):
    plt.clf()

    boxPlot('new_deaths')
    uri = renderMatplotlib(plt)

    describe = df['new_deaths'].describe()
    describes = {
        "count": describe['count'],
        "mean": describe['mean'],
        "std": describe['std'],
        "min": describe['min'],
        "haiNam": describe['25%'],
        "namMuoi": describe['50%'],
        "bayNam": describe['75%'],
        "max": describe['max'],
        "median": df['new_deaths'].median(),
        "mode": df['new_deaths'].mode()
    }

    doPhanTan = {
        "IQR": interquartile_range('new_deaths'),
        "var": df['new_deaths'].var(),
        "std": df['new_deaths'].std(),
    }

    mucDo = {
        "knewness": df['new_deaths'].skew(),
        "kurtosis": df['new_deaths'].kurtosis()
    }

    plt.clf()
    fig, ax = plt.subplots()
    df['new_deaths'].plot.kde(ax=ax,
                              legend=False,
                              title='Histogram new_deaths')
    df['new_deaths'].plot.hist(density=True, ax=ax, color='red')
    ax.set_ylabel('new_deaths')
    ax.grid(axis='y')
    ax.set_facecolor('#d8dcd0')
    hist = renderMatplotlib(plt)

    plt.clf()
    x = df['new_deaths']
    data = randn(len(x))
    qqplot(data, line='s')
    plt.title('Biểu đồ phân phối chuẩn của new_deaths')
    kiemDinh = renderMatplotlib(plt)

    data = {
        "uri": uri,
        "describe": describes,
        "doPhanTan": doPhanTan,
        "mucDo": mucDo,
        "hist": hist,
        "kiemDinh": kiemDinh
    }
    return render(request, 'components/newDead.html', {"data": data})

示例#13

0

显示文件

def normality_test(ts):  # Completed
    """
    Performs a series of hypothesis tests about normality
    on the time series data distribution. Besides
    the result of the statistical test, this also includes
    a quantile plot of the data (qqplot).

    Note: Shapiro & Kolmogorov-Smirnov Tests can still produce
    inconsistencies if the data set (size) is to small to detect
    non-normality.
    """
    ts = ts_to_list(ts)
    data = np.array(ts)
    # Shapiro-Wilk test: Detects all departures from normality.
    # Rejects the hypothesis of normality when the p-value is <= to 0.05.
    # i.e not from a normal distribution.
    stat_sw, p_sw = shapiro(data)  # (1) Normality test
    # Kolmogorov-Smirnov: Tests the sample data against
    # another sample, to compare their distributions for
    # similarities, not just for normal distributions.
    # If p < .05 we can reject the null, meaning our sample
    # distribution is not identical to a normal distribution.
    stat_ks, p_ks = normaltest(data)  # (2) Normality test
    # Anderson-Darling: Test is the data comes from a particular
    # distribution (one of many). Modified version of the
    # Kolmogorov-Smirnov to check for normality. However, rather
    # Than a p-value, we're given an array of critical values
    # where the hypothesis can be rejected.
    stat_ad = anderson(data)  # (3) Normality test
    # Print results of all 3 tests
    print(f'\nShapiro-Wilk Statistic Test Result: {stat_sw:.3f}')
    print(f'P-value: {p_sw}: ', end='')
    # Check if (SW) from normal distribution or not.
    if p_sw < 0.05:
        print("Null Hypothesis Rejected. Not from normal distribution.\n")
    else:
        print("Accepted Null Hypothesis.\n")
    print(f'Kolmogorov-Smirnov Statistic Test Result: {stat_ks:.3f}')
    print(f'P-value: {p_ks}', end='')
    # Check if (KS) from normal distribution or not.
    if p_ks < 0.05:
        print("Null Hypothesis Rejected. Not from normal distribution.\n")
    else:
        print("Accepted Null Hypothesis. Can occurs if data set is too small.")
    print(f'Anderson-Darling Statistic Test Result: {stat_ad.statistic}')
    # Check if (AD) from normal distribution or not.
    for i in range(len(stat_ad.critical_values)):
        st, cv = stat_ad.significance_level[i], stat_ad.critical_values[i]
        if stat_ad.statistic < stat_ad.critical_values[i]:
            print(f'{st:.3f}: {cv:.3f}: Accepted. From normal distribution')
        else:
            print(f'{st:.3f}: {cv:.3f}: Rejected. Data not normal')
    # Plots a standardized line, scaled by the SD of the time series.
    qqplot(data, line='s')
    plt.show()

    ts = list_to_ts(ts)
    return ts

示例#14

0

显示文件

文件： FMRresiduals&multipleRegress.py 项目： asarantsev/StockMarketSpring2019REU

def residooMultipleRegression(Y, A, B1, B2, B3, B4, x1, x2, x3, x4): # Y is the ChangeNominal/Real, A is the intercept B1 - B4 are the corresponding coefficients of x1-x4
    residual = numpy.empty_like(Y)
    i = 0
    while i < T-1:
        PredicatedY = (B1 * x1[i]) + (B2 * x2[i]) + (B3 * x3[i]) + (B4 + x4[i])
        residual[i] = Y[i] - PredicatedY
        i += 1
    qqplot(residual, line = 's')
    plt.show()

示例#15

0

显示文件

def QQ_plot(data):
    # QQ Plot
    from numpy.random import seed
    from numpy.random import randn
    from statsmodels.graphics.gofplots import qqplot
    from matplotlib import pyplot
    # q-q plot
    qqplot(data, line='s')
    pyplot.show()

示例#16

0

显示文件

def spatial_QQ_plots(ALLdata, timestamps):
    """
    INPUTS
        stations - list of station objects
        start - tuple or list in the form of (year,month,day)
        end - tuple or list in the form of (year,month,day)
        var - the variable to make the QQ plots for
    """

    labels = ['temp', 'direction', 'speed', 'solar']

    num_times = len(timestamps)

    fig, axes = plt.subplots(num_times,
                             4,
                             figsize=(13, int(np.round(num_times * 13 / 4))),
                             dpi=80,
                             facecolor='w',
                             edgecolor='k')

    for row, ts in enumerate(timestamps):
        temp = []
        wind_dir = []
        wind_speed = []
        solar = []

        for station in ALLdata.WSdata:
            df = station.data_binned.loc[ts]
            temp.append(df['temp:'])
            wind_dir.append(df['dir:'])
            wind_speed.append(df['speed:'])
            solar.append(df['solar:'])

        lst = [
            np.matrix(sorted(temp)).T,
            np.matrix(sorted(wind_dir)).T,
            np.matrix(sorted(wind_speed)).T,
            np.matrix(sorted(solar)).T
        ]

        for col, vals in enumerate(lst):
            ax = axes[row, col]
            qqplot(vals, line='s', ax=ax)
            ax.set_xlabel('')
            ax.set_ylabel('')
            k2, p = sps.shapiro(vals)
            p_str = '{:.2f}'.format(p)
            print('timestamp:{}, var:{}, p: {}'.format(ts, labels[col], p_str))
            ax.set_xlabel('Shap p:{}'.format(p_str))
            #ax.annotate('Shap p:{}'.format(p_str),xy = (0.05,.9),xytext = (0.05,.9), textcoords='axes fraction',horizontalalignment='left', verticalalignment='top')
            #ax.annotate('p'.format(p),xy = (0.05,.9),xytext = (0.05,.9), textcoords='axes fraction',horizontalalignment='left', verticalalignment='top')
            if col == 0:
                ax.set_ylabel(ts)
            if row == 0:
                ax.title.set_text(labels[col])

    fig.tight_layout(pad=1.1)

示例#17

0

显示文件

 def test_qqplot_pltkwargs(self, close_figures):
     qqplot(
         self.res,
         line="r",
         marker="d",
         markerfacecolor="cornflowerblue",
         markeredgecolor="white",
         alpha=0.5,
     )

示例#18

0

显示文件

文件： covid_app_func.py 项目： WraySmith/COVID_learning

def explore_series(df, columns_explore, plot_lags=20):
    # Figure setup
    fig_out = plt.figure(figsize=(22, 24))
    plot_cols = len(columns_explore)
    position = 0
    # create lists for output values from adfuller tests
    output_ADF = []
    output_pval = []
    output_crit1 = []
    output_crit5 = []
    output_crit10 = []
    output_labels = []

    # loop through the columns of interest
    for column in df:
        if column in columns_explore:
            # time history plot
            df[column].plot(ax=plt.subplot2grid((5, plot_cols), (0, position)),
                            title=column)
            # histogram
            df[column].hist(ax=plt.subplot2grid((5, plot_cols), (1, position)))
            # qqplot to check normality
            qqplot(df[column],
                   line='r',
                   ax=plt.subplot2grid((5, plot_cols), (2, position)))
            # autocorrelation plot
            plot_acf(df[column].dropna(),
                     lags=plot_lags,
                     ax=plt.subplot2grid((5, plot_cols), (3, position)))
            # partial autocorrelation plot
            plot_pacf(df[column].dropna(),
                      lags=plot_lags,
                      ax=plt.subplot2grid((5, plot_cols), (4, position)))
            position += 1

            # run adfuller test and append results to lists
            result = adfuller(df[column].dropna())
            output_ADF.append(result[0])
            output_pval.append(result[1])
            output_crit1.append(result[4]['1%'])
            output_crit5.append(result[4]['5%'])
            output_crit10.append(result[4]['10%'])
            output_labels.append(column)

    # create dataframe for the adfuller results
    df_out = pd.DataFrame(columns=output_labels,
                          index=[
                              'ADF_Statistic', 'p-value', 'Critical_1percent',
                              'Critical_5_percent', 'Critical_10_percent'
                          ])
    df_out.iloc[0] = output_ADF
    df_out.iloc[1] = output_pval
    df_out.iloc[2] = output_crit1
    df_out.iloc[3] = output_crit5
    df_out.iloc[4] = output_crit10

    return df_out, fig_out

示例#19

0

显示文件

def isGaussian(data):
        
    # histogram plot --------------------------------------------------------------
    print("Histogram plot -------------------------------------")
    pyplot.hist(data)
    pyplot.show()
    
    
    #QQPlot ----------------------------------------------------------------------
    print("QQ plot -------------------------------------")
    from statsmodels.graphics.gofplots import qqplot
    # q-q plot
    qqplot(data, line='s')
    pyplot.show()
    
    #shapiro wilk-test -----------------------------------------------------------
    print("Shapiro-Wilk test -------------------------------------")
    from scipy.stats import shapiro
    stat, p = shapiro(data)
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    
    # interpret
    alpha = 0.05
    
    if p > alpha:
        print('Sample looks Gaussian (fail to reject H0)')
    else:
        print('Sample does not look Gaussian (reject H0)')
        
    #D'Agostino's K^2 Test --------------------------------------------------------
    print("D'Agostino's K^2 test -------------------------------------")
    from scipy.stats import normaltest
    # normality test
    stat, p = normaltest(data)
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    # interpret
    alpha = 0.05
    
    if p > alpha:
        print('Sample looks Gaussian (fail to reject H0)')
    else:
        print('Sample does not look Gaussian (reject H0)')
    
    #Anderson-Darling Test --------------------------------------------------------
    print("Anderson-Darling test -------------------------------------")
    from scipy.stats import anderson
    # normality test
    result = anderson(data)
    print('Statistic: %.3f' % result.statistic)
    p = 0
    for i in range(len(result.critical_values)):
        sl, cv = result.significance_level[i], result.critical_values[i]
        if result.statistic < result.critical_values[i]:
            print('%.3f: %.3f, data looks normal (fail to reject H0)' % (sl, cv))
        else:
            print('%.3f: %.3f, data does not look normal (reject H0)' % (sl, cv))

示例#20

0

显示文件

def qq(a, logvalue, lognext, interval, NCHANGES):
    V, A, r, sigma = regression(a, logvalue, lognext, interval, NCHANGES)
    centralized = numpy.array([V[k] - r * A[k] for k in range(NCHANGES)])
    s1 = stats.shapiro(centralized)[0]
    s2 = stats.shapiro(centralized)[1]
    qqplot(centralized, line='r')
    #    fig = plt.figure()
    #    fig.savefig(im + 'eco_'+ Ecoregions[eid] + '_Raw_Data_residuals.png')
    pyplot.show()
    return s1, s2

示例#21

0

显示文件

文件： residuals_api.py 项目： bobTheHands/GamestonkTerminal

def plot_qqplot(
    other_args: List[str],
    ticker: str,
    model_name: str,
    residuals: List[float],
):
    """Qqplot time series against a standard normal curve

    Parameters
    ----------
    other_args : str
        Command line arguments to be processed with argparse
    ticker : str
        Ticker of the stock
    model_name : str
        Model fitting name in use
    residuals : List[float]
        Residuals data
    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="qqplot",
        description="""
            Qqplot time series against a standard normal curve
        """,
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        plt.figure(figsize=plot_autoscale(),
                   dpi=PLOT_DPI,
                   constrained_layout=True)

        qqplot(residuals,
               stats.distributions.norm,
               fit=True,
               line="45",
               ax=plt.gca())
        plt.title(f"Q-Q plot residuals from {model_name} on {ticker}")
        plt.ylabel("Sample quantiles")
        plt.xlabel("Theoretical quantiles")
        plt.grid(True)

        if gtff.USE_ION:
            plt.ion()

        plt.show()
        print("")

    except Exception as e:
        print(e, "\n")
        return

示例#22

0

显示文件

文件： qa_view.py 项目： sinyeeftw/GamestonkTerminal

def display_qqplot(
    name: str,
    df: pd.DataFrame,
    target: str,
    external_axes: Optional[List[plt.Axes]] = None,
):
    """Show QQ plot for data against normal quantiles

    Parameters
    ----------
    name : str
        Stock ticker
    df : pd.DataFrame
        Dataframe
    target : str
        Column in data to look at
    external_axes : Optional[List[plt.Axes]], optional
        External axes (1 axis is expected in the list), by default None
    """
    # Statsmodels has a UserWarning for marker kwarg-- which we don't use
    warnings.filterwarnings(category=UserWarning, action="ignore")
    data = df[target]

    # This plot has 1 axis
    if external_axes is None:
        _, ax = plt.subplots(
            figsize=plot_autoscale(),
            dpi=PLOT_DPI,
        )
    else:
        if len(external_axes) != 1:
            logger.error("Expected list of one axis item.")
            console.print("[red]Expected list of 1 axis items./n[/red]")
            return
        (ax, ) = external_axes

    qqplot(
        data,
        stats.distributions.norm,
        fit=True,
        line="45",
        color=theme.down_color,
        ax=ax,
    )
    ax.get_lines()[1].set_color(theme.up_color)

    ax.set_title(f"Q-Q plot for {name} {target}")
    ax.set_ylabel("Sample quantiles")
    ax.set_xlabel("Theoretical quantiles")

    theme.style_primary_axis(ax)

    if external_axes is None:
        theme.visualize_output()

示例#23

0

显示文件

文件： results.py 项目： BerenMillidge/Masters-Dissertation

    def qqplots(self, epoch, sub=None):
        assert 0 <= epoch <= self.epochs, 'epoch must be between 0 and %.0f, got %.0f' % (
            self.epochs, epoch)
        if not sub:
            sub = range(self.masks)

        for group in sub:
            gofplots.qqplot(self.data[group, :, epoch], fit=True, line='45')
            plt.suptitle("%s, %s, Epoch %.0f" %
                         (self.name, self.labels[group], epoch),
                         fontsize=18)

示例#24

0

显示文件

文件： generic_plots.py 项目： GermanCM/machine_learning_concepts_checks

    def plot_QQ_plot(self, series_values):
        try:
            from numpy.random import seed
            from numpy.random import randn
            from statsmodels.graphics.gofplots import qqplot
            from matplotlib import pyplot

            qqplot(series_values, line='s')
            pyplot.show()
        except Exception as exc:
            raise exc

示例#25

0

显示文件

文件： time_series_useful_functions.py 项目： canada87/AJ_lib

def residuals_eval(y_true, y_pred):
    y_res = y_true - y_pred
    df_res = DataFrame(y_res)
    print(df_res.describe())
    df_res.plot()#line
    pyplot.show()
    df_res.hist()#hist
    pyplot.show()
    df_res.plot(kind='kde')#density plot
    pyplot.show()
    qqplot(numpy.array(y_res), line='r')
    pyplot.show()

示例#26

0

显示文件

文件： process_results.py 项目： cserpell/param_prob_forec

def residuals_charts(test_dataset, test_output):
    """Build residuals charts for one experiment."""
    prediction_series = test_output.mean(axis=0).reshape(test_output.shape[1])
    residuals = prediction_series - test_dataset.y_data.reshape(
        test_dataset.y_data.shape[0])
    pd.Series(residuals).hist(bins=30)
    print(stats.normaltest(residuals))
    gofplots.qqplot(residuals)
    xxx = np.linspace(-3.5, 3.5, 4)
    pyplot.plot(xxx, xxx)
    tsaplots.plot_acf(residuals, lags=30)
    tsaplots.plot_pacf(residuals, lags=30)

示例#27

0

显示文件

 def residual_plot(self):
     '''
     Plot the residual and save it to current directory
     '''
     import matplotlib.pyplot as plt
     from scipy.stats import norm
     from statsmodels.graphics.gofplots import qqplot
     # set the size of the plot
     plt.figure(figsize=(16, 9))
     # plot the distribution
     ax = plt.subplot(121)
     # create bins and count the numbers
     count = pd.DataFrame([0] * 24, index=np.arange(-5.75, 6, step=0.5))
     for i in count.index:
         for r in self._residual:
             if r >= i - 0.25 and r < i + 0.25:
                 count.loc[i] += 1
     # create a normal distribution reference
     xx = np.linspace(-3, 3, 100)
     normal = norm.pdf(xx, np.mean(self._residual), np.std(self._residual))
     normalcdf = norm.cdf(xx, np.mean(self._residual),
                          np.std(self._residual))
     low_flag = True
     for i in range(xx.shape[0]):
         if normalcdf[i] >= 0.025 and low_flag:
             low = i
             low_flag = False
         if normalcdf[i] >= 0.975:
             high = i
             break
     # plot the distribution
     plt.plot(count.index, count, 'o', label="residual")
     plt.plot(xx, normal * self._residual.shape[0], '--', label="normal")
     plt.fill_between(xx[low:high],
                      0,
                      normal[low:high] * self._residual.shape[0],
                      alpha=.3,
                      facecolor="grey",
                      label="95% normal")
     plt.xlim([-6, 6])
     ax.set_ylim(bottom=5)
     ax.legend()
     plt.title("Distribution of the residual")
     plt.yscale('log')
     # plot the QQ plot
     ax = plt.subplot(122)
     qqplot(self._residual, line='s', ax=ax)
     plt.xlim([-3.5, 3.5])
     plt.ylim([-5, 5])
     plt.title("residual QQ plot")
     #plt.show()
     plt.savefig("residual_plots.png")

示例#28

0

显示文件

文件： SARIMA_example.py 项目： Billy-Bat/F_Main

def plot_Model_Identify(DataSet, frequency=1, acf_lag=12, pacf_lag=12):
    """
    DataSet : dataframe with the type of first column either int()
    or panda datetime
    Frequency : int, Seasonal Component period (in time step)
    """
    # Organize plot
    fig, ax = plt.subplots(3, 4)

    # Plot the Observed Data
    DataSet.plot(ax=ax[0, 0])
    ax[0, 0].set_title('Observed Value')
    ax[0, 0].set_xlabel("")

    # Plot the autocorrelation plot
    autocorrelation_plot(DataSet.iloc[:, 0], ax=ax[1, 0])
    ax[1, 0].set_title('Autocorrelation')

    # Plot the QQ plot
    qqplot(
        DataSet.iloc[:, 0],
        ax=ax[2, 0],
    )
    ax[2, 0].set_title('Q-Q Plot')

    # Lag plot
    lag_plot(DataSet.iloc[:, 0], ax=ax[0, 1])
    ax[0, 1].set_title('Lag Plot')
    ax[0, 1].set_ylabel("")
    ax[0, 1].set_xlabel("")

    # ACF Plot
    tsa.plot_acf(DataSet.iloc[:, 0], ax=ax[1, 1], lags=acf_lag, alpha=0.05)
    ax[1, 1].set_title('ACF')

    # PACF Plot
    tsa.plot_pacf(DataSet.iloc[:, 0], ax=ax[2, 1], lags=pacf_lag, alpha=0.05)
    ax[2, 1].set_title('PACF')

    # decomposition plot
    decomposition = sm.tsa.seasonal_decompose(DataSet.iloc[:, 0],
                                              freq=frequency)
    decomposition.resid.plot(ax=ax[0, 2])
    decomposition.resid.plot(ax=ax[0, 3], kind='kde')
    decomposition.seasonal.plot(ax=ax[1, 2])
    decomposition.trend.plot(ax=ax[2, 2])
    ax[0, 2].set_title('Residual')
    ax[1, 2].set_title('Seasonal')
    ax[2, 2].set_title('Trend')
    ax[0, 3].set_title('Residual Prob. Distrib')

    plt.show()

示例#29

0

显示文件

def correctLin(x, y):
    n = numpy.size(x)
    r = stats.linregress(x, y)
    s = r.slope
    i = r.intercept
    print(r)
    residuals = numpy.array([y[k] - x[k] * s - i for k in range(n)])
    stderr = math.sqrt((1 / (n - 2)) * numpy.dot(residuals, residuals))
    qqplot(residuals, line='r')
    pyplot.show()
    print('Shapiro-Wilk p = ', stats.shapiro(residuals)[1])
    print('Jarque-Bera p = ', stats.jarque_bera(residuals)[1])
    return (residuals, s, i, stderr)

示例#30

0

显示文件

文件： FMR3.py 项目： asarantsev/StockMarketSpring2019REU

def simpleLin(x, y):
    n = numpy.size(x)
    x = numpy.array(x)
    y = numpy.array(y)
    k = numpy.dot(x, y) / numpy.dot(x, x)
    residuals = y - k * x
    stderr = numpy.std(residuals)
    qqplot(residuals, line='r')
    pyplot.show()
    pyplot.plot(residuals)
    pyplot.show()
    print('normality', stats.shapiro(residuals))
    return (k, stderr)

示例#31

0

显示文件

文件： linregression.py 项目： hdemers/datascience

def plot_regress_analysis(model, influence=True, annotate=True):
    plt.figure(figsize=(15, 16))

    # Residuals vs Fitted
    ax = plt.subplot2grid((3, 2), (0, 0))
    ax.set_title("Residuals vs Fitted")
    ax.set_xlabel('Fitted values')
    ax.set_ylabel('Residuals')
    fitted = model.predict()
    residuals = model.resid
    ax.plot(fitted, residuals, marker='.', linestyle='')

    # Model non-linearity with quadratic
    polyline = np.poly1d(np.polyfit(fitted, residuals, 2))
    max_fitted = np.max(fitted)
    xs = np.append(np.arange(np.min(fitted), max_fitted), max_fitted)
    ax.plot(xs, polyline(xs), linewidth=2.5)

    # Q-Q plot
    ax = plt.subplot2grid((3, 2), (0, 1))
    ax.set_title("Q-Q")
    qqplot(model.resid_pearson, dist="norm", line='r', ax=ax)

    # Scale-Location
    ax = plt.subplot2grid((3, 2), (1, 0))
    ax.set_title("Scale-Location")
    ax.set_xlabel('Fitted values')
    ax.set_ylabel('$|$Normalized residuals$|^{1/2}$')
    std_residuals = np.sqrt(np.abs(model.resid_pearson))
    ax.plot(fitted, std_residuals, linestyle='', marker='.')

    # Model non-linearity with quadratic
    polyline = np.poly1d(np.polyfit(fitted, std_residuals, 2))
    ax.plot(xs, polyline(xs), linewidth=2.5)

    # Residuals vs Leverage
    ax = plt.subplot2grid((3, 2), (1, 1))
    plot_leverage_resid2(model, ax, annotate=annotate)

    # Influence plot
    if influence:
        ax = plt.subplot2grid((3, 2), (2, 0), colspan=2)
        ax = influence_plot(model, ax=ax)

示例#32

0

显示文件

文件： dea.py 项目： jameszuccollo/pyDEA

    def env_corr(self, env_vars, coeff_plot=False, qq_plot=False):
        """
        Determine correlations with environmental/non-discretionary variables
        using a logit regression. Tobit will be implemented when available
        upstream in statsmodels.

        Takes:
            env_vars: A pandas dataframe of environmental variables

        Returns:
            corr_mod: the statsmodels' model instance containing the inputs
                      and results from the logit model.

        Note that there can be no spaces in the variables' names.
        """

        import matplotlib.pyplot as plt
        from statsmodels.regression.linear_model import OLS
        from statsmodels.graphics.gofplots import qqplot
        from seaborn import coefplot

        env_data = _to_dataframe(env_vars)
        corr_data = env_data.join(self['Efficiency'])
        corr_mod = OLS.from_formula(
            "Efficiency ~ " + " + ".join(env_vars.columns), corr_data)
        corr_res = corr_mod.fit()

        #plot coeffs
        if coeff_plot:
            coefplot("Efficiency ~ " + " + ".join(env_vars.columns),
                     data=corr_data)
            plt.xticks(rotation=45, ha='right')
            plt.title('Regression coefficients and standard errors')

        #plot qq of residuals
        if qq_plot:
            qqplot(corr_res.resid, line='s')
            plt.title('Distribution of residuals')

        print(corr_res.summary())

        return corr_res

示例#33

0

显示文件

文件： matching.py 项目： Heidi-/hack-university-data-science

def draw_figures():
    bdims = pd.read_csv("bdims.csv")
    fdims = bdims[ bdims["sex"] == 0]

    fig, plots = plt.subplots(4, 2)

    biidi = standardize(fdims["bii.di"])
    elbdi = standardize(fdims["elb.di"])
    age = standardize(bdims["age"])
    chede = standardize(fdims["che.de"])

    plots[0][0].hist(biidi, bins=range(-4,4))
    plots[1][0].hist(elbdi, bins=range(-3,5))
    plots[2][0].hist(age, bins=range(-2,5))
    plots[3][0].hist(chede, bins=range(-2,6))

    plots[0][0].set_title("Histogram of female biiliac diameter")
    plots[1][0].set_title("Histogram of female elbow diameter")
    plots[2][0].set_title("Histogram of general age")
    plots[3][0].set_title("Histogram of female chest depth")
    
    qqplot(biidi, ax=plots[1][1], line="q")
    qqplot(elbdi, ax=plots[2][1], line="q")
    qqplot(age, ax=plots[3][1], line="q")
    qqplot(chede, ax=plots[0][1], line="q")

    plots[0][1].set_title("Normal Q-Q Plot A")
    plots[1][1].set_title("Normal Q-Q Plot B")
    plots[2][1].set_title("Normal Q-Q Plot C")
    plots[3][1].set_title("Normal Q-Q Plot D")
  
    for i in range(0,4):
        plots[i][0].set_xlabel("standarized data")
        plots[i][0].set_ylabel("frequency")

    fig.set_size_inches(12, 12)
    plt.tight_layout()

    return fig

示例#34

0

显示文件

文件： statistics.py 项目： olavvatne/CNN

        print (final_test_loss)


for folder in folders:
    print
    print folder
    for d in data[folder]:
        breakeven_points = util.find_breakeven(d["curve"])
        pr[folder].append(breakeven_points[-1])
        print (breakeven_points[-1])

print ("Loss samples t test")
# Random samples from normal
s = np.random.normal(np.mean(lc[folders[0]]), np.std(lc[folders[1]]), 100)
print ("Random samples", scipy.stats.shapiro(s))
fig = qqplot(s, scipy.stats.norm, fit=True, line="45")
plt.show()

# First folder figures
fig = qqplot(np.array(lc[folders[0]]), scipy.stats.norm, fit=True, line="45")
plt.show()
print (folders[0], scipy.stats.shapiro(np.array(lc[folders[0]])))

# Second folder figures
fig = qqplot(np.array(lc[folders[1]]), scipy.stats.norm, fit=True, line="45")
plt.show()
print (folders[1], scipy.stats.shapiro(np.array(lc[folders[1]])))

tstat, pval = perform_welchs_test(lc[folders[0]], lc[folders[1]])
print ("t-statistics = {}".format(tstat))
print ("p-value = {}".format(pval))

示例#35

0

显示文件

文件： CAPM.py 项目： GBelzoni/BigGits

irf.plot_cum_effects(orth=False)
plt.show()
fevd = results.fevd(1)
fevd.summary()
fevd.plot()
plt.show()
results.test_causality('DJIA', ['SP500'],kind='f')
results.test_causality('SP500', ['DJIA'],kind='f')
results.test_normality(signif=0.05,verbose=False)

resids = results.resid.sum(axis=1)
resids.plot()
plt.show()
from statsmodels.graphics.gofplots import qqplot, qqline

qqplot(data=resids,line='s')#, dist, distargs, a, loc, scale, fit, line, ax)
plt.show()
from statsmodels.sandbox.tsa.garch import Garch
rets1 = rets_bm.iloc[:,1]
Garch(rets1)

#Getting GARCH working -using RPY2
import rpy2
rpy2.__version__

t =datetime(2013,1,5)
from pandas.tseries.offsets import MonthEnd, BusinessMonthBegin

(t + 2*BusinessMonthBegin())> datetime(2013,1,1)

示例#36

0

显示文件

文件： eval.py 项目： gtzampanakis/downfoot

				ma['teams'][0],
				ma['teams'][1],
				ma['score'][0],
				ma['score'][1],
				fitted[mai],
		)
		mfile.write(towrite)
		mfile.write('\n')

print
print 'residual_std: %.10f' % (resid.std())
print

if PLOT_RESIDUAL_QQ:
	import statsmodels.graphics.gofplots as sgg
	sgg.qqplot(resid, fit=True)

if PLOT_RESIDUAL_HIST:
	import pylab
	import scipy.stats as ss

	freqs, lefts = np.histogram(resid, bins = 'auto', density = True)
	centers = (lefts[:-1] + lefts[1:]) / 2
	pylab.bar(centers, freqs, width = centers[1] - centers[0])

	empirical_dist = ss.norm(*(ss.norm.fit(resid)))
	pylab.plot(centers, [empirical_dist.pdf(x) for x in centers], 'g-', linewidth = 5)
	
if PLOT_SCATTER_TOT_EXP_RESIDUALS:
	import pylab
	pylab.scatter(tot_exps, resid, marker = '.', s = 1)