示例#1
0
def prob_plots(x, y, y_hat, shape=(2, 2), figsize=(8, 8)):

    fig, axes = plt.subplots(*shape, sharex=True, sharey=True, figsize=figsize)

    scatter_kws = dict(label='', marker=None, linestyle='-')
    common_opts = dict(plottype='qq', problabel='', datalabel='')

    for ax, (label, series) in zip(axes.flat, y_hat.items()):

        scatter_kws['label'] = 'original'
        fig = probscale.probplot(x,
                                 ax=ax,
                                 scatter_kws=scatter_kws,
                                 **common_opts)

        scatter_kws['label'] = 'target'
        fig = probscale.probplot(y,
                                 ax=ax,
                                 scatter_kws=scatter_kws,
                                 **common_opts)

        scatter_kws['label'] = 'corrected'
        fig = probscale.probplot(series,
                                 ax=ax,
                                 scatter_kws=scatter_kws,
                                 **common_opts)
        ax.set_title(label)
        ax.legend()

    [ax.set_xlabel('Standard Normal Quantiles') for ax in axes[-1]]
    [ax.set_ylabel('Temperature [C]') for ax in axes[:, 0]]
    [fig.delaxes(ax) for ax in axes.flat[len(y_hat.keys()):]]
    fig.tight_layout()

    return fig
def get_group_probplot(df, gcol, tcols, output_path):
    print("Get prob plot per group")
    groups = df.loc[:, gcol].unique().tolist()
    groups = np.sort(groups)

    color_dict = {}
    for k in groups:
        color_dict[k] = np.random.rand(3, )

    for col in tcols:
        # print("Column: {}".format(col))
        fig = plt.figure(figsize=(14, 14))
        for k in groups:
            common_opts = dict(plottype='prob',
                               probax='y',
                               datascale='log',
                               problabel='Cumulative Probability',
                               datalabel=k)

            df_k = df[df[gcol] == k].loc[:, col]

            scatter_kws_opts = dict(marker='.',
                                    markersize=5,
                                    alpha=0.6,
                                    c=color_dict[k],
                                    label=k)

            probscale.probplot(data=df_k.values,
                               ax=plt.gca(),
                               scatter_kws=scatter_kws_opts,
                               **common_opts)

        lgnd = plt.legend(loc='best', prop={'size': 20}, title="Clusters")
        for handle in lgnd.legendHandles:
            handle._legmarker.set_markersize(40)
        plt.setp(lgnd.get_title(), fontsize='20')

        fig.suptitle("Probplot: " + col, fontsize=20)
        plt.xlabel('Ordered values', fontsize=16)
        plt.ylabel('Cumulative Probability', fontsize=16)
        for axis in [plt.gca().xaxis]:
            formatter = FuncFormatter(lambda y, _: '{:.16g}'.format(y))
            axis.set_major_formatter(formatter)

        plt.grid(True,
                 which="both",
                 ls="-",
                 color='0.65',
                 alpha=0.5,
                 linewidth=0.5)

        save_current_figure(output_path, "probplot", col)
示例#3
0
def qq_plot(data, variable, ymin = -np.inf, ymax = np.inf):
    """
    Create qq-plot
    """

    trunc_data = data.loc[(data[variable] >= ymin) & (data[variable] <= ymax), :]
    val = trunc_data.shape[0]/data.shape[0]

    print("Porcentaje de datos conservado {}".format(val))

    norm = stats.norm(loc = 21, scale = 8)
    fig, ax = plt.subplots(figsize = (4, 4))
    ax.set_aspect('equal')

    common_opts = dict(
        plottype = 'qq',
        probax = 'x',
        problabel = 'Theoretical Quantiles',
        datalabel = 'Emperical Quantiles',
        scatter_kws = dict(label=variable)
    )

    fig = probscale.probplot(trunc_data[variable], ax = ax, dist = norm, **common_opts)

    equality_line(ax, label = 'Normal Distribution')
    ax.legend(loc = 'lower right')
    sns.despine()
示例#4
0
def save_qq(fqn: str, arr: np.ndarray):
    scatter_options = dict(
        marker="+",
        markersize=15,
        markerfacecolor="none",
        markeredgecolor="black",
        markeredgewidth=1.25,
        linestyle="none",
        zorder=5,
        label="Observations",
    )

    line_options = dict(color="#6184ff",
                        linewidth=3,
                        zorder=1,
                        label="Best Fit",
                        alpha=1)

    fig, ax = pyplot.subplots(figsize=(8, 8))
    fig = probscale.probplot(
        arr,
        ax=ax,
        plottype="pp",
        bestfit=True,
        estimate_ci=True,
        line_kws=line_options,
        scatter_kws=scatter_options,
        problabel="Percentile",
    )
    # ax.legend(loc="lower right")
    # ax.set_ylim(bottom=-2, top=4)
    seaborn.despine(fig)
    plt.savefig(fqn, pad_inches=0, bbox_inches="tight")
示例#5
0
def makeFancyPlot(data):
    fig, ax = plt.subplots(figsize=(6, 3))
    fig = probscale.probplot(
        data, ax=ax, plottype="pp", xscale="log", scatter_kws=dict(linestyle="-")
    )
    # plt.plot(data, linestyle="-")
    plt.show()
plt.title('Henry hub spot price')
plt.grid(True)
plt.show()

"""]The first step in statistical analysis is to investigate the time series distribution.
The Jarque-Bera normality test shows that the price time series is not normally distributed but it follows a log-normal distribution (Jarque & Bera, 1987). Below fig illustrates the distribution of the logarithmic price with respect to the reference red line in the normal plot. Therefore, the log price or the return of the natural gas price is normally distributed and follows the Gaussian distribution. Now, it is appropriate to assume normal
distribution for first the difference of variable and/or the logarithm of the gas price. 
"""

from scipy import stats
stats.jarque_bera(df['hh_sp'])

fig, ax = plt.subplots(figsize=(10, 6))
plt.grid(True)
fig = probscale.probplot(df['hh_sp'], ax=ax, plottype='pp', bestfit=True,
                         problabel='Percentile', datalabel='HenryHub spot price (S/MMBtu)',
                         scatter_kws=dict(label='HH original data'),
                         line_kws=dict(label='Best-fit line'))
ax.legend(loc='upper left')
seaborn.despine()

fig, ax = plt.subplots(figsize=(10, 6))
plt.grid(True)
fig = probscale.probplot(df['hh_sp'], ax=ax, plottype='pp', bestfit=True,
                         datascale='log',
                         problabel='Percentile', 
                         datalabel='Log of HenryHub price ($/MMBtu)',
                         scatter_kws=dict(label='HH lognormal data'),
                         line_kws=dict(label='Best-fit line'))
ax.legend(loc='upper left')
seaborn.despine()
示例#7
0
文件: sta.py 项目: xyfzkd/biosta_hw
    ax.set_xlim(limits)
    ax.set_ylim(limits)
    ax.plot(limits, limits, 'k-', alpha=0.75, zorder=0, label=label)


norm = stats.norm(loc=21, scale=8)
fig, ax = plt.subplots(figsize=(5, 5))
ax.set_aspect('equal')

common_opts = dict(plottype='qq',
                   probax='x',
                   problabel='Theoretical Quantiles',
                   datalabel='Emperical Quantiles',
                   scatter_kws=dict(label='Bill amounts'))

fig = probscale.probplot(raw['pred_age'], ax=ax, dist=norm, **common_opts)

equality_line(ax, label='Guessed Normal Distribution')
ax.legend(loc='lower right')
sns.despine()
#%%
fig.savefig('norm.pdf', edgecolor='black', transparent=False)

#%%
import seaborn as sns
import numpy as np
x = np.linspace(min(raw['pred_age']), max(raw['pred_age']), 50)
y = 239 * 1 / (3.82 * np.sqrt(2 * np.pi)) * np.exp(-(x - 45.12)**2 /
                                                   (2 * 3.82**2))
plt.plot(x, y)
plt.hist(raw['pred_age'],
示例#8
0
文件: plots.py 项目: gabrielesp/FoMpy
    def qq(self, parameter, backend=None, save_plot=None):
        """
		Methods
		-------
		qq( parameter, save_to_file = None):
			Plot a Quantile plot.Plotting positions are converted into quantiles
			or Z-scores based on a probability distribution

		Parameters
		----------

		parameter : array_like, shape (n,)
			Input values, this takes either a single array or a sequence of arrays
			which are not required to be of the same length.
		save_plot : path or None, optional
			Path indicating the folder where the user wishes to save the generated plots.		

		"""

        import matplotlib
        if (backend is None) or (backend is 'Agg'):
            matplotlib.use('Agg')
        elif (backend is 'TkAgg'):
            matplotlib.use('TkAgg')
        else:
            pass

        if (len(parameter) < 2):
            raise Exception('Not enough data for a QQ-plot')

        parameter = [x for x in parameter if x == x
                     ]  # Remove all NaN values from the parameter list
        plt.close()
        fig, ax = plt.subplots()
        slope, r = probscale.probplot(parameter,
                                      ax=ax,
                                      plottype='qq',
                                      bestfit=True,
                                      estimate_ci=False,
                                      return_best_fit_results=True,
                                      problabel='Standard Normal Quantiles',
                                      datalabel=r'Parameter',
                                      scatter_kws={
                                          'markersize': 10,
                                          'linestyle': 'none',
                                          'label': None
                                      })
        slope, intercept, rcoef, prob, sterrest = stats.linregress(
            r['x'], r['y'])
        # plt.text(1.5,max(r['y']), "$R^2_{"+str(method)+"}=%1.2f$" % rcoef**2)
        # plt.legend(loc='lower right', fontsize='medium')
        plt.title('QQ-plot')
        if (save_plot is not None):
            try:
                checkPath(save_plot)
                plt.savefig(save_plot + 'qqplot.pdf',
                            bbox_inches='tight',
                            format='pdf',
                            dpi=1200)
            except NameError:
                print('No path to save the plots has been chosen')
        else:
            print('No path has been defined\nTrying to use GUI backend...')
            plt.show()
        plt.close()

        return
示例#9
0
fig, ax = plt.subplots(figsize=(10, 3))
ax.plot(position, latency, marker='.', linestyle='none')
ax.set_xlabel('Percentile')
ax.set_ylabel('latency')
#ax.set_yscale('log')
#ax.set_ylim(bottom=1, top=100)
seaborn.despine()
"""

fig, (ax1, ax2) = plt.subplots(figsize=(10, 6), ncols=2, sharex=True)
markers = dict(marker='.', linestyle='none', label='Bill Amount')

fig = probscale.probplot(diffList,
                         ax=ax1,
                         plottype='pp',
                         probax='y',
                         problabel='Percentiles',
                         datalabel='Latency',
                         scatter_kws=markers)

fig = probscale.probplot(diffList,
                         ax=ax2,
                         plottype='qq',
                         probax='y',
                         problabel='Standard Normal Quantiles',
                         datalabel='Latency',
                         scatter_kws=markers)

#ax1.set_xlim(left=1, right=100)
fig.tight_layout()
seaborn.despine()
示例#10
0
def pred_w_test_data(df, tCol, mlModel, feature_list, saveDir):

    xdata = np.array(list(df.loc[:, feature_list].values))
    tdata = np.array(list(df.loc[:, tCol].values))
    pred_test = mlModel.reg_best.predict(xdata)
    df['%s_pred' % tCol] = pred_test
    rvRatio = np.ceil(xtiConst.MULTI_DATI * xtiConst.XTI_NMAX_THRESH /
                      np.power(10, pred_test)) / np.ceil(
                          xtiConst.MULTI_DATI * xtiConst.XTI_NMAX_THRESH /
                          np.power(10, tdata))
    df['%s_rvRatio_pred_over_act' % tCol] = rvRatio
    #if (saveDir is None):
    #    df.to_pickle('./dfTestResult_%s.pkl' % tCol)
    #else:
    #    df.to_pickle('./%s/dfTestResult_%s.pkl' % (saveDir, tCol))
    df.to_pickle('./%s/dfTestResult_%s.pkl' % (saveDir, tCol))

    ### plot probplot ###
    fig, ax = plt.subplots(1,
                           1,
                           sharex=False,
                           sharey=False,
                           figsize=(5 * 1 + 0.2, 5 * 1))
    binsize = 50
    #pltRange = (5e-1, 2)
    pltRange = (np.min(rvRatio), np.max(rvRatio))
    binsx = np.linspace(pltRange[0], pltRange[1], binsize)
    rvRatio = df.loc[:, '%s_rvRatio_pred_over_act' % tCol].values
    #ax.hist(rvRatio, bins=list(binsx)+[np.inf], normed=True, histtype='step', cumulative=True, label='>%d%%' % SampThreshPcnt)
    fig = probscale.probplot(
        rvRatio,
        ax=ax,
        plottype='prob',
        probax='y',
        bestfit=False,
        estimate_ci=False,
        datascale='linear',
        problabel='Probabilities (%)',
        datalabel='Predicted RV / Actual RV',
        scatter_kws=dict(marker='.', markersize=1),
    )
    ax.axvline(1.0 / np.sqrt(xtiConst.MULTI_DATI), c='red', linestyle='dashed')
    ax.axvline(1.0 / xtiConst.MULTI_DATI, c='gray', linestyle='dashed')
    ax.set_xlim(pltRange)
    plt.tight_layout()
    #if (saveDir is None):
    #    plt.savefig('%s_probplot.png' % tCol, format='png')
    #else:
    #    plt.savefig('./%s/%s_probplot.png' % (saveDir, tCol), format='png')
    plt.savefig('./%s/%s_probplot.png' % (saveDir, tCol), format='png')
    ax.cla()

    ### xy plot ###
    fig, ax = plt.subplots(1,
                           1,
                           sharex=False,
                           sharey=False,
                           figsize=(5 * 1 + 0.2, 5 * 1))
    #pltRange = (1e1, 2000)
    xdata = np.ceil(xtiConst.MULTI_DATI * 30000 / np.power(10, tdata))
    ydata = np.ceil(xtiConst.MULTI_DATI * 30000 / np.power(10, pred_test))
    pltRange = (1, np.max([np.max(xdata), np.max(ydata)]))
    ax.scatter(xdata, ydata, s=10, alpha=0.5)
    ax.set_title('%s, R square=%0.4f' % (tCol, r2_score(tdata, pred_test)))
    ax.plot([pltRange[0], pltRange[1]], [pltRange[0], pltRange[1]],
            c='black',
            linestyle='solid',
            linewidth=0.5)
    ax.plot(np.arange(pltRange[0], pltRange[1] + 1, 1),
            np.arange(pltRange[0] + 100, pltRange[1] + 100 + 1, 1),
            c='red',
            linestyle='dotted',
            label='y=x+100')
    ax.plot([pltRange[0], pltRange[1]], [
        pltRange[0] / np.sqrt(xtiConst.MULTI_DATI),
        pltRange[1] / np.sqrt(xtiConst.MULTI_DATI)
    ],
            c='red',
            linestyle='dashed',
            label='y/x=%0.2f' % (1.0 / np.sqrt(xtiConst.MULTI_DATI)))
    ax.grid(which='both', axis='both')
    ax.set_xlabel('Actual RV')
    ax.set_ylabel('Predicted RV')
    ax.set_xlim(pltRange)
    ax.set_ylim(pltRange)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.legend(loc='upper left', fontsize=8)
    plt.tight_layout()
    #if (saveDir is None):
    #    plt.savefig('%s_xyplot.png' % tCol, format='png')
    #else:
    #    plt.savefig('./%s/%s_xyplot.png' % (saveDir, tCol), format='png')
    plt.savefig('./%s/%s_xyplot.png' % (saveDir, tCol), format='png')
    ax.cla()

    ### feature importance ###
    if (mlp.algo['model'] != 'sknn'):
        fig, ax = plt.subplots(1,
                               1,
                               sharex=False,
                               sharey=False,
                               figsize=(5 * 1 + 0.2, 12 * 1))
        importances = pd.Series(mlModel.reg_best.feature_importances_,
                                index=feature_list)
        importances = importances.sort_values()
        importances[-30:].plot(kind="barh", color='blue', ax=ax)
        ax.set_title("Feature Importance")
        plt.tight_layout()
        plt.savefig('./%s/%s_feature_importance.png' % (saveDir, tCol),
                    format='png')
        ax.cla()
ax.plot(res_2.smoothed_marginal_probabilities[2])
ax.set(title='Smoothed probability of up regime')

plt.tight_layout()

print(res_2.expected_durations)

import probscale

#qqpolot vs. normal distribution
fig, ax = plt.subplots(figsize=(10, 4))
plt.grid(True)
fig = probscale.probplot(res_2.resid,
                         ax=ax,
                         plottype='pp',
                         bestfit=True,
                         problabel='Percentile',
                         datalabel='Residuals',
                         scatter_kws=dict(label='Model residuals'),
                         line_kws=dict(label='Best-fit line'))
ax.legend(loc='upper left')
plt.show()

import seaborn as sns
plt.figure(figsize=(12, 5))

# Plot a simple histogram with binsize determined automatically
sns.distplot(res_2.resid, 20)
plt.title('Histogram of residuals')
plt.xlabel('Residuals')
plt.ylabel('Density')
plt.grid(True)
示例#12
0
def plot_cdf2(df,
              col,
              xlabel,
              title,
              figname,
              addThresh,
              xlim=None,
              ylim=None,
              hue=None,
              hue_order=None,
              div=None,
              binSize=50,
              addTable=False,
              logX=False,
              doBoxCox=False,
              palette='tab10'):

    cmap = cm.get_cmap(palette)

    xdata = df.loc[:, col].dropna(axis=0).values
    if (div != None) and (div != 0):
        xdata = xdata / div
        if addThresh != False:
            addThresh = addThresh / div

    if logX == True:
        xdata = np.log10(xdata)
        if addThresh != False:
            addThresh = np.log10(addThresh)

    if doBoxCox == True:
        #xdata, lmd = stats.boxcox(xdata)
        #pt = PowerTransformer(method='yeo-johnson')
        pt = PowerTransformer(method='box-cox')
        pt.fit(xdata.reshape(-1, 1))
        #print(pt.lambdas_)
        xdata = pt.transform(xdata.reshape(-1, 1)).T[0]
        if addThresh != False:
            addThresh = pt.transform([[addThresh]])[0][0]

    if xlim == None:
        xmax = np.max(xdata)
        xmin = np.min(xdata)
    else:
        xmax = xlim[1]
        xmin = xlim[0]
    xrng = xmax - xmin
    if (xrng == 0):
        return ('single value')
    xmax += xrng * 0.01
    xmin -= xrng * 0.01
    xrng = xmax - xmin
    binArray = list(np.linspace(xmin, xmax, binSize + 1))
    """
    if addTable == False:
        fig, ax = plt.subplots(1,1, figsize=(8,5))
    else:
        gridsize = (1, 8)
        fig = plt.figure(figsize=(12, 5))
        ax      = plt.subplot2grid(gridsize, (0, 0), colspan=4, rowspan=1)
        axTable = plt.subplot2grid(gridsize, (0, 6), colspan=2, rowspan=1)
    """
    if addTable == False:
        fig, ax = plt.subplots(1, 1, figsize=(8, 5))
    else:
        gridsize = (2, 3)
        fig = plt.figure(figsize=(8, 8))
        ax = plt.subplot2grid(gridsize, (0, 0), colspan=3, rowspan=1)
        axTable = plt.subplot2grid(gridsize, (1, 1), colspan=2, rowspan=1)

    ax1 = ax.twinx()

    if hue != None:
        if hue_order == None:
            hue_order = list(np.sort(df.loc[:, hue].unique()))
            #if len(hue_order) > 4:
            #    hue = None
    else:
        hue_order = ['All']
    print(hue_order)

    dfPerc = pd.DataFrame({})

    for i in range(len(hue_order)):

        hueVal = hue_order[i]
        uniRec = False

        if hue != None:
            if (df.query("%s=='%s'" % (hue, hueVal)).shape[0] > 0):
                if (df.query("%s=='%s'" % (hue, hueVal)).shape[0] == 1):
                    uniRec = True
                xdata = df.query("%s=='%s'" %
                                 (hue, hueVal)).loc[:,
                                                    col].dropna(axis=0).values
                dfPerc.loc['count', hueVal] = df.query(
                    "%s=='%s'" %
                    (hue, hueVal)).loc[:, col].dropna(axis=0).count()
            else:
                xdata = np.nan
                dfPerc.loc['count', hueVal] = np.nan
        else:
            xdata = df.loc[:, col].dropna(axis=0).values
            dfPerc.loc['count', hueVal] = df.loc[:, col].dropna(axis=0).count()

        if (div != None) and (div != 0):
            xdata = xdata / div

        if logX == True:
            xdata = np.log10(xdata)

        if doBoxCox == True:
            #xdata = stats.boxcox(xdata, lmd)
            xdata = pt.transform(xdata.reshape(-1, 1)).T[0]
            #print(xdata)

        dfPerc.loc['mean', hueVal] = np.mean(xdata)
        dfPerc.loc['std', hueVal] = np.std(xdata)
        dfPerc.loc['max', hueVal] = np.max(xdata)
        dfPerc.loc['+3$\sigma$',
                   hueVal] = np.percentile(xdata, 100 * stats.norm.cdf(3))
        dfPerc.loc['median', hueVal] = np.percentile(xdata,
                                                     100 * stats.norm.cdf(0))
        dfPerc.loc['-3$\sigma$',
                   hueVal] = np.percentile(xdata, 100 * stats.norm.cdf(-3))
        dfPerc.loc['min', hueVal] = np.min(xdata)
        if addThresh != False:
            dfPerc.loc['Distance', hueVal] = np.abs(
                np.percentile(xdata, 100 * stats.norm.cdf(0)) -
                addThresh) / np.std(xdata)
        else:
            dfPerc.loc['Distance', hueVal] = np.nan

        if (uniRec == False) and (not np.isnan(np.mean(xdata))):
            ax.hist(xdata.astype(np.double),
                    bins=binArray,
                    density=False,
                    histtype='stepfilled',
                    alpha=0.3,
                    edgecolor='white',
                    color=cmap(i),
                    label=hueVal)
            #ax1.hist(xdata.astype(np.double), bins=binArray+[np.inf], density=True, histtype='step', cumulative=True, lw=2, color='C%s'%str(i))
            probscale.probplot(
                xdata.astype(np.double),
                ax=ax1,
                plottype='prob',
                probax='y',
                bestfit=False,
                estimate_ci=False,
                datascale='linear',
                #problabel='Probabilities (%)',
                #datalabel='Min P-Value between KS-Test and T-Test',
                scatter_kws=dict(marker='.',
                                 markersize=5,
                                 color=cmap(i),
                                 label=hueVal))
            if ylim != None:
                ax1.set_ylim(ylim[0], ylim[1])
        elif not np.isnan(np.mean(xdata)):
            ax.axvline(xdata, label=hueVal, lw=2, color=cmap(i))

    ax.set_xlabel(xlabel)
    ax.set_ylabel('Number')
    ax.set_title(title)
    if addThresh != False:
        ax.axvline(addThresh, c='red', ls='dashed')
    ax.set_xlim(xmin, xmax)
    ax1.tick_params(labelsize=12)
    ax1.set_ylabel('CDF')
    ax.grid(axis='both', which='major')
    """
    if addTable == True:
        ax.legend(loc='upper left')
    else:
        ax.legend(bbox_to_anchor=(1.2, 1), loc='upper left', borderaxespad=0, fontsize=10)
    """
    ax.legend(bbox_to_anchor=(1.2, 1),
              loc='upper left',
              borderaxespad=0,
              fontsize=10)
    ax.set_axisbelow(True)

    if addTable == True:
        axTable.axis('off')
        the_table = axTable.table(
            cellText=np.around(dfPerc.values.astype(np.double), decimals=2),
            fontsize=12,
            rowLoc='right',
            #rowColours=colors,
            rowLabels=list(dfPerc.index),
            #colWidths=[.5,.5],
            colLabels=list(dfPerc.columns),
            cellLoc='center',
            colLoc='center',
            loc='center right')
        the_table.scale(1.5, 1.5)
        the_table.set_fontsize(12)
        #plt.subplots_adjust(wspace=0.4)
        plt.tight_layout()
    else:
        ax.set_xlabel(xlabel)
        plt.tight_layout()

    plt.savefig(figname + '_cdf.png', format='png')
plt.show()

# ### Plot of CDF vs R in probplot

# In[130]:

fig = plt.figure(figsize=(10, 8))
ax3 = fig.add_subplot(1, 1, 1)

common_opts = dict(probax='y',
                   datascale='log',
                   datalabel='R(in KOhm)',
                   scatter_kws=dict(marker='v', linestyle='none'))
fig = probscale.probplot(R / 1000,
                         ax=ax3,
                         plottype='prob',
                         problabel='Cumulative Probabilities(%)',
                         **common_opts)
ax3.set_xlim(left=2, right=1e4)
ax3.set_ylim(bottom=0.13, top=99.87)
ax3.grid()
plt.yticks([0.5, 2, 10, 30, 50, 70, 90, 98, 99.5])

plt.tight_layout()
plt.title('Extracted data-Probability y scale')
plt.show()

# ### Fitting Polynomial Equation

# In[116]:
示例#14
0
        print(
            "***ALL effects attempted have sample mean > unoptimized mean***")
        print("NONE")
        exit()

    #Take the factor combo with the best sample mean and keep trying
    print("Lowest observed sample mean (Target to Beat)")
    print(
        int(anova_df_pandas[(anova_df_pandas['Sample Mean'] <
                             np.mean(one))]['Sample Mean'].min()))
    print("Next best guesses (produced a sample mean lower than unoptimized)")

if len(all) == 0:
    print("NONE - ERROR")  #I shouldn't get here...
else:
    print(all.rstrip(','))

#Normplot of effects
if len(sys.argv) == 7:
    fig = plt.figure(figsize=(6, 4))
    probscale.probplot(effects,
                       plottype='prob',
                       probax='y',
                       problabel='Standard Normal Probabilities',
                       bestfit=True)
    plt.xlabel("Normal Probability Plot of Effect Estimates")
    plt.title(rv[0] + " - " + rv[1] + " [" + sys.argv[6] + "]")
    plt.tight_layout()
    plt.savefig("results/anova/" + sys.argv[6] + "-" + input_csv_parse[2] +
                "-" + rv[0] + "-" + rv[1] + "-anova-normplot.png")