def scatter_plot(X, Y, x_label="X", y_label="Y", title=None, x_lim=None, y_lim=None, ax=None, show_linear_fitting=False, show=True, save=False, **kwargs): if ax is None: fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4)) ax.scatter(X, Y, **kwargs) if show_linear_fitting: slope, intercept, r, p, stderr_of_slope = stats.linregress(X, Y) predicted_Y = slope*X + intercept residual_std = residual_standard_deviation(Y, predicted_Y) ax.plot(X, predicted_Y) x_label += "\nslope={:.3f}, ".format(slope) x_label += "intercept={:.3f}".format(intercept) x_label += "\nr={:.3f}, ".format(r) x_label += "residual std={:.3f}".format(residual_std) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_xlim(x_lim) ax.set_ylim(y_lim) ax.set_title(title) show_and_save_plot(show=show, save=save, filename="scatter.png")
def probability_plot(series, title=None, sparams=(), distribution="norm", ax=None, show_fitting=False, save=False, show=True, **kwargs): if ax is None: fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4)) results = probplot(series, sparams=sparams, dist=distribution, fit=True, plot=ax, **kwargs) ax.set_title(title) if show_fitting: slope, intercept, r = results[1] x_label = "Theoretical quantiles\n" x_label += "slope={:.4f}, ".format(slope) x_label += "intercept={:.4f}, ".format(intercept) x_label += "r={:.4f}".format(r) ax.set_xlabel(x_label) show_and_save_plot(save=save, show=show, filename="probability_plot.png") return results
def box_cox_normality_set(series, main_title=None, show=True, save=False, box_cox_kws= None, prob_kws=None, original_hist_kws=None, transformed_hist_kws=None): fig, axes = plt.subplots(nrows=2, ncols=2, gridspec_kw={ "left": 0.1, "right": 0.98, "bottom": 0.1, "top": 0.9, "wspace": 0.3, "hspace": 0.4, }, figsize=(7, 8)) org_hist, box_cox = axes[0] trans_hist, prob = axes[1] fig.suptitle(main_title) # Box-Cox Normality Plot if box_cox_kws is None: box_cox_kws = {"lambda_min": -2, "lambda_max": 2, "N": 100} lambdas, corrs, optimal_lambda, max_corr = \ box_cox_normality_plot(series, ax=box_cox, show=False, save=False, **box_cox_kws) x_label = "λ\nMax CC = {:.3f} at λ = {:.3f}".format(max_corr, optimal_lambda) box_cox.set_xlabel(x_label) # Histogram of the Original Data if original_hist_kws is None: original_hist_kws = {"title": "Original Data"} histogram(series, ax=org_hist, show=False, save=False, **original_hist_kws) # Transformation of Data transformed_series = pd.Series(map(partial(box_cox_transformation, lambda_=optimal_lambda), series)) # Histogram of the Transformed Data if transformed_hist_kws is None: transformed_hist_kws = {"title": "Transformed Data", "x_label": "Transformed Measure"} histogram(transformed_series, ax=trans_hist, show=False, save=False, **transformed_hist_kws) # Probability Plot of the Transformed Data if prob_kws is None: prob_kws = {"title": "Probability Plot"} probability_plot(series, ax=prob, show=False, save=False, **prob_kws) show_and_save_plot(show=show, save=save, filename="box_cox_normality_set.png") return lambdas, corrs, optimal_lambda, max_corr
def autocorrelation_plot(series, max_lag=None, title=None, ax=None, arima=False, show=True, save=False, **kwargs): series = np.array(series) if max_lag is None: max_lag = len(series) - 2 # Lag values of N and N-1 are meaningless. if ax is None: fig, ax = plt.subplots() # Plotting autocorrelation coefficients lags = range(0, max_lag + 1) coefs = [autocorrelation_coefficient(series, lag) for lag in lags] ax.plot(lags, coefs, **kwargs) ax.set_title(title) ax.set_xlabel("Lag") ax.set_ylabel("Autocorrelation Coefficient") ax.set_ylim((-1, 1)) # Plotting confidence lines z_95 = stats.norm.ppf(0.975) # 0.975 = 1 - 0.05/2 z_99 = stats.norm.ppf(0.995) # 0.995 = 1 - 0.01/2 N = len(series) if arima: confidence_95 = np.array([ z_95 / np.sqrt(N) * np.sqrt(1 + 2 * np.sum(series[0:lag]**2)) for lag in lags ]) confidence_99 = np.array([ z_99 / np.sqrt(N) * np.sqrt(1 + 2 * np.sum(series[0:lag]**2)) for lag in lags ]) ax.plot(lags, confidence_99, c="k") ax.plot(lags, confidence_95, c="k") ax.axhline(y=0, c="k") ax.plot(lags, -confidence_95, c="k") ax.plot(lags, -confidence_99, c="k") else: confidence_95 = z_95 / np.sqrt(N) confidence_99 = z_99 / np.sqrt(N) ax.axhline(y=confidence_99) ax.axhline(y=confidence_95) ax.axhline(y=0, c="k") ax.axhline(y=-confidence_95) ax.axhline(y=-confidence_99) show_and_save_plot(show=show, save=save, filename="autocorrelation.png") return coefs, confidence_95, confidence_99
def bihistogram(series_1, series_2, labels=["Series 1", "Series_2"], bins=10, x_label="Measure", y_label="Count", title=None, edgecolor="k", show=True, save=False, **kwargs): fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, gridspec_kw={ "left": 0.1, "right": 0.98, "top": 0.9, "bottom": 0.1, "wspace": 0.3, "hspace": 0, }, figsize=(6, 6)) hist_1, hist_2 = axes # Calculation of shared bin edges for the 2 histograms all_values = np.append(np.array(series_1), np.array(series_2)) _, bin_edges = np.histogram(all_values, bins=bins) histogram(series_1, bins=bin_edges, x_label=None, y_label=y_label, ax=hist_1, show=False, **kwargs) histogram(series_2, bins=bin_edges, x_label=x_label, y_label=y_label, ax=hist_2, show=False, **kwargs) hist_2.invert_yaxis() hist_1.annotate(labels[0], xy=(0.02, 0.9), xycoords="axes fraction") hist_2.annotate(labels[1], xy=(0.02, 0.05), xycoords="axes fraction") fig.suptitle(title) show_and_save_plot(show=show, save=save, filename="bihistogram.png")
def doe_scatter_plot(df, response, factors, x_labels=None, y_label="Response", title=None, show_overall_mean=False, figure_size=(8, 6), show=True, save=False, **kwargs): fig, axes = plt.subplots(nrows=1, ncols=len(factors), sharey=True, figsize=figure_size, gridspec_kw={ "left": 0.1, "right": 0.95, "bottom": 0.1, "top": 0.95, "wspace": 0, }) overall_mean = df[response].mean() for factor, ax in zip(factors, axes): if show_overall_mean: ax.axhline(y=overall_mean) factor_levels = np.sort(df[factor].unique()) def encode(level): return factor_levels.searchsorted(level) encoded_factors = [encode(x) for x in df[factor]] ax.scatter(encoded_factors, df[response], **kwargs) ax.set_xticks(list(range(len(factor_levels)))) ax.set_xticklabels(factor_levels) ax.set_xlim(-0.5, len(factor_levels) - 0.5) if x_labels is None: x_labels = factors for x_label, ax in zip(x_labels, axes): ax.set_xlabel(x_label) axes[0].set_ylabel(y_label) fig.suptitle(title) show_and_save_plot(show=show, save=save, filename="doe_scatter_plot.png")
def ppcc_plot(series, lambda_min=-5, lambda_max=5, dist="tukeylambda", N=100, ax=None, save=False, show=True): if ax is None: fig, ax = plt.subplots() svals, ppcc = stats.ppcc_plot(series, lambda_min, lambda_max, dist=dist, plot=ax, N=N) show_and_save_plot(show=show, save=save, filename="ppcc.png") max_corr_value = ppcc.max() max_corr_lambda = svals[ppcc.argmax()] return svals, ppcc, max_corr_value, max_corr_lambda
def box_cox_normality_plot(series, lambda_min=-2, lambda_max=2, N=100, ax=None, show=True, save=False): if ax is None: fig, ax = plt.subplots() lambdas, corrs = stats.boxcox_normplot(series, lambda_min, lambda_max, plot=ax, N=N) max_corr_value = corrs.max() max_corr_lambda = lambdas[corrs.argmax()] show_and_save_plot(show=show, save=save, filename="box_cox_normality.png") return lambdas, corrs, max_corr_lambda, max_corr_value
def histogram(series, bins=10, x_label="Measure", y_label="Count", title=None, edgecolor="k", ax=None, save=False, show=True, plot_pdf=False, show_statistics=False, **kwargs): # Calculation of Statistics mean = np.mean(series) std = np.std(series, ddof=1) range_ = max(series) - min(series) # Histogram if ax is None: fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4)) ax.hist(series, bins=bins, edgecolor=edgecolor, **kwargs) if show_statistics: if x_label is None: x_label = "" x_label += "\n(mean={:.4f}, std={:.4f}, range={:.4f})".format( mean, std, range_) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_title(title) # PDF Plot if plot_pdf: Xs = np.arange(mean - 3 * std, mean + 3 * std, 6 * std / 100) Ys = normal_pdf(Xs, mean=mean, std=std) ax1 = ax.twinx() ax1.plot(Xs, Ys) ax1.axis("off") show_and_save_plot(save=save, show=show, filename="histogram.png") return mean, std, range_
def box_plot(df, column=None, group=None, x_label=None, y_label=None, ax=None, show=True, save=False, **kwargs): if ax is None: fig, ax = plt.subplots() df.boxplot(column=column, by=group, ax=ax, **kwargs) ax.set_xlabel(x_label) ax.set_ylabel(y_label) show_and_save_plot(show=show, save=save, filename="box_plot.png")
def bootstrap_plot(series, fig=None, size=50, samples=500, show=True, save=False, **kwargs): if fig is None: fig = plt.figure(figsize=(10, 8)) pd.plotting.bootstrap_plot(series, fig=fig, size=size, samples=samples, **kwargs) show_and_save_plot(show=show, save=save, filename="bootstrap.png")
def four_plot(series, main_title="4-PLOT", show=True, save=False, run_kws=None, lag_kws=None, hist_kws=None, prob_kws=None): fig, axes = plt.subplots(nrows=2, ncols=2, gridspec_kw={ "left": 0.1, "right": 0.98, "top": 0.9, "bottom": 0.1, "wspace": 0.3, "hspace": 0.3, }, figsize=(7, 8)) rsp, lag = axes[0] hist, prob = axes[1] # Run Sequence Plot run_kws = run_kws if run_kws is not None else {} clearance = (max(series) - min(series)) * 1 / 10 y_lim = (min(series) - clearance, max(series) + clearance) run_sequence_plot(series, y_lim=y_lim, ax=rsp, show=False, **run_kws) # Lag Plot lag_kws = lag_kws if lag_kws is not None else {} lag_plot(series, ax=lag, show=False, **lag_kws) # Histogram hist_kws = hist_kws if hist_kws is not None else {} histogram(series, ax=hist, show=False, **hist_kws) # Probability Plot prob_kws = prob_kws if prob_kws is not None else {} probability_plot(series, ax=prob, show=False, **prob_kws) fig.suptitle(main_title) show_and_save_plot(show=show, save=save, filename="4-plot.png")
def lag_plot(series, lag=1, ax=None, x_lim=None, y_lim=None, title="Lag Plot", show=True, save=False, **kwargs): if ax is None: fig, ax = plt.subplots() pd.plotting.lag_plot(series, lag=lag, ax=ax, **kwargs) ax.set_xlim(x_lim) ax.set_ylim(y_lim) ax.set_title(title) show_and_save_plot(show=show, save=save, filename="lag_plot.png")
def qq_plot(series_1, series_2, x_label="Series 1", y_label="Series 2", title="Q-Q Plot", ax=None, show=True, save=False, **kwargs): if ax is None: fig, ax = plt.subplots() # Determining the values to be plotted N1, N2 = len(series_1), len(series_2) series_1 = sorted(series_1) series_2 = sorted(series_2) if N1 == N2: plotted_series_1 = series_1 plotted_series_2 = series_2 elif N1 > N2: plotted_series_1 = [] plotted_series_2 = series_2 for i2 in range(N2): i1 = int((N1-1)/(N2-1) * i2) plotted_series_1.append(series_1[i1]) else: plotted_series_1 = series_1 plotted_series_2 = [] for i1 in range(N1): i2 = int((N2-1)/(N1-1) * i1) plotted_series_2.append(series_2[i2]) # Scatter plot ax.scatter(plotted_series_1, plotted_series_2, **kwargs) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_title(title) # 45-degree reference line min_ = min(series_1[0], series_2[0]) max_ = max(series_1[-1], series_2[-1]) ax.plot((min_, max_), (min_, max_)) show_and_save_plot(show=show, save=save, filename="qq_plot.png")
def box_cox_linearity_plot(X, Y, lambda_min=-2, lambda_max=2, N=100, title=None, ax=None, show=True, save=False): # Calculating lambda values and corresponding correlation coefficients lambdas = np.linspace(lambda_min, lambda_max, N) Rs = [] for lambda_ in lambdas: transformed_X = box_cox_transformation(X, lambda_) _, _, R, _, _ = stats.linregress(transformed_X, Y) Rs.append(R) Rs = np.array(Rs) Rs_squared = Rs**2 optimal_index = Rs_squared.argmax() optimal_lambda = lambdas[optimal_index] optimal_R = Rs[optimal_index] # Plotting if ax is None: fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4)) ax.scatter(lambdas, Rs, marker="x") ax.set_title(title) ax.set_ylabel("Correlation Coefficient") x_label = "λ\n" x_label += "Best R={:.3f} at λ={:.3f}".format(optimal_R, optimal_lambda) ax.set_xlabel(x_label) show_and_save_plot(show=show, save=save, filename="box_cox_linearity.png") return lambdas, Rs, optimal_lambda, optimal_R
def run_sequence_plot(series, x_label="Index", y_label="Measure", title=None, y_lim=None, ax=None, show=True, save=False, **kwargs): indices = [i + 1 for i in range(len(series))] if ax is None: fig, ax = plt.subplots() ax.plot(indices, series, **kwargs) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_ylim(y_lim) ax.set_title(title) show_and_save_plot(show=show, save=save, filename="run_sequence_plot.png")
def box_cox_linearity_set(X, Y, main_title=None, show=True, save=False, box_cox_kws=None, original_lin_kws=None, transformed_lin_kws=None): fig, axes = plt.subplots(nrows=2, ncols=2, gridspec_kw={ "left": 0.1, "right": 0.98, "bottom": 0.15, "top": 0.9, "wspace": 0.3, "hspace": 0.6, }, figsize=(7, 8)) org_lin, box_cox = axes[0] trans_lin, _ = axes[1] fig.suptitle(main_title) # Box-Cox Linearity Plot if box_cox_kws is None: box_cox_kws = {"title": "Box-Cox Transformation"} lambdas, Rs, optimal_lambda, optimal_R = \ box_cox_linearity_plot(X, Y, ax=box_cox, show=False, save=False, **box_cox_kws) x_label = "λ\n" x_label += "Best R={:.3f} at λ={:.3f}".format(optimal_R, optimal_lambda) box_cox.set_xlabel(x_label) # Linearity Plot of the Original Data if original_lin_kws is None: original_lin_kws = { "title": "Original Data", "show_linear_fitting": True } scatter_plot(X, Y, ax=org_lin, show=False, save=False, **original_lin_kws) # Linearity Plot of the Transformed Data if transformed_lin_kws is None: transformed_lin_kws = { "title": "Transformed Data", "show_linear_fitting": True } transformed_X = box_cox_transformation(X, optimal_lambda) scatter_plot(transformed_X, Y, ax=trans_lin, show=False, save=False, **transformed_lin_kws) # Hiding the empty plot in the bottom-right corner _.axis("off") show_and_save_plot(show=show, save=save, filename="box_cox_linearity_set.png") return lambdas, Rs, optimal_lambda, optimal_R
def doe_statistic_matrix(df, response, factors, statistic="mean", y_label=None, title=None, show_overall_statistic=False, figure_size=(8, 6), show=True, save=False, **kwargs): df = df.copy() n_factors = len(factors) fig, axes = plt.subplots(nrows=n_factors, ncols=n_factors, squeeze=False, sharey=True, figsize=figure_size, gridspec_kw={"left": 0.1, "right": 0.95, "bottom": 0.05, "top": 0.9, "wspace": 0, "hspace": 0.3}) if statistic == "mean": overall_statistic = df[response].mean() elif statistic == "median": overall_statistic = df[response].median() elif statistic == "std": overall_statistic = df[response].std() else: raise ValueError("*statistic* should be 'mean', 'median', or 'std'.") for pair in product(range(n_factors), range(n_factors)): row, col = pair ax = axes[row, col] if row > col: # Skip the cell ax.axis("off") elif row == col: # Single-factor scatter plot if show_overall_statistic: ax.axhline(y=overall_statistic) factor = factors[row] factor_levels = np.sort(df[factor].unique()) def encode(level): return factor_levels.searchsorted(level) encoded_factors = [encode(x) for x in factor_levels] stat_by_level = [] for level in factor_levels: if statistic == "mean": stat_by_level.append( df[df[factor]==level][response].mean()) elif statistic == "median": stat_by_level.append( df[df[factor]==level][response].median()) elif statistic == "std": stat_by_level.append( df[df[factor]==level][response].std()) else: raise ValueError( "*statistic* should be 'mean', 'median', or 'std'.") ax.plot(encoded_factors, stat_by_level, **kwargs) ax.annotate(factor, xy=(0.5, 1.02), xycoords="axes fraction", horizontalalignment="center") ax.set_xticks(list(range(len(factor_levels)))) ax.set_xticklabels(factor_levels) ax.set_xlim(-0.5, len(factor_levels)-0.5) else: # Double-factor scatter plot if show_overall_statistic: ax.axhline(y=overall_statistic) factor_1 = factors[row] factor_2 = factors[col] try: combined_factor = df[factor_1] * df[factor_2] df["combined"] = combined_factor factor_levels = np.sort(combined_factor.unique()) def encode(level): return factor_levels.searchsorted(level) encoded_factors = [encode(x) for x in factor_levels] stat_by_level = [] for level in factor_levels: if statistic == "mean": stat_by_level.append( df[df["combined"]==level][response].mean()) elif statistic == "median": stat_by_level.append( df[df["combined"]==level][response].median()) elif statistic == "std": stat_by_level.append( df[df["combined"]==level][response].std()) else: raise ValueError( "*statistic* should be 'mean', 'median', or 'std'.") ax.plot(encoded_factors, stat_by_level, **kwargs) ax.annotate("{}*{}".format(factor_1, factor_2), xy=(0.5, 1.02), xycoords="axes fraction", horizontalalignment="center") ax.set_xticks(list(range(len(factor_levels)))) ax.set_xticklabels(factor_levels) ax.set_xlim(-0.5, len(factor_levels)-0.5) except TypeError: raise TypeError("Factor levels should be encoded as integers.") if y_label is None: y_label = statistic.title() + " of Response" axes[0, 0].set_ylabel(y_label) fig.suptitle(title) show_and_save_plot(show=show, save=save, filename="doe_statistic_matrix.png")
def block_plot(df, response_name, plot_factor, grouping_factors, x_label=None, y_label=None, plot_factor_label=None, title=None, ax=None, figure_size=(6, 6), show=True, save=False): # Levels of the plot factor plot_levels = np.sort(df[plot_factor].unique()) # Combinations of the grouping factors levels_by_factor = {} for factor in grouping_factors: levels_by_factor[factor] = np.sort(df[factor].unique()) # Combinations = Levels_of_Factor_1 x Levels_of_Factor_2 x ... # (Cartesian product) combinations = list(itertools.product(*(levels_by_factor.values()))) # Calculating response means grouped by all factors groups = df[[response_name, plot_factor, *grouping_factors]].groupby([*grouping_factors, plot_factor]) grouped_response_means = groups.mean() # Response means grouped by grouping factors response_means_by_combination = {} for combination in combinations: response_means_by_combination[combination] = {} try: for plot_level in plot_levels: response_means_by_combination[combination][plot_level] = \ grouped_response_means.loc[(*combination, plot_level), response_name] except KeyError: continue # Plotting if ax is None: fig, ax = plt.subplots(gridspec_kw={ "bottom": 0.15, "top": 0.95, "left": 0.1, "right": 0.96 }, figsize=figure_size) for index, combination in enumerate(combinations): response_means_for_this_combination = \ response_means_by_combination[combination] if len(response_means_for_this_combination) <= 1: continue low = min(response_means_for_this_combination.values()) high = max(response_means_for_this_combination.values()) ax.bar(index, high - low, width=0.5, bottom=low, align="center", edgecolor="k", color="w") for plot_level in response_means_for_this_combination.keys(): mean_at_this_level = \ response_means_for_this_combination[plot_level] if mean_at_this_level not in (low, high): ax.plot([index], [mean_at_this_level], marker="^", color="k") ax.annotate(str(plot_level), xy=(index, mean_at_this_level), xycoords="data", xytext=(-2, 1), textcoords="offset points") if plot_factor_label is not None: plot_factor_label = "Plot Character = " + plot_factor_label ax.annotate(plot_factor_label, xy=(0.01, 0.97), xycoords="axes fraction") x_ticks = [index for index in range(len(combinations))] ax.set_xticks(x_ticks) if len(grouping_factors) > 1: x_tick_labels = [str(combination) for combination in combinations] ax.set_xticklabels(x_tick_labels, rotation=60) else: x_tick_labels = [str(combination[0]) for combination in combinations] ax.set_xticklabels(x_tick_labels) if x_label is None: if len(grouping_factors) > 1: x_label = "(" + ", ".join(grouping_factors) + ")" else: x_label = grouping_factors[0] if y_label is None: y_label = "Average Response" ax.set_title(title) ax.set_xlabel(x_label) ax.set_ylabel(y_label) show_and_save_plot(show=show, save=save, filename="block_plot.png")
def doe_statistic_plot(df, response, factors, statistic="mean", x_labels=None, y_label=None, title=None, show_overall_statistic=False, figure_size=(8, 6), show=True, save=False, **kwargs): fig, axes = plt.subplots(nrows=1, ncols=len(factors), sharey=True, figsize=figure_size, gridspec_kw={"left": 0.1, "right": 0.95, "bottom": 0.1, "top": 0.95, "wspace": 0,}) if statistic == "mean": overall_statistic = df[response].mean() elif statistic == "median": overall_statistic = df[response].median() elif statistic == "std": overall_statistic = df[response].std() else: raise ValueError("*statistic* should be 'mean', 'median', or 'std'.") for factor, ax in zip(factors, axes): if show_overall_statistic: ax.axhline(y=overall_statistic) factor_levels = np.sort(df[factor].unique()) # levels -> 0, 1, 2, ... def encode(level): return factor_levels.searchsorted(level) encoded_levels = [encode(x) for x in factor_levels] stat_by_level = [] for level in factor_levels: if statistic == "mean": stat_by_level.append( df[df[factor]==level][response].mean()) elif statistic == "median": stat_by_level.append( df[df[factor]==level][response].median()) elif statistic == "std": stat_by_level.append( df[df[factor]==level][response].std()) else: raise ValueError( "*statistic* should be 'mean', 'median', or 'std'.") ax.plot(encoded_levels, stat_by_level, **kwargs) ax.set_xticks(list(range(len(factor_levels)))) ax.set_xticklabels(factor_levels) ax.set_xlim(-0.5, len(factor_levels)-0.5) if x_labels is None: x_labels = factors for x_label, ax in zip(x_labels, axes): ax.set_xlabel(x_label) if y_label is None: y_label = statistic.title() + " of Response" axes[0].set_ylabel(y_label) fig.suptitle(title) show_and_save_plot(show=show, save=save, filename="doe_statistic_plot.png")
def weibull_plot(series, title=None, x_label="Measure", y_label="Weibull Probability (%)", ax=None, show=True, save=False, **kwargs): # Calculation of X and Y coordinates and fitting line parameters X = np.log(series) / np.log(10) X = np.sort(X) n = len(series) i = np.arange(1, n+1) p = (i-0.3) / (n+0.4) Y = np.log(-np.log(1-p)) slope, intercept, r, p_value, error = stats.linregress(X, Y) X_for_fitting = np.linspace((-6.908-intercept)/slope, # p=0.001, y=-6.908 (1.933-intercept)/slope, # p=0.999, y=1.933 10) series_for_fitting = 10 ** X_for_fitting Y_for_fitting = intercept + slope*X_for_fitting shape_parameter = slope / np.log(10) # beta scale_parameter = 10**(-intercept/slope) # eta # Plotting if ax is None: fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 6)) ax.scatter(series, Y, **kwargs) ax.plot(series_for_fitting, Y_for_fitting) ax.set_title(title) percentage_ticks = np.array([0.1, 0.5, 1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 99.9]) p_ticks = percentage_ticks / 100 y_ticks = np.log(-np.log(1-p_ticks)) ax.set_yticks(y_ticks) ax.set_yticklabels(percentage_ticks) ax.set_ylabel(y_label) ax.set_ylim((y_ticks[0], y_ticks[-1])) ax.set_xscale("log") ax.set_xlim((min(series)/10**1.5, max(series)*10**0.5)) x_label += "\nr={:.3f}, ".format(r) x_label += "slope={:.3f}, ".format(slope) x_label += "intercept={:.3f}\n".format(intercept) x_label += "shape={:.3f}, ".format(shape_parameter) x_label += "scale={:.3f}".format(scale_parameter) ax.set_xlabel(x_label) # Horizontal and Vertical Lines ax.axhline(y=0) ax.axvline(x=scale_parameter) ax.annotate("63.2", xy=(0.005, 0.05), xycoords=("axes fraction", "data")) ax.annotate("{:.3f}".format(scale_parameter), xy=(scale_parameter, 0.01), xycoords=("data", "axes fraction")) show_and_save_plot(show=show, save=save, filename="weibull_plot.png") return slope, intercept, r, shape_parameter, scale_parameter
def doe_scatter_matrix(df, response, factors, y_label="Response", title=None, figure_size=(8, 6), show=True, save=False, **kwargs): n_factors = len(factors) fig, axes = plt.subplots(nrows=n_factors, ncols=n_factors, squeeze=False, sharey=True, figsize=figure_size, gridspec_kw={ "left": 0.1, "right": 0.95, "bottom": 0.05, "top": 0.9, "wspace": 0, "hspace": 0.3 }) for pair in product(range(n_factors), range(n_factors)): row, col = pair ax = axes[row, col] if row > col: # Skip the cell ax.axis("off") elif row == col: # Single-factor scatter plot factor = factors[row] factor_levels = np.sort(df[factor].unique()) def encode(level): return factor_levels.searchsorted(level) encoded_factors = [encode(x) for x in df[factor]] ax.scatter(encoded_factors, df[response], **kwargs) ax.annotate(factor, xy=(0.5, 1.02), xycoords="axes fraction", horizontalalignment="center") ax.set_xticks(list(range(len(factor_levels)))) ax.set_xticklabels(factor_levels) ax.set_xlim(-0.5, len(factor_levels) - 0.5) else: # Double-factor scatter plot factor_1 = factors[row] factor_2 = factors[col] try: combined_factor = df[factor_1] * df[factor_2] factor_levels = np.sort(combined_factor.unique()) def encode(level): return factor_levels.searchsorted(level) encoded_factors = [encode(x) for x in combined_factor] ax.scatter(encoded_factors, df[response], **kwargs) ax.annotate("{}*{}".format(factor_1, factor_2), xy=(0.5, 1.02), xycoords="axes fraction", horizontalalignment="center") ax.set_xticks(list(range(len(factor_levels)))) ax.set_xticklabels(factor_levels) ax.set_xlim(-0.5, len(factor_levels) - 0.5) except TypeError: raise TypeError("Factor levels should be encoded as integers.") axes[0, 0].set_ylabel(y_label) fig.suptitle(title) show_and_save_plot(show=show, save=save, filename="doe_scatter_matrix.png")