def test_Fit_Lognormal_2P(): dist = Lognormal_Distribution(mu=1,sigma=0.5) rawdata = dist.random_samples(20, seed=5) data = make_right_censored_data(data=rawdata, threshold=dist.mean) fit = Fit_Lognormal_2P(failures=data.failures, right_censored=data.right_censored, show_probability_plot=False, print_results=False) assert_allclose(fit.mu, 0.9494189618970151,rtol=rtol,atol=atol) assert_allclose(fit.sigma, 0.4267323807168996,rtol=rtol,atol=atol) assert_allclose(fit.gamma, 0,rtol=rtol,atol=atol) assert_allclose(fit.AICc, 49.69392320890684,rtol=rtol,atol=atol) assert_allclose(fit.Cov_mu_sigma, 0.0025054526707355687,rtol=rtol,atol=atol) assert_allclose(fit.loglik, -22.494020427982832,rtol=rtol,atol=atol)
def test_Fit_Lognormal_2P(): dist = Lognormal_Distribution(mu=1, sigma=0.5) rawdata = dist.random_samples(20, seed=5) data = make_right_censored_data(data=rawdata, threshold=dist.mean) MLE = Fit_Lognormal_2P(failures=data.failures, right_censored=data.right_censored, method='MLE', show_probability_plot=False, print_results=False) assert_allclose(MLE.mu, 0.9494190246173423, rtol=rtol, atol=atol) assert_allclose(MLE.sigma, 0.4267323457212804, rtol=rtol, atol=atol) assert_allclose(MLE.gamma, 0, rtol=rtol, atol=atol) assert_allclose(MLE.AICc, 49.69392320890687, rtol=rtol, atol=atol) assert_allclose(MLE.BIC, 50.979505403073674, rtol=rtol, atol=atol) assert_allclose(MLE.loglik, -22.494020427982846, rtol=rtol, atol=atol) assert_allclose(MLE.AD, 46.91678130009629, rtol=rtol, atol=atol) assert_allclose(MLE.Cov_mu_sigma, 0.002505454567167978, rtol=rtol, atol=atol) LS = Fit_Lognormal_2P(failures=data.failures, right_censored=data.right_censored, method='LS', show_probability_plot=False, print_results=False) assert_allclose(LS.mu, 0.9427890879489974, rtol=rtol, atol=atol) assert_allclose(LS.sigma, 0.4475312141445822, rtol=rtol, atol=atol) assert_allclose(LS.gamma, 0, rtol=rtol, atol=atol) assert_allclose(LS.AICc, 49.757609068995194, rtol=rtol, atol=atol) assert_allclose(LS.BIC, 51.043191263162, rtol=rtol, atol=atol) assert_allclose(LS.loglik, -22.52586335802701, rtol=rtol, atol=atol) assert_allclose(LS.AD, 46.93509652892565, rtol=rtol, atol=atol) assert_allclose(LS.Cov_mu_sigma, 0.0025640250120794526, rtol=rtol, atol=atol)
def __BIC_minimizer(common_shape_X): #lgtm [py/similar-function] ''' __BIC_minimizer is used by the minimize function to get the sigma which gives the lowest overall BIC ''' BIC_tot = 0 for stress in unique_stresses_f: failure_current_stress_df = f_df[f_df['stress'] == stress] FAILURES = failure_current_stress_df['times'].values if right_censored is not None: if stress in unique_stresses_rc: right_cens_current_stress_df = rc_df[rc_df['stress'] == stress] RIGHT_CENSORED = right_cens_current_stress_df['times'].values else: RIGHT_CENSORED = None else: RIGHT_CENSORED = None lognormal_fit_common_shape = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False, force_sigma=common_shape_X) BIC_tot += lognormal_fit_common_shape.BIC return BIC_tot
def HistogramPLOT_all(data, month, year): #Initiate Situation = [] mon = [ 'January', 'Febuary', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ] #Get just Full day data logicF = (data["isFULL"].apply(lambda x: x) == (1)) data01 = data[logicF].copy() data01.fillna(method='ffill', inplace=True) logicY = (data01["DateTime"].apply(lambda x: x.year) == (year)) data01 = data01[logicY].copy() fig = plt.figure(figsize=(24, 18), dpi=80, facecolor='w', edgecolor='r') #Plotting 12 graph xvals = np.linspace(0, 30, 1000) for i in range(month): ax = plt.subplot2grid((4, 3), (int(np.floor(i / 3)), int(i % 3))) logic = (data01["DateTime"].apply(lambda x: x.month)) == (i + 1) ws = data01['WS95'][logic] ws = ws + 0.0001 failures = [] censored = [] threshold = 30 for item in ws: if item > threshold: censored.append(threshold) else: failures.append(item) xvals = np.linspace(0, 30, 1000) print(ws.shape) if (np.sum(logic) != 0): ax.hist(ws, bins=30, normed=True) hist, edge = np.histogram(np.array(ws), bins=1000, range=(0, 30), normed=True) wb2 = Fit_Weibull_2P(failures=failures, show_probability_plot=False, print_results=False) wb3 = Fit_Weibull_3P(failures=failures, show_probability_plot=False, print_results=False) gm2 = Fit_Gamma_2P(failures=failures, show_probability_plot=False, print_results=False) gm3 = Fit_Gamma_3P(failures=failures, show_probability_plot=False, print_results=False) ln2 = Fit_Lognormal_2P(failures=failures, show_probability_plot=False, print_results=False) wbm = Fit_Weibull_Mixture(failures=failures, right_censored=censored, show_plot=False, print_results=False) wb2_pdf = Weibull_Distribution(alpha=wb2.alpha, beta=wb2.beta).PDF( xvals=xvals, show_plot=True, label='Weibull_2P') wb3_pdf = Weibull_Distribution(alpha=wb3.alpha, beta=wb3.beta, gamma=wb3.gamma).PDF( xvals=xvals, show_plot=True, label='Weibull_3P') gm2_pdf = Gamma_Distribution(alpha=gm2.alpha, beta=gm2.beta).PDF(xvals=xvals, show_plot=True, label='Gamma_2P') gm3_pdf = Gamma_Distribution(alpha=gm3.alpha, beta=gm3.beta, gamma=gm3.gamma).PDF(xvals=xvals, show_plot=True, label='Gamma_3P') ln2_pdf = Lognormal_Distribution(mu=ln2.mu, sigma=ln2.sigma).PDF( xvals=xvals, show_plot=True, label='Lognormal_2P') part1_pdf = Weibull_Distribution(alpha=wbm.alpha_1, beta=wbm.beta_1).PDF( xvals=xvals, show_plot=False) part2_pdf = Weibull_Distribution(alpha=wbm.alpha_2, beta=wbm.beta_2).PDF( xvals=xvals, show_plot=False) Mixture_PDF = part1_pdf * wbm.proportion_1 + part2_pdf * wbm.proportion_2 ax.plot(xvals, Mixture_PDF, label='Weibull_Mixture') ax.legend() ax.set_ylim(0, 0.16) ax.set_xlim(0, 30) ax.set_xticks([0, 5, 10, 15, 20, 25, 30]) ax.tick_params(axis="x", labelsize=20) ax.tick_params(axis="y", labelsize=20) ax.set_title('{}'.format(mon[i]), fontweight='bold', size=20) plt.tight_layout() plt.show()
def __init__(self, failures, failure_stress, right_censored=None, right_censored_stress=None, print_results=True, show_plot=True, common_shape_method='BIC'): # input type checking and converting to arrays in preperation for creation of dataframe if common_shape_method not in ['BIC', 'weighted_average', 'average']: raise ValueError('common_shape_method must be either BIC, weighted_average, or average. Default is BIC.') if len(failures) != len(failure_stress): raise ValueError('The length of failures does not match the length of failure_stress') if type(failures) is list: failures = np.array(failures) elif type(failures) is np.ndarray: pass else: raise ValueError('failures must be an array or list') if type(failure_stress) is list: failure_stress = np.array(failure_stress) elif type(failure_stress) is np.ndarray: pass else: raise ValueError('failure_stress must be an array or list') if right_censored is not None: if len(right_censored) != len(right_censored_stress): raise ValueError('The length of right_censored does not match the length of right_censored_stress') if type(right_censored) is list: right_censored = np.array(right_censored) elif type(right_censored) is np.ndarray: pass else: raise ValueError('right_censored must be an array or list') if type(right_censored_stress) is list: right_censored_stress = np.array(right_censored_stress) elif type(right_censored_stress) is np.ndarray: pass else: raise ValueError('right_censored_stress must be an array or list') xmin = np.floor(np.log10(min(failures))) - 1 xmax = np.ceil(np.log10(max(failures))) + 1 xvals = np.logspace(xmin, xmax, 100) if right_censored is not None: TIMES = np.hstack([failures, right_censored]) STRESS = np.hstack([failure_stress, right_censored_stress]) CENS_CODES = np.hstack([np.ones_like(failures), np.zeros_like(right_censored)]) else: TIMES = failures STRESS = failure_stress CENS_CODES = np.ones_like(failures) data = {'times': TIMES, 'stress': STRESS, 'cens_codes': CENS_CODES} df = pd.DataFrame(data, columns=['times', 'stress', 'cens_codes']) df_sorted = df.sort_values(by=['cens_codes', 'stress', 'times']) is_failure = df_sorted['cens_codes'] == 1 is_right_cens = df_sorted['cens_codes'] == 0 f_df = df_sorted[is_failure] rc_df = df_sorted[is_right_cens] unique_stresses_f = f_df.stress.unique() if right_censored is not None: unique_stresses_rc = rc_df.stress.unique() for item in unique_stresses_rc: # check that there are no unique right_censored stresses that are not also in failure stresses if item not in unique_stresses_f: raise ValueError('The right_censored_stress array contains values that are not in the failure_stress array. This is equivalent to trying to fit a distribution to only censored data and cannot be done.') lognormal_fit_mu_array = [] lognormal_fit_sigma_array = [] lognormal_fit_mu_array_common_shape = [] color_list = ['steelblue', 'darkorange', 'red', 'green', 'purple', 'blue', 'grey', 'deeppink', 'cyan', 'chocolate'] weights_array = [] # within this loop, each list of failures and right censored values will be unpacked for each unique stress to find the common sigma parameter for stress in unique_stresses_f: failure_current_stress_df = f_df[f_df['stress'] == stress] FAILURES = failure_current_stress_df['times'].values len_f = len(FAILURES) if right_censored is not None: if stress in unique_stresses_rc: right_cens_current_stress_df = rc_df[rc_df['stress'] == stress] RIGHT_CENSORED = right_cens_current_stress_df['times'].values len_rc = len(RIGHT_CENSORED) else: RIGHT_CENSORED = None len_rc = 0 else: RIGHT_CENSORED = None len_rc = 0 weights_array.append(len_f + len_rc) lognormal_fit = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False) lognormal_fit_mu_array.append(lognormal_fit.mu) lognormal_fit_sigma_array.append(lognormal_fit.sigma) common_shape_guess = np.average(lognormal_fit_sigma_array) def __BIC_minimizer(common_shape_X): #lgtm [py/similar-function] ''' __BIC_minimizer is used by the minimize function to get the sigma which gives the lowest overall BIC ''' BIC_tot = 0 for stress in unique_stresses_f: failure_current_stress_df = f_df[f_df['stress'] == stress] FAILURES = failure_current_stress_df['times'].values if right_censored is not None: if stress in unique_stresses_rc: right_cens_current_stress_df = rc_df[rc_df['stress'] == stress] RIGHT_CENSORED = right_cens_current_stress_df['times'].values else: RIGHT_CENSORED = None else: RIGHT_CENSORED = None lognormal_fit_common_shape = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False, force_sigma=common_shape_X) BIC_tot += lognormal_fit_common_shape.BIC return BIC_tot if common_shape_method == 'BIC': optimized_sigma_results = minimize(__BIC_minimizer, x0=common_shape_guess, method='nelder-mead') common_shape = optimized_sigma_results.x[0] elif common_shape_method == 'weighted_average': total_data = sum(weights_array) weights = np.array(weights_array) / total_data common_shape = sum(weights * np.array(lognormal_fit_sigma_array)) elif common_shape_method == 'average': common_shape = common_shape_guess # this was just the numerical average obtained above self.common_shape = common_shape # within this loop, each list of failures and right censored values will be unpacked for each unique stress and plotted as a probability plot as well as the CDF of the common sigma plot AICc_total = 0 BIC_total = 0 AICc = True for i, stress in enumerate(unique_stresses_f): failure_current_stress_df = f_df[f_df['stress'] == stress] FAILURES = failure_current_stress_df['times'].values if right_censored is not None: if stress in unique_stresses_rc: right_cens_current_stress_df = rc_df[rc_df['stress'] == stress] RIGHT_CENSORED = right_cens_current_stress_df['times'].values else: RIGHT_CENSORED = None else: RIGHT_CENSORED = None lognormal_fit_common_shape = Fit_Lognormal_2P(failures=FAILURES, right_censored=RIGHT_CENSORED, show_probability_plot=False, print_results=False, force_sigma=common_shape) lognormal_fit_mu_array_common_shape.append(lognormal_fit_common_shape.mu) if type(lognormal_fit_common_shape.AICc) == str: AICc = False else: AICc_total += lognormal_fit_common_shape.AICc BIC_total += lognormal_fit_common_shape.BIC if show_plot is True: lognormal_fit_common_shape.distribution.CDF(linestyle='--', color=color_list[i], xvals=xvals) Probability_plotting.Lognormal_probability_plot(failures=FAILURES, right_censored=RIGHT_CENSORED, color=color_list[i], label=str(stress)) plt.legend(title='Stress') plt.xlim(10 ** (xmin + 1), 10 ** (xmax - 1)) if common_shape_method == 'BIC': plt.title(str('ALT Lognormal Probability Plot\nOptimal BIC ' + r'$\sigma$ = ' + str(round(common_shape, 4)))) elif common_shape_method == 'weighted_average': plt.title(str('ALT Lognormal Probability Plot\nWeighted average ' + r'$\sigma$ = ' + str(round(common_shape, 4)))) elif common_shape_method == 'average': plt.title(str('ALT Lognormal Probability Plot\nAverage ' + r'$\sigma$ = ' + str(round(common_shape, 4)))) self.BIC_sum = np.sum(BIC_total) if AICc is True: self.AICc_sum = np.sum(AICc_total) else: self.AICc_sum = 'Insufficient Data' sigma_difs = (common_shape - np.array(lognormal_fit_sigma_array)) / np.array(lognormal_fit_sigma_array) sigma_differences = [] for item in sigma_difs: if item > 0: sigma_differences.append(str('+' + str(round(item * 100, 2)) + '%')) else: sigma_differences.append(str(str(round(item * 100, 2)) + '%')) results = {'stress': unique_stresses_f, 'original mu': lognormal_fit_mu_array, 'original sigma': lognormal_fit_sigma_array, 'new mu': lognormal_fit_mu_array_common_shape, 'common sigma': np.ones_like(unique_stresses_f) * common_shape, 'sigma change': sigma_differences} results_df = pd.DataFrame(results, columns=['stress', 'original mu', 'original sigma', 'new mu', 'common sigma', 'sigma change']) blankIndex = [''] * len(results_df) results_df.index = blankIndex self.results = results_df if print_results is True: pd.set_option('display.width', 200) # prevents wrapping after default 80 characters pd.set_option('display.max_columns', 9) # shows the dataframe without ... truncation print('\nALT Lognormal probability plot results:') print(self.results) print('Total AICc:', self.AICc_sum) print('Total BIC:', self.BIC_sum)