def norm_cdf_plot(sample, alpha): #Plots the normal theoretical cdf compared to the empirical one mean = mou.mean_sample(sample) std = mou.std_sample(sample) step = abs(min(sample) - max(sample))/100 x = np.arange(min(sample), max(sample) + step, step) cdf = st.norm.cdf(x, mean, std) n = len(sample) y = np.arange(1, n+1)/n F1 = [] F2 = [] for i in range(0, len(sample)): e = (((mt.log(2/alpha))/(2*n))**0.5) F1.append(y[i] - e) F2.append(y[i] + e) plt.plot(x, cdf, color = 'dimgray', label = 'Theoretical CDF') plt.scatter(sorted(sample), y, label='Empirical CDF') plt.plot(sorted(sample), F1, linestyle='--', color='red', alpha = 0.8, lw = 0.9, label = 'Dvoretzky–Kiefer–Wolfowitz Confidence Bands') plt.plot(sorted(sample), F2, linestyle='--', color='red', alpha = 0.8, lw = 0.9) plt.ylabel('Cumulative Distribution Function') plt.xlabel('Observed Data') plt.legend() plt.show() return cdf
def norm_qq_plot(sample, alpha): #plots the quantile-quantie plot for the given data y = np.arange(1, len(sample)+1)/(len(sample)+1) mean = mou.mean_sample(sample) std = mou.std_sample(sample) theo_qq = phi_inverse(y) x = theo_qq*std + mean #Kolmogorov-Smirnov Test for getting the confidence interval K = (-0.5*mt.log(alpha/2))**0.5 M = (len(sample)**2/(2*len(sample)))**0.5 CI_qq_high = [] CI_qq_low = [] for prob in y: F1 = prob - K/M F2 = prob + K/M s_low = phi_inverse(F1) s_high = phi_inverse(F2) CI_qq_low.append(s_low*std + mean) CI_qq_high.append(s_high*std + mean) sns.regplot(x, sorted(sample), ci = None, line_kws={'color':'dimgray','label':'Regression Line'}) plt.plot(sorted(sample), CI_qq_low, linestyle='--', color='red', alpha = 1, lw = 0.8, label = 'Kolmogorov-Smirnov Confidence Bands') plt.legend() plt.plot(sorted(sample), CI_qq_high, linestyle='--', color='red', alpha = 1, lw = 0.8) plt.xlabel('Theoretical Normal Quantiles') plt.ylabel('Sample Quantiles') plt.show()
def norm_pdf_plot(sample): #Plots the normal pdf with normalized histogram mean = mou.mean_sample(sample) std = mou.std_sample(sample) step = abs(min(sample) - max(sample))/100 x = np.arange(min(sample), max(sample) + step, step) pdf = st.norm.pdf(x, mean, std) plt.plot(x, pdf, color = 'dimgray', label = 'Theoretical PDF') plt.legend() mou.hist(sample, 'sturges', dens_norm = True) return pdf
def chi_square_test(sample, alpha): m = mt.ceil(1 + 3.322 * mt.log(len(sample), 10)) k = 2 f = m - 1 - k c = chi2.ppf(1 - alpha, f) h = (max(sample) - min(sample)) / m mean = mou.mean_sample(sample) std = mou.std_sample(sample) class_ = [min(sample)] for i in range(0, m): class_.append(class_[i] + h) e = [] for i in range(1, len(class_)): if i == 1: e.append(npd.phi(class_[i], mean, std) * len(sample)) elif i == (len(class_)): e.append((1 - npd.phi(class_[i - 1], mean, std)) * len(sample)) else: e.append((npd.phi(class_[i], mean, std) - npd.phi(class_[i - 1], mean, std)) * len(sample)) n = [] t = 0 i = 1 sample = sorted(sample) sample.append(max(sample) + 1) for j in range(0, len(sample) - 1): if sample[j + 1] > class_[i]: n.append((j + 1) - t) t = j + 1 i = i + 1 test_1 = [] for i in range(0, len(n)): test_1.append(((e[i] - n[i])**2) / e[i]) statistics = sum(test_1) if statistics < c: print( 'Chi-Square Statistics = {}\nP-Value = {}\nConsidering that Chi-Square Statistics < P-Value, with a confidence level of {}%, the normal distribution is acceptable.' .format(statistics, c, (1 - alpha) * 100)) elif statistics > c: print( 'Chi-Square Statistics = {}\nP-Value = {}\nConsidering that Chi-Square Statistics > P-Value, with a confidence level of {}%, the normal distribution is not acceptable.' .format(statistics, c, (1 - alpha) * 100))
def lognorm_pdf_plot(sample): ln_x = [] for data in sample: ln_x.append(mt.log(data)) mean = mou.mean_sample(sample) std = mou.std_sample(ln_x) step = abs(min(sample) - max(sample)) / 100 x = np.arange(min(sample), max(sample) + step, step) pdf = st.lognorm.pdf(x, s=std, scale=mean, loc=0) plt.plot(x, pdf, color='dimgray', label='Theoretical PDF') plt.legend() mou.hist(sample, 'sturges', dens_norm=True) print(pdf) return pdf
def lognorm_qq_plot( sample, alpha): #plots the quantile-quantie plot for the given data y = np.arange(1, len(sample) + 1) / (len(sample) + 1) ln_x = [] for data in sample: ln_x.append(mt.log(data)) mean = mou.mean_sample(sample) std = mou.std_sample(ln_x) theo_qq = st.lognorm.ppf(y, s=std, loc=0, scale=mean) x = theo_qq #Kolmogorov-Smirnov Test for getting the confidence interval K = (-0.5 * mt.log(alpha / 2))**0.5 M = (len(sample)**2 / (2 * len(sample)))**0.5 CI_qq_high = [] CI_qq_low = [] for prob in y: F1 = prob - K / M F2 = prob + K / M s_low = st.lognorm.ppf(F1, s=std, loc=0, scale=mean) s_high = st.lognorm.ppf(F2, s=std, loc=0, scale=mean) CI_qq_low.append(s_low) CI_qq_high.append(s_high) sns.regplot(x, sorted(sample), ci=None, line_kws={ 'color': 'dimgray', 'label': 'Regression Line' }) plt.plot(sorted(sample), CI_qq_low, linestyle='--', color='red', alpha=1, lw=0.8, label='Kolmogorov-Smirnov Confidence Bands') plt.legend() plt.plot(sorted(sample), CI_qq_high, linestyle='--', color='red', alpha=1, lw=0.8) plt.xlabel('Theoretical Lognormal Quantiles') plt.ylabel('Sample Quantiles') plt.show()
def ks_test(sample, alpha): sort_sample = sorted(sample) n = len(sort_sample) S = np.arange(1, n + 1)/n ln_x = [] for data in sample: ln_x.append(mt.log(data)) mean = mou.mean_sample(sample) std = mou.std_sample(ln_x) F = lnpd.phi(sort_sample, mean, std) D = max(abs(F-S)) D_ks = st.ksone.ppf(1 - alpha/2, n) if D < D_ks: print('Kolmogorov-Smirnov Statistics = {}\nP-Value = {}\nConsidering that K-S Test Statistics < P-Value, with a confidence level of {}%, the normal distribution is acceptable.'.format(D, D_ks, (1-alpha)*100)) elif D > D_ks: print('Kolmogorov-Smirnov Statistics = {}\nP-Value = {}\nConsidering that K-S Test Statistics > P-Value, with a confidence level of {}%, the normal distribution is not acceptable.'.format(D, D_ks, (1-alpha)*100))
def mean_norm(sample, alpha): n = len(sample) std = mou.std_sample(sample) k = st.norm.ppf(1 - alpha/2) e = k*std/(n**0.5) print("The population mean with a confidence level of {}% is {} \u00B1 {}.".format(int((1-alpha)*100), mou.mean_sample(sample), e))