def find_global_offset(im_list, bbox=9, splitstyle="hsplit", fsize=10, scale=0.5, binning=1): """ This function finds the optimal x-offset and y-offset of the data using ``scrub_outliers`` to filter the data collected from ``get_offset_distribution``. The filtered data are then fit using ``scipy.stats.skewnorm`` :param im_list: 1D list of image arrays to be used in determination of the offset :param bbox: int, passed to ``point_fitting.fit_routine``, size of ROI around each point to apply gaussian fit. Default is 9. :param splitstyle: string, passed to ``im_split``; accepts "hsplit", "vsplit". Default is "hsplit" :param fsize: int, passed to ``point_fitting.find_maxima``, size of average filters used in maxima determination. Default is 10. :return: Mean x- and y-offset values. :Example: >>> from toolbox.alignment import find_global_offset >>> import toolbox.testdata as test >>> im = test.image_stack() >>> print(find_global_offset(im)) (5.624042070667237, -2.651128775580636) """ pooled_x, pooled_y = [], [] for im in im_list: xdist, ydist = get_offset_distribution(im, bbox, splitstyle, fsize) pooled_x += pinhole_filter(xdist, scale, binning) pooled_y += pinhole_filter(ydist, scale, binning) skew, mu1, sigma1 = skewnorm.fit(pooled_x) skew, mu2, sigma2 = skewnorm.fit(pooled_y) return mu1, mu2
def plot_skewed_data(data): # generate histogram using data num_bins = 200 bins = np.linspace(min(data), max(data), num_bins) _, bins = np.histogram(data, bins=bins, density=True) bin_centers = 0.5 * (bins[1:] + bins[:-1]) # get PDF from data by fitting it to skewnorm a, loc, scale = skewnorm.fit(data) pdf = st.skewnorm.pdf(bin_centers, a, loc, scale) # plot kwargs = dict(normed=True, edgecolor='black', linewidth=1.2, alpha=0.5, bins=20, stacked=True) # kwargs = dict(alpha=0.5, bins=20, normed=True, histtype='stepfilled', stacked=False, edgecolor='black', linewidth=1.2) fig, ax = plt.subplots(1, 1) ax.plot(bin_centers, pdf, 'r-', lw=5, alpha=0.6, label='skewnorm pdf') # ax.plot(bin_centers, histogram, ) # Newer version of matplotlib uses density rather than normed ax.hist(data, **kwargs, color='g', label="Histogram of samples (normalized)") ax.axvline(x=np.percentile(data, 1), color='red', ls=':', lw=2, label='1 pc') ax.axvline(x=np.percentile(data, 50), color='red', ls=':', lw=2, label='mean') ax.axvline(x=np.percentile(data, 99), color='red', ls=':', lw=2, label='99 pc') print("%f :: %f" % (skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a))) ax.legend(loc='best', frameon=True) plt.show()
def compare_hist_to_norm_ax(x, data, bins=25): mu, std = scipy.stats.norm.fit(data) ax[x].hist(data, bins=bins, density=True, alpha=0.6, color='purple', label="Données") # Plot le PDF. xmin, xmax = plt.xlim() X = np.linspace(xmin, xmax) ax[x].plot(X, scipy.stats.norm.pdf(X, mu, std), label="Normal Distribution") ax[x].plot(X, skewnorm.pdf(X, *skewnorm.fit(data)), color='black', label="Skewed Normal Distribution") mu, std = scipy.stats.norm.fit(data) sk = scipy.stats.skew(data) title2 = "Moments mu: {}, sig: {}, sk: {}".format(round(mu, 4), round(std, 4), round(sk, 4)) ax[x].ylabel("Fréquence", rotation=90) ax[x].title(title2) ax[x].legend() pass
def fig4(name, func, eps): """Makes figure 4. Args: name (str): Descriptive name of the model. Posterior samples, statistics, and figures are generated and saved in a subdirectory with this name. func (:obj:`<class 'function'>): Function for model construction. Should return a formatted copy of the data. eps (bool): If True, saves the figures to the manuscript subdirectory in .eps format. """ with pm.Model() as m: fit_model(name, func) trace = pm.load_trace(name) params = sorted( [p.name for p in m.deterministics if "Lambda" in p.name]) set_fig_defaults() rcParams["figure.figsize"] = (3, 3 * 2) fig, axes = plt.subplots(5, 1, constrained_layout=True) for p, ax in zip(params, axes): vals, bins, _ = ax.hist(trace[p], bins=50, density=True, histtype="step", color="lightgray") ax.set_xlabel(p) if ax == axes[0]: ax.set_ylabel("Posterior density") start, stop = pm.stats.hpd(trace[p]) for n, l, r in zip(vals, bins, bins[1:]): if l > start: if r < stop: ax.fill_between([l, r], 0, [n, n], color="lightgray") elif l < stop < r: ax.fill_between([l, stop], 0, [n, n], color="lightgray") elif l < start < r: ax.fill_between([start, r], 0, [n, n], color="lightgray") x = np.linspace(min([bins[0], 0]), max([0, bins[-1]])) theta = skewnorm.fit(trace[p]) ax.plot(x, skewnorm.pdf(x, *theta), "k", label="Normal approx.") ax.plot(x, norm.pdf(x), "k--", label="Prior") ax.plot([0, 0], [skewnorm.pdf(0, *theta), norm.pdf(0)], "ko") fig.savefig(f"{name}/fig4.png") if eps is True: fig.savefig("manuscript/fig4.eps")
def calc_risk_skewnorm(self, confidence=0.95): port_returns = self.returns.dot(self.allocation) losses = -port_returns.iloc[:, 0] params = skewnorm.fit(losses) VaR = skewnorm.ppf(confidence, *params) tail_loss = skewnorm.expect(lambda y: y, args=(params[0], ), loc=params[1], scale=params[2], lb=VaR) CVaR = (1 / (1 - confidence)) * tail_loss return losses, VaR, CVaR
def plot_skew_norm_fit(ax, data, bins, color='k', linestyle='-', linewidth=3, label=None): s, loc, scale = skewnorm.fit(data) pdf = skewnorm.pdf(bins, s, scale=scale, loc=loc) ax.plot(bins, pdf, color=color, linestyle=linestyle, linewidth=linewidth, label=label)
def main(): symbol = 'BTCUSDT' start = int(datetime.datetime.timestamp(datetime.datetime(2019, 6, 1))) * 1000 - 365 * 24 * 60 * 60 * 1000 previous = start trades = [] total_trades = 0 max_trades = 0 time_step = 1 with open('../Binance/' + symbol + '.csv', 'r') as csvfile: reader = csv.DictReader(csvfile) for line in reader: if int(line['Timestamp']) > previous + time_step * 60 * 60 * 1000: previous += time_step * 60 * 60 * 1000 trades.append(total_trades) if max_trades < total_trades: max_trades = total_trades total_trades = 0 total_trades += 1 mean = np.mean(trades) print(mean) trades = [t for t in trades if t != 1] #mean, scale = norm.fit(np.log(trades)) a, loc, scale = skewnorm.fit(np.log(trades)) loc_n, scale_n = norm.fit(np.log(trades)) k, loc_ne, scale_ne = exponnorm.fit(np.log(trades)) plt.hist(np.log(trades), bins=100, density=True) x = np.linspace(6, 12, 100) plt.plot(x, skewnorm.pdf(x, a, loc=loc, scale=scale), label='skewnorm') plt.plot(x, norm.pdf(x, loc=loc_n, scale=scale_n), label='norm') plt.plot(x, exponnorm.pdf(x, k, loc=loc_n, scale=scale_n), label='Exponentially modified Gaussian') plt.xlabel('Log Trades') plt.ylabel('Density') plt.legend() #plt.plot([i for i in range(1, max_trades)], poisson_density(np.array([i for i in range(1, max_trades)]), mean)) #plt.plot([i for i in range(0, max_trades)], norm.pdf([i for i in range(max_trades)], loc=mean, scale=np.sqrt(mean))) plt.savefig(symbol + ' log Trades') plt.show()
def calc_distribution(y, type='norm', lower=0.01, upper=99.99, points=100): lo, up = get_percentiles(y, lower, upper) X = np.linspace(lo, up, points) if type == 'norm': p1, p2 = norm.fit(y) Y = norm.pdf(X, p1, p2) return X, Y elif type == 'skewed': p1, p2, p3 = skewnorm.fit(y) Y = skewnorm.pdf(X, p1, p2, p3) return X, Y else: raise AttributeError("'type' not recognized.")
def table2(name, func, tex): """Makes table 2. Args: name (str): Descriptive name of the model. Posterior samples, statistics, and figures are generated and saved in a subdirectory with this name. func (:obj:`<class 'function'>): Function for model construction. Should return a formatted copy of the data. tex (bool): If True, saves the table to the manuscript subdirectory. """ with pm.Model() as m: fit_model(name, func) trace = pm.load_trace(name) params = sorted([p.name for p in m.deterministics if "Lambda" in p.name]) df = pm.summary(trace, var_names=params) table = [] for p, i in zip(params, interps): theta = skewnorm.fit(trace[p]) p0 = norm.pdf(0) p1 = skewnorm.pdf(0, *theta) bf = p0 / p1 a, b, c = df.loc[p, ["mean", "hpd_2.5", "hpd_97.5"]] dic = { "Variable": p, "Posterior mean (95% HPD)": "%s (%s, %s)" % ( latexify(a), latexify(b), latexify(c)), "During roved-frequency trials ...": i, "BF": latexify(bf), "Evidence": interpret(bf), } table.append(dic) # print(p, bf) df = pd.DataFrame(table)[dic.keys()] df.to_latex(f"{name}/table2.tex", escape=False, index=False) if tex is True: df.to_latex("manuscript/table2.tex", escape=False, index=False)
def fit_params_to_1d_data(logX): """ Fit skewed normal distributions to 1-D capactity data, and return the distribution parameters. Args ---- logX: Logarithm of one-dimensional capacity data, indexed by module and phase resolution index """ m_max = logX.shape[0] p_max = logX.shape[1] params = np.zeros((m_max, p_max, 3)) for m_ in range(m_max): for p_ in range(p_max): params[m_, p_] = skewnorm.fit(logX[m_, p_]) return params
def compare_hist_to_norm(data, bins=25): """ :param data: :param bins: :return: """ fig = plt.figure(figsize=(10, 5)) mu, std = scipy.stats.norm.fit(data) plt.hist(data, bins=bins, density=True, alpha=0.6, color='purple', label="Données") # Plot le PDF. xmin, xmax = plt.xlim() X = np.linspace(xmin, xmax) plt.plot(X, scipy.stats.norm.pdf(X, mu, std), label="Normal Distribution") plt.plot(X, skewnorm.pdf(X, *skewnorm.fit(data)), color='black', label="Skewed Normal Distribution") mu, std = scipy.stats.norm.fit(data) sk = scipy.stats.skew(data) title2 = "Moments mu: {}, sig: {}, sk: {}".format(round(mu, 4), round(std, 4), round(sk, 4)) plt.ylabel("Fréquence", rotation=90) plt.title(title2) plt.legend() #plt.show() pass
def fit_skewnormal(signal, tag): mu, loc, std = skewnorm.fit(signal["mean"].values) confidence_interval = skewnorm.interval(CONFIDENCE, mu, loc=loc, scale=std) if PLOTTING: # Plot the histogram. plt.subplots() plt.hist(signal["mean"].values, bins=25, density=True, alpha=0.6, color="g") # Plot the PDF. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = skewnorm.pdf(x, mu, loc, std) plt.plot(x, p, "k", linewidth=1) title = "Fit results skewnormal: a=%2f loc = %.2f, std = %.2f" % ( mu, loc, std) plt.title(title) plt.axvline(x=confidence_interval[0]) plt.axvline(x=confidence_interval[1]) # Plot the confidence interval plt.savefig(f"analysis/images/{slugify(tag)}_fit_skew_histogram.png", format="png") return { "distribution": "skewnormal", "params": [{ "a": mu, "loc": loc, "std": std }], "confidence": [confidence_interval], }
def sep_two_skewed_normals(x, th_init): x0 = x[x < th_init] x1 = x[x >= th_init] if x0.size == 0: return th_init, (x.min() - 1, x1.mean(), 0.01, x1.std(), 0, 0) if x1.size == 1: a1 = TH_SKEWNESS m1 = x0.mean() s1 = MIN_SCALE else: a1, m1, s1 = skewnorm.fit(x1) if a1 > TH_SKEWNESS: a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS) if x0.size == 1: a0 = TH_SKEWNESS m0 = x0.mean() s0 = MIN_SCALE else: a0, m0, s0 = skewnorm.fit(x0) if a0 > TH_SKEWNESS: a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS) num_x0_last = x0.size num_change = 1 x_sorted = sorted(x) nums_x0 = [num_x0_last, ] while num_change: # E, binary search for new th i0 = int(x0.size/2) i1 = x.size - int(x1.size/2) while i1 - i0 > 1: i = int((i0 + i1) / 2) p0 = skewnorm.pdf(x_sorted[i], a0, m0, s0) - skewnorm.pdf(x_sorted[i], a1, m1, s1) if p0 > 0: i0 = i else: i1 = i th = (x_sorted[i0] + x_sorted[i1]) / 2 x0 = x[x < th] x1 = x[x >= th] # M if x0.size == 0: break if x1.size == 1: a1 = TH_SKEWNESS m1 = x0.mean() s1 = MIN_SCALE else: a1, m1, s1 = skewnorm.fit(x1) if a1 > TH_SKEWNESS: a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS) if x0.size == 1: a0 = TH_SKEWNESS m0 = x0.mean() s0 = MIN_SCALE else: a0, m0, s0 = skewnorm.fit(x0) if a0 > TH_SKEWNESS: a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS) # update num_change = x0.size - num_x0_last num_x0_last = x0.size if num_x0_last not in nums_x0: nums_x0.append(num_x0_last) else: break th = min(skewnorm.ppf(TH_SKEWNORM_PPF, a0, m0, s0), th) # extreme case that under very weak L1 constraint, negligible cluster is fitted with large sigma if s1 > 0.1 and s0 / s1 > 10: th = min(skewnorm.ppf(1e-4, a1, m1, s1), th) return th, (m0, m1, s0, s1, a0, a1)
df = df.dropna(subset=["mass", "year"]) df = df.loc[(df["year"] > 1975) & (df["year"] < 2010) & (df["mass"] != 0)] observed_rate = land_area / earth_total_area * not_populated_rate logmass = np.log(df["mass"]) plt.plot(df["year"], logmass, "bo") plt.show() plt.hist(df["year"], bins=50) plt.show() pd.plotting.scatter_matrix(df[["mass", "year", "reclat", "reclong"]], figsize=(7, 7)) plt.show() ms = np.linspace(-5, 20, 100) p_skewnorm = skewnorm.fit(logmass) pdf_skewnorm = skewnorm.pdf(ms, *p_skewnorm) plt.hist(logmass, bins=50, alpha=0.2, density=True) plt.plot(ms, pdf_skewnorm, c="r") plt.show() mass_of_doom = np.log( (4 / 3) * np.pi * 500**3 * 1600 * 1000) # Just using a spherical approximation and some avg density meteor_is_doom = 1 - skewnorm.cdf(mass_of_doom, *p_skewnorm) num_events = 1000 * df["year"].value_counts().mean() / observed_rate print(meteor_is_doom * num_events)
spw = input() print("Select baseline pair from " + str(uvp.get_blpairs()) + " by typing the index : ") i = input() key = (spw, blpairs[i], pol) power = np.abs(np.real(uvp.get_data(key))) data = power.T[min_delay_index[0][0]:max_delay_index[0] [0]][:, time_min:time_max].flatten() fig_hist2, ax_hist2 = plt.subplots(2, figsize=(12, 8)) x = np.linspace(min(data), max(data), 30) # We now fit a skewed probability distribution. ax_hist2[0].plot(x, skewnorm.pdf(x, *skewnorm.fit(data)), lw=3, color='k', label='skewnorm pdf') p4 = ax_hist2[0].hist(data, bins=30, density=True, linewidth=2, edgecolor='k') ax_hist2[0].set_ylabel("Probability", fontsize=14) ax_hist2[0].set_xlabel(r"$P(k)\ \rm [mK^2\ h^{-3}\ Mpc^3]$", fontsize=14) # We plot the residuals between our histogram and our fit. ax_hist2[1].plot(x, p4[0] - skewnorm.pdf(x, *skewnorm.fit(data)))
"../Data/Empirical/Emp_Phi_Final/nosp_main_phi.csv", sep=",", header=0) avg_nosp = gmean(exp_nosp[["Phi"]]) mp = np.array(exp_mc[["Phi"]])[:, 0] nosp = np.array(exp_nosp[["Phi"]])[:, 0] print "Average Phi, secretory: ", avg_mc[0] print "Median Phi, secretory: ", np.median(mp) print "Variance Phi, secretory ", np.var(mp) print "" print "Average Phi, nonsecretory: ", avg_nosp[0] print "Median Phi, nonsecretory: ", np.median(nosp) print "Variance Phi, nonsecretory ", np.var(nosp) total = 0.0 if sys.argv[1] == "skewnorm": mp_shape, mp_loc, mp_s, = skewnorm.fit(mp) nosp_shape, nosp_loc, nosp_s = skewnorm.fit(nosp) dist_mp = skewnorm(mp_shape, mp_loc, mp_s) dist_nosp = skewnorm(nosp_shape, nosp_loc, nosp_s) elif sys.argv[1] == "norm": mp_loc, mp_s = norm.fit(mp) nosp_loc, nosp_s = norm.fit(nosp) dist_mp = norm(mp_loc, mp_s) dist_nosp = norm(nosp_loc, nosp_s) elif sys.argv[1] == "lognorm": mp_shape, mp_loc, mp_s, = lognorm.fit(mp, floc=0) nosp_shape, nosp_loc, nosp_s = lognorm.fit(nosp, floc=0) dist_mp = lognorm(mp_shape, mp_loc, mp_s) dist_nosp = lognorm(nosp_shape, nosp_loc, nosp_s) index = 0
mode = x[np.argmax(y)] x = x - mode s = s - mode ax.plot(x, y, 'r-', lw=5, alpha=0.6, label='skewnorm pdf') ax.axvline(x=0, color='y', label='mode') ax.hist(s, density=True) ax.set_xlabel('noise') ax.set_ylabel('Number of samples/PDF') ax.set_title('Skewnormal histogram, shape = %.1f, loc = %.1f, w = %.1f' % (a, loc, w)) text = 'Shift all noise by mode = %.3f' % mode props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax.text(0.05, 0.8, text, transform=ax.transAxes, fontsize=12, verticalalignment='top', bbox=props) ax.legend() plt.show() print 'mode: ', mode model = skewnorm.fit(s) print model raw_input('press')
#+++ x1[0]=xbar; x1[1:]=x[nsum:] #+++ y1[0]=ytilde; y1[1:]=y[nsum:] #+++ ax2.plot(x1,y1,color='blue') ax2.plot(x, y, color='blue') ax2.plot([x[0], x[-1]], [1, 1], color='grey') ax2.plot([x[0], x[-1]], [1, 1], color='grey') ax2.set_ylabel('b5 m2 on/off ratio') x = np.linspace(2000, 21500, 100) # mu_ge,std_ge = norm.fit(b5_img_ge) # p_ge = norm.pdf(x,mu_ge,std_ge) a, loc, scale = skewnorm.fit(b5_img_ge[np.where(b5_img_ge > 15000)]) p_ge = skewnorm.pdf(x, a, loc, scale) p_ge = p_ge / np.amax(p_ge) p_ge = p_ge * (np.amax(y_hist_ge) - bkg) ax1.plot(x, p_ge + bkg, linewidth=2, color='cyan') # mu_lt,std_lt = norm.fit(b5_img_lt) # p_lt = norm.pdf(x,mu_lt,std_lt) a, loc, scale = skewnorm.fit(b5_img_lt[np.where((b5_img_lt > 7500) & (b5_img_lt < 15000))]) p_lt = skewnorm.pdf(x, a, loc, scale) p_lt = p_lt / np.amax(p_lt) p_lt = p_lt * (np.amax(y_hist_lt) - bkg) ax1.plot(x, p_lt + bkg, linewidth=2, color='gold')
#distLATarray = (distLATarray - np.mean(distLATarray, axis=0)) / np.std(distLATarray, axis=0) #distEarray = (distEarray - np.mean(distEarray, axis=0)) / np.std(distEarray, axis=0) # Distribution Test y = np.array(distNMRSE) y_centered = (y - np.mean(y)) / (np.max(y) - np.min(y)) y = (y - np.mean(y)) / np.std(y) plt.figure() plt.plot(y, label='NRMSE') plt.title('NRMSE per k-fold validation iteration') plt.grid() x = np.linspace(np.min(y), np.max(y), iters) bins = np.linspace(np.min(y), np.max(y), iters + 1) hist, bin_edges = np.histogram(y, bins=bins, density=True) mean, var = norm.fit(y) # Get first 2 moments of data smean, svar, sk = skewnorm.fit(y) # Get first 3 moments of data #df, nc, cmean, cvar = ncx2.fit(y, 4) # Get 3 first moments of data gNorm = norm.pdf(x, mean, var) # Center and scale a Gaussian function sNorm = skewnorm.pdf(x, smean, svar, sk) # Center and scale a Skewed Gaussian function #chiSq = ncx2.pdf(x, 4, 1) # Center and scale a Chi-Square function plt.figure() plt.plot(x, gNorm, 'r-', label='Norm PDF') plt.plot(x, sNorm, 'g-', label='Skewed Norm PDF') #plt.plot(x, chiSq,'m-', label='Chi-Square PDF') plt.bar(bin_edges[:-1], hist, width=(max(bin_edges) - min(bin_edges)) / iters) plt.title('NRMSE distribution') plt.xlim(np.min(x), np.max(x)) plt.legend()
def calculate_skew_norm_params(train_features_given_class, n_classes): return np.array([[ skewnorm.fit(train_features_given_class[class_i, :, feature_i]) for feature_i in range(n_classes) ] for class_i in range(n_classes)])
def extract_corner_pixel_info(data, cremove, acis_prefix, obsid, omode, fits): """ extract and analyze acis corner pixel distribution input: data --- event data cremove --- a list of pixel position to remove aics_prefix --- prefix of the output files obsid --- obsid omode --- data mode fits --- fits file name output: <plot_dir>/Ind_Plots/<acis_prefix>_<ccd_id>_<tail>.png (e.g. acisf20783_I2_hist.png) <plot_dir>/Ind_Plots/<acis_prefix>_<taiL>.png (e.g., acisf20783_cp.png) <data_dir>/<ccd)_id>.dat """ # #--- select data with grade 0, 2, 3, 4, or 6 # mask = data['grade'] != 1 data2 = data[mask] mask = data2['grade'] != 4 data3 = data2[mask] mask = data3['grade'] != 7 data4 = data3[mask] # #--- create lists of lists to save fitted results # nc_list = [[] for x in range(0, 4)] #--- normal distribution center nw_list = [[] for x in range(0, 4)] #--- normal distribution width sc_list = [[] for x in range(0, 4)] #--- skewed normal distribution center sw_list = [[] for x in range(0, 4)] #--- skewed normal distribution width sk_list = [[] for x in range(0, 4)] #--- skewness m_list = [[] for x in range(0, 4)] #--- bin position # #--- go through each ccd # for n in range(0, 4): ccd = ccds[n] cname = ccd_id[n] mask = data4['ccd_id'] == ccd data5 = data4[mask] if len(data5) < 1: continue # #--- devide the data into 16 sections # minexpo = min(data5['expno']) maxexpo = max(data5['expno']) kstop = int((maxexpo - minexpo) / 16.0 / binsize) + 1 k = 0 #--- bin skip counter: reset after each hist plotted m = 0 #--- bin counter j = minexpo # #--- create holders for data for later use (creating a matrix of histogram plots) # data_list = [] mnp_list = [] wnp_list = [] msp_list = [] wsp_list = [] skp_list = [] bp_list = [] while j < maxexpo: k += 1 m += 1 mask = data5['expno'] >= j data6 = data5[mask] mask = data6['expno'] < (j + binsize) data7 = data6[mask] pdata = data7['phas'] edata = data7['expno'] hlist = flaten_the_data(pdata, cremove) if hlist != 'NA': mu = numpy.mean(hlist) std = numpy.std(hlist) [skew, smu, sstd] = skewnorm.fit(hlist) # #--- save data for the tending plots # nc_list[n].append(mu) nw_list[n].append(std) sc_list[n].append(smu) sw_list[n].append(sstd) sk_list[n].append(skew) m_list[n].append(numpy.mean(edata)) # #--- save histogram data for bin: m if k reachs kstop # if k == kstop: data_list.append(hlist) mnp_list.append(mu) wnp_list.append(std) msp_list.append(smu) wsp_list.append(sstd) skp_list.append(skew) bp_list.append(m) # #--- set k = 0for the next round # k = 0 j += binsize # #--- create histogram plots in a multipanel plot # create_histogram_plot(data_list, mnp_list, wnp_list, msp_list, wsp_list,\ skp_list, bp_list, acis_prefix, ccd_id[n], omode) # #--- save data # stime = numpy.mean(data4['time']) save_data(stime, m_list[n], nc_list[n], nw_list[n], sc_list[n], sw_list[n],\ sk_list[n], ccd_id[n], obsid, omode) # #--- now create trend plots: normal distribution and skewed normal distribution # create_trend_plots(ccd_id, m_list, nc_list, fits, acis_prefix, omode) create_trend_plots(ccd_id, m_list, sc_list, fits, acis_prefix, omode, sk=1)
def fit(self, x, y=None): x = np.asarray(x) assert x.ndim == 1 self._a, self._loc, self._scale = skewnorm.fit(x) return self
def plot_diagnostic(self, bins: int = 25) -> None: # error residuals = self.error_by_models.flatten() norm_residuals = (residuals - np.mean(residuals)) / np.std(residuals) flatten_all_real = np.array([self.real_and_forcecast[i][0] for i in range(self.num_forecasting \ - self.forecast_range)]).flatten() flatten_all_forecast_error = np.array([ self.real_and_forcecast[i][1] - self.real_and_forcecast[i][0] for i in range(self.num_forecasting - self.forecast_range) ]).flatten() fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10)) fig.subplots_adjust(hspace=0.5) # Histogram mu, std = scipy.stats.norm.fit(residuals) ax[0, 0].hist(residuals, bins=bins, density=True, alpha=0.6, color='purple', label="Données") # Plot le PDF. xmin, xmax = ax[0, 0].get_xlim() X = np.linspace(xmin, xmax) ax[0, 0].plot(X, scipy.stats.norm.pdf(X, mu, std), label="Normal Distribution") ax[0, 0].plot(X, skewnorm.pdf(X, *skewnorm.fit(residuals)), color='black', label="Skewed Normal Distribution") mu, std = scipy.stats.norm.fit(residuals) sk = scipy.stats.skew(residuals) title2 = "Moments mu: {}, sig: {}, sk: {}".format( round(mu, 4), round(std, 4), round(sk, 4)) ax[0, 0].set_ylabel("Fréquence", rotation=90) ax[0, 0].set_title(title2) ax[0, 0].legend() # OLS alpha, beta = self.__alpha_beta_coef(x=flatten_all_forecast_error, y=flatten_all_real) ax[0, 1].scatter(y=flatten_all_real, x=flatten_all_forecast_error) ax[0, 1].plot(flatten_all_forecast_error, alpha + flatten_all_forecast_error * beta, color="red") # ax[0, 1].grid() ax[0, 1].set_ylabel("Observations") ax[0, 1].set_xlabel("Forecast error") ax[0, 1].set_title("OLS Obs vs error") # autocorr plot_acf(flatten_all_forecast_error, lags=self.forecast_range, ax=ax[1, 0]) # qqplot stats.probplot(residuals, plot=plt) plt.show() pass
#First I plot all fitted skewed normal distributions through distance bins #The skewed normal distributions are fitted through Bayesian optimization using the scipy module plt.figure(3) plt.xlabel('distance bin') plt.ylabel('sizes (m)') n = 1 x = np.arange(0, 200, 0.1) #range needed to create probability distribution function skewed_mean_distances = [] skewed_std_distances = [] skewed_variance_distances = [] skewed_kurtosis_distances = [] skewed_skew_distances = [] for bins in size_nest_distances: if len(bins) > 10: a, loc, scale = skewnorm.fit(bins) #I fit all distance bins mean, var, skew, kurt = skewnorm.stats(a, loc, scale, moments='mvsk') skewed_mean_distances.append(mean) skewed_std_distances.append(skewnorm.std( a, loc, scale)) #Calculate the std of all distributions skewed_variance_distances.append( var) #Calcualate variance if interesting skewed_skew_distances.append(skew) #calculate skewness if interesting skewed_kurtosis_distances.append( kurt) #calculate kurtosis if interesting pdf = skewnorm.pdf(x, a, loc, scale) #create probability distribution function #I now plot all of the fitted distributions with their respective histograms plt.subplot(1, len(size_nest_distances), n) plt.hist(bins, bins=50, normed=True) plt.plot(x, pdf)
if task_type not in all_solved_sizes: all_solved_sizes[task_type] = solved_sizes else: all_solved_sizes[task_type] = np.concatenate( (all_solved_sizes[task_type], solved_sizes), 0) for task_type in all_solved_sizes.keys(): plt.figure() n, bins, patches = plt.hist(all_solved_sizes[task_type], 50, density=True, facecolor='blue', alpha=0.5) X = np.linspace(min(all_solved_sizes[task_type]), max(all_solved_sizes[task_type])) plt.plot(X, skewnorm.pdf(X, *skewnorm.fit(all_solved_sizes[task_type]))) kernel = gaussian_kde(all_solved_sizes[task_type], bw_method="silverman") plt.plot(X, kernel.pdf(X)) plt.title(task_type) plt.show() for task_type in all_solved_loc.keys(): plt.figure() plt.scatter(all_solved_loc[task_type][:, 0], all_solved_loc[task_type][:, 1]) plt.title(task_type) plt.show()
def calculateAnglePerParticle(gap_in_cm): # Read in raw hit data detector_hits = pd.read_csv('./data/hits.csv', names=["det", "x", "y", "z", "energy"], dtype={ "det": np.int8, "x": np.float64, "y": np.float64, "z": np.float64, "energy": np.float64 }, delimiter=',', error_bad_lines=False, engine='c') n_entries = len(detector_hits['det']) if len(detector_hits['det']) == 0: raise ValueError('No particles hits on either detector!') elif 2 not in detector_hits['det']: raise ValueError('No particles hit detector 2!') deltaX = np.zeros(n_entries, dtype=np.float64) deltaZ = np.zeros(n_entries, dtype=np.float64) array_counter = 0 for count, el in enumerate(detector_hits['det']): # pandas series can throw a KeyError if character starts line # TODO: replace this with parse command that doesn't import keyerror throwing lines while True: try: pos1 = detector_hits['det'][count] pos2 = detector_hits['det'][count + 1] detector_hits['x'][count] detector_hits['z'][count] detector_hits['x'][count + 1] detector_hits['z'][count + 1] except KeyError: count = count + 1 if count == n_entries: break continue break # Checks if first hit detector == 1 and second hit detector == 2 if np.equal(pos1, 1) & np.equal(pos2, 2): deltaX[array_counter] = detector_hits['x'][ count + 1] - detector_hits['x'][count] deltaZ[array_counter] = detector_hits['z'][ count + 1] - detector_hits['z'][count] # Successful pair, continues to next possible pair count = count + 2 array_counter = array_counter + 1 else: # Unsuccessful pair, continues count = count + 1 # Copy of array with trailing zeros removed deltaX_rm = deltaX[:array_counter] deltaZ_rm = deltaZ[:array_counter] del deltaX del deltaZ # Find angles in degrees theta = np.rad2deg(np.arctan2(deltaZ_rm, gap_in_cm)) phi = np.rad2deg(np.arctan2(deltaX_rm, gap_in_cm)) # Fit a standard normal distribution to data try: x_theta = np.linspace(min(theta), max(theta)) mu_theta, std_theta = norm.fit(theta) p_theta = norm.pdf(x_theta, mu_theta, std_theta) x_phi = np.linspace(min(phi), max(phi)) mu_phi, std_phi = norm.fit(phi) p_phi = norm.pdf(x_phi, mu_phi, std_phi) except: pass # Fit skew normal distribution to data #TODO: write a check for sig_p RuntimeError when np.sqrt(-#) alpha_t, loc_t, scale_t = skewnorm.fit(theta) alpha_p, loc_p, scale_p = skewnorm.fit(phi) delta_t = alpha_t / np.sqrt(1 + alpha_t**2) delta_p = alpha_t / np.sqrt(1 + alpha_p**2) mean_t = loc_t + scale_t * delta_t * np.sqrt(2 / np.pi) mean_p = loc_p + scale_p * delta_p * np.sqrt(2 / np.pi) p_test = scale_p**2 * (1 - 2 * (delta_p**2) / np.pi) if np.equal(0, np.round(p_test, 2)): sig_p = None else: sig_p = np.sqrt(p_test) t_test = scale_t**2 * (1 - 2 * (delta_t**2) / np.pi) if np.equal(0, np.round(t_test, 2)): sig_t = None else: sig_t = np.sqrt(t_test) theta_actual, phi_actual, numberOfParticles = findSourceAngle() with open('./data/results.txt', 'a') as f: f.write( str(numberOfParticles) + ',' + str(theta_actual) + ',' + str(phi_actual) + ',' + str(round(np.mean(theta), 4)) + ',' + str(round(np.std(theta), 4)) + ',' + str(round(np.mean(phi), 4)) + ',' + str(round(np.std(phi), 4)) + ',' + str(round(np.median(theta), 4)) + ',' + str(round(np.median(phi), 4)) + ',' + str(round(mu_theta, 4)) + ',' + str(round(std_theta, 4)) + ',' + str(round(mu_phi, 4)) + ',' + str(round(std_phi, 4)) + ',' + str(round(mean_t, 4)) + ',' + str(round(sig_t, 4)) + ',' + str(round(mean_p, 4)) + ',' + str(round(sig_p, 4)) + '\n')
# ##### The Anderson-Darling test above value of 38.20 exceeds the 99% critical value of 1.088 by a large margin, indicating that the Normal distribution may be a poor choice to represent portfolio losses # In[ ]: ## Null Hypothesis - No Skewness # In[375]: # Test the data for skewness print("Skewtest result: ", skewtest(losses)) # In[376]: # Fit the portfolio loss data to the skew-normal distribution params = skewnorm.fit(losses) # In[377]: # Compute the 95% VaR from the fitted distribution, using parameter estimates VaR_95 = skewnorm.ppf(0.95, *params) print("VaR_95 from skew-normal: ", VaR_95) # Losses are not normally distributed as the critical value exceeeds the 99% conidence interval of test statistic value # Losses can be skewed # # Definition wiki - anderson # In many cases (but not all), you can determine a p value for the Anderson-Darling statistic and use that value to help you # determine if the test is significant are not. Remember the p ("probability") value is the probability of getting a result # #that is more extreme if the null hypothesis is true. If the p value is low (e.g., <=0.05), you conclude that the data do # not follow the normal distribution. Remember that you chose the significance level even though many people just use 0.05
def __create_pdf(val): a, loc, scale = skewnorm.fit(val) return skewnorm(a, loc, scale)