def plot_distrib_distance(list_values, name, dt, xmax, ymax, color): #print(list_values) fig = matplotlib.pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.set_xlim([0, xmax]) n, bins, patches = matplotlib.pyplot.hist(list_values, 50, normed=1, facecolor=color, alpha=0.5) param = rayleigh.fit(sorted(list_values)) pdf_fitted = rayleigh.pdf(sorted(list_values), loc=param[0], scale=param[1]) mean_rayleigh = rayleigh.mean(loc=param[0], scale=param[1]) std_rayleigh = rayleigh.std(loc=param[0], scale=param[1]) / math.sqrt( float(len(list_values))) print('mean = ' + str(mean_rayleigh)) print('std error = ' + str(std_rayleigh)) #chi_value = chisquare(sorted(list_values), f_exp=pdf_fitted) #print('chi_square = '+str(chi_value[0])) #print('p_value = '+str(chi_value[1])) matplotlib.pyplot.plot(sorted(list_values), pdf_fitted, 'g-') matplotlib.pyplot.savefig(str(name) + '_' + str(dt) + '.svg') #matplotlib.pyplot.show() matplotlib.pyplot.close() return (mean_rayleigh, std_rayleigh)
def compute_array_ar(ruv): x = np.linspace(0, ruv.max() + 100., 1000) param = rayleigh.fit(ruv) pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) interval = rayleigh.interval(0.992, loc=param[0], scale=param[1]) linea = min(interval[1], ruv.max()) return 61800 / (100. * linea)
def returnDistData(cls, self): gammaParam = gamma.fit(10**(self.data / 10)) gammaDist = gamma.pdf(self.data, *gammaParam) rayleighParam = rayleigh.fit(self.data) rayleighDist = rayleigh.pdf(self.data, *rayleighParam) normParam = norm.fit(self.data) normDist = norm.pdf(self.data, *normParam) logNormParam = lognorm.fit(self.data) lognormDist = lognorm.pdf(self.data, *logNormParam) nakagamiParam = nakagami.fit(self.data) nakagamiDist = nakagami.pdf(self.data, *nakagamiParam) exponParam = expon.fit(self.data) exponDist = expon.pdf(self.data, *exponParam) exponweibParam = exponweib.fit(self.data) weibDist = exponweib.pdf(self.data, *exponweibParam) distDF = pd.DataFrame(np.column_stack([ gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist, exponDist, weibDist ]), columns=[ 'gammaDist', 'rayleighDist', 'normDist', 'lognormDist', 'nakagamiDist', 'exponDist', 'weibDist' ]) self.distDF = distDF
def user_dist_kstest(sim_dist_vec, diff_dist_vec, fit_rayleigh=False, _n=100): """ Test the goodness of a given weights to defferentiate friend distance distributions and non-friend distance distributions of a given user. The distance distribution can be assumed to follow Rayleigh distribution. Parameters: ---------- sim_dist_vec: {vector-like (list), float}, distances between friends and the user diff_dist_vec: {vector-like (list), float}, distances between non-fri -ends and the user fit_rayleigh: {boolean}, determine if fit data into Rayleigth distri -bution _n: {integer}, number of random samples generated from estimated distribution Returns: ------- * res: {float}: p-value of ks-tests with assumption that distances follow Rayleigh distribution. Examples: --------- pval = user_dist_kstest(sim_dist_vec, diff_dist_vec) """ # convert list to numpy.arrray, which can be # automatice transfer to R readable objects # for R-function, if the proper setting is # configured sim_dist_vec = np.array(sim_dist_vec) diff_dist_vec = np.array(diff_dist_vec) if fit_rayleigh: friend_param = rayleigh.fit(sim_dist_vec) nonfriend_param = rayleigh.fit(diff_dist_vec) samp_friend = rayleigh.rvs(friend_param[0], friend_param[1], _n) samp_nonfriend = rayleigh.rvs(nonfriend_param[0], nonfriend_param[1], _n) # ouput p-value of ks-tests test_stat, pval = kstest_2samp_greater(samp_friend, samp_nonfriend) else: test_stat, pval = kstest_2samp_greater(sim_dist_vec, diff_dist_vec) return pval
def user_dist_kstest(sim_dist_vec, diff_dist_vec, fit_rayleigh=False, min_nobs=10, _n=100): """ Test the goodness of a given weights to defferentiate friend distance distributions and non-friend distance distributions of a given user. The distance distribution is considered to follow Rayleigh distribution. Parameters: ---------- sim_dist_vec: {vector-like (list), float}, distances between friends and the user diff_dist_vec: {vector-like (list), float}, distances between non-fri -ends and the user fit_rayleigh: {boolean}, determine if fit data into Rayleigth distri -bution min_nobs: {integer}, minmum number of observations required for compar -ing _n: {integer}, number of random samples generated from estimated distribution Returns: ------- * res: {float}: p-value of ks-test with assumption that distances follow Rayleigh distribution. Examples: --------- pval = user_dist_kstest(sim_dist_vec, diff_dist_vec) """ # is_valid = (len(sim_dist_vec) >= min_nobs) & \ # (len(diff_dist_vec) >= min_nobs) # not used yet if fit_rayleigh: friend_param = rayleigh.fit(sim_dist_vec) nonfriend_param = rayleigh.fit(diff_dist_vec) samp_friend = rayleigh.rvs(friend_param[0], friend_param[1], _n) samp_nonfriend = rayleigh.rvs(nonfriend_param[0], nonfriend_param[1], _n) # ouput p-value of ks-test res = ks_2samp(samp_friend, samp_nonfriend)[1] else: res = ks_2samp(sim_dist_vec, diff_dist_vec)[1] return res
def __init__(self, mode=0, elem=None, sample=None): if mode == 0: self.mu = elem[0] self.sigma = elem[1] else: self.mu, self.sigma = rayleigh.fit(sample) self.math_average = rayleigh.mean(loc=self.mu, scale=self.sigma) self.dispersion = rayleigh.var(loc=self.mu, scale=self.sigma)
def plot_maxwell(vel, label=None, draw=True): speed = (vel*vel).sum(1)**0.5 loc, scale = rayleigh.fit(speed, floc=0) dist = rayleigh(scale=scale) if draw: plt.hist(speed, 20, density=True) x = np.linspace(dist.ppf(0.01), dist.ppf(0.99), 1000) plt.plot(x, dist.pdf(x), label=label) if label: plt.legend() return kstest(speed, dist.cdf)[0]
def compute_initial_figure(self, ruv): x = np.linspace(0, ruv.max() + 100., 1000) param = rayleigh.fit(ruv) pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) self.axes.hist(ruv, bins=30, normed=True) self.axes.plot(x, pdf_fitted, 'r-') ylims = self.axes.get_ylim() self.interval = rayleigh.interval(0.992, loc=param[0], scale=param[1]) linea = min(self.interval[1], ruv.max()) self.axes.vlines(linea, 0, ylims[1], linestyles='dashed') self.axes.set_ylim(ylims)
def rayleigh_statistic(data,bins): rayleigh_param[-1,-1] rayleigh_pdf=[-1] try : rayleigh_param = rayleigh.fit(data) except: rayleigh_param[-2,-2] try: pdf_rayleigh_fitted = rayleigh.pdf(bins, *rayleigh_param[:-2],loc=rayleigh_param[0],scale=rayleigh_param[1]) # fitted distribution except : rayleigh_pdf=[-1] return [rayleigh_param, rayleigh_pdf]
def super_hist(data_name): wind = data[data_name] support = np.linspace(wind.min(), wind.max(), 100) p0, p1, p2 = scipy.stats.weibull_min.fit(wind, floc=0) plt.plot(support, scipy.stats.weibull_min.pdf(support, p0, p1, p2), 'r-', lw=2, label = 'Weibull') data[data_name].plot.hist( weights=np.ones_like(data.index)/len(data.index), bins=25) param = rayleigh.fit(wind) # distribution fitting plt.plot(support, rayleigh.pdf(support, loc=param[0],scale=param[1]),lw=2, label = 'Rayleigh') plt.legend() plt.xlabel("Mean wind speed [m/s]") plt.ylabel("Frequency [%]") #figure_path = Directory + '\ ' + data_name + '.png' plt.savefig(Directory + '\ ' + 'Super_' + data_name[17:19] + '.png') plt.show() return
def approximating_dists(data,bins): try : rayleigh_param = rayleigh.fit(data) except: print "screwed raleigh fit " print "params for rayleigh " ,rayleigh_param try: pdf_rayleigh_fitted = rayleigh.pdf(bins, *rayleigh_param[:-2],loc=rayleigh_param[0],scale=rayleigh_param[1]) # fitted distribution except : print " returning as nothing to plot " try : exp_param = expon.fit(data) except: print "screwed expon fit " print "params for exponential ", exp_param try: pdf_exp_fitted = expon.pdf(bins, *exp_param[:-2],loc=exp_param[0],scale=exp_param[1]) # fitted distribution except : print " returning as nothing to plot " return [exp_param, pdf_exp_fitted, rayleigh_param, pdf_rayleigh_fitted]
def rayleigh_params(self): if self._rayleigh_params is None: from scipy.stats import rayleigh self._rayleigh_params = rayleigh.fit(self.speed, floc=0) return self._rayleigh_params
from scipy.stats import norm, rayleigh from numpy import linspace from pylab import plot, show, hist, figure, title """ here we try to fit rayleigh distribution to data reference: glowingpython.blogspot.it""" #generate 150 samples from a rayleigh distribution of mean 5 and std dev 2 samp = rayleigh.rvs(loc=5, scale=2, size=150) #fit rayleigh distibution to generated samples #param[0] & param[1] are mean and std. dev of fitted distribution param = rayleigh.fit(samp) #generate points on x-axis to plot x = linspace(5, 13, 100) #get the points on y-axis for fitted distribution pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) #get the points on y-axis for original distribution pdf = rayleigh.pdf(x, loc=5, scale=2) title('Rayleigh distribution') #plot the fitted distribution and original distribution plot(x, pdf_fitted, 'r-', x, pdf, 'b-') #histogram of normalized samples generated from rayleigh distribution hist(samp, normed=1, alpha=0.3) show()
def downtime_accepted_models(D=list(), alpha=.05): params = list() params.append(uniform.fit(D)) params.append(expon.fit(D)) params.append(rayleigh.fit(D)) params.append(weibull_min.fit(D)) params.append(gamma.fit(D)) params.append(gengamma.fit(D)) params.append(invgamma.fit(D)) params.append(gompertz.fit(D)) params.append(lognorm.fit(D)) params.append(exponweib.fit(D)) llf_value = list() llf_value.append(log(product(uniform.pdf(D, *params[0])))) llf_value.append(log(product(expon.pdf(D, *params[1])))) llf_value.append(log(product(rayleigh.pdf(D, *params[2])))) llf_value.append(log(product(weibull_min.pdf(D, *params[3])))) llf_value.append(log(product(gamma.pdf(D, *params[4])))) llf_value.append(log(product(gengamma.pdf(D, *params[5])))) llf_value.append(log(product(invgamma.pdf(D, *params[6])))) llf_value.append(log(product(gompertz.pdf(D, *params[7])))) llf_value.append(log(product(lognorm.pdf(D, *params[8])))) llf_value.append(log(product(exponweib.pdf(D, *params[9])))) AIC = list() AIC.append(2 * len(params[0]) - 2 * llf_value[0]) AIC.append(2 * len(params[1]) - 2 * llf_value[1]) AIC.append(2 * len(params[2]) - 2 * llf_value[2]) AIC.append(2 * len(params[3]) - 2 * llf_value[3]) AIC.append(2 * len(params[4]) - 2 * llf_value[4]) AIC.append(2 * len(params[5]) - 2 * llf_value[5]) AIC.append(2 * len(params[6]) - 2 * llf_value[6]) AIC.append(2 * len(params[7]) - 2 * llf_value[7]) AIC.append(2 * len(params[8]) - 2 * llf_value[8]) AIC.append(2 * len(params[9]) - 2 * llf_value[9]) model = list() model.append( ["uniform", params[0], kstest(D, "uniform", params[0])[1], AIC[0]]) model.append( ["expon", params[1], kstest(D, "expon", params[1])[1], AIC[1]]) model.append( ["rayleigh", params[2], kstest(D, "rayleigh", params[2])[1], AIC[2]]) model.append([ "weibull_min", params[3], kstest(D, "weibull_min", params[3])[1], AIC[3] ]) model.append( ["gamma", params[4], kstest(D, "gamma", params[4])[1], AIC[4]]) model.append( ["gengamma", params[5], kstest(D, "gengamma", params[5])[1], AIC[5]]) model.append( ["invgamma", params[6], kstest(D, "invgamma", params[6])[1], AIC[6]]) model.append( ["gompertz", params[7], kstest(D, "gompertz", params[7])[1], AIC[7]]) model.append( ["lognorm", params[8], kstest(D, "lognorm", params[8])[1], AIC[8]]) model.append( ["exponweib", params[9], kstest(D, "exponweib", params[9])[1], AIC[9]]) accepted_models = [i for i in model if i[2] > alpha] if accepted_models: aic_values = [i[3] for i in accepted_models] final_model = min(range(len(aic_values)), key=aic_values.__getitem__) return accepted_models, accepted_models[final_model] elif not accepted_models: aic_values = [i[3] for i in model] final_model = min(range(len(aic_values)), key=aic_values.__getitem__) return model, model[final_model]
plt.hist(DATOS, color="lightblue", bins=BINS) plt.xlabel("Datos") plt.ylabel("Frecuencia de aparición") plt.show() """ 5) Con los datos contenidos en datos.csv, encontrar la mejor curva de ajuste y graficar la curva de ajuste encontrada """ #Gráfica todas las curvas. plt.hist(DATOS, bins=BINS, alpha=0.7, color="lightblue", normed=True) xt = plt.xticks()[0] xmin, xmax = 0, max(xt) lnspc = np.linspace(xmin, xmax, len(DATOS)) c, d = rayleigh.fit(DATOS) modelo = rayleigh(c, d) pdf_g = rayleigh.pdf(lnspc, c, d) plt.plot(lnspc, pdf_g, 'k-', lw=5, alpha=1, color="blue", label='Rayleigh') e, f = norm.fit(DATOS) pdf_g = norm.pdf(lnspc, e, f) plt.plot(lnspc, pdf_g, 'r-', lw=2, alpha=0.5, color="red", label='Normal') g, h = uniform.fit(DATOS) pdf_g = uniform.pdf(lnspc, g, h) plt.plot(lnspc, pdf_g, 'r-', lw=2, alpha=0.5,
print "mu=\t" + str(mu) print "sigma=\t" + str(sigma) print "median=\t" + str(np.median(values, axis=0)) # print stats.skew(values) # the histogram of the data n, bins, patches = plt.hist(values, numbins, normed=True, facecolor="cyan", alpha=0.75) # doesn't appear to be lognormal shape, loc, scale = stats.lognorm.fit(values, floc=0) print np.log(scale), shape # mu, sigma, 4.57364532995 1.3159903533 dist = lognorm(shape, loc=np.log(scale)) # sigma, mu l = plt.plot(bins, dist.pdf(bins), "b--", linewidth=1) # let's try rayleigh # samp = rayleigh.rvs(loc=5,scale=2,size=150) # samples generation param = rayleigh.fit(values) # distribution fitting # x = linspace(5,13,100) # fitted distribution pdf_fitted = rayleigh.pdf(bins, loc=param[0], scale=param[1]) print param # original distribution pdf = rayleigh.pdf(bins, loc=5, scale=2) # title('Rayleigh distribution') # plot(x,pdf_fitted,'g-',x,pdf,'b-') # hist(samp,normed=1,alpha=.3) # show() normed_data = (values - mu) / sigma print "kstest:", print (stats.kstest(normed_data, "norm")) print "anderson-darling:",
def fitDist(self): n = len(self.data) # gamma distribution gammaParam = gamma.fit(self.data) gammaNumPar = len(gammaParam) gammaSum = -1 * np.sum(np.log(gamma.pdf(self.data, *gammaParam))) aicGamma = 2 * gammaNumPar + 2 * gammaSum + (2 * gammaNumPar * (gammaNumPar + 1) / (n - gammaNumPar - 1)) # rayleigh distribution rayleighParam = rayleigh.fit(self.data) rayleighNumPar = len(rayleighParam) rayleighSum = -1 * np.sum( np.log(rayleigh.pdf(self.data, *rayleighParam))) aicRayleigh = 2 * rayleighNumPar + 2 * rayleighSum + ( 2 * rayleighNumPar * (rayleighNumPar + 1) / (n - rayleighNumPar - 1)) # normal distribution normParam = norm.fit(self.data) normNumPar = len(normParam) normSum = -1 * np.sum(np.log(norm.pdf(self.data, *normParam))) aicNorm = 2 * normNumPar + 2 * normSum + (2 * normNumPar * (normNumPar + 1) / (n - normNumPar - 1)) # LogNormal distribution logNormParam = lognorm.fit(self.data) logNormNumPar = len(logNormParam) logNormSum = -1 * np.sum(np.log(lognorm.pdf(self.data, *logNormParam))) aicLogNorm = 2 * logNormNumPar + 2 * logNormSum + ( 2 * logNormNumPar * (logNormNumPar + 1) / (n - logNormNumPar - 1)) # Nakagami distribution nakagamiParam = nakagami.fit(self.data) nakagamiNumPar = len(nakagamiParam) nakagamiSum = -1 * np.sum( np.log(nakagami.pdf(self.data, *nakagamiParam))) aicNakagami = 2 * nakagamiNumPar + 2 * nakagamiSum + ( 2 * nakagamiNumPar * (nakagamiNumPar + 1) / (n - nakagamiNumPar - 1)) # exponential distribution exponParam = expon.fit(self.data) exponNumPar = len(exponParam) exponSum = -1 * np.sum(np.log(expon.pdf(self.data, *exponParam))) aicExpon = 2 * exponNumPar + 2 * exponSum + (2 * exponNumPar * (exponNumPar + 1) / (n - exponNumPar - 1)) # weibul distribution exponweibParam = exponweib.fit(self.data) exponweibNumPar = len(exponweibParam) exponweibSum = -1 * np.sum( np.log(exponweib.pdf(self.data, *exponweibParam))) aicExpWeib = 2 * exponweibNumPar + 2 * exponweibSum + ( 2 * exponweibNumPar * (exponweibNumPar + 1) / (n - exponweibNumPar - 1)) return (aicGamma, aicRayleigh, aicNorm, aicLogNorm, aicNakagami, aicExpon, aicExpWeib)
def selector(self, array): """ Selects SBs that can be observed given the current weather conditions, HA range, array type and array configuration (in the case of 12m array type) and SB/Project Status. See :ref:`Selection and Data preparation <selection>` :param array: '12m', '7m', 'tp' :type array: String. :return: Depending on the array type, creates tables select12m, select7m or selecttp. """ # TODO: add a 5% padding to fraction selection. # TODO: check with Jorge Garcia the rms fraction against reality. self.check_observability(array) if array not in ['12m', '7m', 'tp']: print("Use 12m, 7m or tp for array selection.") return None else: if array == '12m': array1 = ['TWELVE-M'] elif array == '7m': array1 = ['SEVEN-M', 'ACA'] else: array1 = ['TP-Array'] sel = self.sb_summary.copy() if array == '7m': sel = sel[ (sel.array == array1[0]) | (sel.array == array1[1])] else: sel = sel[sel.array == array1[0]] print("SBs for %s array: %d" % (array, len(sel))) pwvcol = self.pwvdata[[str(self.pwv)]] len_bf_cond = len(sel) sel = pd.merge( sel, pwvcol, left_on='repfreq', right_index=True) sel.rename(columns={str(self.pwv): 'transmission'}, inplace=True) ind1 = sel.repfreq ind2 = pd.np.around(sel.maxPWVC, decimals=1).astype(str) sel['tau_org'] = self.tau.lookup(ind1, ind2) sel['tsky_org'] = self.tsky.lookup(ind1, ind2) sel['airmass'] = 1 / pd.np.cos(pd.np.radians(-23.0262015 - sel.DEC)) sel = pd.merge(sel, self.reciever, left_on='band', right_index=True, how='left') tskycol = self.tsky[[str(self.pwv)]] sel = pd.merge(sel, tskycol, left_on='repfreq', right_index=True) taucol = self.tau[[str(self.pwv)]] sel.rename(columns={str(self.pwv): 'tsky'}, inplace=True) sel = pd.merge(sel, taucol, left_on='repfreq', right_index=True) sel.rename(columns={str(self.pwv): 'tau'}, inplace=True) print("SBs in sb_summary: %d. SBs merged with tau/tsky info: %d." % (len_bf_cond, len(sel))) sel['sel_array'] = False # Patch for hybrid configuration, C34-6 & C34-7 if array == '12m': self.ruv.sort() ruv6 = self.ruv[self.ruv < 1091.].copy() # x = np.linspace(0, ruv6.max() + 100., 1000) param = rayleigh.fit(ruv6) maxl6 = np.min([ruv6.max(), rayleigh.interval(0.992, loc=param[0], scale=param[1])[1]]) self.ruv6 = ruv6.copy() self.res6 = 61800 / (100. * maxl6) self.blnum6 = len(ruv6) if self.blnum6 < 591: self.ruv6 = self.ruv.copy() self.res6 = 0.571 print self.blnum6, self.res6 if array == '12m': sel.loc[ ((sel.arrayMinAR <= self.array_ar) & (sel.arrayMaxAR >= self.array_ar)) | ((sel.arrayMinAR <= self.res6) & (sel.arrayMaxAR >= self.res6)) | ((sel.arrayMinAR >= self.array_ar) & (sel.arrayMaxAR <= self.res6)), 'sel_array'] = True print("SBs for current 12m Array AR: %d. " "(AR=%.2f, #bl=%d, #ant=%d)" % (len(sel.query('sel_array == True')), self.array_ar, self.num_bl, self.num_ant)) sel['blmax'] = sel.apply( lambda row: rUV.computeBL(row['AR'], row['repfreq']), axis=1) sel['blmin'] = sel.apply( lambda row: rUV.computeBL(row['LAS'], row['repfreq']), axis=1) if self.array_name is not None: sel['blfrac'] = sel.apply( lambda row: (33. * 17) / (1. * len( self.ruv[(self.ruv <= row['blmax'])])) if (row['isPointSource'] == False) else (33. * 17) / (self.num_ant * (self.num_ant - 1) / 2.), axis=1) else: sel['blfrac'] = sel.apply( lambda row: (33. * 17) / (1. * len( self.ruv[self.ruv < row['blmax']])) if (row['isPointSource'] == False) else (33. * 17) / (34. * (34. - 1) / 2.), axis=1) if self.num_ant != 34: sel.loc[:, 'blfrac'] = sel.loc[:, 'blfrac'] * ( 33 * 17 / (self.num_ant * ( self.num_ant - 1) / 2.)) sel.loc[:, 'blfrac'] = sel.apply( lambda row: ret_cycle(row[u'CODE'], row['blfrac']), axis=1 ) elif array == '7m': sel['sel_array'] = True sel['blfrac'] = 1. if self.num_ant != 9: sel.loc[:, 'blfrac'] = sel.loc[:, 'blfrac'] * ( 9 * 4 / (self.num_ant * ( self.num_ant - 1) / 2.)) else: sel['sel_array'] = True sel['blfrac'] = 1. sel['tsys'] = ( 1 + sel['g']) * \ (sel['trx'] + sel['tsky'] * ((1 - pd.np.exp(-1 * sel['airmass'] * sel['tau'])) / (1 - pd.np.exp(-1. * sel['tau']))) * 0.95 + 0.05 * 270.) / \ (0.95 * pd.np.exp(-1 * sel['tau'] * sel['airmass'])) sel['tsys_org'] = ( 1 + sel['g']) * \ (sel['trx'] + sel['tsky_org'] * ((1 - pd.np.exp(-1 * sel['airmass'] * sel['tau_org'])) / (1 - pd.np.exp(-1. * sel['tau_org']))) * 0.95 + 0.05 * 270.) / \ (0.95 * pd.np.exp(-1 * sel['tau_org'] * sel['airmass'])) sel['sel_trans'] = False sel.loc[(sel.transmission > self.transmission) | (sel.maxPWVC >= self.pwv), 'sel_trans'] = True print("SBs with a transmission higher than %2.1f: %d" % (self.transmission, len(sel.query('sel_array == True and sel_trans == True')))) self.alma.date = self.date lst = pd.np.degrees(self.alma.sidereal_time()) ha = (lst - sel.RA) / 15. ha.loc[ha > 12] = ha.loc[ha > 12] - 24. ha.loc[ha < -12] = 24. + ha.loc[ha < -12] sel['HA'] = ha sel['sel_ha'] = False sel.loc[ ((sel.HA > self.minha) & (sel.HA < self.maxha)) | (sel.RA == 0.), 'sel_ha'] = True s3 = len(sel.query('sel_array == True and sel_trans == True and' ' sel_ha == True')) print("SBs within current HA limits (or RA=0): %d" % s3) sel['tsysfrac'] = (sel.tsys / sel.tsys_org) ** 2. sel = pd.merge(sel, self.obser_prop, left_on='SB_UID', right_index=True) sel['sel_el'] = False if self.not_horizon is False: sel.loc[(sel.up == 1) & (sel.etime > 1.5), 'sel_el'] = True s4 = len( sel.query('sel_array == True and sel_trans == True and' ' sel_ha == True and sel_el == True')) print("SBs over %d degrees, 1.5 hours: %d" % (self.horizon, s4)) sel['sel_st'] = False sel.loc[(sel.SB_state != "Phase2Submitted") & (sel.SB_state != "FullyObserved") & (sel.SB_state != "Deleted") & (sel.SB_state != "Canceled") & (sel.PRJ_state != "Phase2Submitted") & (sel.PRJ_state != "Completed"), 'sel_st'] = True sel.loc[ (sel.name.str.contains('not', case=False) == True), 'sel_st'] = False s5 = len( sel.query( 'sel_array == True and sel_trans == True and sel_ha == True ' 'and sel_el == True and sel_st == True')) print("SBs with Ok state: %d" % s5) sel['sel_exe'] = False sel.loc[sel.execount > sel.Total, 'sel_exe'] = True s6 = len( sel.query( 'sel_array == True and sel_trans == True and sel_ha == True ' 'and sel_el == True and sel_st == True and sel_exe == True')) print("SBs with missing exec: %d" % s6) sel['frac'] = sel.tsysfrac * sel.blfrac fg = self.fieldsource.query( 'isQuery == False and name == "Primary:"' ).groupby('SB_UID') p = pd.DataFrame( [fg.pointings.mean(), fg.pointings.count()], index=['mpointings', 'sources']).transpose() sel = pd.merge(sel, p, left_on='SB_UID', right_index=True, how='left') if array == '12m': self.select12m = sel.query( 'sel_array == True and sel_trans == True and sel_ha == True ' 'and sel_el == True and sel_st == True and sel_exe == True and ' 'tsysfrac < 2.1') # print sel.query( # 'sel_array == True and sel_trans == True and sel_ha == True ' # 'and sel_el == True and sel_st == True and sel_exe == True and ' # 'frac >= 2.1') self.all12m = sel print("SBs with 'frac' < 2.1: %d" % len(self.select12m)) elif array == '7m': self.select7m = sel.query( 'sel_array == True and sel_trans == True and sel_ha == True ' 'and sel_el == True and sel_st == True and sel_exe == True and ' 'frac < 2.1') self.all7m = sel print("SBs with 'frac' < 2.1: %d" % len(self.select7m)) else: self.selecttp = sel.query( 'sel_array == True and sel_trans == True and sel_ha == True ' 'and sel_el == True and sel_st == True and sel_exe == True and ' 'frac < 2.1') self.alltp = sel print("SBs with 'frac' < 2.1: %d" % len(self.selecttp))
mav_rach = mav(r_rachat)[0] mav_sous = mav(r_souscription)[0] print("") print("Outliers for fta : ", r - len(r_fta)) print("Outliers for nav : ", s - len(r_nav)) print("") print("MAD rachat : ", mav_rach) print("MAD souscription : ", mav_sous) print("") ###################### fitting des modèles from scipy.stats import norm, rayleigh, gamma from statsmodels.graphics.tsaplots import plot_acf param = rayleigh.fit(r_rachat_sous) # distribution fitting param2 = rayleigh.fit(r_rachat_sous) param3 = gamma.fit(r_rachat_sous) x = linspace(-5, 5, 1000) a = 1.99 #paramètre loi gamma pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) pdf_fitted_2 = norm.pdf(x, loc=param2[0], scale=param2[1]) pdf_fitted_3 = gamma.pdf(x, a) pdf = rayleigh.pdf(x, loc=5, scale=2) pdf2 = norm.pdf(x, loc=5, scale=2) pdf3 = gamma.pdf(x, a) plt.title('Rayleigh, normal, gamma distribution')
from scipy.stats import norm, rayleigh from numpy import linspace from pylab import plot, show, hist, figure, title # 依照 rayleigh 分布產生數據 y = rayleigh.rvs(loc=5, scale=2, size=150) # fitting param = rayleigh.fit(y) # 畫圖 x = linspace(5, 13, 100) # fitted distribution pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) # original distribution pdf = rayleigh.pdf(x, loc=5, scale=2) title('Rayleigh distribution') plot(x, pdf_fitted, 'r-', x, pdf, 'b-') # hist(y, normed=1, alpha=.3) hist(y, density=1, alpha=.3) show()
def gen_wind_histogram(oldFigure, sliderValue, autoState): windVal = [] if oldFigure is not None: windVal = oldFigure['data'][0]['y'] if 'Auto' in autoState: binVal = np.histogram(windVal, bins=range(int(round(min(windVal))), int(round(max(windVal))))) else: binVal = np.histogram(windVal, bins=sliderValue) avgVal = float(sum(windVal)) / len(windVal) medianVal = np.median(windVal) param = rayleigh.fit(binVal[0]) pdf_fitted = rayleigh.pdf(binVal[1], loc=(avgVal - abs(param[1])) * 0.55, scale=(binVal[1][-1] - binVal[1][0]) / 3) yVal = pdf_fitted * max(binVal[0]) * 20, yValMax = max(yVal[0]) binValMax = max(binVal[0]) trace = Bar(x=binVal[1], y=binVal[0], marker=dict(color='#7F7F7F'), showlegend=False, hoverinfo='x+y') trace1 = Scatter(x=[25], y=[0], mode='lines', line=dict(dash='dash', color='#2E5266'), marker=dict(opacity=0, ), visible=True, name='Average') trace2 = Scatter(x=[25], y=[0], line=dict(dash='dot', color='#BD9391'), mode='lines', marker=dict(opacity=0, ), visible=True, name='Median') trace3 = Scatter(mode='lines', line=dict(color='#42C4F7'), y=yVal[0], x=binVal[1][:len(binVal[1])], name='Rayleigh Fit') layout = Layout(xaxis=dict( title='Wind Speed (mph), Rounded to Closest Integer', showgrid=False, showline=False, ), yaxis=dict(showgrid=False, showline=False, zeroline=False, title='Number of Samples'), margin=dict(t=50, b=20, r=50), autosize=True, bargap=0.01, bargroupgap=0, hovermode='closest', legend=dict(x=0.175, y=-0.2, orientation='h'), shapes=[ dict(xref='x', yref='y', y1=int(max(binValMax, yValMax)) + 0.5, y0=0, x0=avgVal, x1=avgVal, type='line', line=dict(dash='dash', color='#2E5266', width=5)), dict(xref='x', yref='y', y1=int(max(maxV, yValMax)) + 0.5, y0=0, x0=medianVal, x1=medianVal, type='line', line=dict(dash='dot', color='#BD9391', width=5)) ]) return dict(data=[trace, trace1, trace2, trace3], layout=layout)
def plot_statistical_analysis(R, V, time_step, filename): # Total kinetic energy and total velocity V_calc1 = np.linalg.norm(V, axis=1) # Calculating sumations and norms accordingly V_calc2 = (V_calc1 ** 2) # by the given exercise E_k = np.sum(V_calc2, axis=1) / 2 V_tot = np.sum(V, axis=2) V_tot = np.linalg.norm(V_tot, axis=1) print(R.shape) dists = R[:, :, np.newaxis, :] - R[:, :, :, np.newaxis] # Creating another axis to be able to compare the values # And calculate the distances between particles dists = np.linalg.norm(dists, axis=1) # Calculating norm dists = np.reshape(dists, (400*400*1000, 1)) # Putting every value in a row R = np.reshape(R, [400*1000, 2]) # reshape the matrixes so they can be plotted R_x = R[:, 0] # Creating x and y vectors R_y = R[:, 1] V = np.reshape(V, [400*1000, 2]) V_x = V[:, 0] # Creating x and y vectors V_y = V[:, 1] V = np.linalg.norm(V, axis=1) # Creating the norm vector needed for plotting time = "time" t = np.arange(0, 20, time_step) # Predefinitions plt.figure(tight_layout=True, figsize=[9., 6.]) # Average time plt.subplot(421) plt.plot(t, V_tot) # Everything here is pretty self explanatory plt.xlabel(time) # plot time with wanted vector, V, dists and E_k plt.ylabel("Average speed") # Kinetic energy plt.subplot(422) plt.plot(t, E_k) plt.ylabel("Kinetic energy") plt.ylim([min(E_k)-0.2, max(E_k)+0.2]) plt.xlabel(time) # Distance plt.subplot(423) plt.xlabel("distance") # Create the histograms with hist plt.ylabel("Pair distribution\n probability") # and then input the wanted vectors plt.xlim([-0.5, max(dists)+1]) plt.hist(dists, bins=50, density="True") # Speed plt.subplot(424) plt.xlabel("speed") plt.ylabel("Velocity norm\n probability") plt.hist(V, bins="auto", density="True") x = np.linspace(min(V), max(V), 100) # The vector needed to plot the pdf and reg. plot needs together with loc_V, scale_V = rayleigh.fit(V) # Creating the values pdf needs plt.plot(x, rayleigh.pdf(x, loc_V, scale_V)) # All the other pdfs are done # x position # the same way plt.subplot(425) # also am creating x.lims and y.lims plt.xlabel("x position") # where they are needed plt.ylabel("Probability") plt.hist(R_x, bins="auto", density="True") x = np.linspace(min(R_x)-0.5, max(R_x)+0.5, 100) loc_R_x, scale_R_x = uniform.fit(R_x) plt.plot(x, uniform.pdf(x, loc_R_x, scale_R_x)) # y position plt.subplot(426) plt.xlabel("y position") plt.ylabel("Probability") plt.hist(R_y, bins="auto", density="True") x = np.linspace(min(R_y)-0.5, max(R_y)+0.5, 100) loc_R_y, scale_R_y = uniform.fit(R_y) plt.plot(x, uniform.pdf(x, loc_R_y, scale_R_y)) # x velocity plt.subplot(427) plt.xlabel("x velocity") plt.ylabel("Probability") plt.hist(V_x, bins="auto", density="True") x = np.linspace(min(V_x)-0.2, max(V_x)+0.2, 100) loc_v_x, scale_v_x = norm.fit(V_x) plt.plot(x, norm.pdf(x, loc_v_x, scale_v_x)) # y velocity plt.subplot(428) plt.xlabel("y velocity") plt.ylabel("Probability") plt.hist(V_y, bins="auto", density="True") x = np.linspace(min(V_y)-0.2, max(V_y)+0.2, 100) loc_v_y, scale_v_y = norm.fit(V_y) plt.plot(x, norm.pdf(x, loc_v_y, scale_v_y)) plt.savefig(filename) plt.show()
x = {} for key, count in enumerate(data_count): x[key] = [] for i in xrange(0, count): value = random.randint(0 + key * 50, 50 + key * 50) x[key].append(value) data = [] for i in range(0, 20): for value in x[i]: data.append(value) from scipy.stats import norm from numpy import linspace from pylab import plot, show, hist, figure, title from scipy.stats import norm, rayleigh param = rayleigh.fit(data) # distribution fitting x = linspace(0, 1000, 10) # fitted distribution pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) # original distribution pdf = rayleigh.pdf(x, loc=5, scale=2) title('Rayleigh distribution') plot(x, pdf_fitted, 'r-', x, pdf, 'b-') hist(data, normed=1, alpha=.3) show()
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ] # can be more, I don't think less. display_fit = False dt = s.dt # Perform the analysis for a sigle track dapp_list = [] for j in J: # Calculate the SD x = np.array(s.trajectory["x"]) y = np.array(s.trajectory["y"]) sd = squared_displacement(x, y, j) x_fit = np.sqrt(sd / (j * dt)) reg = rayleigh.fit(x_fit) if display_fit: eval_x = np.linspace(x_fit.min(), x_fit.max(), 100) plt.plot(eval_x, rayleigh.pdf(eval_x, *reg), label="Fit") plt.hist(x_fit, 32, density=True, alpha=0.5, label="Data") plt.legend() plt.show() sigma = reg[1] dapp = sigma / 4 # This is the equivalent of Dapp dapp_list.append(dapp) plt.semilogx(np.array(J) * dt, dapp_list) plt.xlabel("Time") plt.ylabel("Estimated $D_{app}$")
import numpy as np import matplotlib.pyplot as plt import sys from matplotlib.ticker import FormatStrFormatter from scipy.stats import rayleigh from numpy import linspace import matplotlib.mlab as mlab infile = sys.argv[1] fin = np.genfromtxt(infile, names=True, delimiter=",") Tarray = np.array([item[0] for item in fin]) param = rayleigh.fit(Tarray) x = linspace(0, 5, 50) pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) pdf = rayleigh.pdf(x) plt.title('Normal distribution') plt.plot(x, pdf_fitted, 'r-') plt.hist(Tarray, 50, normed=1, alpha=.5) plt.xlabel("time [arb]") plt.ylabel("V") print(param[1]) #plt.legend(loc='upper center') plt.tight_layout() #plt.savefig("hist_ch4_200MHz.png") plt.show()
fontsize=MEDIUM_SIZE) plt.gcf().text(0.21 + 0.275, 0.2 - 0.04 - 6 / yr, '$\mathregular{10^{-10}}$', fontsize=MEDIUM_SIZE) # Graph parameters weight = ones_like(star_xir) / float( len(star_xir)) # Weight to normalize graph sub8.errorbar(x, y, yerr=ye, fmt='k|', mfc='none') sub8.hist(star_xir, bins=30, range=(0, 4.5), weights=weight, edgecolor='#08088A', linewidth=0.5, fc=(0, 0, 0, 0), orientation='horizontal') # Rayleigh distribution mean = sum(star_xir) / len(star_xir) y = linspace(0.0, 4.5, 100) param = rayleigh.fit(star_xir) pdf_fitted = rayleigh.pdf(y) * 0.15 sub8.plot(pdf_fitted, y, 'black', linestyle='--', linewidth=0.5) # ========== Output ========== # fig.savefig('fig6.eps', format='eps', bbox_inches='tight', pad_inches=0.02, dpi=1000)
for key, count in enumerate(data_count): x[key] = [] for i in xrange(0,count): value = random.randint(0+key*50,50+key*50) x[key].append(value) data = [] for i in range(0,20): for value in x[i]: data.append(value) from scipy.stats import norm from numpy import linspace from pylab import plot,show,hist,figure,title from scipy.stats import norm,rayleigh param = rayleigh.fit(data) # distribution fitting x = linspace(0,1000,10) # fitted distribution pdf_fitted = rayleigh.pdf(x,loc=param[0],scale=param[1]) # original distribution pdf = rayleigh.pdf(x,loc=5,scale=2) title('Rayleigh distribution') plot(x,pdf_fitted,'r-',x,pdf,'b-') hist(data,normed=1,alpha=.3) show()
def curvaDeAjuste(): #leer datos del archivo csv data = np.loadtxt("xy.csv", delimiter=',', skiprows=1, usecols=range(1,22)) #inicializar probabilidades de "x" y "y" para formato histograma xProbabilities = [] #inicializar arreglo vacío de probabilidades de x xList = [] #inicializar arreglo vacío para lista horizontal de x xRayleighSamples = [] #inicializar sample list para Rayleigh yProbabilities = [] #inicializar arreglo vacío de probabilidades de y yList = [] #inicializar arreglo vacío para lista horizontal de y yRayleighSamples = [] #inicializar sample list para Rayleigh rayleighMultiplier = 100 #determinar multiplicador para longitud de lista de valores de rayleigh #recorrer rows x5-x15 for i in range(5,16): xRow = data[i-5, :] #xi xVal = np.sum(xRow) #suma de valores xProbabilities.append(xVal) xList.append(i) temp = int(round(xVal*rayleighMultiplier)) #obtener valor temporal para datos de Rayleigh de distribución dada for j in range(0,temp): xRayleighSamples.append(i) #añadir otro datapoint correspondiente a i #recorrer columnas y5-y25 for i in range(5,26): yCol = data[:,i-5] #yi yVal = np.sum(yCol) #suma de valores yProbabilities.append(yVal) yList.append(i) temp = int(round(xVal*rayleighMultiplier)) #obtener valor temporal para datos de Rayleigh de distribución dada for j in range(0,temp): yRayleighSamples.append(i) #añadir otro datapoint correspondiente a i ## PARA X # obtener fit data con curva gaussiana parsX, covX = curve_fit(f=gaussian, xdata=xList, ydata=xProbabilities, p0=[6,10,14], bounds=(-np.inf, np.inf)) stdevsX = np.sqrt(np.diag(covX)) #print(stdevsX) plt.figure() # Plot fit data Gaussiana como overlay en cima de los puntos ya determinados del histograma plt.scatter(xList, xProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(xList, gaussian(xList, *parsX), linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Gauss para X') plt.ylabel('Probabilidad') plt.xlabel('Valor de X') plt.legend(['Ajuste de Rayleigh','Probabilidad de X']) plt.savefig('curvaAjuste_x_Gaussian.png') #guardar imagen en folder # obtener fit data con curva de lorentz (tiene pico más pronunciado) parsX, covX = curve_fit(f=lorentzian, xdata=xList, ydata=xProbabilities, p0=[0.14,10,4], bounds=(-np.inf, np.inf)) stdevsX = np.sqrt(np.diag(covX)) #print(stdevsX) plt.figure() # Plot fit data Lorentz como overlay en cima de los puntos ya determinados del histograma plt.scatter(xList, xProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(xList, lorentzian(xList, *parsX), linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Lorentz para X') plt.ylabel('Probabilidad') plt.xlabel('Valor de X') plt.legend(['Ajuste de Rayleigh','Probabilidad de X']) plt.savefig('curvaAjuste_x_Lorentzian.png') #guardar imagen en folder # obtener fit data con curva de rayleigh (distribución normal con peso de un lado) param = rayleigh.fit(xRayleighSamples) pdf_fitted = rayleigh.pdf(xList,loc=param[0],scale=param[1]) plt.figure() plt.scatter(xList, xProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(xList,pdf_fitted, linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Rayleigh para X') plt.ylabel('Probabilidad') plt.xlabel('Valor de X') plt.legend(['Ajuste de Rayleigh','Probabilidad de X']) plt.savefig('curvaAjuste_x_Rayleigh.png') #guardar imagen en folder # obtener fit data con curva voigt (distribución de Gauss y Lorentz con peso) parsX, covX = curve_fit(f=voigt, xdata=xList, ydata=xProbabilities, p0=[0.5,10,4], bounds=(-np.inf, np.inf)) stdevsX = np.sqrt(np.diag(covX)) #print(stdevsX) plt.figure() # Plot fit data Voigt como overlay en cima de los puntos ya determinados de probabilidades plt.scatter(xList, xProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(xList, voigt(xList, *parsX), linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Voigt para X') plt.ylabel('Probabilidad') plt.xlabel('Valor de X') plt.legend(['Ajuste de Voigt','Probabilidad de X']) plt.savefig('curvaAjuste_x_Voigt.png') #guardar imagen en folder ## PARA Y # obtener fit data con curva gaussiana (distribución normal) parsY, covY = curve_fit(f=gaussian, xdata=yList, ydata=yProbabilities, p0=[6,15,24], bounds=(-np.inf, np.inf)) stdevsY = np.sqrt(np.diag(covY)) plt.figure() # Plot fit data Gaussiana como overlay en cima de los puntos ya determinados del histograma plt.scatter(yList, yProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(yList, gaussian(yList, *parsY), linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Gauss para Y') plt.ylabel('Probabilidad') plt.xlabel('Valor de Y') plt.legend(['Ajuste de Gauss','Probabilidad de Y']) plt.savefig('curvaAjuste_y_Gaussian.png') #guardar imagen en folder # obtener fit data con curva de lorentz (tiene pico más pronunciado) parsY, covY = curve_fit(f=lorentzian, xdata=yList, ydata=yProbabilities, p0=[0.14,15,2], bounds=(-np.inf, np.inf)) stdevsY = np.sqrt(np.diag(covY)) plt.figure() # Plot fit data Lorentz como overlay en cima de los puntos ya determinados del histograma plt.scatter(yList, yProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(yList, lorentzian(yList, *parsY), linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Lorentz para Y') plt.ylabel('Probabilidad') plt.xlabel('Valor de Y') plt.legend(['Ajuste de Lorentz','Probabilidad de Y']) plt.savefig('curvaAjuste_y_Lorentzian.png') #guardar imagen en folder # obtener fit data con curva de rayleigh (distribución normal con peso de un lado) param = rayleigh.fit(yRayleighSamples) pdf_fitted = rayleigh.pdf(yList,loc=param[0],scale=param[1]) plt.figure() plt.scatter(yList, yProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(yList, pdf_fitted, linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Rayleigh para Y') plt.ylabel('Probabilidad') plt.xlabel('Valor de Y') plt.legend(['Ajuste de Rayleigh','Probabilidad de Y']) plt.savefig('curvaAjuste_y_Rayleigh.png') #guardar imagen en folder # obtener fit data con curva voigt (distribución de Gauss y Lorentz con peso) parsY, covY = curve_fit(f=voigt, xdata=yList, ydata=yProbabilities, p0=[0.5,10,4], bounds=(-np.inf, np.inf)) stdevsY = np.sqrt(np.diag(covY)) plt.figure() # Plot fit data Voigt como overlay en cima de los puntos ya determinados de probabilidades plt.scatter(yList, yProbabilities, s=10, color='#00b3b3', label='Data') plt.plot(yList, voigt(yList, *parsY), linestyle='--', linewidth=2, color='black') plt.title('Curva de Ajuste de Voigt para Y') plt.ylabel('Probabilidad') plt.xlabel('Valor de Y') plt.legend(['Ajuste de Voigt','Probabilidad de Y']) plt.savefig('curvaAjuste_y_Voigt.png') #guardar imagen en folder print("Gracias a las gráficas anteriores es claro que la función indicada es la de Voigt: (ampG1*(1/(sigmaG1*(np.sqrt(2*np.pi))))*(np.exp(-((x-cenG1)**2)/((2*sigmaG1)**2)))) +\((ampL1*widL1**2/((x-cenL1)**2+widL1**2)) \n\nPara x, los parámetros son: ") print(parsX) print("Para y, los parámetros son: ") print(parsY) print("\nLas graficas de Voigt corresponden a las gráficas de las funciones de mejor ajuste. Las funciones se grafican de forma separada de los datos utilizando el código anterior.") plt.figure() # Plot fit data Voigt como overlay en cima de los puntos ya determinados de probabilidades plt.plot(xList, voigt(xList, *parsX), linestyle='--', linewidth=2, color='black') plt.title('Función de Densidad Marginal (Voigt) para X') plt.ylabel('Probabilidad') plt.xlabel('Valor de X') plt.legend(['Ajuste de Voigt']) plt.savefig('x_Voigt.png') #guardar imagen en folder plt.figure() # Plot fit data Voigt como overlay en cima de los puntos ya determinados de probabilidades plt.plot(yList, voigt(yList, *parsY), linestyle='--', linewidth=2, color='black') plt.title('Función de Densidad Marginal (Voigt) para Y') plt.ylabel('Probabilidad') plt.xlabel('Valor de Y') plt.legend(['Ajuste de Voigt']) plt.savefig('y_Voigt.png') #guardar imagen en folder return
dfaux = dfvar.loc[mask] analysis = weibull.Analysis(dfaux["Viento - Velocidad (m/s)"], unit="m/s") analysis.fit(method='mle') # Capturando los parametros de weibull forma = analysis.stats[3] escala = analysis.stats[6] count, bins, ignored = plt.hist( dfaux["Viento - Velocidad (m/s)"], bins=range(0, int(dfaux["Viento - Velocidad (m/s)"].max() + 2))) ab = np.arange(0, int(dfaux["Viento - Velocidad (m/s)"].max() + 2)) x = np.linspace(min(dfaux["Viento - Velocidad (m/s)"]), max(dfaux["Viento - Velocidad (m/s)"]), sum(count)) scale = count.max() / weib(x, escala, forma).max() # Capturando Parametros de Rayleigh param = rayleigh.fit( dfaux["Viento - Velocidad (m/s)"]) # distribution fitting pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) plt.plot(x, weib(x, escala, forma) * scale, '-b', label='Weibull') plt.plot(x, pdf_fitted * scale, '-r', label='Rayleigh') # incorporando RAyleigh plt.xlabel("Vel. Viento [m/s]") plt.ylabel("Distribucion de frecuencia") plt.title("Distribucion de Weibull") plt.legend(loc='upper right') # j = mes # i = anio #****************************************************************** # Generacion de Tablas de Frecuencia , PDF, y CDF #****************************************************************** histo, binEdges = np.histogram( dfaux['Viento - Velocidad (m/s)'],
def Threshold_finder(data, max_population=3, min_population_size=0.2, confidence_interval=0.90, verbose=False): import warnings warnings.filterwarnings("ignore") ''' data: 1D data array with count numbers max_population: Define the maximal number of populations exist in sample datasets min_population_size: The smallest population should have at least 20% population confidence_interval: if unimodel was used, select the confidence interval for lower bound; 0.90 = 5% confidence one tail test ''' best_population = np.inf best_loglike = -np.inf best_mdoel = None model_kind = 'Gaussian' # Set Gaussian to be the default model type for n_components in [ n + 1 for n in list(reversed(np.arange(max_population))) ]: BGM = BayesianGaussianMixture(n_components=n_components, verbose=0).fit(data) # Proceed only if the model can converge if BGM.converged_: if verbose: print('%s populations converged' % str(n_components)) dict_wp = dict() # store weighted probability for each population for p in np.arange(n_components): para = norm.fit( mask_list(data, BGM.predict(data), p)) # fit gaussian model to population p dict_wp[p] = norm(para[0], para[1]).pdf(data) * BGM.weights_[p] # Compute log likelyhood of prediction # wp[0] = norm.pdf(data[i])*weight[0], wp[1] = norm.pdf(data[i])*weight[1] ... # log(wp[0]+wp[1]+...) gives total log likelyhood loglike = sum([ np.log(sum([dict_wp[p][i] for p in np.arange(n_components)])) for i in np.arange(len(data)) ])[0] if loglike > best_loglike and min( BGM.weights_) > min_population_size: # minimal best_loglike = loglike best_population = n_components best_mdoel = BGM if verbose: print('%s model with %s population has log likelyhood of %s ' % (model_kind, n_components, loglike)) else: if verbose: print('%s populations not converged' % str(n_components)) if n_components == 1: # A gaussian model may not best fit one distribution; Other models should also being tested to decide if better 1 model fit exist para = rayleigh.fit(data) loglike = sum(np.log(rayleigh(para[0], para[1]).pdf(data)))[0] if loglike > best_loglike: best_loglike = loglike best_population = 1 best_mdoel = rayleigh(para[0], para[1]) model_kind = 'Rayleigh' if verbose: print( '%s model with %s population has log likelyhood of %s ' % (model_kind, n_components, loglike)) if best_mdoel == None: # nither Gaussian nor Rayleight could fit the data para = chi2.fit(data) loglike = sum(np.log(chi2(para[0], para[1], para[2]).pdf(data)))[0] if loglike > best_loglike: best_loglike = loglike best_population = 1 best_mdoel = chi2(para[0], para[1], para[2]) model_kind = 'Chi-square' if verbose: print('%s model with %s population has log likelyhood of %s ' % (model_kind, n_components, loglike)) if best_population > 1: p = list(best_mdoel.means_).index( min(best_mdoel.means_ )) # Get the population id that represent negatives threshold = max(mask_list(data, best_mdoel.predict(data), p))[0] else: if model_kind == 'Rayleigh' or model_kind == 'Chi-square': threshold = min(1, abs(best_mdoel.interval(confidence_interval)[0])) else: para = norm.fit(data) threshold = min( 1, abs( norm(data, para[0], para[1]).interval(confidence_interval)[0])) print( 'Best model with %s distribution has %s populations with threshold at %s' % (model_kind, best_population, threshold)) return threshold, model_kind, best_mdoel, best_population
from scipy.stats import norm, rayleigh import matplotlib.pyplot as plt import matplotlib.mlab as mlab import numpy as np samp = rayleigh.rvs(loc=5, scale=2, size=150) # samples generation print(samp) param = rayleigh.fit(samp) # distribution fitting x = np.linspace(5, 13, 100) # fitted distribution pdf_fitted = rayleigh.pdf(x, loc=param[0], scale=param[1]) # original distribution pdf = rayleigh.pdf(x, loc=5, scale=2) plt.title('Rayleigh distribution') plt.plot(x, pdf_fitted, 'r-', x, pdf, 'b-') plt.hist(samp, normed=1, alpha=.3) plt.show()