def calculate_change_factor(idir, ifile, prob=[0.10, 0.90], aggregation=[24, 48, 72, 96]): r''' ''' ifiles = sorted(os.listdir(idir)) dic = {} dic_dsf = {} for i in ifiles: fname = os.path.join(idir, i) d = wg_io.load(fname) mu = numpy.array(d[0]) nu = numpy.array(d[1]) mu_dsf = mu / (1 - mu) nu_dsf = nu / (1 - nu) # print 'CF: ', numpy.mean(nu/mu) # print '\n' prob_up = prob[1] prob_down = prob[0] axis = 0 alpha = 0.0 beta = 1.0 up = scipy.stats.mstats.mquantiles(nu / mu, prob=prob_up, alphap=alpha, betap=beta, axis=axis) down = scipy.stats.mstats.mquantiles(nu / mu, prob=prob_down, alphap=alpha, betap=beta, axis=axis) dic[i] = [numpy.mean(nu / mu), down, up] up_dsf = scipy.stats.mstats.mquantiles(nu_dsf / mu_dsf, prob=prob_up, alphap=alpha, betap=beta, axis=axis) down_dsf = scipy.stats.mstats.mquantiles(nu_dsf / mu_dsf, prob=prob_down, alphap=alpha, betap=beta, axis=axis) dic_dsf[i] = [numpy.mean(nu_dsf / mu_dsf), down_dsf, up_dsf] # aggregation = [24] # Preprocess #size_in = size_cm[0]/2.54, size_cm[1]/2.54 data_mean = {} data_var = {} data_skew = {} data_dsf = {} # TODO: include the other type of groupings! for g in range(12): temp_mean = [] temp_var = [] temp_skew = [] temp_dsf = [] for k in dic: for agg in aggregation: if ('agg_' + str(agg) in k) and ('stat_Eh' in k) and ('g_' + str(g) + '_' in k): temp_mean.append([k, dic[k][0], dic[k][1], dic[k][2]]) if ('agg_' + str(agg) in k) and ('stat_VARh' in k) and ('g_' + str(g) + '_' in k): temp_var.append([k, dic[k][0], dic[k][1], dic[k][2]]) if ('agg_' + str(agg) in k) and ('stat_SKh' in k) and ('g_' + str(g) + '_' in k): temp_skew.append([k, dic[k][0], dic[k][1], dic[k][2]]) if ('agg_' + str(agg) in k) and ('stat_FFh' in k) and ('g_' + str(g) + '_' in k): temp_dsf.append([k, dic[k][0], dic[k][1], dic[k][2]]) #temp_dsf.append([k, dic_dsf[k][0], dic_dsf[k][1], dic_dsf[k][2]]) data_mean[str(g)] = sorted(temp_mean) data_var[str(g)] = sorted(temp_var) data_skew[str(g)] = sorted(temp_skew) data_dsf[str(g)] = sorted(temp_dsf) dic = wg_io.load(ifile) ts = 1 lag = 1 accumulate = True group_type = 'm' dic_stat = wg_tool.series_statistics(dic, ts, aggregation, lag=lag, group_type=group_type, accumulate=accumulate) temp_mean = [] temp_cv = [] temp_var = [] temp_dsf = [] temp_skew = [] temp_correl = [] # [Ey, VARy, CVy, SKy, Rly, DSFy] dicdic = dic_stat['period'] for g in range(12): mean_index = 0 cf_mean = data_mean[str(g)] cf_mean = [i[1] for i in cf_mean] mean = dicdic[mean_index][g] new_mean = numpy.array(mean) * numpy.array(cf_mean) temp_mean.append(new_mean) var_index = 1 cf_var = data_var[str(g)] cf_var = [i[1] for i in cf_var] var = dicdic[var_index][g] new_var = numpy.array(var) * numpy.array(cf_var) temp_var.append(new_var) new_cv = (new_var**0.5) / new_mean temp_cv.append(new_cv) skew_index = 3 cf_skew = data_skew[str(g)] cf_skew = [i[1] for i in cf_skew] skew = dicdic[skew_index][g] new_skew = numpy.array(skew) * numpy.array(cf_skew) temp_skew.append(new_skew) dsf_index = 5 cf_dsf = data_dsf[str(g)] cf_dsf = [i[1] for i in cf_dsf] dsf = dicdic[dsf_index][g] new_dsf = numpy.array(dsf) * numpy.array(cf_dsf) temp_dsf.append(new_dsf) ret_dic = { 'mean': temp_mean, 'var': temp_var, 'skew': temp_skew, 'cv': temp_cv, 'dsf': temp_dsf, 'aggregation': aggregation } return ret_dic
def extend_fine(idir, ifile, extend_aggregation, aggregation, ofile): r''' ''' dic = calculate_change_factor(idir, ifile) fmin = scipy.optimize.fmin_l_bfgs_b fmin = scipy.optimize.fmin # Changed factors original data dsf = dic['dsf'] mean = dic['mean'] var = dic['var'] skew = dic['skew'] correl_ori = [] skew_ori = [] # Find the original # ----------------- dic = wg_io.load(ifile) ts = 1 lag = 1 accumulate = True group_type = 'm' dic_stat = wg_tool.series_statistics(dic, ts, extend_aggregation, lag=lag, group_type=group_type, accumulate=accumulate) # [Ey, VARy, CVy, SKy, Rly, DSFy] dicdic = dic_stat['period'] for g in range(12): skew_index = 3 skew_ori.append(dicdic[skew_index][g]) correl_index = 4 correl_ori.append(dicdic[correl_index][g]) # Find indexes to use extension values for bellow 24 hours and calculates # values for over 24 hours n_group = len(dsf) pre_index = [] post_index = [] for i in range(len(extend_aggregation)): if not (extend_aggregation[i] in aggregation): pre_index.append(i) for i in range(len(aggregation)): if (aggregation[i] in extend_aggregation): post_index.append(i) h = numpy.array(aggregation) h_ext = numpy.array(extend_aggregation) # Extend mean: E(h) = A*h. Is linear and cuts in 0. Either one value of future # is used, or a regression is made... # ---------------------------------------------------------------------- mean_new = [] for g in range(n_group): mean_h = mean[g] Xo = [1] # A Xf = fmin(mean_min, Xo, args=(h, mean_h), disp=0) A = Xf[0] mean_adj = A * h_ext mean_res = [mean_adj[i] for i in pre_index] + [mean_h[j] for j in post_index] mean_new.append(numpy.array(mean_res)) # matplotlib.pyplot.plot(h_ext, mean_adj, 'o--') # matplotlib.pyplot.plot(h, mean_h) # matplotlib.pyplot.show() # Extend dry spell fraction: dsf(h) = A*(e^h*B) # --------------------------------------------- # TODO: include the different groupings dsf_new = [] for g in range(n_group): dsf_h = dsf[g] Xo = [-1] # A if False: # Only use 24 72 hh = [h[0], h[1]] dsf_hh = [dsf_h[0], dsf_h[1]] else: hh = h dsf_hh = dsf_h #print fmin(dsf_min, Xo, args=(h, dsf_h), approx_grad=True, bounds=bounds, maxfun=3000) Xf = fmin(dsf_min, Xo, args=(hh, dsf_hh), disp=0) A = Xf[0] dsf_adj = numpy.exp(A * h_ext) dsf_res = [dsf_adj[i] for i in pre_index] + [dsf_h[j] for j in post_index] dsf_new.append(numpy.array(dsf_res)) # matplotlib.pyplot.plot(h_ext, dsf_adj, 'o--') # matplotlib.pyplot.plot(h, dsf_h) # matplotlib.pyplot.show() # Skewness.... hmmmm difficult so far so only observed is downscaled # --------------------------------------------- # TODO: include the different groupings skew_new = [] for g in range(n_group): skew_h = skew[g] skew_adj = skew_ori[g] skew_res = [skew_adj[i] for i in pre_index] + [skew_h[j] for j in post_index] skew_new.append(numpy.array(skew_res)) # Autocorrelation.... very difficult indeed not taken into account # --------------------------------------------- correl_new = correl_ori # Variance # ------------------------------------------------- var_new = [] for g in range(n_group): var_h = var[g] Xo = [0.5, 1, 0.8] Xf = fmin(var_min, Xo, args=(aggregation, var_h), disp=0) eps = Xf[0] ## [h] Sig2i = Xf[1] ## [mm^2] alp = Xf[2] ## [] var_adj = variance_t_downscaling(extend_aggregation, eps, Sig2i, alp) var_res = [var_adj[i] for i in pre_index] + [var_h[j] for j in post_index] var_new.append(numpy.array(var_res)) # Coefficient of variance #----------------------- cv_new = [(var_new[i])**0.5 / mean_new[i] for i in range(len(mean_new))] dic = { 'aggregation': extend_aggregation, 'dsf': dsf_new, 'mean': mean_new, 'var': var_new, 'skew': skew_new, 'correl': correl_new, 'cv': cv_new } # Plot test # for g in range(n_group): # for key in dic: # x = dic['aggregation'] # if not(key == 'aggregation'): # y = dic[key][g] # # fig = matplotlib.pyplot.figure() ## fig.subplots_adjust(left=0.10, bottom=0.10, wspace=0.40, hspace=0.20) # fig.suptitle(key) # ax = fig.add_subplot(111) # ax.set_axisbelow(True) # ax.plot(x, y, 'b-', label='period statistics') # matplotlib.pyplot.show() wg_io.save(dic, ofile) return dic
def change_factors(idir, ifile, extend_aggregation, aggregation, stats=None): r''' ''' dic = calculate_change_factor(idir, ifile) fmin = scipy.optimize.fmin_l_bfgs_b fmin = scipy.optimize.fmin # Changed factors original data dsf = dic['dsf'] mean = dic['mean'] var = dic['var'] skew = dic['skew'] cv_ori = [] correl_ori = [] skew_ori = [] mean_ori = [] var_ori = [] dsf_ori = [] # Find the original # ----------------- dic = wg_io.load(ifile) ts = 1 lag = 1 accumulate = True group_type = 'm' dic_stat = wg_tool.series_statistics(dic, ts, extend_aggregation, lag=lag, group_type=group_type, accumulate=accumulate) # [Ey, VARy, CVy, SKy, Rly, DSFy] dicdic = dic_stat['period'] for g in range(12): mean_index = 0 mean_ori.append(dicdic[mean_index][g]) var_index = 1 var_ori.append(dicdic[var_index][g]) cv_index = 2 cv_ori.append(dicdic[cv_index][g]) skew_index = 3 skew_ori.append(dicdic[skew_index][g]) correl_index = 4 correl_ori.append(dicdic[correl_index][g]) dsf_index = 5 dsf_ori.append(dicdic[dsf_index][g]) # Find indexes to use extension values for bellow 24 hours and calculates # values for over 24 hours n_group = len(dsf) pre_index = [] post_index = [] for i in range(len(extend_aggregation)): if not (extend_aggregation[i] in aggregation): pre_index.append(i) for i in range(len(aggregation)): if (aggregation[i] in extend_aggregation): post_index.append(i) h = numpy.array(aggregation) h_ext = numpy.array(extend_aggregation) # Extend mean: E(h) = A*h. Is linear and cuts in 0. Either one value of future # is used, or a regression is made... # ---------------------------------------------------------------------- mean_new = [] for g in range(n_group): mean_h = mean[g] Xo = [1] # A Xf = fmin(mean_min, Xo, args=(h, mean_h), disp=0) A = Xf[0] mean_adj = A * h_ext mean_res = [mean_adj[i] for i in pre_index] + [mean_h[j] for j in post_index] mean_new.append(numpy.array(mean_res)) # matplotlib.pyplot.plot(h_ext, mean_adj, 'o--') # matplotlib.pyplot.plot(h, mean_h) # matplotlib.pyplot.show() # Extend dry spell fraction: dsf(h) = A*(e^h*B) # --------------------------------------------- # TODO: include the different groupings dsf_new = [] for g in range(n_group): dsf_h = dsf[g] Xo = [-1] # A if False: # Only use 24 72 hh = [h[0], h[1]] dsf_hh = [dsf_h[0], dsf_h[1]] else: hh = h dsf_hh = dsf_h #print fmin(dsf_min, Xo, args=(h, dsf_h), approx_grad=True, bounds=bounds, maxfun=3000) Xf = fmin(dsf_min, Xo, args=(hh, dsf_hh), disp=0) A = Xf[0] dsf_adj = numpy.exp(A * h_ext) dsf_res = [dsf_adj[i] for i in pre_index] + [dsf_h[j] for j in post_index] dsf_new.append(numpy.array(dsf_res)) # matplotlib.pyplot.plot(h_ext, dsf_adj, 'o--') # matplotlib.pyplot.plot(h, dsf_h) # matplotlib.pyplot.show() # Skewness.... hmmmm difficult so far so only observed is downscaled # --------------------------------------------- # TODO: include the different groupings skew_new = [] for g in range(n_group): skew_h = skew[g] skew_adj = skew_ori[g] skew_res = [skew_adj[i] for i in pre_index] + [skew_h[j] for j in post_index] skew_new.append(numpy.array(skew_res)) # Autocorrelation.... very difficult indeed not taken into account # --------------------------------------------- correl_new = correl_ori # Variance # ------------------------------------------------- var_new = [] for g in range(n_group): var_h = var[g] Xo = [0.5, 1, 0.8] Xf = fmin(var_min, Xo, args=(aggregation, var_h), disp=0) eps = Xf[0] ## [h] Sig2i = Xf[1] ## [mm^2] alp = Xf[2] ## [] var_adj = variance_t_downscaling(extend_aggregation, eps, Sig2i, alp) var_res = [var_adj[i] for i in pre_index] + [var_h[j] for j in post_index] var_new.append(numpy.array(var_res)) # Coefficient of variance #----------------------- cv_new = [(var_new[i])**0.5 / mean_new[i] for i in range(len(mean_new))] dic = { 'aggregation': extend_aggregation, 'dsf': dsf_new, 'mean': mean_new, 'var': var_new, 'skew': skew_new, 'correl': correl_new, 'cv': cv_new } mean_cf = numpy.array(mean_new) / numpy.array(mean_ori) var_cf = numpy.array(var_new) / numpy.array(var_ori) cv_cf = numpy.array(cv_new) / numpy.array(cv_ori) skew_cf = numpy.array(skew_new) / numpy.array(skew_ori) correl_cf = numpy.array(correl_new) / numpy.array(correl_ori) dsf_cf = numpy.array(dsf_new) / numpy.array(dsf_ori) ## Define the statistics to use in the model, this way it can be more inter ## actively decided which ones to use # 0 1 2 3 4 5 stats_names = ['Eh', 'VARh', 'CVh', 'Rlh', 'SKh', 'FFh'] if stats == None: stats = [2, 3, 4, 5] len_stats = len(stats) stats_names_used = [stats_names[i] for i in stats] print stats_names_used all_stats = [] ep_stats = [] # Now arrange so it is easy to use in the parameter generator for g in range(n_group): temp_stats = [] temp_ep = [] for ag in range(len(extend_aggregation)): stats_list = [ mean_cf[g][ag], var_cf[g][ag], cv_cf[g][ag], correl_cf[g][ag], skew_cf[g][ag], dsf_cf[g][ag] ] stats_used = [stats_list[i] for i in stats] temp_stats = temp_stats + stats_used temp_ep = temp_ep + [mean_cf[g][ag]] all_stats.append(numpy.array(temp_stats)) ep_stats.append(numpy.array(temp_ep)) #stats_list.append(Eh, VARh, CVh, Rlh, SKh, FFh] print all_stats print ep_stats return {'cf_stats': all_stats, 'cf_mean': ep_stats}
def ev1_multiple(idir, duration, return_period, plot_pos='w', accumulate=False): r'''Function to calculate the extreme value distribution of rainfall for different rainfall duration and return periods. This can be then used to produce intensity-duration-frequency curves. Parameters ---------- rain : numpy array year : numpy array duration : list return_period : plot_pos : P = (i-b)/(n + 1 - 2*b), where n is the number of items, i is the index for the sorted list and b is given 'w' Weibul 0.0 'c' Chegodayev 0.30 't' Turkey 0.33333 'b' Blom 0.375 'g' Gringorten 0.44 'h' Hazen 0.5 Returns ------- A dictionary idf_data ''' ifiles = os.listdir(idir) plot_position = { 'w': 0.0, 'c': 0.30, 't': 0.333333333333333333, 'b': 0.375, 'g': 0.440, 'h': 0.5 } if str(plot_pos) in plot_position: b = plot_position[plot_pos] else: print('Selected ploting position incorrect. Using Weibul by default') b = plot_position[plot_pos] # Plot position: (i-b)/(n + 1 - 2*b)) # Get basic data from file one ifile = os.path.join(idir, ifiles[0]) dic = wg_io.load(ifile) year = dic['year'] year_max = numpy.int(numpy.amax(year)) year_min = numpy.int(numpy.amin(year)) rain_max_list = [] for i in ifiles: print i ifile = os.path.join(idir, i) dic = wg_io.load(ifile) rain = dic['rainfall'] year = dic['year'] dic_max = wg_tool.get_max(rain, year, duration, accumulate=accumulate) rain_max = dic_max['rain'] rain_max_list.append(rain_max) # now create median size = dic_max['size'] prob = [(i - b) / (size + 1.0 - 2.0 * b) for i in range(1, size + 1)] prob = numpy.array(prob) ln_ = -numpy.log(-numpy.log(prob)) coefficient = [] for step in range(len(duration)): rain_median = scipy.median(numpy.array(rain_max_list), axis=0) (m, b, r, tt, stderr) = scipy.stats.linregress(ln_, rain_median[step]) coefficient.append([m, b, r]) ## Create the array to use in ploting later idf = [] r_p = numpy.array(return_period) * 1.0 ln_calc = -numpy.log(-numpy.log(1.0 - (1.0 / r_p))) ## mbr stands for Slope(m) Y-intercept(b) Rcoefficient(r) for mbr in coefficient: i_calc = ln_calc * mbr[0] + mbr[1] idf.append(i_calc) idf_data = numpy.transpose(numpy.array(idf)) return { 'coefficients': coefficient, ## [slope, y-int, R] 'rain': rain_max_list, 'probability': prob, 'ln': ln_, 'idf_data': idf_data, 'duration': duration, 'year_max': year_max, 'year_min': year_min }
def ev1(ifile, duration, return_period, plot_pos='w', accumulate=False): r'''Function to calculate the extreme value distribution of rainfall for different rainfall duration and return periods. This can be then used to produce intensity-duration-frequency curves. Parameters ---------- rain : numpy array year : numpy array duration : list return_period : plot_pos : P = (i-b)/(n + 1 - 2*b), where n is the number of items, i is the index for the sorted list and b is given 'w' Weibul 0.0 'c' Chegodayev 0.30 't' Turkey 0.33333 'b' Blom 0.375 'g' Gringorten 0.44 'h' Hazen 0.5 Returns ------- A dictionary idf_data ''' dic = wg_io.load(ifile) rain = dic['rainfall'] year = dic['year'] plot_position = { 'w': 0.0, 'c': 0.30, 't': 0.333333333333333333, 'b': 0.375, 'g': 0.440, 'h': 0.5 } if str(plot_pos) in plot_position: b = plot_position[plot_pos] else: print('Selected ploting position incorrect. Using Weibul by default') b = plot_position[plot_pos] # Plot position: (i-b)/(n + 1 - 2*b)) # Common use functions #movmean = tool.movmean movmean = wg_tool.movnanmean year_max = numpy.int(numpy.amax(year)) year_min = numpy.int(numpy.amin(year)) coefficient = [] rain_max_list = [] probability = [] ln = [] #year = numpy.transpose(year) for step in duration: rain_year_max = [] for y in range(year_min, (year_max + 1)): rain_per_year = rain[(year == y)] ##! ##This is to comply with the rolling window function in case there ## are years with no data. if rain_per_year.shape[0] > step: rain_year_m_ave = movmean(rain_per_year, step, 1, accumulate) temp_calc = numpy.nanmax(rain_year_m_ave) ##! ## If the moving average produces NaN when calculating the ## nanmax ignores the NaN. ## Is this appropriate in every case? if not numpy.isnan(temp_calc): rain_year_max.append(temp_calc) size = len(rain_year_max) sorting = numpy.sort(rain_year_max) print size ## Only do this once if step == duration[0]: # ploting position probability prob = [(i - b) / (size + 1.0 - 2.0 * b) for i in range(1, size + 1)] prob = numpy.array(prob) #fr = 1 - prob ## Extreme value 1 TO UPDATE ln_ = -numpy.log(-numpy.log(prob)) probability.append(prob) ln.append(ln_) (m, b, r, tt, stderr) = scipy.stats.linregress(ln_, sorting) coefficient.append([m, b, r]) rain_max_list.append(sorting) ## Create the array to use in ploting later idf = [] r_p = numpy.array(return_period) * 1.0 ln_calc = -numpy.log(-numpy.log(1.0 - (1.0 / r_p))) ## mbr stands for Slope(m) Y-intercept(b) Rcoefficient(r) for mbr in coefficient: i_calc = ln_calc * mbr[0] + mbr[1] idf.append(i_calc) idf_data = numpy.transpose(numpy.array(idf)) return { 'coefficients': coefficient, ## [slope, y-int, R] 'rain': rain_max_list, 'probability': probability, 'ln': ln, 'idf_data': idf_data, 'duration': duration, 'year_max': year_max, 'year_min': year_min }
def change_factors(idir, ifile, extend_aggregation, aggregation, stats=None): r''' ''' dic = calculate_change_factor(idir, ifile) fmin = scipy.optimize.fmin_l_bfgs_b fmin = scipy.optimize.fmin # Changed factors original data dsf = dic['dsf'] mean = dic['mean'] var = dic['var'] skew = dic['skew'] cv_ori = [] correl_ori = [] skew_ori = [] mean_ori = [] var_ori = [] dsf_ori = [] # Find the original # ----------------- dic = wg_io.load(ifile) ts = 1 lag = 1 accumulate = True group_type = 'm' dic_stat = wg_tool.series_statistics(dic, ts, extend_aggregation, lag=lag, group_type=group_type, accumulate=accumulate) # [Ey, VARy, CVy, SKy, Rly, DSFy] dicdic = dic_stat['period'] for g in range(12): mean_index = 0 mean_ori.append(dicdic[mean_index][g]) var_index = 1 var_ori.append(dicdic[var_index][g]) cv_index = 2 cv_ori.append(dicdic[cv_index][g]) skew_index = 3 skew_ori.append(dicdic[skew_index][g]) correl_index = 4 correl_ori.append(dicdic[correl_index][g]) dsf_index = 5 dsf_ori.append(dicdic[dsf_index][g]) # Find indexes to use extension values for bellow 24 hours and calculates # values for over 24 hours n_group = len(dsf) pre_index = [] post_index = [] for i in range(len(extend_aggregation)): if not(extend_aggregation[i] in aggregation): pre_index.append(i) for i in range(len(aggregation)): if (aggregation[i] in extend_aggregation): post_index.append(i) h = numpy.array(aggregation) h_ext = numpy.array(extend_aggregation) # Extend mean: E(h) = A*h. Is linear and cuts in 0. Either one value of future # is used, or a regression is made... # ---------------------------------------------------------------------- mean_new = [] for g in range(n_group): mean_h = mean[g] Xo = [1] # A Xf = fmin(mean_min, Xo, args=(h, mean_h), disp=0) A = Xf[0] mean_adj = A * h_ext mean_res = [mean_adj[i] for i in pre_index] + [mean_h[j] for j in post_index] mean_new.append(numpy.array(mean_res)) # matplotlib.pyplot.plot(h_ext, mean_adj, 'o--') # matplotlib.pyplot.plot(h, mean_h) # matplotlib.pyplot.show() # Extend dry spell fraction: dsf(h) = A*(e^h*B) # --------------------------------------------- # TODO: include the different groupings dsf_new = [] for g in range(n_group): dsf_h = dsf[g] Xo = [-1] # A if False: # Only use 24 72 hh = [h[0], h[1]] dsf_hh = [dsf_h[0], dsf_h[1]] else: hh = h dsf_hh = dsf_h #print fmin(dsf_min, Xo, args=(h, dsf_h), approx_grad=True, bounds=bounds, maxfun=3000) Xf = fmin(dsf_min, Xo, args=(hh, dsf_hh), disp=0) A = Xf[0] dsf_adj = numpy.exp(A * h_ext) dsf_res = [dsf_adj[i] for i in pre_index] + [dsf_h[j] for j in post_index] dsf_new.append(numpy.array(dsf_res)) # matplotlib.pyplot.plot(h_ext, dsf_adj, 'o--') # matplotlib.pyplot.plot(h, dsf_h) # matplotlib.pyplot.show() # Skewness.... hmmmm difficult so far so only observed is downscaled # --------------------------------------------- # TODO: include the different groupings skew_new = [] for g in range(n_group): skew_h = skew[g] skew_adj = skew_ori[g] skew_res = [skew_adj[i] for i in pre_index] + [skew_h[j] for j in post_index] skew_new.append(numpy.array(skew_res)) # Autocorrelation.... very difficult indeed not taken into account # --------------------------------------------- correl_new = correl_ori # Variance # ------------------------------------------------- var_new = [] for g in range(n_group): var_h = var[g] Xo = [0.5, 1, 0.8] Xf = fmin(var_min, Xo, args=(aggregation, var_h), disp=0) eps =Xf[0] ## [h] Sig2i = Xf[1] ## [mm^2] alp = Xf[2] ## [] var_adj = variance_t_downscaling(extend_aggregation, eps, Sig2i, alp) var_res = [var_adj[i] for i in pre_index] + [var_h[j] for j in post_index] var_new.append(numpy.array(var_res)) # Coefficient of variance #----------------------- cv_new = [(var_new[i])**0.5 / mean_new[i] for i in range(len(mean_new))] dic = {'aggregation': extend_aggregation, 'dsf': dsf_new, 'mean': mean_new, 'var': var_new, 'skew': skew_new, 'correl': correl_new, 'cv': cv_new} mean_cf = numpy.array(mean_new) / numpy.array(mean_ori) var_cf = numpy.array(var_new) / numpy.array(var_ori) cv_cf = numpy.array(cv_new) / numpy.array(cv_ori) skew_cf = numpy.array(skew_new) / numpy.array(skew_ori) correl_cf = numpy.array(correl_new) / numpy.array(correl_ori) dsf_cf = numpy.array(dsf_new) / numpy.array(dsf_ori) ## Define the statistics to use in the model, this way it can be more inter ## actively decided which ones to use # 0 1 2 3 4 5 stats_names = ['Eh', 'VARh', 'CVh', 'Rlh', 'SKh', 'FFh'] if stats == None: stats = [2, 3, 4, 5] len_stats = len(stats) stats_names_used = [stats_names[i] for i in stats] print stats_names_used all_stats = [] ep_stats = [] # Now arrange so it is easy to use in the parameter generator for g in range(n_group): temp_stats = [] temp_ep = [] for ag in range(len(extend_aggregation)): stats_list = [mean_cf[g][ag], var_cf[g][ag], cv_cf[g][ag], correl_cf[g][ag], skew_cf[g][ag], dsf_cf[g][ag]] stats_used = [stats_list[i] for i in stats] temp_stats = temp_stats + stats_used temp_ep = temp_ep + [mean_cf[g][ag]] all_stats.append(numpy.array(temp_stats)) ep_stats.append(numpy.array(temp_ep)) #stats_list.append(Eh, VARh, CVh, Rlh, SKh, FFh] print all_stats print ep_stats return {'cf_stats': all_stats, 'cf_mean': ep_stats}
def calculate_change_factor(idir, ifile, prob=[0.10, 0.90], aggregation=[24,48,72,96]): r''' ''' ifiles = sorted(os.listdir(idir)) dic = {} dic_dsf= {} for i in ifiles: fname = os.path.join(idir, i) d = wg_io.load(fname) mu = numpy.array(d[0]) nu = numpy.array(d[1]) mu_dsf = mu/(1 - mu) nu_dsf = nu/(1 - nu) # print 'CF: ', numpy.mean(nu/mu) # print '\n' prob_up = prob[1] prob_down = prob[0] axis = 0 alpha = 0.0 beta = 1.0 up = scipy.stats.mstats.mquantiles(nu/mu, prob=prob_up, alphap=alpha, betap=beta, axis=axis) down = scipy.stats.mstats.mquantiles(nu/mu, prob=prob_down, alphap=alpha, betap=beta, axis=axis) dic[i] = [numpy.mean(nu/mu), down, up] up_dsf = scipy.stats.mstats.mquantiles(nu_dsf/mu_dsf, prob=prob_up, alphap=alpha, betap=beta, axis=axis) down_dsf = scipy.stats.mstats.mquantiles(nu_dsf/mu_dsf, prob=prob_down, alphap=alpha, betap=beta, axis=axis) dic_dsf[i] = [numpy.mean(nu_dsf/mu_dsf), down_dsf, up_dsf] # aggregation = [24] # Preprocess #size_in = size_cm[0]/2.54, size_cm[1]/2.54 data_mean = {} data_var = {} data_skew = {} data_dsf = {} # TODO: include the other type of groupings! for g in range(12): temp_mean = [] temp_var = [] temp_skew = [] temp_dsf = [] for k in dic: for agg in aggregation: if ('agg_' + str(agg) in k) and ('stat_Eh' in k) and ('g_' + str(g) + '_' in k): temp_mean.append([k, dic[k][0], dic[k][1], dic[k][2]]) if ('agg_' + str(agg) in k) and ('stat_VARh' in k) and ('g_' + str(g) + '_' in k): temp_var.append([k, dic[k][0], dic[k][1], dic[k][2]]) if ('agg_' + str(agg) in k) and ('stat_SKh' in k) and ('g_' + str(g) + '_' in k): temp_skew.append([k, dic[k][0], dic[k][1], dic[k][2]]) if ('agg_' + str(agg) in k) and ('stat_FFh' in k) and ('g_' + str(g) + '_' in k): temp_dsf.append([k, dic[k][0], dic[k][1], dic[k][2]]) #temp_dsf.append([k, dic_dsf[k][0], dic_dsf[k][1], dic_dsf[k][2]]) data_mean[str(g)] = sorted(temp_mean) data_var[str(g)] = sorted(temp_var) data_skew[str(g)] = sorted(temp_skew) data_dsf[str(g)] = sorted(temp_dsf) dic = wg_io.load(ifile) ts = 1 lag = 1 accumulate = True group_type = 'm' dic_stat = wg_tool.series_statistics(dic, ts, aggregation, lag=lag, group_type=group_type, accumulate=accumulate) temp_mean = [] temp_cv = [] temp_var = [] temp_dsf = [] temp_skew = [] temp_correl = [] # [Ey, VARy, CVy, SKy, Rly, DSFy] dicdic = dic_stat['period'] for g in range(12): mean_index = 0 cf_mean = data_mean[str(g)] cf_mean = [i[1] for i in cf_mean] mean = dicdic[mean_index][g] new_mean = numpy.array(mean) * numpy.array(cf_mean) temp_mean.append(new_mean) var_index = 1 cf_var = data_var[str(g)] cf_var = [i[1] for i in cf_var] var = dicdic[var_index][g] new_var = numpy.array(var) * numpy.array(cf_var) temp_var.append(new_var) new_cv = (new_var**0.5)/new_mean temp_cv.append(new_cv) skew_index = 3 cf_skew = data_skew[str(g)] cf_skew = [i[1] for i in cf_skew] skew = dicdic[skew_index][g] new_skew = numpy.array(skew) * numpy.array(cf_skew) temp_skew.append(new_skew) dsf_index = 5 cf_dsf = data_dsf[str(g)] cf_dsf = [i[1] for i in cf_dsf] dsf = dicdic[dsf_index][g] new_dsf = numpy.array(dsf) * numpy.array(cf_dsf) temp_dsf.append(new_dsf) ret_dic = {'mean': temp_mean, 'var': temp_var, 'skew': temp_skew, 'cv': temp_cv, 'dsf': temp_dsf, 'aggregation': aggregation} return ret_dic
def extend_fine(idir, ifile, extend_aggregation, aggregation, ofile): r''' ''' dic = calculate_change_factor(idir, ifile) fmin = scipy.optimize.fmin_l_bfgs_b fmin = scipy.optimize.fmin # Changed factors original data dsf = dic['dsf'] mean = dic['mean'] var = dic['var'] skew = dic['skew'] correl_ori = [] skew_ori = [] # Find the original # ----------------- dic = wg_io.load(ifile) ts = 1 lag = 1 accumulate = True group_type = 'm' dic_stat = wg_tool.series_statistics(dic, ts, extend_aggregation, lag=lag, group_type=group_type, accumulate=accumulate) # [Ey, VARy, CVy, SKy, Rly, DSFy] dicdic = dic_stat['period'] for g in range(12): skew_index = 3 skew_ori.append(dicdic[skew_index][g]) correl_index = 4 correl_ori.append(dicdic[correl_index][g]) # Find indexes to use extension values for bellow 24 hours and calculates # values for over 24 hours n_group = len(dsf) pre_index = [] post_index = [] for i in range(len(extend_aggregation)): if not(extend_aggregation[i] in aggregation): pre_index.append(i) for i in range(len(aggregation)): if (aggregation[i] in extend_aggregation): post_index.append(i) h = numpy.array(aggregation) h_ext = numpy.array(extend_aggregation) # Extend mean: E(h) = A*h. Is linear and cuts in 0. Either one value of future # is used, or a regression is made... # ---------------------------------------------------------------------- mean_new = [] for g in range(n_group): mean_h = mean[g] Xo = [1] # A Xf = fmin(mean_min, Xo, args=(h, mean_h), disp=0) A = Xf[0] mean_adj = A * h_ext mean_res = [mean_adj[i] for i in pre_index] + [mean_h[j] for j in post_index] mean_new.append(numpy.array(mean_res)) # matplotlib.pyplot.plot(h_ext, mean_adj, 'o--') # matplotlib.pyplot.plot(h, mean_h) # matplotlib.pyplot.show() # Extend dry spell fraction: dsf(h) = A*(e^h*B) # --------------------------------------------- # TODO: include the different groupings dsf_new = [] for g in range(n_group): dsf_h = dsf[g] Xo = [-1] # A if False: # Only use 24 72 hh = [h[0], h[1]] dsf_hh = [dsf_h[0], dsf_h[1]] else: hh = h dsf_hh = dsf_h #print fmin(dsf_min, Xo, args=(h, dsf_h), approx_grad=True, bounds=bounds, maxfun=3000) Xf = fmin(dsf_min, Xo, args=(hh, dsf_hh), disp=0) A = Xf[0] dsf_adj = numpy.exp(A * h_ext) dsf_res = [dsf_adj[i] for i in pre_index] + [dsf_h[j] for j in post_index] dsf_new.append(numpy.array(dsf_res)) # matplotlib.pyplot.plot(h_ext, dsf_adj, 'o--') # matplotlib.pyplot.plot(h, dsf_h) # matplotlib.pyplot.show() # Skewness.... hmmmm difficult so far so only observed is downscaled # --------------------------------------------- # TODO: include the different groupings skew_new = [] for g in range(n_group): skew_h = skew[g] skew_adj = skew_ori[g] skew_res = [skew_adj[i] for i in pre_index] + [skew_h[j] for j in post_index] skew_new.append(numpy.array(skew_res)) # Autocorrelation.... very difficult indeed not taken into account # --------------------------------------------- correl_new = correl_ori # Variance # ------------------------------------------------- var_new = [] for g in range(n_group): var_h = var[g] Xo = [0.5, 1, 0.8] Xf = fmin(var_min, Xo, args=(aggregation, var_h), disp=0) eps =Xf[0] ## [h] Sig2i = Xf[1] ## [mm^2] alp = Xf[2] ## [] var_adj = variance_t_downscaling(extend_aggregation, eps, Sig2i, alp) var_res = [var_adj[i] for i in pre_index] + [var_h[j] for j in post_index] var_new.append(numpy.array(var_res)) # Coefficient of variance #----------------------- cv_new = [(var_new[i])**0.5 / mean_new[i] for i in range(len(mean_new))] dic = {'aggregation': extend_aggregation, 'dsf': dsf_new, 'mean': mean_new, 'var': var_new, 'skew': skew_new, 'correl': correl_new, 'cv': cv_new} # Plot test # for g in range(n_group): # for key in dic: # x = dic['aggregation'] # if not(key == 'aggregation'): # y = dic[key][g] # # fig = matplotlib.pyplot.figure() ## fig.subplots_adjust(left=0.10, bottom=0.10, wspace=0.40, hspace=0.20) # fig.suptitle(key) # ax = fig.add_subplot(111) # ax.set_axisbelow(True) # ax.plot(x, y, 'b-', label='period statistics') # matplotlib.pyplot.show() wg_io.save(dic, ofile) return dic
def run_gen(ifile, ofile, start_year, end_year): r''' Parameters ---------- ifile : String TODO save_as : String TODO start_year : Integer Inclusive end_year : Integer Inclusive Returns ------- ''' dic = wg_io.load(ifile) day_normal = dic['day_normal'] day_leap = dic['day_leap'] list_lambda_ = dic['lambda_'] list_beta = dic['beta'] list_mu_c = dic['mu_c'] list_eta = dic['eta'] list_alpha = dic['alpha'] list_theta = dic['theta'] size = len(day_normal) period = (end_year - start_year) day_location = [0] # Create a vector containing the exact amount of hourly time steps needed # to fill the period for i in range(period): if calendar.isleap(start_year + i): day_location = day_location + day_leap else: day_location = day_location + day_normal hour_index = numpy.array(day_location) * 24 hour_index_cum = numpy.cumsum(hour_index) data_size = numpy.sum(hour_index) hour_grouped = [ [hour_index_cum[i], hour_index_cum[i+1]] for i in range(size*period)] data = numpy.zeros(data_size) for i in range(size): day_size = day_leap[i] year_size = numpy.int(numpy.round((day_size/365.25) * period, 0)) lambda_ = list_lambda_[i] beta = list_beta[i] mu_c = list_mu_c[i] eta = list_eta[i] alpha = list_alpha[i] theta = list_theta[i] temp_data = compute_NSRP(lambda_, beta, mu_c, eta, alpha, theta, year_number=year_size, storm=None, DEBUG=False) t_i_start = 0 for p in range(period): list_index = (size*p + i) i_start = hour_grouped[list_index][0] i_end = hour_grouped[list_index][1] delta = i_end - i_start t_i_end = t_i_start + delta data[i_start:i_end] = temp_data[t_i_start:t_i_end] t_i_start = t_i_end # Create time vectors dtype_int = numpy.int ## Data holders for date, variable and the broken date date = numpy.zeros(data_size) year = numpy.zeros(data_size, dtype=dtype_int) month = numpy.zeros(data_size, dtype=dtype_int) day = numpy.zeros(data_size, dtype=dtype_int) hour = numpy.zeros(data_size, dtype=dtype_int) minute = numpy.zeros(data_size, dtype=dtype_int) doy = numpy.zeros(data_size, dtype=dtype_int) date_format='%Y-%m-%d %H:%M' date_start_string = str(start_year) + '-01-01 00:00' date_start = datetime.datetime.strptime(date_start_string, date_format ) for t in range(data_size): h = datetime.timedelta(hours=t) datevalue = date_start + h date[t] = numpy.float(matplotlib.dates.date2num(datevalue)) year[t] = datevalue.year month[t] = datevalue.month day[t] = datevalue.day hour[t] = datevalue.hour minute[t] = datevalue.minute doy[t] = datevalue.timetuple().tm_yday comment = '' dic = {'date' : date, 'rainfall': data, 'year' : year, 'month' : month, 'day' : day, 'hour' : hour, 'minute' : minute, 'doy' : doy, 'comment': comment}
return [Eh, VARh, CVh, Rh, SKh, FFh, Fddh, Fwwh] def run_para(ifile, ofile, ts, aggregation, lag, group_type='m', stats=None, weights=None, MY_METHOD=True, idir_cf=None, aggregation_cf=None): r''' ''' if idir_cf == None: change_factors = None else: # Calculate change factor #aggregation_cf = [24, 48, 72, 96] change_factors = wg_downscale.change_factors(idir_cf, ifile, aggregation, aggregation_cf) # load data dic = wg_io.load(ifile) rain_data = dic['rainfall'] # Make the grouping for the parameter if group_type == 'm': group_month = dic['month'] rain_grouped = wg_tool.group_by(rain_data, group_month) # Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec day_normal = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] day_leap = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] elif group_type == 'y': rain_grouped = [rain_data] day_normal = [365] day_leap = [366] else:
def ev1(ifile, duration, return_period, plot_pos='w', accumulate=False): r'''Function to calculate the extreme value distribution of rainfall for different rainfall duration and return periods. This can be then used to produce intensity-duration-frequency curves. Parameters ---------- rain : numpy array year : numpy array duration : list return_period : plot_pos : P = (i-b)/(n + 1 - 2*b), where n is the number of items, i is the index for the sorted list and b is given 'w' Weibul 0.0 'c' Chegodayev 0.30 't' Turkey 0.33333 'b' Blom 0.375 'g' Gringorten 0.44 'h' Hazen 0.5 Returns ------- A dictionary idf_data ''' dic = wg_io.load(ifile) rain = dic['rainfall'] year = dic['year'] plot_position = {'w': 0.0, 'c': 0.30, 't': 0.333333333333333333, 'b': 0.375, 'g': 0.440, 'h': 0.5} if str(plot_pos) in plot_position: b = plot_position[plot_pos] else: print('Selected ploting position incorrect. Using Weibul by default') b = plot_position[plot_pos] # Plot position: (i-b)/(n + 1 - 2*b)) # Common use functions #movmean = tool.movmean movmean = wg_tool.movnanmean year_max = numpy.int(numpy.amax(year)) year_min = numpy.int(numpy.amin(year)) coefficient = [] rain_max_list = [] probability = [] ln = [] #year = numpy.transpose(year) for step in duration: rain_year_max = [] for y in range(year_min, (year_max+1)): rain_per_year = rain[(year==y)] ##! ##This is to comply with the rolling window function in case there ## are years with no data. if rain_per_year.shape[0] > step: rain_year_m_ave = movmean(rain_per_year, step, 1, accumulate) temp_calc = numpy.nanmax(rain_year_m_ave) ##! ## If the moving average produces NaN when calculating the ## nanmax ignores the NaN. ## Is this appropriate in every case? if not numpy.isnan(temp_calc): rain_year_max.append(temp_calc) size = len(rain_year_max) sorting = numpy.sort(rain_year_max) print size ## Only do this once if step == duration[0]: # ploting position probability prob = [(i-b)/(size+1.0-2.0*b) for i in range(1, size + 1)] prob = numpy.array(prob) #fr = 1 - prob ## Extreme value 1 TO UPDATE ln_ = -numpy.log(-numpy.log(prob)) probability.append(prob) ln.append(ln_) (m, b, r, tt, stderr) = scipy.stats.linregress(ln_, sorting) coefficient.append([m, b, r]) rain_max_list.append(sorting) ## Create the array to use in ploting later idf = [] r_p = numpy.array(return_period)*1.0 ln_calc = -numpy.log(-numpy.log(1.0 - (1.0 / r_p))) ## mbr stands for Slope(m) Y-intercept(b) Rcoefficient(r) for mbr in coefficient: i_calc = ln_calc*mbr[0] + mbr[1] idf.append(i_calc) idf_data = numpy.transpose(numpy.array(idf)) return {'coefficients': coefficient, ## [slope, y-int, R] 'rain': rain_max_list , 'probability': probability, 'ln': ln, 'idf_data': idf_data, 'duration': duration, 'year_max': year_max, 'year_min': year_min }
def ev1_multiple(idir, duration, return_period, plot_pos='w', accumulate=False): r'''Function to calculate the extreme value distribution of rainfall for different rainfall duration and return periods. This can be then used to produce intensity-duration-frequency curves. Parameters ---------- rain : numpy array year : numpy array duration : list return_period : plot_pos : P = (i-b)/(n + 1 - 2*b), where n is the number of items, i is the index for the sorted list and b is given 'w' Weibul 0.0 'c' Chegodayev 0.30 't' Turkey 0.33333 'b' Blom 0.375 'g' Gringorten 0.44 'h' Hazen 0.5 Returns ------- A dictionary idf_data ''' ifiles = os.listdir(idir) plot_position = {'w': 0.0, 'c': 0.30, 't': 0.333333333333333333, 'b': 0.375, 'g': 0.440, 'h': 0.5} if str(plot_pos) in plot_position: b = plot_position[plot_pos] else: print('Selected ploting position incorrect. Using Weibul by default') b = plot_position[plot_pos] # Plot position: (i-b)/(n + 1 - 2*b)) # Get basic data from file one ifile = os.path.join(idir, ifiles[0]) dic = wg_io.load(ifile) year = dic['year'] year_max = numpy.int(numpy.amax(year)) year_min = numpy.int(numpy.amin(year)) rain_max_list = [] for i in ifiles: print i ifile = os.path.join(idir, i) dic = wg_io.load(ifile) rain = dic['rainfall'] year = dic['year'] dic_max = wg_tool.get_max(rain, year, duration, accumulate=accumulate) rain_max = dic_max['rain'] rain_max_list.append(rain_max) # now create median size = dic_max['size'] prob = [(i-b)/(size + 1.0-2.0*b) for i in range(1, size + 1)] prob = numpy.array(prob) ln_ = -numpy.log(-numpy.log(prob)) coefficient = [] for step in range(len(duration)): rain_median = scipy.median(numpy.array(rain_max_list), axis=0) (m, b, r, tt, stderr) = scipy.stats.linregress(ln_, rain_median[step]) coefficient.append([m, b, r]) ## Create the array to use in ploting later idf = [] r_p = numpy.array(return_period)*1.0 ln_calc = -numpy.log(-numpy.log(1.0 - (1.0 / r_p))) ## mbr stands for Slope(m) Y-intercept(b) Rcoefficient(r) for mbr in coefficient: i_calc = ln_calc*mbr[0] + mbr[1] idf.append(i_calc) idf_data = numpy.transpose(numpy.array(idf)) return {'coefficients': coefficient, ## [slope, y-int, R] 'rain': rain_max_list , 'probability': prob, 'ln': ln_, 'idf_data': idf_data, 'duration': duration, 'year_max': year_max, 'year_min': year_min }