def obj_ftn(new_crds, crds, evgs, n_extra_dims, n_lags, fit_vg_str, calls_ctr): fit_vg_str = f'{new_crds[0]:0.5f} {fit_vg_str}({new_crds[1]:0.1f})' old_crds = crds[:, -n_extra_dims:].copy() crds[:, -n_extra_dims:] = new_crds[2:].reshape(crds.shape[0], n_extra_dims) # print(new_crds.min(), new_crds.max()) tvgs = [] for i in range(1, n_lags + 1): dists = get_l2_norm(crds[i:, :], crds[:-i, :]) tvgs.extend(get_theo_vg_vals(fit_vg_str, dists).tolist()) tvgs = np.array(tvgs) obj_val = ((evgs - tvgs)**2).sum() calls_ctr[0] += 1 print(f'{obj_val:0.5E}', calls_ctr[0]) crds[:, -n_extra_dims:] = old_crds return obj_val
def main(): main_dir = Path(os.getcwd()) os.chdir(main_dir) h_arr = np.linspace(0, 1e6, int(1e1)) print(get_theo_vg_vals('100 Sph(10000) + 10 Exp(1000000)', h_arr)) return
def get_lags_evgs_tvgs(ts_vals, crds, n_lags, fit_vg_str): lags, evgs, tvgs = [], [], [] for i in range(1, n_lags + 1): diffs = (ts_vals[i:] - ts_vals[:-i])**2 dists = get_l2_norm(crds[i:, :], crds[:-i, :]) lags.extend(dists.tolist()) evgs.extend(diffs.tolist()) tvgs.extend(get_theo_vg_vals(fit_vg_str, dists).tolist()) return lags, evgs, tvgs
def get_lags_evgs_tvgs(ts_vals, crds, fit_vg_str): lags, evgs, tvgs = [], [], [] for i in range(0, ts_vals.shape[0]): val_1 = ts_vals[i] crds_1 = crds[i,:] for j in range(0, ts_vals.shape[0]): if j <= i: continue diff = (val_1 - ts_vals[j]) ** 2 dist = get_l2_norm(crds_1, crds[j,:]) lags.append(dist) evgs.append(diff) lags = np.array(lags) tvgs.append(get_theo_vg_vals(fit_vg_str, lags).tolist()) return lags, evgs, tvgs
def get_mean_vg(vg_strs_ser, dists): assert vg_strs_ser.size if vg_strs_ser.size == 1: mean_vg_str = vg_strs_ser.iloc[0] else: vgs = [] vg_perm_rs = [] vg_stat_vals = np.zeros((vg_strs_ser.size, dists.size)) for j, vg_str in enumerate(vg_strs_ser): vg_stat_vals[j, :] = get_theo_vg_vals(vg_str, dists) for i, vg in enumerate(disagg_vg_str(vg_str)[1], start=1): if i not in vg_perm_rs: vg_perm_rs.append(i) if vg in vgs: continue vgs.append(vg) vg_vals = vg_stat_vals.mean(axis=0) # median might be a problem if vgs don't have the same rise rate. # vg_vals = np.median(vg_stat_vals, axis=0) assert dists.size == vg_vals.size get_vg_args = (dists, vg_vals, 'mean_vg', vgs, vg_perm_rs, 1000, False, False, None, None, False, dists[-1] + 1) mean_vg_str = get_vg(get_vg_args)[1] return mean_vg_str
def get_vg(args): (dists, vg_vals, dist_lab, mix_vg_list, perm_r_list, opt_iters, wt_by_dist_flag, plot_flag, fig_size, out_dir, plt_at_zero_dist_flag, max_fit_dist) = args fit_idxs = dists <= max_fit_dist dists_fit = dists[fit_idxs] vg_val_fit = vg_vals[fit_idxs] perm_r_list = np.array(np.unique(perm_r_list), dtype=int) perm_r_list = perm_r_list[perm_r_list <= len(mix_vg_list)] all_mix_vg_ftns = { 'Nug': nug_vg, 'Sph': sph_vg, 'Exp': exp_vg, 'Lin': lin_vg, 'Gau': gau_vg, 'Pow': pow_vg, 'Hol': hol_vg } min_obj = np.inf best_vg_name = '' best_vg_param = '' lb_thresh = 1e-8 # lower bound (used instead of zero) max_dist_thresh = max(1e6, dists.max()) var_multpr = 2 for perm_r in perm_r_list: vg_perms = combinations(mix_vg_list, int(perm_r)) skip_perm_list = [] for vg_strs in vg_perms: if vg_strs in skip_perm_list: # if a given permutation exists then don't run further continue mix_vg_names = [] # to hold the variogram names and ftns bounds = [] for i, vg_name in enumerate(vg_strs): mix_vg_names.append((vg_name, all_mix_vg_ftns[vg_name])) if vg_name == 'Pow': sub_bounds = [(lb_thresh, 2), (lb_thresh, var_multpr * vg_vals.max())] else: sub_bounds = [ (dists.min(), max_dist_thresh), (lb_thresh, var_multpr * vg_vals.max())] [bounds.append(tuple(l)) for l in sub_bounds] opt = differential_evolution( vg_calib, tuple(bounds), args=(mix_vg_names, dists_fit, vg_val_fit, wt_by_dist_flag), maxiter=opt_iters, popsize=len(bounds) * 50, polish=False) assert opt.success, 'Optimization did not succeed!' # Conditions for an optimization result to be selected: # 1: Obj ftn value less than the previous * fit_thresh # 2: Range of the variograms is in ascending order # minimize type optimization: rngs = opt.x[0::2].copy() sills = opt.x[1::2].copy() # using Akaike Information Criterion (AIC) to select a model curr_AIC = ( (vg_vals.size * np.log(opt.fun)) + (2 * opt.x.shape[0])) cond_1_fun = curr_AIC < min_obj * (1. - 1e-2) cond_2_fun = np.all(np.where(np.ediff1d(rngs) < 0, False, True)) if not cond_2_fun: # flipping ranges and sills into correct order sort_idxs = np.argsort(rngs) rngs = rngs[sort_idxs] sills = sills[sort_idxs] adj_perm = np.array(vg_strs)[sort_idxs] skip_perm_list.append(tuple(adj_perm)) mix_vg_names = np.array(mix_vg_names)[sort_idxs] cond_2_fun = np.all( np.where(np.ediff1d(rngs) < 0, False, True)) prms = np.zeros((2 * rngs.shape[0]), dtype=np.float64) prms[0::2] = rngs prms[1::2] = sills if (cond_1_fun and cond_2_fun): min_obj = curr_AIC best_vg_name = mix_vg_names best_vg_param = prms vg_str = '' # final nested variogram string for i in range(len(best_vg_name)): prms = best_vg_param[(i * 2): (i * 2 + 2)] vg_str += ( ' + %0.5f %s(%0.1f)' % (prms[1], best_vg_name[i][0], prms[0])) if vg_str: vg_str = vg_str[3:] print(dist_lab, vg_str) assert vg_str, 'No vg fitted!' if plt_at_zero_dist_flag: theo_dists = np.concatenate(([0.0], dists)) else: theo_dists = dists theo_vg_vals = get_theo_vg_vals(vg_str, theo_dists) if plot_flag: plt.figure(figsize=fig_size) plt.plot( dists, vg_vals, label='empirical', lw=3, alpha=0.4, color='red') plt.plot( theo_dists, theo_vg_vals, label='theoretical', lw=1, alpha=0.6, color='blue') plt.legend() plt.xlabel('Distance') plt.ylabel('Semi-variogram') plt.title(f'{dist_lab}\n{vg_str}') plt.grid() plt.gca().set_axisbelow(True) plt.savefig(str(out_dir / f'{dist_lab}_vg.png'), bbox_inches='tight') plt.close() return (dist_lab, vg_str, theo_dists, theo_vg_vals)
def main(): main_dir = Path( r'P:\Synchronize\IWS\Testings\variograms\comb_vg\temp_1961_2015_with_zeros\vgs_CP' ) os.chdir(main_dir) # Something needed with an actual range. allowed_vgs = ['Sph', 'Exp'] # , 'Gau'] in_vg_strs_file = Path('vgs.csv') sep = ';' # max_rng can be None or a float. # When None, then maximum range from all vgs is taken. max_rng = 250e3 n_fit_dists = 50 max_nbr_dist = 50e3 n_rnd_pts = int(1e2) n_sims = int(1e2) ks_alpha = 0.99 n_sel_thresh = 1000 abs_thresh_wt = (1e-2) # * n_rnd_pts out_fig_name = 'clustered_vgs.png' fig_size = (10, 7) out_vgs_sers_name = 'clustered_vgs.csv' # krg_wts_exp = 0.1 vg_strs_ser_main = pd.read_csv(in_vg_strs_file, sep=sep, index_col=0, squeeze=True) if max_rng is None: max_rng = -np.inf for vg_str in vg_strs_ser_main: _, vgs, rngs = disagg_vg_str(vg_str) assert all([vg in allowed_vgs for vg in vgs]) rng = max(rngs) if rng >= max_rng: max_rng = rng elif isinstance(max_rng, (int, float)): max_rng = float(max_rng) else: raise ValueError('Invalid max_rng:', max_rng) print('max_rng:', max_rng) cluster_args = (vg_strs_ser_main, max_rng, n_fit_dists, n_sims, n_rnd_pts, abs_thresh_wt, ks_alpha, n_sel_thresh, max_nbr_dist) vg_clusters = get_clustered_vgs(cluster_args) print('Done fitting.') print('Refitting...') theo_dists = np.linspace(0, max_rng, n_fit_dists) refit_vgs = [] out_clustered_ser = pd.Series(index=vg_strs_ser_main.index, dtype=object) for vg_cluster in vg_clusters: print(vg_cluster) refit_vg_str = get_mean_vg(vg_strs_ser_main.loc[vg_cluster[1]], theo_dists) refit_vgs.append(refit_vg_str) print(vg_cluster[0], refit_vg_str) for vg_label in vg_cluster[1]: out_clustered_ser.loc[vg_label] = refit_vg_str out_clustered_ser.to_csv(out_vgs_sers_name, sep=sep) plt.figure(figsize=fig_size) leg_flag = True for vg_str in vg_strs_ser_main: if leg_flag: label = f'old(n={vg_strs_ser_main.size})' leg_flag = False else: label = None plt.plot(theo_dists, get_theo_vg_vals(vg_str, theo_dists), label=label, alpha=0.5, c='red') leg_flag = True for vg_str in refit_vgs: if leg_flag: label = f'new(n={len(refit_vgs)})' leg_flag = False else: label = None plt.plot(theo_dists, get_theo_vg_vals(vg_str, theo_dists), label=label, alpha=0.5, c='blue') plt.legend() plt.grid() plt.gca().set_axisbelow(True) plt.xlabel('Distance') plt.ylabel('Semi-variogram') # plt.show() plt.savefig(out_fig_name, bbox_inches='tight') plt.close() return