def main(): main_dir = Path( r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr') os.chdir(main_dir) in_data_file = Path(r'precipitation.csv') in_crds_file = Path(r'precipitation_coords.csv') sep = ';' time_fmt = '%Y-%m-%d' beg_year = 1991 end_year = 1991 # NOTE: skips last odd step. out_dir = Path(r'precipitation_kriging') min_valid_stns = 10 mdr = 0.5 perm_r_list = [1, 2] fit_vgs = ['Exp'] fil_nug_vg = 'Sph' n_best = 1 ngp = 20 figs_flag = True vg_vars = ['orig', 'mag', 'phs', 'data', ] # 'cos', 'sin', n_cpus = 8 out_dir.mkdir(exist_ok=True) data_df = pd.read_csv(in_data_file, sep=sep, index_col=0) data_df.index = pd.to_datetime(data_df.index, format=time_fmt) data_df = data_df.loc[f'{beg_year}':f'{end_year}'].iloc[:-1] data_df.dropna(axis=1, how='any', inplace=True) crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y', 'Z']] crds_df = crds_df.loc[data_df.columns] crds_df.to_csv(Path(in_crds_file.stem + '_subset.csv'), sep=sep) probs_df = data_df.rank(axis=0) / (data_df.shape[0] + 1) norms_df = pd.DataFrame( data=norm.ppf(probs_df.values), columns=data_df.columns) ft_df = pd.DataFrame( data=np.fft.rfft(norms_df, axis=0), columns=data_df.columns) mag_df = pd.DataFrame(data=np.abs(ft_df), columns=data_df.columns) phs_df = pd.DataFrame(data=np.angle(ft_df), columns=data_df.columns) phs_le_idxs = phs_df < 0 phs_df[phs_le_idxs] = (2 * np.pi) + phs_df[phs_le_idxs] for part in vg_vars: (out_dir / part).mkdir(exist_ok=True) if part == 'mag': part_df = mag_df elif part == 'phs': part_df = phs_df elif part == 'cos': part_df = pd.DataFrame( data=np.cos(phs_df), columns=data_df.columns) elif part == 'sin': part_df = pd.DataFrame( data=np.sin(phs_df), columns=data_df.columns) elif part == 'data': part_df = data_df.copy() part_df.values[:] = np.sort(part_df.values, axis=0) # part_df = part_df.iloc[-2:] elif part == 'orig': part_df = data_df.copy() else: raise ValueError(f'Undefined: {part}!') part_df.to_csv(out_dir / f'{part}.csv', sep=sep) fit_vg_cls = FitVariograms() fit_vg_cls.set_data(part_df, crds_df, index_type='obj') fit_vg_cls.set_vg_fitting_parameters( mdr, perm_r_list, fil_nug_vg, ngp, fit_vgs, n_best) fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns) fit_vg_cls.set_output_settings(out_dir / part, figs_flag) fit_vg_cls.verify() fit_vg_cls.fit_vgs() fit_vg_cls.save_fin_vgs_df() fit_vg_cls = None return
def main(): main_dir = Path( r'P:\Synchronize\IWS\Testings\fourtrans_practice\ft_spatio_temporal_interps' ) os.chdir(main_dir) in_data_file = Path(r'precipitation.csv') in_crds_file = Path(r'precipitation_coords.csv') sep = ';' time_fmt = '%Y-%m-%d' beg_year = 1989 end_year = 1992 # NOTE: skips last odd step. out_dir = Path(r'precipitation_interpolation_validation') min_valid_stns = 10 # Selected post subsetting. # validation_cols = [] # ['T3705', 'T1875', 'T5664', 'T1197'] validation_cols = ['P3733', 'P3315', 'P3713', 'P3454'] mdr = 0.7 perm_r_list = [1, 2] fit_vgs = ['Sph', 'Exp'] fil_nug_vg = 'Nug' n_best = 1 ngp = 5 figs_flag = True n_cpus = 8 out_dir.mkdir(exist_ok=True) data_df = pd.read_csv(in_data_file, sep=sep, index_col=0) data_df.index = pd.to_datetime(data_df.index, format=time_fmt) data_df = data_df.loc[f'{beg_year}':f'{end_year}'].iloc[:-1] data_df.dropna(axis=1, how='any', inplace=True) crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y', 'Z']] crds_df = crds_df.loc[data_df.columns] crds_df.to_csv(Path(in_crds_file.stem + '_subset.csv'), sep=sep) # raise Exception if validation_cols: assert all([ validation_col in crds_df.index for validation_col in validation_cols ]) crds_df = crds_df.loc[crds_df.index.difference( pd.Index(validation_cols))] data_df = data_df[crds_df.index] ft_df = pd.DataFrame(data=np.fft.rfft(data_df, axis=0), columns=data_df.columns) for part in ['real', 'imag']: fit_vg_cls = FitVariograms() part_df = pd.DataFrame(getattr(ft_df.values, part), columns=data_df.columns) part_df.to_csv(out_dir / f'{part}.csv', sep=sep) fit_vg_cls.set_data(part_df, crds_df, index_type='obj') fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp, fit_vgs, n_best) fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns) fit_vg_cls.set_output_settings(out_dir / part, figs_flag) fit_vg_cls.verify() fit_vg_cls.fit_vgs() fit_vg_cls.save_fin_vgs_df() fit_vg_cls = None return
def main(): main_dir = Path( r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr\5min\v7_long_range' ) os.chdir(main_dir) in_data_file = Path( r'../neckar_1min_ppt_data_20km_buff_Y2009__RR5min_RTsum.pkl') in_crds_file = Path(r'../metadata_ppt_gkz3_crds.csv') # has X, Y cols sep = ';' time_fmt = '%Y-%m-%d %H:%M:%S' beg_time = '2009-01-01 00:00:00' end_time = '2009-03-31 23:59:00' out_dir = main_dir min_valid_stns = 10 mdr = 0.5 perm_r_list = [1, 2] fit_vgs = ['Exp'] fil_nug_vg = 'Sph' n_best = 1 ngp = 20 figs_flag = True vg_vars = [ 'orig', 'data', ] # 'phs', 'mag', 'sin', 'cos', n_cpus = 8 out_dir.mkdir(exist_ok=True) if in_data_file.suffix == '.csv': data_df = pd.read_csv(in_data_file, sep=sep, index_col=0) data_df.index = pd.to_datetime(data_df.index, format=time_fmt) elif in_data_file.suffix == '.pkl': data_df = pd.read_pickle(in_data_file) else: raise NotImplementedError( f'Unknown extension of in_data_file: {in_data_file.suffix}!') data_df = data_df.loc[f'{beg_time}':f'{end_time}'] if data_df.shape[0] % 2: data_df = data_df.iloc[:-1, :] print('Dropped last record in data_df!') data_df.dropna(axis=1, how='any', inplace=True) crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y']] crds_df = crds_df.loc[data_df.columns] probs_df = data_df.rank(axis=0) / (data_df.shape[0] + 1) norms_df = pd.DataFrame(data=norm.ppf(probs_df.values), columns=data_df.columns) ft_df = pd.DataFrame(data=np.fft.rfft(norms_df, axis=0), columns=data_df.columns) mag_df = pd.DataFrame(data=np.abs(ft_df), columns=data_df.columns) phs_df = pd.DataFrame(data=np.angle(ft_df), columns=data_df.columns) phs_le_idxs = phs_df < 0 phs_df[phs_le_idxs] = (2 * np.pi) + phs_df[phs_le_idxs] for part in vg_vars: (out_dir / part).mkdir(exist_ok=True) index_type = 'obj' if part == 'mag': part_df = mag_df elif part == 'phs': part_df = phs_df elif part == 'cos': part_df = pd.DataFrame(data=np.cos(phs_df), columns=data_df.columns) elif part == 'sin': part_df = pd.DataFrame(data=np.sin(phs_df), columns=data_df.columns) elif part == 'data': part_df = data_df.copy() part_df.values[:] = np.sort(part_df.values, axis=0) index_type = 'date' # part_df = part_df.iloc[-2:] elif part == 'orig': part_df = data_df.copy() index_type = 'date' else: raise ValueError(f'Undefined: {part}!') part_df.to_csv(out_dir / f'{part}/{part}.csv', sep=sep) # continue fit_vg_cls = FitVariograms() fit_vg_cls.set_data(part_df, crds_df, index_type=index_type) fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp, fit_vgs, n_best) fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns) fit_vg_cls.set_output_settings(out_dir / part, figs_flag) fit_vg_cls.verify() fit_vg_cls.fit_vgs() fit_vg_cls.save_fin_vgs_df() fit_vg_cls = None return
def main(): main_dir = Path( r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr' ) os.chdir(main_dir) in_data_file = Path(r'precipitation.csv') in_crds_file = Path(r'precipitation_coords.csv') sep = ';' time_fmt = '%Y-%m-%d' beg_year = 1991 end_year = 1991 # NOTE: skips last odd step. out_dir = Path(r'precipitation_kriging') min_valid_stns = 10 # Selected post subsetting. validation_cols = [] # ['T3705', 'T1875', 'T5664', 'T1197'] # validation_cols = ['P3733', 'P3315', 'P3713', 'P3454'] mdr = 0.7 perm_r_list = [1, 2] fit_vgs = ['Sph', 'Exp'] fil_nug_vg = 'Sph' n_best = 1 ngp = 10 figs_flag = True # mag_cftn = None # cos_sin_cftn = None mag_cftn = '0.02154 Sph(8998.9) + 0.91539 Exp(100566492.6) + 0.05894 Sph(73391.2)' cos_sin_cftn = '0.89903 Sph(848353919.8) + 0.38155 Exp(112531.1) + 0.11241 Sph(6980.2)' n_cpus = 8 out_dir.mkdir(exist_ok=True) data_df = pd.read_csv(in_data_file, sep=sep, index_col=0) data_df.index = pd.to_datetime(data_df.index, format=time_fmt) data_df = data_df.loc[f'{beg_year}':f'{end_year}'].iloc[:-1] data_df.dropna(axis=1, how='any', inplace=True) crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y', 'Z']] crds_df = crds_df.loc[data_df.columns] crds_df.to_csv(Path(in_crds_file.stem + '_subset.csv'), sep=sep) if validation_cols: assert all([ validation_col in crds_df.index for validation_col in validation_cols ]) crds_df = crds_df.loc[crds_df.index.difference( pd.Index(validation_cols))] data_df = data_df[crds_df.index] probs_df = data_df.rank(axis=0) / (data_df.shape[0] + 1) norms_df = pd.DataFrame(data=norm.ppf(probs_df.values), columns=data_df.columns) ft_df = pd.DataFrame(data=np.fft.rfft(norms_df, axis=0), columns=data_df.columns) mag_df = pd.DataFrame(data=np.abs(ft_df), columns=data_df.columns) phs_df = pd.DataFrame(data=np.angle(ft_df), columns=data_df.columns) for part in [ 'data', 'mag', 'cos', 'sin', ]: # 'probs', 'norm', ft_vg_flag = False (out_dir / part).mkdir(exist_ok=True) if part == 'mag': part_df = mag_df out_ser = pd.Series(index=part_df.index, dtype=object) out_ser[:] = mag_cftn out_ser.to_csv(out_dir / f'{part}/vg_strs.csv', sep=sep) ft_vg_flag = False elif part == 'cos': part_df = pd.DataFrame(data=np.cos(phs_df), columns=data_df.columns) out_ser = pd.Series(index=part_df.index, dtype=object) out_ser[:] = cos_sin_cftn out_ser.to_csv(out_dir / f'{part}/vg_strs.csv', sep=sep) ft_vg_flag = False elif part == 'sin': part_df = pd.DataFrame(data=np.sin(phs_df), columns=data_df.columns) out_ser = pd.Series(index=part_df.index, dtype=object) out_ser[:] = cos_sin_cftn out_ser.to_csv(out_dir / f'{part}/vg_strs.csv', sep=sep) ft_vg_flag = False elif part == 'data': part_df = data_df.copy() part_df.values[:] = np.sort(part_df.values, axis=0) ft_vg_flag = True elif part == 'probs': part_df = probs_df.copy() part_df.values[:] = np.sort(part_df.values, axis=0) ft_vg_flag = True elif part == 'norms': part_df = norms_df.copy() part_df.values[:] = np.sort(part_df.values, axis=0) ft_vg_flag = True else: raise ValueError(f'Undefined: {part}!') part_df.to_csv(out_dir / f'{part}.csv', sep=sep) if not ft_vg_flag: continue fit_vg_cls = FitVariograms() fit_vg_cls.set_data(part_df, crds_df, index_type='obj') fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp, fit_vgs, n_best) fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns) fit_vg_cls.set_output_settings(out_dir / part, figs_flag) fit_vg_cls.verify() fit_vg_cls.fit_vgs() fit_vg_cls.save_fin_vgs_df() fit_vg_cls = None return
def main(): main_dir = Path(r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr') os.chdir(main_dir) vg_vars = ['mean_temp'] strt_date = '1991-01-01' end_date = '1991-12-30' min_valid_stns = 10 drop_stns = [] # ['T3705', 'T1875', 'T5664', 'T1197'] # drop_stns = ['P3733', 'P3315', 'P3713', 'P3454'] mdr = 0.7 perm_r_list = [1, 2] fit_vgs = ['Sph', 'Exp'] fil_nug_vg = 'Sph' n_best = 1 ngp = 5 figs_flag = False n_cpus = 8 sep = ';' for vg_var in vg_vars: if vg_var == 'mean_temp': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_mean_temp_paths() elif vg_var == 'min_temp': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_min_temp_paths() elif vg_var == 'max_temp': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_max_temp_paths() elif vg_var == 'ppt': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_ppt_paths() else: raise RuntimeError(f'Unknown vg_var: {vg_var}!') in_vals_df = pd.read_csv( in_vals_df_loc, sep=sep, index_col=0, encoding='utf-8') in_vals_df.index = pd.to_datetime(in_vals_df.index, format='%Y-%m-%d') in_vals_df = in_vals_df.loc[strt_date:end_date, :] if drop_stns: in_vals_df.drop(labels=drop_stns, axis=1, inplace=True) in_vals_df.dropna(how='all', axis=0, inplace=True) in_coords_df = pd.read_csv( in_stn_coords_df_loc, sep=sep, index_col=0, encoding='utf-8') in_coords_df.index = list(map(str, in_coords_df.index)) if drop_stns: in_coords_df.drop(labels=drop_stns, axis=0, inplace=True) in_coords_df = in_coords_df[['X', 'Y', 'Z']].astype(float) fit_vg_cls = FitVariograms() fit_vg_cls.set_data(in_vals_df, in_coords_df) fit_vg_cls.set_vg_fitting_parameters( mdr, perm_r_list, fil_nug_vg, ngp, fit_vgs, n_best) fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns) fit_vg_cls.set_output_settings(out_dir, figs_flag) fit_vg_cls.verify() fit_vg_cls.fit_vgs() fit_vg_cls.save_fin_vgs_df() fit_vg_cls = None return
def main(): main_dir = Path( r'X:\hiwi\ElHachem\Prof_Bardossy\Extremes\kriging_ppt_netatmo') # main_dir = Path( # r"/run/media/abbas/EL Hachem 2019/home_office") # r'X:\staff\elhachem\2020_10_03_Rheinland_Pfalz') os.chdir(main_dir) vg_vars = ['ppt'] # ['ppt'] strt_date = '2017-01-01' end_date = '2019-12-31' min_valid_stns = 5 drop_stns = [] mdr = 0.5 perm_r_list = [1, 2] fit_vgs = ['Sph', 'Exp'] fil_nug_vg = 'Nug' # 'Nug' n_best = 2 ngp = 5 figs_flag = True fit_for_extreme_events = True use_netatmo_good_stns = False DWD_stations = True n_cpus = 4 sep = ';' # ; for vg_var in vg_vars: if vg_var == 'mean_temp': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_mean_temp_paths() elif vg_var == 'min_temp': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_min_temp_paths() elif vg_var == 'max_temp': (in_vals_df_loc, in_stn_coords_df_loc, out_dir) = get_max_temp_paths() elif vg_var == 'ppt': (in_vals_df_loc, in_stn_coords_df_loc, out_dir, path_to_netatmo_gd_stns_file, path_to_netatmo_ppt_extreme, path_to_dwd_ppt_extreme) = get_ppt_paths() else: raise RuntimeError(f'Unknown vg_var: {vg_var}!') # added by Abbas if use_netatmo_good_stns: in_df_stns = pd.read_csv(path_to_netatmo_gd_stns_file, index_col=0, sep=';') good_netatmo_stns = list(in_df_stns.values.ravel()) in_vals_df = pd.read_csv(in_vals_df_loc, sep=sep, index_col=0, encoding='utf-8', parse_dates=True, infer_datetime_format=True, engine='c') # in_vals_df.index = pd.to_datetime(in_vals_df.index, # format='%Y-%m-%d') in_vals_df = in_vals_df.loc[strt_date:end_date, :] if use_netatmo_good_stns: in_vals_df = in_vals_df.loc[:, good_netatmo_stns] if drop_stns: in_vals_df.drop(labels=drop_stns, axis=1, inplace=True) in_vals_df.dropna(how='all', axis=0, inplace=True) # added by Abbas, for edf #in_vals_df = in_vals_df[in_vals_df >= 0] in_coords_df = pd.read_csv(in_stn_coords_df_loc, sep=';', index_col=0, encoding='utf-8') if fit_for_extreme_events: df_extremes = pd.read_csv( path_to_dwd_ppt_extreme, sep=';', index_col=0, parse_dates=True, infer_datetime_format=True).dropna(how='all') in_vals_df = in_vals_df.loc[ in_vals_df.index.intersection(df_extremes.index), :] if DWD_stations: # # added by Abbas, for DWD stations # stndwd_ix = [ '0' * (5 - len(str(stn_id))) + str(stn_id) if len(str(stn_id)) < 5 else str(stn_id) for stn_id in in_coords_df.index ] stndwd_ix = [stn for stn in stndwd_ix if stn in in_vals_df.columns] in_coords_df.index = stndwd_ix in_coords_df.index = list(map(str, in_coords_df.index)) if drop_stns: in_coords_df.drop(labels=drop_stns, axis=0, inplace=True) if in_coords_df.shape[0] > in_vals_df.shape[1]: in_vals_df = in_vals_df.loc[:, in_coords_df.index] else: in_coords_df = in_coords_df.loc[in_vals_df.columns, :] fit_vg_cls = FitVariograms() fit_vg_cls.set_data(in_vals_df, in_coords_df) fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp, fit_vgs, n_best) fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns) fit_vg_cls.set_output_settings(out_dir, figs_flag) fit_vg_cls.verify() fit_vg_cls.fit_vgs() fit_vg_cls.save_fin_vgs_df() fit_vg_cls = None return