def calc_gamma_map(): fname = r"D:\data_sets\MSWEP_V21\data\grid_new.csv" ascat = HSAF_io() mswep = MSWEP_io() mswep.grid['gamma'] = np.nan for i, (precip, info) in enumerate(mswep.iter_gp()): print(i) if len(precip.dropna()) == 0: continue try: precip = calc_anomaly(precip, method='harmonic', longterm=False) sm = calc_anomaly(ascat.read( info.dgg_gpi)['2007-01-01':'2016-12-31'], method='harmonic', longterm=False) ts = pd.concat((precip, sm), axis=1).values mswep.grid.loc[info.name, 'gamma'] = estimate_gamma(ts[:, 0], ts[:, 1]) except: continue mswep.grid.dropna().to_csv(fname)
def generate_soil_moisture(size=5000, gamma=0.85, precip=None, scale=15, anomaly=False): ''' generate soil moisture time series based on the API model ''' if precip is None: precip = generate_precipitation(size=size, scale=scale) else: size = len(precip) if anomaly is True: precip = calc_anomaly( pd.Series(precip, index=pd.date_range(start='2010-01-01', periods=size))).values sm_true = np.zeros(size) for t in np.arange(1, size): sm_true[t] = gamma * sm_true[t - 1] + precip[t] return sm_true, precip
def plot_cat_timeseries(): outpath = r'D:\work\LDAS\2018-02_scaling\_new\ismn_eval\timeseries' fname = r"D:\work\LDAS\2018-02_scaling\_new\ismn_eval\validation.csv" res = pd.read_csv(fname) diff_srf = res['corr_DA_cal_pent_ma_sm_surface'] - res[ 'corr_DA_uncal_pent_ma_sm_surface'] diff_rz = res['corr_DA_cal_pent_ma_sm_rootzone'] - res[ 'corr_DA_uncal_pent_ma_sm_rootzone'] diff_prof = res['corr_DA_cal_pent_ma_sm_profile'] - res[ 'corr_DA_uncal_pent_ma_sm_profile'] ind = (diff_srf > 0.2) | (diff_rz > 0.2) | (diff_prof > 0.2) res = res.loc[ind, ['network', 'station', 'lat', 'lon']] ismn = ISMN_io() cal = LDAS_io('xhourly', 'US_M36_SMOS_DA_calibrated_scaled') uncal = LDAS_io('xhourly', 'US_M36_SMOS_DA_nocal_scaled_pentadal') variables = ['sm_surface', 'sm_rootzone', 'sm_profile'] for idx, stat in res.iterrows(): fname = os.path.join(outpath, stat.network + '_' + stat.station + '.png') ts_ismn = ismn.read(stat.network, stat.station) lat = stat.lat lon = stat.lon plt.figure(figsize=(17, 9)) for i, var in enumerate(variables): ax = plt.subplot(3, 1, i + 1) ts_cal = calc_anomaly(cal.read_ts(var, lon, lat), method='ma') ts_cal.index += pd.to_timedelta('2 hours') ts_uncal = calc_anomaly(uncal.read_ts(var, lon, lat), method='ma') ts_uncal.index += pd.to_timedelta('2 hours') df = pd.DataFrame({ 'cal': ts_cal, 'uncal': ts_uncal, 'insitu': calc_anomaly(ts_ismn[var], method='ma') }).dropna() if len(df) > 0: df.plot(ax=ax) else: continue title = 'R(ismn - cal) = %.2f , R(ismn - uncal) = %.2f' % ( df.corr().loc['insitu', 'cal'], df.corr().loc['insitu', 'uncal']) ax.set_title(title, fontsize=12) ax.set_xlim('2010-01-01', '2016-01-01') ax.set_ylim(-0.3, 0.3) ax.set_xlabel('') plt.tight_layout() plt.savefig(fname, dpi=150) plt.close()
def run(part): parts = 6 result_file = r'D:\work\ESA_CCI_SM\validation_%i.csv' % part cci = CCISM_io() ismn = ISMN_io() # ismn.list = ismn.list.iloc[100:120] # Split station list in 4 parts for parallelization subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int') subs[-1] = len(ismn.list) start = subs[part - 1] end = subs[part] ismn.list = ismn.list.iloc[start:end, :] periods = { 'p1': ['2007-10-01', '2010-01-14'], 'p2': ['2010-01-15', '2011-10-04'], 'p3': ['2011-10-05', '2012-06-30'], 'p4': ['2012-07-01', '2014-12-31'] } freq = ['abs', 'anom'] corr_tags = [ 'corr_' + m + '_' + v + '_' + p + '_' + f for m in cci.modes for v in cci.versions for p in periods.keys() for f in freq ] p_tags = [ 'p_' + m + '_' + v + '_' + p + '_' + f for m in cci.modes for v in cci.versions for p in periods.keys() for f in freq ] n_tags = [ 'n_' + m + '_' + v + '_' + p + '_' + f for m in cci.modes for v in cci.versions for p in periods.keys() for f in freq ] res = ismn.list.copy() res.drop(['ease_col', 'ease_row'], axis='columns', inplace=True) for col in corr_tags + p_tags: res[col] = np.nan for col in n_tags: res[col] = 0 for i, (meta, ts_insitu) in enumerate(ismn.iter_stations()): print('%i/%i (Proc %i)' % (i, len(ismn.list), part)) if ts_insitu is None: print('No in situ data for ' + meta.network + ' / ' + meta.station) continue ts_insitu = ts_insitu[periods['p1'][0]:periods['p4'][1]] if len(ts_insitu) < 10: print('No in situ data for ' + meta.network + ' / ' + meta.station) continue df_insitu = pd.DataFrame(ts_insitu).dropna() df_insitu_anom = pd.DataFrame(calc_anomaly(ts_insitu)).dropna() for m in cci.modes: df_cci = cci.read(meta.lon, meta.lat, mode=m).dropna() if len(df_cci) < 10: print('No CCI ' + m + ' data for ' + meta.network + ' / ' + meta.station) continue for f in freq: if f == 'abs': matched = df_match(df_cci, df_insitu, window=0.5) else: for v in cci.versions: df_cci.loc[:, m + '_' + v] = calc_anomaly( df_cci[m + '_' + v]) df_cci.dropna(inplace=True) if (len(df_cci) < 10) | (len(df_insitu_anom) < 10): print('No in situ or CCI ' + m + ' anomaly data for ' + meta.network + ' / ' + meta.station) continue matched = df_match(df_cci, df_insitu_anom, window=0.5) data = df_cci.join(matched['insitu']).dropna() for p in periods.keys(): vals = data[periods[p][0]:periods[p][1]].values n_matches = vals.shape[0] if n_matches < 10: continue for k, v in enumerate(cci.versions): corr, p_value = pearsonr(vals[:, k], vals[:, -1]) res.loc[meta.name, 'corr_' + m + '_' + v + '_' + p + '_' + f] = corr res.loc[meta.name, 'p_' + m + '_' + v + '_' + p + '_' + f] = p_value res.loc[meta.name, 'n_' + m + '_' + v + '_' + p + '_' + f] = n_matches res.to_csv(result_file, float_format='%0.4f')
def insitu_evaluation(): result_file = r'D:\work\LDAS\2018-06_rmse_uncertainty\insitu_evaluation\validation.csv' noDA = LDAS_io('xhourly', 'US_M36_SMOS40_noDA_cal_scaled') DA_const_err = LDAS_io('xhourly', 'US_M36_SMOS40_DA_cal_scaled') DA_varia_err = LDAS_io('xhourly', 'US_M36_SMOS40_DA_cal_scl_errfile') t_ana = pd.DatetimeIndex( LDAS_io('ObsFcstAna', 'US_M36_SMOS40_DA_cal_scaled').timeseries.time. values).sort_values() ismn = ISMN_io(col_offs=noDA.grid.tilegrids.loc['domain', 'i_offg'], row_offs=noDA.grid.tilegrids.loc['domain', 'j_offg']) runs = ['noDA', 'DA_const_err', 'DA_varia_err'] tss = [noDA.timeseries, DA_const_err.timeseries, DA_varia_err.timeseries] variables = ['sm_surface', 'sm_rootzone', 'sm_profile'] # modes = ['absolute','longterm','shortterm'] modes = [ 'absolute', ] # ismn.list = ismn.list.iloc[101::] i = 0 for meta, ts_insitu in ismn.iter_stations(): i += 1 logging.info('%i/%i' % (i, len(ismn.list))) res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row for var in variables: for mode in modes: if mode == 'absolute': ts_ref = ts_insitu[var].dropna() elif mode == 'mean': ts_ref = calc_anomaly(ts_insitu[var], mode).dropna() else: ts_ref = calc_anomaly( ts_insitu[var], method='moving_average', longterm=(mode == 'longterm')).dropna() for run, ts_model in zip(runs, tss): ind = (ts_model['snow_mass'][row, col].values == 0) & ( ts_model['soil_temp_layer1'][row, col].values > 277.15) ts_mod = ts_model[var][row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') # TODO: Make sure that time of netcdf file is correct!! if mode == 'absolute': ts_mod = ts_mod.dropna() else: ts_mod = calc_anomaly( ts_mod, method='moving_average', longterm=mode == 'longterm').dropna() tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod }).loc[t_ana, :].dropna() res['len_' + mode + '_' + var] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) res['corr_' + run + '_' + mode + '_' + var] = r if (r > 0) & (p < 0.01) else np.nan res['rmsd_' + run + '_' + mode + '_' + var] = np.sqrt( ((tmp[1] - tmp[2])**2).mean()) res['ubrmsd_' + run + '_' + mode + '_' + var] = np.sqrt( (((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) if (os.path.isfile(result_file) == False): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)
def plot_timeseries(): # Colorado # lat = 39.095962936 # lon = -106.918945312 # Nebraska # lat = 41.203456192 # lon = -102.249755859 # New Mexico # lat = 31.522361470 # lon = -108.528442383 # Oklahoma lat = 35.205233348 lon = -97.910156250 exp = 'SMAP_EASEv2_M36_NORTH_SCA_SMOSrw_DA' domain = 'SMAP_EASEv2_M36_NORTH' cal = LDAS_io('incr', 'US_M36_SMOS_DA_calibrated_scaled') uncal = LDAS_io('incr', 'US_M36_SMOS_DA_nocal_scaled_pentadal') incr_var_cal = (cal.timeseries['srfexc'] + cal.timeseries['rzexc'] - cal.timeseries['catdef']).var(dim='time').values incr_var_uncal = (uncal.timeseries['srfexc'] + uncal.timeseries['rzexc'] - uncal.timeseries['catdef']).var(dim='time').values col, row = LDAS_io().grid.lonlat2colrow(lon, lat, domain=True) title = 'increment variance (calibrated): %.2f increment variance (uncalibrated): %.2f' % ( incr_var_cal[row, col], incr_var_uncal[row, col]) # title = '' fontsize = 12 cal = LDAS_io('ObsFcstAna', 'US_M36_SMOS_DA_calibrated_scaled') uncal = LDAS_io('ObsFcstAna', 'US_M36_SMOS_DA_nocal_scaled_pentadal') orig = LDAS_io('ObsFcstAna', 'US_M36_SMOS_noDA_unscaled') ts_obs_cal = cal.read_ts('obs_obs', lon, lat, species=3, lonlat=True) ts_obs_cal.name = 'Tb obs (calibrated)' ts_obs_uncal = uncal.read_ts('obs_obs', lon, lat, species=3, lonlat=True) ts_obs_uncal.name = 'Tb obs (uncalibrated)' ts_obs_orig = orig.read_ts('obs_obs', lon, lat, species=3, lonlat=True) ts_obs_orig.name = 'Tb obs (uncalibrated, unscaled)' ts_fcst_cal = cal.read_ts('obs_fcst', lon, lat, species=3, lonlat=True) ts_fcst_cal.name = 'Tb fcst (calibrated)' ts_fcst_uncal = uncal.read_ts('obs_fcst', lon, lat, species=3, lonlat=True) ts_fcst_uncal.name = 'Tb fcst (uncalibrated)' df = pd.concat( (ts_obs_cal, ts_obs_uncal, ts_obs_orig, ts_fcst_cal, ts_fcst_uncal), axis=1).dropna() plt.figure(figsize=(19, 8)) ax1 = plt.subplot(211) df.plot(ax=ax1, ylim=[140, 300], xlim=['2010-01-01', '2017-01-01'], fontsize=fontsize, style=['-', '--', ':', '-', '--'], linewidth=2) plt.xlabel('') plt.title(title, fontsize=fontsize + 2) cols = df.columns.values for i, col in enumerate(df): df[col] = calc_anomaly(df[col], method='ma', longterm=True).values if i < 3: cols[i] = col[0:7] + 'anomaly ' + col[7::] else: cols[i] = col[0:7] + ' anomaly' + col[7::] df.columns = cols df.dropna(inplace=True) ax2 = plt.subplot(212, sharex=ax1) df.plot(ax=ax2, ylim=[-60, 60], xlim=['2010-01-01', '2017-01-01'], fontsize=fontsize, style=['-', '--', ':', '-', '--'], linewidth=2) plt.xlabel('') plt.tight_layout() plt.show()
def calc_lagged_corr(): fout = Path('/Users/u0116961/Documents/work/deforestation_paper/lagged_corr_w_sif.csv') ds_lai = io('LAI') ds_vod = io('SMOS_IC') ds_met = io('MERRA2') ds_sif = io('SIF') date_from = '2010-01-01' date_to = '2019-12-31' for i, val in ds_lai.lut.iterrows(): print(f'{i} / {len(ds_lai.lut)}') lai = ds_lai.read('LAI', i, date_from=date_from, date_to=date_to).dropna() if len(lai) == 0: continue vod = ds_vod.read('VOD', i, date_from=date_from, date_to=date_to) if (len(lai)>0) & (len(vod)>0): invalid = (ds_vod.read('Flags', i, date_from=date_from, date_to=date_to) > 0) | \ (ds_vod.read('RMSE', i, date_from=date_from, date_to=date_to) > 8) | \ (ds_vod.read('VOD_StdErr', i, date_from=date_from, date_to=date_to) > 1.2) vod[invalid] = np.nan vod = vod.dropna() if len(vod) == 0: continue sif = ds_sif.read('sif_dc', i, date_from=date_from, date_to=date_to) invalid = (ds_sif.read('n', i, date_from=date_from, date_to=date_to) <= 1) | \ (ds_sif.read('cloud_fraction', i, date_from=date_from, date_to=date_to) > 0.7) sif[invalid] = np.nan sif = sif.dropna() if len(sif) == 0: continue df_veg = pd.concat((lai, vod, sif), axis=1, keys=['LAI', 'VOD', 'SIF']).resample('M').mean().dropna() for col in df_veg: df_veg[f'{col}_anom'] = calc_anomaly(df_veg[col], method='harmonic', longterm=True, n=3) temp = ds_met.read('T2M', i, date_from=date_from, date_to=date_to) prec = ds_met.read('PRECTOTLAND', i, date_from=date_from, date_to=date_to) rad = ds_met.read('LWLAND', i, date_from=date_from, date_to=date_to) + \ ds_met.read('SWLAND', i, date_from=date_from, date_to=date_to) df_met = pd.concat((temp,prec,rad), axis=1, keys=['T','P','R']).resample('M').mean().dropna() if len(df_met) == 0: continue df_met['T_anom'] = calc_anomaly(df_met['T'], method='harmonic', longterm=True, n=3) df_met['P_anom'] = calc_anomaly(df_met['P'], method='harmonic', longterm=True, n=3) df_met['R_anom'] = calc_anomaly(df_met['R'], method='harmonic', longterm=True, n=3) tmp_df_met = df_met.copy() tmp_df_veg = df_veg.reindex(df_met.index).copy() tmp_df_veg.columns = tmp_df_veg.columns + '_nolag' tmp_df_met = pd.concat((tmp_df_met, tmp_df_veg), axis=1) tmp_df_met.index = np.arange(len(df_met)) res = pd.DataFrame(index=(i,)) for lag in np.arange(-6,7): tmp_df_veg = df_veg.reindex(df_met.index) tmp_df_veg.index = np.arange(len(tmp_df_veg)) + lag corr = pd.concat((tmp_df_met, tmp_df_veg), axis=1).corr() res[f'R_LAI_T_{lag}'] = corr['R']['LAI'] res[f'R_LAI_P_{lag}'] = corr['T']['LAI'] res[f'R_LAI_R_{lag}'] = corr['P']['LAI'] res[f'R_VOD_T_{lag}'] = corr['R']['VOD'] res[f'R_VOD_P_{lag}'] = corr['T']['VOD'] res[f'R_VOD_R_{lag}'] = corr['P']['VOD'] res[f'R_SIF_T_{lag}'] = corr['R']['SIF'] res[f'R_SIF_P_{lag}'] = corr['T']['SIF'] res[f'R_SIF_R_{lag}'] = corr['P']['SIF'] res[f'R_LAI_VOD_{lag}'] = corr['LAI_nolag']['VOD'] res[f'R_LAI_SIF_{lag}'] = corr['LAI_nolag']['SIF'] res[f'R_VOD_SIF_{lag}'] = corr['VOD_nolag']['SIF'] res[f'R_anom_LAI_T_{lag}'] = corr['R_anom']['LAI_anom'] res[f'R_anom_LAI_P_{lag}'] = corr['T_anom']['LAI_anom'] res[f'R_anom_LAI_R_{lag}'] = corr['P_anom']['LAI_anom'] res[f'R_anom_VOD_T_{lag}'] = corr['R_anom']['VOD_anom'] res[f'R_anom_VOD_P_{lag}'] = corr['T_anom']['VOD_anom'] res[f'R_anom_VOD_R_{lag}'] = corr['P_anom']['VOD_anom'] res[f'R_anom_SIF_T_{lag}'] = corr['R_anom']['SIF_anom'] res[f'R_anom_SIF_P_{lag}'] = corr['T_anom']['SIF_anom'] res[f'R_anom_SIF_R_{lag}'] = corr['P_anom']['SIF_anom'] res[f'R_anom_LAI_VOD_{lag}'] = corr['LAI_anom_nolag']['VOD_anom'] res[f'R_anom_LAI_SIF_{lag}'] = corr['LAI_anom_nolag']['SIF_anom'] res[f'R_anom_VOD_SIF_{lag}'] = corr['VOD_anom_nolag']['SIF_anom'] if fout.exists(): res.to_csv(fout, float_format='%0.4f', mode='a', header=False) else: res.to_csv(fout, float_format='%0.4f')
def run(): exp = 'US_M36_SMOS40_noDA_cal_scaled' io = LDAS_io('ObsFcstAna', exp) froot = r"D:\data_sets\LDAS_runs" + "\\" + exp + "\\obs_err" fbase = 'SMOS_fit_Tb_' dtype = template_error_Tb40()[0] angles = np.array([ 40., ]) orbits = ['A', 'D'] tiles = io.grid.tilecoord['tile_id'].values.astype('int32') ind_lat = io.grid.tilecoord.loc[:, 'j_indg'].values - io.grid.tilegrids.loc[ 'domain', 'j_offg'] ind_lon = io.grid.tilecoord.loc[:, 'i_indg'].values - io.grid.tilegrids.loc[ 'domain', 'i_offg'] template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32') template['lon'] = io.grid.tilecoord['com_lon'].values.astype('float32') template['lat'] = io.grid.tilecoord['com_lat'].values.astype('float32') modes = np.array([0, 0]) sdate = np.array([2010, 1, 1, 0, 0]) edate = np.array([2016, 12, 31, 0, 0]) lengths = np.array([len(tiles), len(angles)]) # tiles, incidence angles, whatever dims = io.timeseries['obs_obs'].shape obs_errstd = np.full(dims[0:-1], 4.) # ----- Calculate anomalies ----- cnt = 0 for spc in np.arange(dims[0]): for lat in np.arange(dims[1]): for lon in np.arange(dims[2]): cnt += 1 logging.info('%i / %i' % (cnt, np.prod(dims[0:-1]))) try: obs = calc_anomaly(io.timeseries['obs_obs'][ spc, lat, lon, :].to_dataframe()['obs_obs'], method='moving_average', longterm=True) fcst = calc_anomaly(io.timeseries['obs_fcst'][ spc, lat, lon, :].to_dataframe()['obs_fcst'], method='moving_average', longterm=True) fcst_errvar = np.nanmean( io.timeseries['obs_fcstvar'][spc, lat, lon, :].values) tmp_obs_errstd = (((obs - fcst)**2).mean() - fcst_errvar)**0.5 if not np.isnan(tmp_obs_errstd): obs_errstd[spc, lat, lon] = tmp_obs_errstd except: pass np.place(obs_errstd, obs_errstd < 0, 0) np.place(obs_errstd, obs_errstd > 20, 20) # ----- write output files ----- for orb in orbits: # !!! inconsistent with the definition in the obs_paramfile (species) !!! modes[0] = 1 if orb == 'A' else 0 res = template.copy() spc = 0 if orb == 'A' else 1 res['err_Tbh'] = obs_errstd[spc, ind_lat, ind_lon] spc = 2 if orb == 'A' else 3 res['err_Tbv'] = obs_errstd[spc, ind_lat, ind_lon] fname = os.path.join(froot, fbase + orb + '.bin') fid = open(fname, 'wb') io.write_fortran_block(fid, modes) io.write_fortran_block(fid, sdate) io.write_fortran_block(fid, edate) io.write_fortran_block(fid, lengths) io.write_fortran_block(fid, angles) for f in res.columns.values: io.write_fortran_block(fid, res[f].values) fid.close()
def run(part): parts = 15 smos = SMOS_io() ismn = ISMN_io() ascat = HSAF_io(ext=None) mswep = MSWEP_io() # Median Q from MadKF API/CONUS run. Q_avg = 12. R_avg = 74. # Select only SCAN and USCRN ismn.list = ismn.list[(ismn.list.network == 'SCAN') | (ismn.list.network == 'USCRN')] ismn.list.index = np.arange(len(ismn.list)) # Split station list in 4 parts for parallelization subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int') subs[-1] = len(ismn.list) start = subs[part - 1] end = subs[part] ismn.list = ismn.list.iloc[start:end, :] if platform.system() == 'Windows': result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'ismn_eval', 'result_part%i.csv' % part) elif platform.system() == 'Linux': result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046', 'output', 'MadKF', 'CONUS', 'ismn_eval', 'result_part%i.csv' % part) else: result_file = os.path.join('/', 'work', 'MadKF', 'CONUS', 'ismn_eval', 'parts2', 'result_part%i.csv' % part) dt = ['2010-01-01', '2015-12-31'] for cnt, (station, insitu) in enumerate(ismn.iter_stations(surf_depth=0.1)): # station = ismn.list.loc[978,:] # insitu = ismn.read_first_surface_layer('SCAN','Los_Lunas_Pmc') print('%i / %i' % (cnt, len(ismn.list))) # if True: try: gpi = lonlat2gpi(station.lon, station.lat, mswep.grid) mswep_idx = mswep.grid.index[mswep.grid.dgg_gpi == gpi][0] smos_gpi = mswep.grid.loc[mswep_idx, 'smos_gpi'] precip = mswep.read(mswep_idx) sm_ascat = ascat.read(gpi) sm_smos = smos.read(smos_gpi) * 100. if (precip is None) | (sm_ascat is None) | (sm_smos is None) | ( insitu is None): continue precip = calc_anomaly(precip[dt[0]:dt[1]], method='moving_average', longterm=False) sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]], method='moving_average', longterm=False) sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]], method='moving_average', longterm=False) insitu = calc_anomaly(insitu[dt[0]:dt[1]].resample('1d').first(), method='moving_average', longterm=False).tz_localize(None) df = pd.DataFrame({ 1: precip, 2: sm_ascat, 3: sm_smos, 4: insitu }, index=pd.date_range(dt[0], dt[1])) df.loc[np.isnan(df[1]), 1] = 0. n = len(df) if len(df.dropna()) < 50: continue gamma = mswep.grid.loc[mswep_idx, 'gamma'] api = API(gamma=gamma) # --- OL run --- x_OL = np.full(n, np.nan) model = deepcopy(api) for t, f in enumerate(precip.values): x = model.step(f) x_OL[t] = x # ----- Calculate uncertainties ----- # convert (static) forcing to model uncertainty P_avg = Q_avg / (1 - gamma**2) # calculate TCA based uncertainty and scaling coefficients tmp_df = pd.DataFrame({ 1: x_OL, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])).dropna() snr, r_tc, err, beta = tc(tmp_df) P_TC = err[0]**2 Q_TC = P_TC * (1 - gamma**2) R_TC = (err[1] / beta[1])**2 H_TC = beta[1] # Calculate RMSD based uncertainty R_rmsd = (np.nanmean( (tmp_df[1].values - H_TC * tmp_df[2].values)**2) - P_avg) if R_rmsd < 0: R_rmsd *= -1 # ----------------------------------- # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=gamma, Q=Q_TC) x_kf, P, R_innov_kf, checkvar_kf, K_kf = \ KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC) # ----- Run EnKF using static uncertainties ----- forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_avg] x_avg, P, R_innov_avg, checkvar_avg, K_avg = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) ----- # forc_pert = ['normal', 'additive', Q_avg] # obs_pert = ['normal', 'additive', R_rmsd] # x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \ # EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run MadKF ----- cnt = 0 checkvar_madkf = 9999. while ((checkvar_madkf < 0.95) | (checkvar_madkf > 1.05)) & (cnt < 5): cnt += 1 tmp_x_madkf, P_madkf, R_madkf, Q_madkf, H_madkf, R_innov_madkf, tmp_checkvar_madkf, K_madkf = \ MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20) if abs(1 - tmp_checkvar_madkf) < abs(1 - checkvar_madkf): checkvar_madkf = tmp_checkvar_madkf x_madkf = tmp_x_madkf df['x_ol'] = x_OL df['x_kf'] = x_kf df['x_avg'] = x_avg # df['x_rmsd'] = x_rmsd df['x_madkf'] = x_madkf # tc_ol = tc(df[[4,3,'x_ol']]) # tc_kf = tc(df[[4,3,'x_kf']]) # tc_avg = tc(df[[4,3,'x_avg']]) # tc_rmsd = tc(df[[4,3,'x_rmsd']]) # tc_madkf = tc(df[[4,3,'x_madkf']]) ci_l_ol, ci_m_ol, ci_u_ol = bootstrap_tc(df[[4, 3, 'x_ol']]) ci_l_kf, ci_m_kf, ci_u_kf = bootstrap_tc(df[[4, 3, 'x_kf']]) ci_l_avg, ci_m_avg, ci_u_avg = bootstrap_tc(df[[4, 3, 'x_avg']]) # ci_l_rmsd, ci_m_rmsd, ci_u_rmsd = bootstrap_tc(df[[4,3,'x_rmsd']]) ci_l_madkf, ci_m_madkf, ci_u_madkf = bootstrap_tc( df[[4, 3, 'x_madkf']]) corr = df.dropna().corr() n_all = len(df.dropna()) result = pd.DataFrame( { 'lon': station.lon, 'lat': station.lat, 'network': station.network, 'station': station.station, 'gpi': gpi, 'n_all': n_all, 'Q_est_madkf': Q_madkf, 'R_est_madkf': R_madkf, 'corr_ol': corr[4]['x_ol'], 'corr_kf': corr[4]['x_kf'], 'corr_avg': corr[4]['x_avg'], # 'corr_rmsd': corr[4]['x_rmsd'], 'corr_madkf': corr[4]['x_madkf'], # 'snr_ol': tc_ol[0][2], # 'snr_kf': tc_kf[0][2], # 'snr_avg': tc_avg[0][2], # 'snr_rmsd': tc_rmsd[0][2], # 'snr_madkf': tc_madkf[0][2], # 'r_ol': tc_ol[1][2], # 'r_kf': tc_kf[1][2], # 'r_avg': tc_avg[1][2], # 'r_rmsd': tc_rmsd[1][2], # 'r_madkf': tc_madkf[1][2], # 'rmse_kf': tc_kf[2][2], # 'rmse_avg': tc_avg[2][2], # 'rmse_rmsd': tc_rmsd[2][2], # 'rmse_madkf': tc_madkf[2][2], # 'rmse_ol': tc_ol[2][2], 'r_ol_l': ci_l_ol, 'r_ol_m': ci_m_ol, 'r_ol_u': ci_u_ol, 'r_kf_l': ci_l_kf, 'r_kf_m': ci_m_kf, 'r_kf_u': ci_u_kf, 'r_avg_l': ci_l_avg, 'r_avg_m': ci_m_avg, 'r_avg_u': ci_u_avg, # 'r_rmsd_l': ci_l_rmsd, # 'r_rmsd_m': ci_m_rmsd, # 'r_rmsd_u': ci_u_rmsd, 'r_madkf_l': ci_l_madkf, 'r_madkf_m': ci_m_madkf, 'r_madkf_u': ci_u_madkf, 'checkvar_kf': checkvar_kf, 'checkvar_avg': checkvar_avg, # 'checkvar_rmsd': checkvar_rmsd, 'checkvar_madkf': checkvar_madkf, 'R_innov_kf': R_innov_kf, 'R_innov_avg': R_innov_avg, # 'R_innov_rmsd': R_innov_rmsd, 'R_innov_madkf': R_innov_madkf }, index=(station.name, )) if (os.path.isfile(result_file) == False): result.to_csv(result_file, float_format='%0.4f') else: result.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: print('GPI failed.') continue ascat.close() mswep.close()
def run(cell=None, gpi=None): if (cell is None) and (gpi is None): print('No cell/gpi specified.') return smos = SMOS_io() ascat = HSAF_io(ext=None) mswep = MSWEP_io() if gpi is not None: cell = mswep.gpi2cell(gpi) # Median Q/R from TC run. Q_avg = 12. R_avg = 74. if platform.system() == 'Windows': result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'result_%04i.csv' % cell) else: result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046', 'output', 'MadKF', 'CONUS', 'result_%04i.csv' % cell) dt = ['2010-01-01', '2015-12-31'] for data, info in mswep.iter_cell(cell, gpis=gpi): # print info.name # if True: try: precip = mswep.read(info.name) sm_ascat = ascat.read(info.dgg_gpi) sm_smos = smos.read(info.smos_gpi) * 100. if (precip is None) | (sm_ascat is None) | (sm_smos is None): continue precip = calc_anomaly(precip[dt[0]:dt[1]], method='moving_average', longterm=False) sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]], method='moving_average', longterm=False) sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]], method='moving_average', longterm=False) api = API(gamma=info.gamma) # Regularize time steps df = pd.DataFrame({ 1: precip, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])) n_inv_precip = len(np.where(np.isnan(df[1]))[0]) n_inv_ascat = len(np.where(np.isnan(df[2]))[0]) n_inv_smos = len(np.where(np.isnan(df[3]))[0]) n_inv_asc_smo = len(np.where(np.isnan(df[2]) & np.isnan(df[3]))[0]) df.loc[np.isnan(df[1]), 1] = 0. # --- get OL ts --- OL = np.full(len(precip), np.nan) model = API(gamma=info.gamma) for t, f in enumerate(df[1].values): x = model.step(f) OL[t] = x # collocate OL and satellite data sets. df2 = pd.DataFrame({ 1: OL, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])).dropna() # ----- Calculate uncertainties ----- # convert (static) forcing to model uncertainty P_avg = Q_avg / (1 - info.gamma**2) # calculate TCA based uncertainty and scaling coefficients snr, err, beta = tcol_snr(df2[1].values, df2[2].values, df2[3].values) P_TC = err[0]**2 Q_TC = P_TC * (1 - info.gamma**2) R_TC = (err[1] / beta[1])**2 H_TC = beta[1] # Calculate RMSD based uncertainty R_rmsd = (np.nanmean( (df2[1].values - H_TC * df2[2].values)**2) - P_avg) if R_rmsd < 0: R_rmsd *= -1 # ----------------------------------- # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=info.gamma, Q=Q_TC) R_2D = np.array([(err[1] / beta[1])**2, (err[2] / beta[2])**2]) H_2D = np.array([beta[1]**(-1), beta[2]**(-1)]) x_2d, P, checkvar1_2d, checkvar2_2d, checkvar3_2d, K1_2d, K2_2d = \ KF_2D(api_kf, df[1].values.copy(), df[2].values.copy(), df[3].values.copy(), R_2D, H=H_2D) # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=info.gamma, Q=Q_TC) x_kf, P, R_innov_kf, checkvar_kf, K_kf = \ KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC) # ----- Run EnKF using TCA-based uncertainties ----- forc_pert = ['normal', 'additive', Q_TC] obs_pert = ['normal', 'additive', R_TC] x_tc, P, R_innov_tc, checkvar_tc, K_tc = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using static uncertainties ----- forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_avg] x_avg, P, R_innov_avg, checkvar_avg, K_avg = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) ----- t = timeit.default_timer() forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_rmsd] x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) t_enkf = timeit.default_timer() - t # ----- Run MadKF ----- t = timeit.default_timer() x_madkf, P, R_madkf, Q_madkf, H_madkf, R_innov_madkf, checkvar_madkf, K_madkf = \ MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20) t_madkf = timeit.default_timer() - t # TC evaluation of assimilation results # df3 = pd.DataFrame({1: x_tc, 2: x_avg, 3: x_rmsd, 4: x_madkf, 5: sm_ascat, 6: sm_smos}, index=pd.date_range(dt[0], dt[1])).dropna() # # rmse_ana_tc = tcol_snr(df3[1].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_avg = tcol_snr(df3[2].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_rmsd = tcol_snr(df3[3].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_madkf = tcol_snr(df3[4].values, df3[5].values, df3[6].values)[1][0] result = pd.DataFrame( { 'lon': info.lon, 'lat': info.lat, 'col': info.col, 'row': info.row, 'P_tc': P_TC, 'Q_tc': Q_TC, 'R_tc': R_TC, 'H_tc': H_TC, 'K_tc': K_tc, 'R_innov_tc': R_innov_tc, 'checkvar_tc': checkvar_tc, 'K_kf': K_kf, 'R_innov_kf': R_innov_kf, 'checkvar_kf': checkvar_kf, 'K1_2d': K1_2d, 'K2_2d': K2_2d, 'checkvar1_2d': checkvar1_2d, 'checkvar2_2d': checkvar2_2d, 'checkvar3_2d': checkvar3_2d, 'P_avg': P_avg, 'Q_avg': Q_avg, 'R_avg': R_avg, 'K_avg': K_avg, 'R_innov_avg': R_innov_avg, 'checkvar_avg': checkvar_avg, 'R_rmsd': R_rmsd, 'K_rmsd': K_rmsd, 'R_innov_rmsd': R_innov_rmsd, 'checkvar_rmsd': checkvar_rmsd, 'P_madkf': Q_madkf / (1 - info.gamma**2), 'Q_madkf': Q_madkf, 'R_madkf': R_madkf, 'H_madkf': H_madkf, 'K_madkf': K_madkf, 'R_innov_madkf': R_innov_madkf, 'checkvar_madkf': checkvar_madkf, 't_enkf': t_enkf, 't_madkf': t_madkf, 'n_inv_precip': n_inv_precip, 'n_inv_ascat': n_inv_ascat, 'n_inv_smos': n_inv_smos, 'n_inv_asc_smo': n_inv_asc_smo }, index=(info.name, )) if (os.path.isfile(result_file) == False): result.to_csv(result_file, float_format='%0.4f') else: result.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: print('GPI failed.') continue ascat.close() mswep.close()
def run(part): parts = 6 result_file = r'D:\work\ESA_CCI_SM\ismn_r2\ismn_r2_part%i.csv' % part cci = CCISM_io() ismn = ISMN_io() # ismn.list = ismn.list.iloc[100:120] # Split station list in 4 parts for parallelization subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int') subs[-1] = len(ismn.list) start = subs[part - 1] end = subs[part] ismn.list = ismn.list.iloc[start:end, :] freq = ['abs', 'anom'] res = ismn.list.copy() res.drop(['ease_col', 'ease_row'], axis='columns', inplace=True) res['r_abs'] = np.nan res['r_anom'] = np.nan for i, (meta, ts_insitu) in enumerate(ismn.iter_stations()): print('%i/%i (Proc %i)' % (i, len(ismn.list), part)) if ts_insitu is None: print('No in situ data for ' + meta.network + ' / ' + meta.station) continue ts_insitu = ts_insitu['2007-10-01':'2014-12-31'] if len(ts_insitu) < 10: print('No in situ data for ' + meta.network + ' / ' + meta.station) continue df_insitu = pd.DataFrame(ts_insitu).dropna() df_insitu_anom = pd.DataFrame(calc_anomaly(ts_insitu)).dropna() df_cci = cci.read(meta.lon, meta.lat, version='v04.4', mode=['ACTIVE', 'PASSIVE']).dropna() if len(df_cci) < 10: print('No CCI data for ' + meta.network + ' / ' + meta.station) continue for f in freq: if f == 'abs': matched = df_match(df_cci, df_insitu, window=0.5) else: df_cci.loc[:, 'ACTIVE_v04.4'] = calc_anomaly( df_cci['ACTIVE_v04.4']) df_cci.loc[:, 'PASSIVE_v04.4'] = calc_anomaly( df_cci['PASSIVE_v04.4']) df_cci.dropna(inplace=True) if (len(df_cci) < 10) | (len(df_insitu_anom) < 10): print('No in situ or CCI anomaly data for ' + meta.network + ' / ' + meta.station) continue matched = df_match(df_cci, df_insitu_anom, window=0.5) data = df_cci.join(matched['insitu']).dropna() if len(data) < 100: continue vals = data[['insitu', 'ACTIVE_v04.4']].values c1, p1 = pearsonr(vals[:, 0], vals[:, 1]) vals = data[['insitu', 'PASSIVE_v04.4']].values c2, p2 = pearsonr(vals[:, 0], vals[:, 1]) vals = data[['ACTIVE_v04.4', 'PASSIVE_v04.4']].values c3, p3 = pearsonr(vals[:, 0], vals[:, 1]) if (c1 < 0) | (c2 < 0) | (c3 < 0) | (p1 > 0.05) | (p2 > 0.05) | ( p3 > 0.05): continue res.loc[meta.name, 'r_' + f] = np.sqrt(tc(data)[1][2]) res.to_csv(result_file, float_format='%0.4f')