def single_location_comparison(location=[31.65, 77.34], station='Banjar', min_year=2000, max_year=2011): """Plot model outputs for given coordinates over time.""" aphro_ds = aphrodite.collect_APHRO(location, minyear=min_year, maxyear=max_year) cru_ds = cru.collect_CRU(location, minyear=min_year, maxyear=max_year) era5_ds = era5.collect_ERA5(location, minyear=min_year, maxyear=max_year) gpm_ds = gpm.collect_GPM(location, minyear=min_year, maxyear=max_year) wrf_ds = beas_sutlej_wrf.collect_BC_WRF(location, minyear=min_year, maxyear=max_year) gauge_ds = beas_sutlej_gauges.gauge_download(station, minyear=min_year, maxyear=max_year) # cmip_ds = cmip5.collect_CMIP5() # cordex_ds = cordex.collect_CORDEX() # model_ts = model_prep([lat, lon], data_filepath='single_loc_test.csv', \ # model_filepath=model_filepath) timeseries = [gauge_ds, gpm_ds, era5_ds, wrf_ds, aphro_ds, cru_ds] tims.benchmarking_subplots(timeseries, reference_dataset=gauge_ds) dataset_stats(timeseries, ref_ds=gauge_ds)
def gauge_stats(): """Print mean, standard deviations and slope for datasets.""" bs_station_df = pd.read_csv('_Data/bs_only_gauge_info.csv') ''' mlm_val_stations = ['Bhakra', 'Suni' 'Pandoh', 'Janjehl', 'Bhuntar', 'Rampur'] val_stations = ['Banjar', 'Larji', 'Bhuntar', 'Sainj', 'Bhakra', 'Kasol', 'Suni', 'Pandoh', 'Janjehl', 'Rampur'] ''' r2_list = [] rmse_list = [] for s in tqdm(bs_station_df): gauge_ds = beas_sutlej_gauges.gauge_download(s, minyear=2000, maxyear=2011) gauge_maxy = gauge_ds.time.max().values gauge_miny = gauge_ds.time.min().values miny = gauge_miny - 0.0001 maxy = gauge_maxy + 0.0001 location = bs_station_df[s].values aphro_ds = aphrodite.collect_APHRO(location, minyear=miny, maxyear=maxy) cru_ds = cru.collect_CRU(location, minyear=miny, maxyear=maxy) era5_ds = era5.collect_ERA5(location, minyear=miny, maxyear=maxy) gpm_ds = gpm.collect_GPM(location, minyear=miny, maxyear=maxy) wrf_ds = beas_sutlej_wrf.collect_BC_WRF(location, minyear=miny, maxyear=maxy) timeseries = [era5_ds, gpm_ds, aphro_ds, cru_ds, wrf_ds] r2s, rmses = dataset_stats(timeseries, ref_ds=gauge_ds, ret=True) r2_list.append(r2s) rmse_list.append(rmses) avg_r2 = np.array(r2_list).mean(axis=0) avg_rmse = np.array(rmse_list).mean(axis=0) return avg_r2, avg_rmse
(station_df['lat'] > 31) & (station_df['lat'] < 31.23)] hf_train_df5 = station_df[(station_df['lon'] > 78.2)] # + list(hf_train_df3['index'].values) + list(hf_train_df4['index'].values) # + list(hf_train_df5['index'].values) hf_train_stations = list(hf_train_df5['index'].values) + list( hf_train_df2['index'].values) # + (['Banjar', 'Larji', 'Bhuntar', 'Sainj', 'Bhakra', 'Kasol', 'Suni', # 'Pandoh', 'Janjehl', 'Rampur']) lf_train_stations = hf_train_stations hf_val_stations = hf_train_stations[0] hf_train_list = [] for station in hf_train_stations: station_ds = beas_sutlej_gauges.gauge_download(station, minyear=1980, maxyear=2010) station_ds['z'] = station_df[station].values[2] station_ds['slope'] = srtm.find_slope(station).slope.values station_ds = station_ds.set_coords('z') station_ds = station_ds.set_coords('slope') station_ds = station_ds.expand_dims(dim={ 'lat': 1, 'lon': 1, 'z': 1, 'slope': 1 }) hf_train_list.append(station_ds) hf_train_ds = xr.merge(hf_train_list) # ERA5 data
hf_train_df3 = station_df[(station_df['lon'] > 77.0) & (station_df['lat'] < 31)] hf_train_df4 = station_df[(station_df['lon'] < 78.0) & (station_df['lon'] > 77.0) & (station_df['lat'] > 31) & (station_df['lat'] < 31.23)] hf_train_df5 = station_df[(station_df['lon'] > 78.2)] # + list(hf_train_df2['index'].values) + list(hf_train_df3['index'].values) # + list(hf_train_df4['index'].values) + list(hf_train_df5['index'].values) hf_train_stations = list(hf_train_df5['index'].values) hf_train_list = [] for station in hf_train_stations: station_ds = beas_sutlej_gauges.gauge_download(station, minyear=2000, maxyear=2005) station_ds['z'] = station_df[station].values[2] station_ds['slope'] = srtm.find_slope(station).slope.values station_ds = station_ds.set_coords('z') station_ds = station_ds.set_coords('slope') station_ds = station_ds.expand_dims(dim={ 'lat': 1, 'lon': 1, 'z': 1, 'slope': 1 }) hf_train_list.append(station_ds) hf_train_ds = xr.merge(hf_train_list) hf_train_df = hf_train_ds.to_dataframe().dropna().reset_index()
model.mean_function, model.Y_metadata) return test_log_likelihood hf_heldout_ll = [] hf_train_ll = [] R2_hf_list = [] R2_lf_list = [] # Calculate values for station in station_list: # Prepare data hf_ds = beas_sutlej_gauges.gauge_download(station, minyear=1980, maxyear=2011) lf_ds = era5.gauge_download(station, minyear=1980, maxyear=2011) hf_ds = hf_ds.assign_coords({"time": lf_ds.time[:len(hf_ds.time.values)]}) hf_ds = hf_ds.dropna(dim='time') lf_ds = lf_ds.dropna(dim='time') # Transform data hf_ds['tp_tr'] = dp.log_transform(hf_ds['tp'].values) lf_ds['tp_tr'] = dp.log_transform(lf_ds['tp'].values) # High fidelity data needs to be smaller in length then low fidelity data x_train_l = lf_ds.time[:330].values.reshape(-1, 1) x_train_h = hf_ds.time[:240].values.reshape(-1, 1)