示例#1
0
def single_location_comparison(location=[31.65, 77.34],
                               station='Banjar',
                               min_year=2000,
                               max_year=2011):
    """Plot model outputs for given coordinates over time."""

    aphro_ds = aphrodite.collect_APHRO(location,
                                       minyear=min_year,
                                       maxyear=max_year)
    cru_ds = cru.collect_CRU(location, minyear=min_year, maxyear=max_year)
    era5_ds = era5.collect_ERA5(location, minyear=min_year, maxyear=max_year)
    gpm_ds = gpm.collect_GPM(location, minyear=min_year, maxyear=max_year)
    wrf_ds = beas_sutlej_wrf.collect_BC_WRF(location,
                                            minyear=min_year,
                                            maxyear=max_year)
    gauge_ds = beas_sutlej_gauges.gauge_download(station,
                                                 minyear=min_year,
                                                 maxyear=max_year)

    # cmip_ds = cmip5.collect_CMIP5()
    # cordex_ds = cordex.collect_CORDEX()
    # model_ts = model_prep([lat, lon], data_filepath='single_loc_test.csv', \
    # model_filepath=model_filepath)

    timeseries = [gauge_ds, gpm_ds, era5_ds, wrf_ds, aphro_ds, cru_ds]

    tims.benchmarking_subplots(timeseries, reference_dataset=gauge_ds)
    dataset_stats(timeseries, ref_ds=gauge_ds)
示例#2
0
def gauge_stats():
    """Print mean, standard deviations and slope for datasets."""

    bs_station_df = pd.read_csv('_Data/bs_only_gauge_info.csv')
    '''
    mlm_val_stations = ['Bhakra', 'Suni' 'Pandoh', 'Janjehl', 'Bhuntar',
                        'Rampur']
    val_stations = ['Banjar', 'Larji', 'Bhuntar', 'Sainj',
                    'Bhakra', 'Kasol', 'Suni', 'Pandoh', 'Janjehl', 'Rampur']
    '''

    r2_list = []
    rmse_list = []

    for s in tqdm(bs_station_df):

        gauge_ds = beas_sutlej_gauges.gauge_download(s,
                                                     minyear=2000,
                                                     maxyear=2011)
        gauge_maxy = gauge_ds.time.max().values
        gauge_miny = gauge_ds.time.min().values
        miny = gauge_miny - 0.0001
        maxy = gauge_maxy + 0.0001

        location = bs_station_df[s].values

        aphro_ds = aphrodite.collect_APHRO(location,
                                           minyear=miny,
                                           maxyear=maxy)
        cru_ds = cru.collect_CRU(location, minyear=miny, maxyear=maxy)
        era5_ds = era5.collect_ERA5(location, minyear=miny, maxyear=maxy)
        gpm_ds = gpm.collect_GPM(location, minyear=miny, maxyear=maxy)
        wrf_ds = beas_sutlej_wrf.collect_BC_WRF(location,
                                                minyear=miny,
                                                maxyear=maxy)

        timeseries = [era5_ds, gpm_ds, aphro_ds, cru_ds, wrf_ds]
        r2s, rmses = dataset_stats(timeseries, ref_ds=gauge_ds, ret=True)
        r2_list.append(r2s)
        rmse_list.append(rmses)

    avg_r2 = np.array(r2_list).mean(axis=0)
    avg_rmse = np.array(rmse_list).mean(axis=0)

    return avg_r2, avg_rmse
示例#3
0
                          (station_df['lat'] > 31)
                          & (station_df['lat'] < 31.23)]
hf_train_df5 = station_df[(station_df['lon'] > 78.2)]
# + list(hf_train_df3['index'].values) + list(hf_train_df4['index'].values)
# + list(hf_train_df5['index'].values)
hf_train_stations = list(hf_train_df5['index'].values) + list(
    hf_train_df2['index'].values)
# + (['Banjar', 'Larji', 'Bhuntar', 'Sainj', 'Bhakra', 'Kasol', 'Suni',
# 'Pandoh', 'Janjehl', 'Rampur'])
lf_train_stations = hf_train_stations
hf_val_stations = hf_train_stations[0]

hf_train_list = []
for station in hf_train_stations:
    station_ds = beas_sutlej_gauges.gauge_download(station,
                                                   minyear=1980,
                                                   maxyear=2010)
    station_ds['z'] = station_df[station].values[2]
    station_ds['slope'] = srtm.find_slope(station).slope.values
    station_ds = station_ds.set_coords('z')
    station_ds = station_ds.set_coords('slope')
    station_ds = station_ds.expand_dims(dim={
        'lat': 1,
        'lon': 1,
        'z': 1,
        'slope': 1
    })
    hf_train_list.append(station_ds)
hf_train_ds = xr.merge(hf_train_list)

# ERA5 data
hf_train_df3 = station_df[(station_df['lon'] > 77.0)
                          & (station_df['lat'] < 31)]
hf_train_df4 = station_df[(station_df['lon'] < 78.0)
                          & (station_df['lon'] > 77.0) &
                          (station_df['lat'] > 31)
                          & (station_df['lat'] < 31.23)]
hf_train_df5 = station_df[(station_df['lon'] > 78.2)]

# + list(hf_train_df2['index'].values) + list(hf_train_df3['index'].values)
# + list(hf_train_df4['index'].values) + list(hf_train_df5['index'].values)
hf_train_stations = list(hf_train_df5['index'].values)

hf_train_list = []
for station in hf_train_stations:
    station_ds = beas_sutlej_gauges.gauge_download(station,
                                                   minyear=2000,
                                                   maxyear=2005)
    station_ds['z'] = station_df[station].values[2]
    station_ds['slope'] = srtm.find_slope(station).slope.values
    station_ds = station_ds.set_coords('z')
    station_ds = station_ds.set_coords('slope')
    station_ds = station_ds.expand_dims(dim={
        'lat': 1,
        'lon': 1,
        'z': 1,
        'slope': 1
    })
    hf_train_list.append(station_ds)
hf_train_ds = xr.merge(hf_train_list)

hf_train_df = hf_train_ds.to_dataframe().dropna().reset_index()
        model.mean_function, model.Y_metadata)
    return test_log_likelihood


hf_heldout_ll = []
hf_train_ll = []
R2_hf_list = []
R2_lf_list = []

# Calculate values

for station in station_list:

    # Prepare data
    hf_ds = beas_sutlej_gauges.gauge_download(station,
                                              minyear=1980,
                                              maxyear=2011)
    lf_ds = era5.gauge_download(station, minyear=1980, maxyear=2011)

    hf_ds = hf_ds.assign_coords({"time": lf_ds.time[:len(hf_ds.time.values)]})

    hf_ds = hf_ds.dropna(dim='time')
    lf_ds = lf_ds.dropna(dim='time')

    # Transform data
    hf_ds['tp_tr'] = dp.log_transform(hf_ds['tp'].values)
    lf_ds['tp_tr'] = dp.log_transform(lf_ds['tp'].values)

    # High fidelity data needs to be smaller in length then low fidelity data
    x_train_l = lf_ds.time[:330].values.reshape(-1, 1)
    x_train_h = hf_ds.time[:240].values.reshape(-1, 1)