Exemplo n.º 1
0
 def test_startMonth_greater_than_endMonth(self):
     self.dataset_times = np.array([datetime.datetime(year, month, 1)
                                    for year in range(2000, 2010)
                                    for month in [1, 8, 9, 10, 11, 12]])
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset, 8, 1)
     np.testing.assert_array_equal(
         self.dataset_times, self.tempSubset.times)
Exemplo n.º 2
0
 def test_startMonth_greater_than_endMonth(self):
     self.dataset_times = np.array([datetime.datetime(year, month, 1)
                                    for year in range(2000, 2010)
                                    for month in [1, 8, 9, 10, 11, 12]])
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset, 8, 1)
     np.testing.assert_array_equal(
         self.dataset_times, self.tempSubset.times)
Exemplo n.º 3
0
 def test_returned_dataset(self):
     self.dataset_times = np.array([datetime.datetime(year, month, 1)
                                    for year in range(2000, 2010)
                                    for month in range(1, 6)])
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset, 1, 5)
     np.testing.assert_array_equal(
         self.dataset_times, self.tempSubset.times)
Exemplo n.º 4
0
 def test_returned_dataset(self):
     self.dataset_times = np.array([datetime.datetime(year, month, 1)
                                    for year in range(2000, 2010)
                                    for month in range(1, 6)])
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset, 1, 5)
     np.testing.assert_array_equal(
         self.dataset_times, self.tempSubset.times)
Exemplo n.º 5
0
 def test_temporal_subset_equal_start_end_month(self):
     self.dataset_times = np.array([datetime.datetime(year, 1, 1)
                                    for year in range(2000, 2010)])
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset,
                                          1, 1,
                                          average_each_year=True)
     np.testing.assert_array_equal(self.dataset_times,
                                   self.tempSubset.times)
Exemplo n.º 6
0
 def test_temporal_subset_equal_start_end_month(self):
     self.dataset_times = np.array([datetime.datetime(year, 1, 1)
                                    for year in range(2000, 2010)])
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset,
                                          1, 1,
                                          average_each_year=True)
     np.testing.assert_array_equal(self.dataset_times,
                                   self.tempSubset.times)
Exemplo n.º 7
0
 def test_temporal_subset_with_average_values(self):
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset,
                                          1, 3,
                                          average_each_year=True)
     self.dataset_values = np.ones([len(self.tempSubset.times),
                                    len(self.ten_year_dataset.lats),
                                    len(self.ten_year_dataset.lons)])
     np.testing.assert_array_equal(self.dataset_values,
                                   self.tempSubset.values)
Exemplo n.º 8
0
 def test_temporal_subset_with_average_values(self):
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset,
                                          1, 3,
                                          average_each_year=True)
     self.dataset_values = np.ones([len(self.tempSubset.times),
                                    len(self.ten_year_dataset.lats),
                                    len(self.ten_year_dataset.lons)])
     np.testing.assert_array_equal(self.dataset_values,
                                   self.tempSubset.values)
Exemplo n.º 9
0
 def test_temporal_subset_attributes(self):
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset,
                                          1, 3,
                                          average_each_year=True)
     self.assertEqual(self.tempSubset.name, self.ten_year_dataset.name)
     self.assertEqual(self.tempSubset.variable,
                      self.ten_year_dataset.variable)
     self.assertEqual(self.tempSubset.units, self.ten_year_dataset.units)
     np.testing.assert_array_equal(self.tempSubset.lats,
                                   self.ten_year_dataset.lats)
     np.testing.assert_array_equal(self.tempSubset.lons,
                                   self.ten_year_dataset.lons)
Exemplo n.º 10
0
 def test_temporal_subset_attributes(self):
     self.tempSubset = dp.temporal_subset(self.ten_year_dataset,
                                          1, 3,
                                          average_each_year=True)
     self.assertEqual(self.tempSubset.name, self.ten_year_dataset.name)
     self.assertEqual(self.tempSubset.variable,
                      self.ten_year_dataset.variable)
     self.assertEqual(self.tempSubset.units, self.ten_year_dataset.units)
     np.testing.assert_array_equal(self.tempSubset.lats,
                                   self.ten_year_dataset.lats)
     np.testing.assert_array_equal(self.tempSubset.lons,
                                   self.ten_year_dataset.lons)
                   us_states=regions[6])

regional_bounds = [NW_bounds, SW_bounds, NGP_bounds,
                   SGP_bounds, MW_bounds, NE_bounds, SE_bounds]

""" Load nClimGrid file into OCW Dataset """
obs_dataset = local.load_file(file_obs, variable_name='tave')

""" Load CMIP5 simulations into a list of OCW Datasets"""
model_dataset = local.load_multiple_files(file_path=model_file_path, variable_name='tas',
                                          dataset_name=dataset_name, variable_unit='K')

""" Temporal subset of obs_dataset """
obs_dataset_subset = dsp.temporal_slice(obs_dataset,
                  start_time=start_date, end_time=end_date)
obs_dataset_season = dsp.temporal_subset(obs_dataset_subset, month_start, month_end,
                      average_each_year=True)

""" Temporal subset of model_dataset """
model_dataset_subset = [dsp.temporal_slice(dataset,start_time=start_date, end_time=end_date)
                        for dataset in model_dataset]
model_dataset_season = [dsp.temporal_subset(dataset, month_start, month_end,
                      average_each_year=True) for dataset in model_dataset_subset]


""" Spatial subset of obs_dataset and generate time series """
obs_timeseries = np.zeros([nyear, n_region])   # region index 0-6: NW, SW, NGP, SGP, MW, NE, SE
model_timeseries = np.zeros([nmodel, nyear, n_region])

for iregion in np.arange(n_region):
    obs_timeseries[:, iregion] = utils.calc_time_series(
                         dsp.subset(obs_dataset_season, regional_bounds[iregion]))
                                        model_info['variable'])
print(
    model_info['data_name'] + " values shape: (times, lats, lons) - %s \n" %
    (model_dataset_present.values.shape, ))
dy = model_dataset_present.spatial_resolution()[0]
dx = model_dataset_present.spatial_resolution()[1]

model_dataset_future = local.load_file(model_info['future']['path'],
                                       model_info['variable'])
print(
    model_info['future']['scenario_name'] + ':' + model_info['data_name'] +
    " values shape: (times, lats, lons) - %s \n" %
    (model_dataset_future.values.shape, ))
""" Step 2: Temporal subsetting """
print("Temporal subsetting for the selected month(s)")
ref_temporal_subset = dsp.temporal_subset(ref_dataset, month_start, month_end)
model_temporal_subset_present = dsp.temporal_subset(model_dataset_present,
                                                    month_start, month_end)
model_temporal_subset_future = dsp.temporal_subset(model_dataset_future,
                                                   month_start, month_end)
""" Step 3: Spatial aggregation of observational data into the model grid """
print(
    "Spatial aggregation of observational data near latitude %0.2f and longitude %0.2f "
    % (grid_lat, grid_lon))
# There are two options to aggregate observational data near a model grid point
#ref_subset = spatial_aggregation(ref_temporal_subset, grid_lon-0.5*dx, grid_lon+0.5*dx, grid_lat-0.5*dy, grid_lat+0.5*dy)
#model_subset_present = spatial_aggregation(model_temporal_subset_present, grid_lon-0.5*dx, grid_lon+0.5*dx, grid_lat-0.5*dy, grid_lat+0.5*dy)
#model_subset_future = spatial_aggregation(model_temporal_subset_future, grid_lon-0.5*dx, grid_lon+0.5*dx, grid_lat-0.5*dy, grid_lat+0.5*dy)
ref_subset = extract_data_at_nearest_grid_point(ref_temporal_subset, grid_lon,
                                                grid_lat)
model_subset_present = extract_data_at_nearest_grid_point(
    NW_bounds, SW_bounds, NGP_bounds, SGP_bounds, MW_bounds, NE_bounds,
    SE_bounds
]
""" Load nClimDiv file into OCW Dataset """
obs_dataset = local.load_file(file_obs, variable_name='tave')
""" Load CMIP5 simulations into a list of OCW Datasets"""
model_dataset = local.load_multiple_files(file_path=model_file_path,
                                          variable_name='tas',
                                          dataset_name=dataset_name,
                                          variable_unit='K')
""" Temporal subset of obs_dataset """
obs_dataset_subset = dsp.temporal_slice(obs_dataset,
                                        start_time=start_date,
                                        end_time=end_date)
obs_dataset_season = dsp.temporal_subset(obs_dataset_subset,
                                         month_start,
                                         month_end,
                                         average_each_year=True)
""" Temporal subset of model_dataset """
model_dataset_subset = [
    dsp.temporal_slice(dataset, start_time=start_date, end_time=end_date)
    for dataset in model_dataset
]
model_dataset_season = [
    dsp.temporal_subset(dataset,
                        month_start,
                        month_end,
                        average_each_year=True)
    for dataset in model_dataset_subset
]
""" Spatial subset of obs_dataset and generate time series """
obs_timeseries = np.zeros([nyear, n_region
Exemplo n.º 14
0
bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time)

ref_dataset = dsp.subset(bounds,ref_dataset)
if ref_dataset.temporal_resolution() != temporal_resolution:
    ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution)
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.subset(bounds,dataset)
    if dataset.temporal_resolution() != temporal_resolution:
        model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution)

# Temporaly subset both observation and model datasets for the user specified season
month_start = time_info['month_start']
month_end = time_info['month_end']
average_each_year = time_info['average_each_year']

ref_dataset = dsp.temporal_subset(month_start, month_end,ref_dataset,average_each_year)
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.temporal_subset(month_start, month_end,dataset,average_each_year)

# generate grid points for regridding
if config['regrid']['regrid_on_reference']:
    new_lat = ref_dataset.lats
    new_lon = ref_dataset.lons 
else:
    delta_lat = config['regrid']['regrid_dlat']
    delta_lon = config['regrid']['regrid_dlon']
    nlat = (max_lat - min_lat)/delta_lat+1
    nlon = (max_lon - min_lon)/delta_lon+1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)
import numpy as np
import numpy.ma as ma


''' data source: https://dx.doi.org/10.6084/m9.figshare.3753321.v1
    AOD_monthly_2000-Mar_2016-FEB_from_MISR_L3_JOINT.nc is publicly available.'''
dataset = local.load_file('AOD_monthly_2000-MAR_2016-FEB_from_MISR_L3_JOINT.nc',
                          'nonabsorbing_ave')
''' Subset the data for East Asia'''
Bounds = ds.Bounds(lat_min=20, lat_max=57.7, lon_min=90, lon_max=150)
dataset = dsp.subset(dataset, Bounds)

'''The original dataset includes nonabsorbing AOD values between March 2000 and February 2015. 
dsp.temporal_subset will extract data in September-October-November.'''
dataset_SON = dsp.temporal_subset(
    dataset, month_start=9, month_end=11, average_each_year=True)

ny, nx = dataset_SON.values.shape[1:]

# multi-year mean aod
clim_aod = ma.zeros([3, ny, nx])

clim_aod[0, :] = ma.mean(dataset_SON.values, axis=0)  # 16-year mean
clim_aod[1, :] = ma.mean(dataset_SON.values[-5:, :],
                         axis=0)  # the last 5-year mean
clim_aod[2, :] = dataset_SON.values[-1, :]  # the last year's value

# plot clim_aod (3 subplots)
plotter.draw_contour_map(clim_aod, dataset_SON.lats, dataset_SON.lons,
                         fname='nonabsorbing_AOD_clim_East_Asia_Sep-Nov',
                         gridshape=[1, 3], subtitles=['2000-2015: 16 years', '2011-2015: 5 years', '2015: 1 year'],
Exemplo n.º 16
0
print(f'BCDP Results: {bcdp_results}')

### OCW SECTION
print('OCW Benchmarks')
ocw_results = {}
with time_block(ocw_results, 'Dataset Loading'):
    datasets = local.load_multiple_files(paths, 'clt')

with time_block(ocw_results, 'Domain Subsetting'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.subset(ds, bnds)

with time_block(ocw_results, 'Seasonal Subsetting'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.temporal_subset(ds, 9, 11)

with time_block(ocw_results, 'Resampling'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.temporal_rebin(ds, 'annual')

with time_block(ocw_results, 'Regridding'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.spatial_regrid(ds, new_lats, new_lons)

print(f'OCW Results: {ocw_results}')

# Plot results
matplotlib.style.use('ggplot')
df = pd.DataFrame({'OCW': ocw_results, 'BCDP': bcdp_results})
df.plot.bar(logy=True, rot=12)
Exemplo n.º 17
0
print("Loading %s into an OCW Dataset Object" % (ref_info['path'],))
ref_dataset = local.load_file(ref_info['path'], ref_info['variable'])
print(ref_info['data_name'] +" values shape: (times, lats, lons) - %s \n" % (ref_dataset.values.shape,))

print("Loading %s into an OCW Dataset Object" % (model_info['present']['path'],))
model_dataset_present = local.load_file(model_info['present']['path'], model_info['variable'])
print(model_info['data_name'] +" values shape: (times, lats, lons) - %s \n" % (model_dataset_present.values.shape,))
dy = model_dataset_present.spatial_resolution()[0]
dx = model_dataset_present.spatial_resolution()[1]

model_dataset_future = local.load_file(model_info['future']['path'], model_info['variable'])
print(model_info['future']['scenario_name']+':'+model_info['data_name'] +" values shape: (times, lats, lons) - %s \n" % (model_dataset_future.values.shape,))

""" Step 2: Temporal subsetting """
print("Temporal subsetting for the selected month(s)")
ref_temporal_subset = dsp.temporal_subset(ref_dataset, month_start, month_end)
model_temporal_subset_present = dsp.temporal_subset(model_dataset_present, month_start, month_end)
model_temporal_subset_future = dsp.temporal_subset(model_dataset_future, month_start, month_end)

""" Step 3: Spatial aggregation of observational data into the model grid """
print("Spatial aggregation of observational data near latitude %0.2f and longitude %0.2f " % (grid_lat, grid_lon))
# There are two options to aggregate observational data near a model grid point
#ref_subset = spatial_aggregation(ref_temporal_subset, grid_lon-0.5*dx, grid_lon+0.5*dx, grid_lat-0.5*dy, grid_lat+0.5*dy)
#model_subset_present = spatial_aggregation(model_temporal_subset_present, grid_lon-0.5*dx, grid_lon+0.5*dx, grid_lat-0.5*dy, grid_lat+0.5*dy)
#model_subset_future = spatial_aggregation(model_temporal_subset_future, grid_lon-0.5*dx, grid_lon+0.5*dx, grid_lat-0.5*dy, grid_lat+0.5*dy)
ref_subset = extract_data_at_nearest_grid_point(ref_temporal_subset, grid_lon, grid_lat)
model_subset_present = extract_data_at_nearest_grid_point(model_temporal_subset_present, grid_lon, grid_lat)
model_subset_future = extract_data_at_nearest_grid_point(model_temporal_subset_future, grid_lon, grid_lat)


""" Step 4:  Create a statistical downscaling object and downscaling model output """
Exemplo n.º 18
0
    min_lon = np.max([min_lon, ref_dataset.lons.min()])
    max_lon = np.min([max_lon, ref_dataset.lons.max()])
bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time)

if ref_dataset.lats.ndim != 2 and ref_dataset.lons.ndim != 2:
    ref_dataset = dsp.subset(bounds, ref_dataset)
for idata, dataset in enumerate(model_datasets):
    if dataset.lats.ndim != 2 and dataset.lons.ndim != 2:
        model_datasets[idata] = dsp.subset(bounds, dataset)

# Temporaly subset both observation and model datasets for the user specified season
month_start = time_info['month_start']
month_end = time_info['month_end']
average_each_year = time_info['average_each_year']

ref_dataset = dsp.temporal_subset(month_start, month_end, ref_dataset,
                                  average_each_year)
for idata, dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.temporal_subset(month_start, month_end,
                                                dataset, average_each_year)

# generate grid points for regridding
if config['regrid']['regrid_on_reference']:
    new_lat = ref_dataset.lats
    new_lon = ref_dataset.lons
else:
    delta_lat = config['regrid']['regrid_dlat']
    delta_lon = config['regrid']['regrid_dlon']
    nlat = (max_lat - min_lat) / delta_lat + 1
    nlon = (max_lon - min_lon) / delta_lon + 1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)
Exemplo n.º 19
0
for i, dataset in enumerate(datasets):
    datasets[i] = dsp.subset(dataset, bounds)
    if dataset.temporal_resolution() != temporal_resolution:
        datasets[i] = dsp.temporal_rebin(datasets[i], temporal_resolution)

# Temporally subset both observation and model datasets
# for the user specified season
month_start = time_info['month_start']
month_end = time_info['month_end']
average_each_year = time_info['average_each_year']

# For now we will treat the first listed dataset as the reference dataset for
# evaluation purposes.
for i, dataset in enumerate(datasets):
    datasets[i] = dsp.temporal_subset(dataset, month_start, month_end,
                                      average_each_year)

reference_dataset = datasets[0]
target_datasets = datasets[1:]
reference_name = names[0]
target_names = names[1:]

# generate grid points for regridding
if config['regrid']['regrid_on_reference']:
    new_lat = reference_dataset.lats
    new_lon = reference_dataset.lons
else:
    delta_lat = config['regrid']['regrid_dlat']
    delta_lon = config['regrid']['regrid_dlon']
    nlat = (max_lat - min_lat)/delta_lat+1
    nlon = (max_lon - min_lon)/delta_lon+1
import ocw.dataset_processor as dsp
import ocw.plotter as plotter

import numpy as np
import numpy.ma as ma
''' data source: https://dx.doi.org/10.6084/m9.figshare.3753321.v1
    AOD_monthly_2000-Mar_2016-FEB_from_MISR_L3_JOINT.nc is publicly available.'''
dataset = local.load_file(
    'AOD_monthly_2000-MAR_2016-FEB_from_MISR_L3_JOINT.nc', 'nonabsorbing_ave')
''' Subset the data for East Asia'''
Bounds = ds.Bounds(lat_min=20, lat_max=57.7, lon_min=90, lon_max=150)
dataset = dsp.subset(dataset, Bounds)
'''The original dataset includes nonabsorbing AOD values between March 2000 and February 2015. 
dsp.temporal_subset will extract data in September-October-November.'''
dataset_SON = dsp.temporal_subset(dataset,
                                  month_start=9,
                                  month_end=11,
                                  average_each_year=True)

ny, nx = dataset_SON.values.shape[1:]

# multi-year mean aod
clim_aod = ma.zeros([3, ny, nx])

clim_aod[0, :] = ma.mean(dataset_SON.values, axis=0)  # 16-year mean
clim_aod[1, :] = ma.mean(dataset_SON.values[-5:, :],
                         axis=0)  # the last 5-year mean
clim_aod[2, :] = dataset_SON.values[-1, :]  # the last year's value

# plot clim_aod (3 subplots)
plotter.draw_contour_map(
    clim_aod,
Exemplo n.º 21
0
for i, dataset in enumerate(model_datasets):
    model_datasets[i] = dsp.subset(dataset, bounds)
    if dataset.temporal_resolution() != temporal_resolution:
        model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)

# Temporally subset both observation and model datasets
# for the user specified season
month_start = time_info['month_start']
month_end = time_info['month_end']
average_each_year = time_info['average_each_year']

# TODO: Fully support multiple observation / reference datasets.
# For now we will only use the first reference dataset listed in the config file
obs_dataset = obs_datasets[0]
obs_name = obs_names[0]
obs_dataset = dsp.temporal_subset(obs_dataset, month_start, month_end,
                                  average_each_year)
for i, dataset in enumerate(model_datasets):
    model_datasets[i] = dsp.temporal_subset(dataset, month_start, month_end,
                                            average_each_year)

# generate grid points for regridding
if config['regrid']['regrid_on_reference']:
    new_lat = obs_dataset.lats
    new_lon = obs_dataset.lons
else:
    delta_lat = config['regrid']['regrid_dlat']
    delta_lon = config['regrid']['regrid_dlon']
    nlat = (max_lat - min_lat) / delta_lat + 1
    nlon = (max_lon - min_lon) / delta_lon + 1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)
FILE_2 = "AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc"
# Filename for the output image/plot (without file extension)
OUTPUT_PLOT = "wrf_bias_compared_to_knmi"

FILE_1_PATH = path.join('/tmp', FILE_1)
FILE_2_PATH = path.join('/tmp', FILE_2)

if not path.exists(FILE_1_PATH):
    urllib.urlretrieve(FILE_LEADER + FILE_1, FILE_1_PATH)
if not path.exists(FILE_2_PATH):
    urllib.urlretrieve(FILE_LEADER + FILE_2, FILE_2_PATH)

""" Step 1: Load Local NetCDF Files into OCW Dataset Objects """
print("Loading %s into an OCW Dataset Object" % (FILE_1_PATH,))
knmi_dataset = local.load_file(FILE_1_PATH, "tasmax")
print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape,))

print("Loading %s into an OCW Dataset Object" % (FILE_2_PATH,))
wrf_dataset = local.load_file(FILE_2_PATH, "tasmax")
print("WRF_Dataset.values shape: (times, lats, lons) - %s \n" % (wrf_dataset.values.shape,))

""" Step 2: Calculate seasonal average """
print("Calculate seasonal average")
knmi_DJF_mean = utils.calc_temporal_mean(dsp.temporal_subset(month_start=12, month_end=2, target_dataset=knmi_dataset))
wrf_DJF_mean = utils.calc_temporal_mean(dsp.temporal_subset(month_start=12, month_end=2, target_dataset=wrf_dataset))
print("Seasonally averaged KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_DJF_mean.shape,))
print("Seasonally averaged wrf_Dataset.values shape: (times, lats, lons) - %s \n" % (wrf_DJF_mean.shape,))
knmi_JJA_mean = utils.calc_temporal_mean(dsp.temporal_subset(month_start=6, month_end=8, target_dataset=knmi_dataset))
wrf_JJA_mean = utils.calc_temporal_mean(dsp.temporal_subset(month_start=6, month_end=8, target_dataset=wrf_dataset))