def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get( 'temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get( 'spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get( 'spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [ dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets ] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def temporalRegrid(dataset, timeRes=timedelta(days=365)): '''Temporally rebin a dataset variable to a specified time resolution (timedelta object).''' dataset = dsp.temporal_rebin(dataset, timeRes) name = dataset.name if name is None: name = '' print('temporalRebin: Dataset %s has new shape %s' % (name, str(dataset.values.shape)), file=sys.stderr) return dataset
def check_some_dsp_functions(dataset): ''' Run a subset of dataset processor functions and check for any kind of exception. ''' try: dsp.temporal_rebin(dataset, 'annual') dsp.ensemble([dataset]) except Exception as e: fail("\nDataset processor functions") print("Following error occured:") print(str(e)) end() finally: os.remove(dataset.origin['path']) success("\nDataset processor functions")
def check_some_dsp_functions(dataset): ''' Run a subset of dataset processor functions and check for any kind of exception. ''' try: dsp.temporal_rebin(dataset, 'annual') dsp.ensemble([dataset]) except Exception as e: fail("\nDataset processor functions") print "Following error occured:" print str(e) end() finally: os.remove(dataset.origin['path']) success("\nDataset processor functions")
def test_variable_propagation(self): annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365)) self.assertEquals(annual_dataset.name, self.ten_year_monthly_dataset.name) self.assertEquals(annual_dataset.variable, self.ten_year_monthly_dataset.variable)
def test_variable_propagation(self): annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, "annual") self.assertEquals(annual_dataset.name, self.ten_year_monthly_dataset.name) self.assertEquals(annual_dataset.variable, self.ten_year_monthly_dataset.variable)
def test_daily_to_monthly_rebin(self): """This test takes a really long time to run. TODO: Figure out where the performance drag is""" monthly_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=31)) bins = list(set([datetime.datetime(time_reading.year, time_reading.month, 1) for time_reading in self.two_years_daily_dataset.times])) bins = np.array(bins) bins.sort() np.testing.assert_array_equal(monthly_dataset.times, bins)
def test_non_rebin(self): """This will take a monthly dataset and ask for a monthly rebin of 28 days. The resulting dataset should have the same time values""" monthly_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=28)) good_times = self.ten_year_monthly_dataset.times np.testing.assert_array_equal(monthly_dataset.times, good_times)
def temporalRegrid(dataset, timeRes=timedelta(days=365)): '''Temporally rebin a dataset variable to a specified time resolution (timedelta object).''' dataset = dsp.temporal_rebin(dataset, timeRes) name = dataset.name if name is None: name = '' print >> sys.stderr, 'temporalRebin: Dataset %s has new shape %s' % ( name, str(dataset.values.shape)) return dataset
def test_non_rebin(self): """This will take a monthly dataset and ask for a monthly rebin of 28 days. The resulting dataset should have the same time values""" monthly_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, "monthly") bins = list(set([datetime.datetime(time_reading.year, time_reading.month, 15) for time_reading in self.ten_year_monthly_dataset.times])) bins = np.array(bins) bins.sort() np.testing.assert_array_equal(monthly_dataset.times, bins)
def test_daily_to_annual_rebin(self): annual_dataset = dp.temporal_rebin( self.two_years_daily_dataset, "annual") bins = list(set([datetime.datetime( time_reading.year, 7, 2) for time_reading in self.two_years_daily_dataset.times])) bins = np.array(bins) bins.sort() np.testing.assert_array_equal(annual_dataset.times, bins)
def test_non_rebin(self): """ This will take a monthly dataset and ask for a monthly rebin of 28 days. The resulting dataset should have the same time values """ monthly_dataset = dp.temporal_rebin( self.ten_year_monthly_dataset, "monthly") bins = list(set([datetime.datetime( time_reading.year, time_reading.month, 15) for time_reading in self.ten_year_monthly_dataset.times])) bins = np.array(bins) bins.sort() np.testing.assert_array_equal(monthly_dataset.times, bins)
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Step 3: Resample Datasets so they are the same shape. # Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) print("Temporally Rebinning the Datasets to an Annual Timestep") # To run annual temporal Rebinning, knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='annual') dataset_start, dataset_end = knmi_dataset.temporal_boundaries() start_time = max([start_time, dataset_start]) end_time = min([end_time, dataset_end]) wrf311_dataset = dsp.temporal_rebin( wrf311_dataset, temporal_resolution='annual') dataset_start, dataset_end = wrf311_dataset.temporal_boundaries() start_time = max([start_time, dataset_start]) end_time = min([end_time, dataset_end]) cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='annual') dataset_start, dataset_end = cru31_dataset.temporal_boundaries() start_time = max([start_time, dataset_start]) end_time = min([end_time, dataset_end])
SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons) # Load the datasets for the evaluation. mean_bias = metrics.MeanBias() # These versions of the metrics require seasonal bounds prior to running # the metrics. You should set these values above in the evaluation # configuration section. spatial_std_dev_ratio = metrics.SeasonalSpatialStdDevRatio( month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END)
# With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ################################################################################ knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ################################################################################ subset = Bounds(-45, 42, -24, 60, datetime.datetime(1989, 1, 1), datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(subset, knmi_dataset) wrf_dataset = dsp.subset(subset, wrf_dataset) # Temporally re-bin the data into a monthly timestep. ################################################################################ knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30)) wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30)) # Spatially regrid the datasets onto a 1 degree grid. ################################################################################ # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons)
urlretrieve(FILE_LEADER + FILE_2, FILE_2_PATH) # Step 1: Load Local NetCDF Files into OCW Dataset Objects. print("Loading %s into an OCW Dataset Object" % (FILE_1_PATH,)) knmi_dataset = local.load_file(FILE_1_PATH, "tasmax") print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape,)) print("Loading %s into an OCW Dataset Object" % (FILE_2_PATH,)) wrf_dataset = local.load_file(FILE_2_PATH, "tasmax") print("WRF_Dataset.values shape: (times, lats, lons) - %s \n" % (wrf_dataset.values.shape,)) # Step 2: Temporally Rebin the Data into an Annual Timestep. print("Temporally Rebinning the Datasets to an Annual Timestep") knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='annual') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='annual') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape,)) print("WRF_Dataset.values shape: %s \n\n" % (wrf_dataset.values.shape,)) # Step 3: Spatially Regrid the Dataset Objects to a 1 degree grid. # The spatial_boundaries() function returns the spatial extent of the dataset print("The KNMI_Dataset spatial bounds (min_lat, max_lat, min_lon, max_lon) are: \n" "%s\n" % (knmi_dataset.spatial_boundaries(), )) print("The KNMI_Dataset spatial resolution (lat_resolution, lon_resolution) is: \n" "%s\n\n" % (knmi_dataset.spatial_resolution(), )) min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() # Using the bounds we will create a new set of lats and lons on 1 degree step new_lons = np.arange(min_lon, max_lon, 1)
# day of the month. Let's fix that real quick. ########################################################################## knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ########################################################################## subset = Bounds(lat_min=-45, lat_max=42, lon_min=-24, lon_max=60, start=datetime.datetime(1989, 1, 1), end=datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(knmi_dataset, subset) wrf_dataset = dsp.subset(wrf_dataset, subset) # Temporally re-bin the data into a monthly timestep. ########################################################################## knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly') # Spatially regrid the datasets onto a 1 degree grid. ########################################################################## # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons)
""" Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape,)) print("Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(new_bounds, knmi_dataset) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape,)) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning use a timedelta > 366 days. I used 999 in this example knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=999)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=999)) print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape,)) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape,)) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays print("Spatially Regridding the KNMI_Dataset...") knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) print("Spatially Regridding the CRU31_Dataset...") cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons)
""" Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape,)) print("Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(knmi_dataset, new_bounds) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape,)) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution = 'full') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution = 'full') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape,)) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape,)) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 0.5 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays print("Spatially Regridding the KNMI_Dataset...") knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) print("Spatially Regridding the CRU31_Dataset...") cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons)
print('OCW Benchmarks') ocw_results = {} with time_block(ocw_results, 'Dataset Loading'): datasets = local.load_multiple_files(paths, 'clt') with time_block(ocw_results, 'Domain Subsetting'): for i, ds in enumerate(datasets): datasets[i] = dsp.subset(ds, bnds) with time_block(ocw_results, 'Seasonal Subsetting'): for i, ds in enumerate(datasets): datasets[i] = dsp.temporal_subset(ds, 9, 11) with time_block(ocw_results, 'Resampling'): for i, ds in enumerate(datasets): datasets[i] = dsp.temporal_rebin(ds, 'annual') with time_block(ocw_results, 'Regridding'): for i, ds in enumerate(datasets): datasets[i] = dsp.spatial_regrid(ds, new_lats, new_lons) print(f'OCW Results: {ocw_results}') # Plot results matplotlib.style.use('ggplot') df = pd.DataFrame({'OCW': ocw_results, 'BCDP': bcdp_results}) df.plot.bar(logy=True, rot=12) for p in ax.patches: val = np.round(p.get_height(), decimals=2) ax.annotate(str(val), (p.get_x() + .02, p.get_height()), size=9.5)
if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(datasets): datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] # For now we will treat the first listed dataset as the reference dataset for # evaluation purposes. for i, dataset in enumerate(datasets): datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, average_each_year) reference_dataset = datasets[0] target_datasets = datasets[1:]
SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons) # Load the datasets for the evaluation. mean_bias = metrics.MeanBias() # These versions of the metrics require seasonal bounds prior to running # the metrics. You should set these values above in the evaluation # configuration section. spatial_std_dev_ratio = metrics.SeasonalSpatialStdDevRatio(month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END) pattern_correlation = metrics.SeasonalPatternCorrelation(month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END)
""" Step 1: Load Local NetCDF File into OCW Dataset Objects and store in list""" target_datasets.append(local.load_file(FILE_1, varName, name="KNMI")) target_datasets.append(local.load_file(FILE_2, varName, name="UC")) target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Daily Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Resample Datasets so they are the same shape """ print("Resampling datasets") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.temporal_rebin(CRU31, datetime.timedelta(days=30)) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[member]) target_datasets[member] = dsp.temporal_rebin(target_datasets[member], datetime.timedelta(days=30)) """ Spatially Regrid the Dataset Objects to a user defined grid """ # Using the bounds we will create a new set of lats and lons print("Regridding datasets") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets):
print 'Maximum overlap period' print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) if ref_data_info['data_source'] == 'rcmed': min_lat = np.max([min_lat, ref_dataset.lats.min()]) max_lat = np.min([max_lat, ref_dataset.lats.max()]) min_lon = np.max([min_lon, ref_dataset.lons.min()]) max_lon = np.min([max_lon, ref_dataset.lons.max()]) bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) ref_dataset = dsp.subset(bounds,ref_dataset) if ref_dataset.temporal_resolution() != temporal_resolution: ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.subset(bounds,dataset) if dataset.temporal_resolution() != temporal_resolution: model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporaly subset both observation and model datasets for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] ref_dataset = dsp.temporal_subset(month_start, month_end,ref_dataset,average_each_year) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.temporal_subset(month_start, month_end,dataset,average_each_year) # generate grid points for regridding
def test_invalid_unit_rebin(self): with self.assertRaises(ValueError): dp.temporal_rebin(self.two_years_daily_dataset, "days")
print( "Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(new_bounds, knmi_dataset) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape, )) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning use a timedelta > 366 days. I used 999 in this example knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=999)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=999)) print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, )) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, )) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays print("Spatially Regridding the KNMI_Dataset...") knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) print("Spatially Regridding the CRU31_Dataset...") cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) print("Final shape of the KNMI_Dataset:%s" % (knmi_dataset.values.shape, ))
if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(obs_datasets): obs_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) for i, dataset in enumerate(model_datasets): model_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] # TODO: Fully support multiple observation / reference datasets. # For now we will only use the first reference dataset listed in the config file obs_dataset = obs_datasets[0]
if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(datasets): datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: datasets[i] = dsp.temporal_rebin(datasets[i], temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] # For now we will treat the first listed dataset as the reference dataset for # evaluation purposes. for i, dataset in enumerate(datasets): datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, average_each_year) reference_dataset = datasets[0] target_datasets = datasets[1:]
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(subset, ref_dataset) target_datasets = [dsp.safe_subset(subset, ds) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
def test_monthly_to_annual_rebin(self): annual_dataset = dp.temporal_rebin( self.ten_year_monthly_dataset, "annual") np.testing.assert_array_equal( annual_dataset.times, self.ten_year_annual_times)
def test_monthly_to_full_rebin(self): full_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=3650)) full_times = [datetime.datetime(2004, 12, 16)] self.assertEqual(full_dataset.times, full_times)
def test_monthly_to_full_rebin(self): full_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, "full") full_times = [datetime.datetime(2005, 1, 1)] self.assertEqual(full_dataset.times, full_times)
target_datasets.append(local.load_file(FILE_2, varName, name="REGM3")) target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print( "Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation" ) # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Resample Datasets so they are the same shape """ print("Resampling datasets ...") print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) print("... temporal") CRU31 = dsp.temporal_rebin(CRU31, temporal_resolution='monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.water_flux_unit_conversion( target_datasets[member]) target_datasets[member] = dsp.temporal_rebin(target_datasets[member], temporal_resolution='monthly') target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) #Regrid print("... regrid") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets):
def run_screen(model_datasets, models_info, observations_info, overlap_start_time, overlap_end_time, overlap_min_lat, overlap_max_lat, overlap_min_lon, overlap_max_lon, temp_grid_setting, spatial_grid_setting, working_directory, plot_title): '''Generates screen to show running evaluation process. :param model_datasets: list of model dataset objects :type model_datasets: list :param models_info: list of dictionaries that contain information for each model :type models_info: list :param observations_info: list of dictionaries that contain information for each observation :type observations_info: list :param overlap_start_time: overlap start time between model and obs start time :type overlap_start_time: datetime :param overlap_end_time: overlap end time between model and obs end time :type overlap_end_time: float :param overlap_min_lat: overlap minimum lat between model and obs minimum lat :type overlap_min_lat: float :param overlap_max_lat: overlap maximum lat between model and obs maximum lat :type overlap_max_lat: float :param overlap_min_lon: overlap minimum lon between model and obs minimum lon :type overlap_min_lon: float :param overlap_max_lon: overlap maximum lon between model and obs maximum lon :type overlap_max_lon: float :param temp_grid_setting: temporal grid option such as hourly, daily, monthly and annually :type temp_grid_setting: string :param spatial_grid_setting: :type spatial_grid_setting: string :param working_directory: path to a directory for storring outputs :type working_directory: string :param plot_title: Title for plot :type plot_title: string ''' option = None if option != "0": ready_screen("manage_obs_screen") y = screen.getmaxyx()[0] screen.addstr(2, 2, "Evaluation started....") screen.refresh() OUTPUT_PLOT = "plot" dataset_id = int(observations_info[0]['dataset_id']) #just accepts one dataset at this time parameter_id = int(observations_info[0]['parameter_id']) #just accepts one dataset at this time new_bounds = Bounds(overlap_min_lat, overlap_max_lat, overlap_min_lon, overlap_max_lon, overlap_start_time, overlap_end_time) model_dataset = dsp.subset(new_bounds, model_datasets[0]) #just accepts one model at this time #Getting bound info of subseted model file to retrive obs data with same bound as subseted model new_model_spatial_bounds = model_dataset.spatial_boundaries() new_model_temp_bounds = model_dataset.time_range() new_min_lat = new_model_spatial_bounds[0] new_max_lat = new_model_spatial_bounds[1] new_min_lon = new_model_spatial_bounds[2] new_max_lon = new_model_spatial_bounds[3] new_start_time = new_model_temp_bounds[0] new_end_time = new_model_temp_bounds[1] screen.addstr(4, 4, "Retrieving data...") screen.refresh() #Retrieve obs data obs_dataset = rcmed.parameter_dataset( dataset_id, parameter_id, new_min_lat, new_max_lat, new_min_lon, new_max_lon, new_start_time, new_end_time) screen.addstr(4, 4, "--> Data retrieved.") screen.refresh() screen.addstr(5, 4, "Temporally regridding...") screen.refresh() if temp_grid_setting.lower() == 'hourly': days = 0.5 elif temp_grid_setting.lower() == 'daily': days = 1 elif temp_grid_setting.lower() == 'monthly': days = 31 else: days = 365 model_dataset = dsp.temporal_rebin(model_dataset, timedelta(days)) obs_dataset = dsp.temporal_rebin(obs_dataset, timedelta(days)) screen.addstr(5, 4, "--> Temporally regridded.") screen.refresh() new_lats = np.arange(new_min_lat, new_max_lat, spatial_grid_setting) new_lons = np.arange(new_min_lon, new_max_lon, spatial_grid_setting) screen.addstr(6, 4, "Spatially regridding...") screen.refresh() spatial_gridded_model = dsp.spatial_regrid(model_dataset, new_lats, new_lons) spatial_gridded_obs = dsp.spatial_regrid(obs_dataset, new_lats, new_lons) screen.addstr(6, 4, "--> Spatially regridded.") screen.refresh() screen.addstr(7, 4, "Setting up metrics...") screen.refresh() bias = metrics.Bias() bias_evaluation = evaluation.Evaluation(spatial_gridded_model, [spatial_gridded_obs], [bias]) screen.addstr(7, 4, "--> Metrics setting done.") screen.refresh() screen.addstr(8, 4, "Running evaluation.....") screen.refresh() bias_evaluation.run() results = bias_evaluation.results[0][0] screen.addstr(8, 4, "--> Evaluation Finished.") screen.refresh() screen.addstr(9, 4, "Generating plots....") screen.refresh() lats = new_lats lons = new_lons gridshape = (1, 1) sub_titles = [""] #No subtitle set for now if not os.path.exists(working_directory): os.makedirs(working_directory) for i in range(len(results)): fname = working_directory + OUTPUT_PLOT + str(i) plotter.draw_contour_map(results[i], lats, lons, fname, gridshape=gridshape, ptitle=plot_title, subtitles=sub_titles) screen.addstr(9, 4, "--> Plots generated.") screen.refresh() screen.addstr(y-2, 1, "Press 'enter' to Exit: ") option = screen.getstr()
print("Fetching data from RCMED...") cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("Temporally Rebinning the Datasets to an Annual Timestep") # To run annual temporal Rebinning use a timedelta of 360 days. knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=360)) wrf311_dataset = dsp.temporal_rebin(wrf311_dataset, datetime.timedelta(days=360)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=360)) # Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(new_bounds, knmi_dataset) wrf311_dataset = dsp.subset(new_bounds, wrf311_dataset) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5)
# Set the Time Range to be the year 1989 start_time = datetime.datetime(1989, 1, 1) end_time = datetime.datetime(1989, 12, 1) print("Time Range is: %s to %s" % (start_time.strftime("%Y-%m-%d"), end_time.strftime("%Y-%m-%d"))) print("Fetching data from RCMED...") cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("Temporally Rebinning the Datasets to an Annual Timestep") # To run annual temporal Rebinning use a timedelta of 360 days. knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=360)) wrf311_dataset = dsp.temporal_rebin(wrf311_dataset, datetime.timedelta(days=360)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=360)) # Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(new_bounds, knmi_dataset) wrf311_dataset = dsp.subset(new_bounds, wrf311_dataset) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5)
target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset( 10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Resample Datasets so they are the same shape """ print("Resampling datasets ...") print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) print("... temporal") CRU31 = dsp.temporal_rebin(CRU31, temporal_resolution='monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[ member]) target_datasets[member] = dsp.temporal_rebin( target_datasets[member], temporal_resolution='monthly') target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) # Regrid print("... regrid") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets):
def test_daily_to_daily_rebin(self): daily_dataset = dp.temporal_rebin( self.two_years_daily_dataset, "daily") np.testing.assert_array_equal( daily_dataset.times, self.two_years_daily_dataset.times)
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(ref_dataset, subset) target_datasets = [dsp.safe_subset(ds, subset) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
new_bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) knmi_dataset = dsp.subset(knmi_dataset, new_bounds) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape, )) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='full') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='full') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, )) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, )) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 0.5 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays print("Spatially Regridding the KNMI_Dataset...") knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) print("Spatially Regridding the CRU31_Dataset...") cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) print("Final shape of the KNMI_Dataset:%s" % (knmi_dataset.values.shape, ))
def test_monthly_to_annual_rebin(self): annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365)) np.testing.assert_array_equal(annual_dataset.times, self.ten_year_annual_times)
print("Fetching data from RCMED...") cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("Temporally Rebinning the Datasets to an Annual Timestep") # To run annual temporal Rebinning, knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution = 'annual') wrf311_dataset = dsp.temporal_rebin(wrf311_dataset, temporal_resolution = 'annual') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution = 'annual') # Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(knmi_dataset, new_bounds) wrf311_dataset = dsp.subset(wrf311_dataset, new_bounds) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5)
# We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ########################################################################## subset = Bounds(lat_min=-45, lat_max=42, lon_min=-24, lon_max=60, start=datetime.datetime(1989, 1, 1), end=datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(knmi_dataset, subset) wrf_dataset = dsp.subset(wrf_dataset, subset) # Temporally re-bin the data into a monthly timestep. ########################################################################## knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly') # Spatially regrid the datasets onto a 1 degree grid. ########################################################################## # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons)
target_datasets.append(local.load_file(FILE_2, varName, name="REGM3")) target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Resample Datasets so they are the same shape """ print("Resampling datasets ...") print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) print("... temporal") CRU31 = dsp.temporal_rebin(CRU31, datetime.timedelta(days=30)) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[member]) target_datasets[member] = dsp.temporal_rebin(target_datasets[member], datetime.timedelta(days=30)) target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) #Regrid print("... regrid") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons)