def setUp(self): self.bounds = Bounds( -80, 80, # Lats -160, 160, # Lons dt.datetime(2000, 1, 1), # Start time dt.datetime(2002, 1, 1)) # End time self.another_bounds = Bounds( -80, 80, # Lats -160, 160)
def setUp(self): self.bounds_rectangular = Bounds( lat_min=-80, lat_max=80, # Lats lon_min=-160, lon_max=160, # Lons start=dt.datetime(2000, 1, 1), # Start time end=dt.datetime(2002, 1, 1)) # End time self.bounds_CORDEX = Bounds(boundary_type='CORDEX South Asia') self.bounds_us_states = Bounds(boundary_type='us_states', us_states=['CA', 'NV', 'AZ']) self.bounds_countries = Bounds( boundary_type='countries', countries=['United States', 'Canada', 'Mexico'])
def test_subregion_unary_result_shape(self): bound = Bounds( lat_min=10, lat_max=18, lon_min=100, lon_max=108, start=dt.datetime(2000, 1, 1), end=dt.datetime(2000, 3, 1)) new_eval = Evaluation( self.test_dataset, [self.another_test_dataset, self.another_test_dataset], [TemporalStdDev(), TemporalStdDev()], [bound, bound, bound, bound, bound] ) new_eval.run() # Expected result shape is # [ # [ # Subregions cause this extra layer # [3, temporalstddev.run(reference).shape], # ] # ] # 5 = number of subregions self.assertTrue(len(new_eval.unary_results) == 5) # number of metrics self.assertTrue(len(new_eval.unary_results[0]) == 2) self.assertTrue(isinstance(new_eval.unary_results, type([]))) # number of datasets (ref + target) self.assertTrue(new_eval.unary_results[0][0].shape[0] == 3)
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get( 'temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get( 'spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get( 'spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [ dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets ] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def test_valid_subregion(self): bound = Bounds( lat_min=-10, lat_max=10, lon_min=-20, lon_max=20, start=dt.datetime(2000, 1, 1), end=dt.datetime(2001, 1, 1)) self.eval.subregions = [bound, bound] self.assertEquals(len(self.eval.subregions), 2)
def test_subregion_result_shape(self): bound = Bounds(10, 18, 100, 108, dt.datetime(2000, 1, 1), dt.datetime(2000, 3, 1)) bias_eval = Evaluation( self.test_dataset, [self.another_test_dataset, self.another_test_dataset], [Bias()], [bound]) bias_eval.run() # Expected result shape is # [ # [ # Subregions cause this extra layer # [number of targets, bias.run(reference, target1).shape] # ] # ], self.assertTrue(len(bias_eval.results) == 1) self.assertTrue(len(bias_eval.results[0]) == 1) self.assertTrue(bias_eval.results[0][0].shape[0] == 2) self.assertTrue(isinstance(bias_eval.results, type([])))
LON_MIN = -24.0 LON_MAX = 60.0 START_SUB = datetime.datetime(2000, 01, 1) END_SUB = datetime.datetime(2007, 12, 31) #regridding parameters gridLonStep = 0.5 gridLatStep = 0.5 #Regrid print("... regrid") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) list_of_regions = [ Bounds(-10.0, 0.0, 29.0, 36.5, START_SUB, END_SUB), Bounds(0.0, 10.0, 29.0, 37.5, START_SUB, END_SUB), Bounds(10.0, 20.0, 25.0, 32.5, START_SUB, END_SUB), Bounds(20.0, 33.0, 25.0, 32.5, START_SUB, END_SUB), Bounds(-19.3, -10.2, 12.0, 20.0, START_SUB, END_SUB), Bounds(15.0, 30.0, 15.0, 25.0, START_SUB, END_SUB), Bounds(-10.0, 10.0, 7.3, 15.0, START_SUB, END_SUB), Bounds(-10.9, 10.0, 5.0, 7.3, START_SUB, END_SUB), Bounds(33.9, 40.0, 6.9, 15.0, START_SUB, END_SUB), Bounds(10.0, 25.0, 0.0, 10.0, START_SUB, END_SUB), Bounds(10.0, 25.0, -10.0, 0.0, START_SUB, END_SUB), Bounds(30.0, 40.0, -15.0, 0.0, START_SUB, END_SUB), Bounds(33.0, 40.0, 25.0, 35.0, START_SUB, END_SUB) ] #for plotting the subregions
def setUpClass(self): self.lats = np.array([10, 12, 14, 16, 18]) self.lons = np.array([100, 102, 104, 106, 108]) self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) self.values = flat_array.reshape(12, 5, 5) self.variable = 'var' self.units = 'units' self.name = 'name' self.local_origin = { 'source': 'local', 'path': '/a/fake/path.nc', 'lat_name': 'a lat name', 'lon_name': 'a lon name', 'time_name': 'a time name', 'elevation_index': 2 } self.rcmed_origin = { 'source': 'rcmed', 'dataset_id': 4, 'parameter_id': 14 } self.esgf_origin = { 'source': 'esgf', 'dataset_id': 'esgf dataset id', 'variable': 'var' } self.dap_origin = { 'source': 'dap', 'url': 'a fake url', } self.local_ds = Dataset(self.lats, self.lons, self.times, self.values, variable=self.variable, units=self.units, name=self.name, origin=self.local_origin) self.rcmed_ds = Dataset(self.lats, self.lons, self.times, self.values, variable=self.variable, units=self.units, name=self.name, origin=self.rcmed_origin) self.esgf_ds = Dataset(self.lats, self.lons, self.times, self.values, variable=self.variable, units=self.units, name=self.name, origin=self.esgf_origin) self.dap_ds = Dataset(self.lats, self.lons, self.times, self.values, variable=self.variable, units=self.units, name=self.name, origin=self.dap_origin) self.subregions = [ Bounds(lat_min=-10, lat_max=10, lon_min=-20, lon_max=20), Bounds(lat_min=-5, lat_max=5, lon_min=-15, lon_max=15) ] self.evaluation = Evaluation( self.local_ds, [self.rcmed_ds, self.esgf_ds, self.dap_ds], [metrics.Bias(), metrics.TemporalStdDev()], subregions=self.subregions)
START_SUB = datetime.datetime(2000, 1, 1) END_SUB = datetime.datetime(2007, 12, 31) # regridding parameters gridLonStep = 0.5 gridLatStep = 0.5 # Regrid print("... regrid") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) list_of_regions = [ Bounds(lat_min=-10.0, lat_max=0.0, lon_min=29.0, lon_max=36.5, start=START_SUB, end=END_SUB), Bounds(lat_min=0.0, lat_max=10.0, lon_min=29.0, lon_max=37.5, start=START_SUB, end=END_SUB), Bounds(lat_min=10.0, lat_max=20.0, lon_min=25.0, lon_max=32.5, start=START_SUB, end=END_SUB), Bounds(lat_min=20.0,
GPM_dataset_filtered = local.load_GPM_IMERG_files_with_spatial_filter( file_path='./data/GPM_2015_summer/', filename_pattern=['*2015*.HDF5'], user_mask_file='Bukovsky_regions.nc', mask_variable_name='Bukovsky', user_mask_values=[10], longitude_name='lon', latitude_name='lat') WRF_dataset = local.load_WRF_2d_files_RAIN( file_path='./data/WRF24_2010_summer/', filename_pattern=['wrf2dout*']) """ Step 2: Load the spatial filter (Bukovsky region mask) """ Bukovsky_mask = Bounds(boundary_type='user', user_mask_file='Bukovsky_regions.nc', mask_variable_name='Bukovsky', longitude_name='lon', latitude_name='lat') """ Step 3: Spatial subset the WRF data (for Northern Great Plains, user_mask_values=[10]) """ WRF_dataset_filtered = dsp.subset(WRF_dataset, Bukovsky_mask, user_mask_values=[10]) """ Step 4: Analyze the wet spells """ duration1, peak1, total1 = metrics.wet_spell_analysis(GPM_dataset_filtered, threshold=0.1, nyear=1, dt=0.5) duration2, peak2, total2 = metrics.wet_spell_analysis( WRF_dataset_filtered.values, threshold=0.1, nyear=1, dt=0.5) """ Step 5: Calculate the joint PDF(JPDF) of spell_duration and peak_rainfall """
cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape, )) print( "Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) knmi_dataset = dsp.subset(knmi_dataset, new_bounds) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape, )) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='full') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='full') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, ))
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(ref_dataset, subset) target_datasets = [dsp.safe_subset(ds, subset) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
regions.append(['MN', 'IA', 'MO', 'WI', 'IL', 'IN', 'MI', 'OH']) regions.append( ['ME', 'VT', 'NH', 'MA', 'NY', 'CT', 'RI', 'NJ', 'PA', 'WV', 'DE', 'MD']) regions.append( ['KY', 'VA', 'AR', 'AL', 'LA', 'MS', 'FL', 'GA', 'NC', 'SC', 'DC', 'TN']) plotter.fill_US_states_with_color( regions, 'NCA_seven_regions', colors=True, region_names=['NW', 'SW', 'NGP', 'SGP', 'MW', 'NE', 'SE']) n_region = 7 # number of regions # CONUS regional boundaries NW_bounds = Bounds(boundary_type='us_states', us_states=regions[0]) SW_bounds = Bounds(boundary_type='us_states', us_states=regions[1]) NGP_bounds = Bounds(boundary_type='us_states', us_states=regions[2]) SGP_bounds = Bounds(boundary_type='us_states', us_states=regions[3]) MW_bounds = Bounds(boundary_type='us_states', us_states=regions[4]) NE_bounds = Bounds(boundary_type='us_states', us_states=regions[5]) SE_bounds = Bounds(boundary_type='us_states', us_states=regions[6]) regional_bounds = [ NW_bounds, SW_bounds, NGP_bounds, SGP_bounds, MW_bounds, NE_bounds, SE_bounds ] """ Load nClimDiv file into OCW Dataset """ obs_dataset = local.load_file(file_obs, variable_name='tave') """ Load CMIP5 simulations into a list of OCW Datasets""" model_dataset = local.load_multiple_files(file_path=model_file_path,
FILE_LEADER = "http://zipper.jpl.nasa.gov/dist/" # Three Local Model Files FILE_1 = "AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_pr.nc" FILE_2 = "AFRICA_ICTP-REGCM3_CTL_ERAINT_MM_50km-rg_1989-2008_pr.nc" FILE_3 = "AFRICA_UCT-PRECIS_CTL_ERAINT_MM_50km_1989-2008_pr.nc" # Filename for the output image/plot (without file extension) OUTPUT_PLOT = "portrait_diagram" # Spatial and temporal configurations LAT_MIN = -45.0 LAT_MAX = 42.24 LON_MIN = -24.0 LON_MAX = 60.0 START = datetime.datetime(2000, 01, 1) END = datetime.datetime(2007, 12, 31) EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # variable that we are analyzing varName = 'pr' # regridding parameters gridLonStep = 0.5 gridLatStep = 0.5 # some vars for this evaluation target_datasets_ensemble = [] target_datasets = [] allNames = [] # Download necessary NetCDF file if not present if not path.exists(FILE_1):
def test_valid_subregion(self): bound = Bounds(-10, 10, -20, 20, dt.datetime(2000, 1, 1), dt.datetime(2001, 1, 1)) self.eval.subregions = [bound, bound] self.assertEquals(len(self.eval.subregions), 2)
# File URL leader FILE_LEADER = 'http://zipper.jpl.nasa.gov/dist/' # Three Local Model Files FILE_1 = 'AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_pr.nc' FILE_2 = 'AFRICA_ICTP-REGCM3_CTL_ERAINT_MM_50km-rg_1989-2008_pr.nc' FILE_3 = 'AFRICA_UCT-PRECIS_CTL_ERAINT_MM_50km_1989-2008_pr.nc' LAT_MIN = -45.0 LAT_MAX = 42.24 LON_MIN = -24.0 LON_MAX = 60.0 START = datetime.datetime(2000, 1, 1) END = datetime.datetime(2007, 12, 31) EVAL_BOUNDS = Bounds(lat_min=LAT_MIN, lat_max=LAT_MAX, lon_min=LON_MIN, lon_max=LON_MAX, start=START, end=END) varName = 'pr' gridLonStep = 0.44 gridLatStep = 0.44 # needed vars for the script target_datasets = [] tSeries = [] results = [] labels = [] # could just as easily b the names for each subregion region_counter = 0 # Download necessary NetCDF file if not present if not path.exists(FILE_1): print('Downloading %s' % (FILE_LEADER + FILE_1))
FILE_1 = "AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_pr.nc" FILE_2 = "AFRICA_ICTP-REGCM3_CTL_ERAINT_MM_50km-rg_1989-2008_pr.nc" FILE_3 = "AFRICA_UCT-PRECIS_CTL_ERAINT_MM_50km_1989-2008_pr.nc" # Filename for the output image/plot (without file extension) OUTPUT_PLOT = "pr_africa_taylor" # Spatial and temporal configurations LAT_MIN = -45.0 LAT_MAX = 42.24 LON_MIN = -24.0 LON_MAX = 60.0 START = datetime.datetime(2000, 1, 1) END = datetime.datetime(2007, 12, 31) EVAL_BOUNDS = Bounds(lat_min=LAT_MIN, lat_max=LAT_MAX, lon_min=LON_MIN, lon_max=LON_MAX, start=START, end=END) # variable that we are analyzing varName = 'pr' # regridding parameters gridLonStep = 0.5 gridLatStep = 0.5 # some vars for this evaluation target_datasets_ensemble = [] target_datasets = [] ref_datasets = []
FILE_LEADER = "http://zipper.jpl.nasa.gov/dist/" # Three Local Model Files FILE_1 = "AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_pr.nc" FILE_2 = "AFRICA_ICTP-REGCM3_CTL_ERAINT_MM_50km-rg_1989-2008_pr.nc" FILE_3 = "AFRICA_UCT-PRECIS_CTL_ERAINT_MM_50km_1989-2008_pr.nc" LAT_MIN = -45.0 LAT_MAX = 42.24 LON_MIN = -24.0 LON_MAX = 60.0 START = datetime.datetime(2000, 01, 1) END = datetime.datetime(2007, 12, 31) EVAL_BOUNDS = Bounds(lat_min=LAT_MIN, lat_max=LAT_MAX, lon_min=LON_MIN, lon_max=LON_MAX, start=START, end=END) varName = 'pr' gridLonStep = 0.44 gridLatStep = 0.44 # needed vars for the script target_datasets = [] tSeries = [] results = [] labels = [] # could just as easily b the names for each subregion region_counter = 0 # Download necessary NetCDF file if not present
wrf_dataset = local.load_file(FILE_2, "tasmax") knmi_dataset.name = "knmi" wrf_dataset.name = "wrf" # Date values from loaded datasets might not always fall on reasonable days. # With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ################################################################################ knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ################################################################################ subset = Bounds(-45, 42, -24, 60, datetime.datetime(1989, 1, 1), datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(subset, knmi_dataset) wrf_dataset = dsp.subset(subset, wrf_dataset) # Temporally re-bin the data into a monthly timestep. ################################################################################ knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30)) wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30)) # Spatially regrid the datasets onto a 1 degree grid. ################################################################################ # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1)
def _load_subregion(subregion_config_data): """""" return Bounds(float(subregion_config_data[0]), float(subregion_config_data[1]), float(subregion_config_data[2]), float(subregion_config_data[3]))
# # Africa Evaluation Settings ref_dataset = local.load_file(CORDEX_AF_TAS, "tas") ref_dataset.name = "cordex_af_tas" target_dataset = local.load_file(CRU_31_TAS, "tas") target_dataset.name = "cru_31_tas" LAT_MIN = -40 LAT_MAX = 40 LON_MIN = -20 LON_MAX = 55 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))
if hasattr(ssl, '_create_unverified_context'): ssl._create_default_https_context = ssl._create_unverified_context # rectangular boundary min_lat = 15.75 max_lat = 55.75 min_lon = -125.75 max_lon = -66.75 start_time = datetime(1998, 1, 1) end_time = datetime(1998, 12, 31) TRMM_dataset = rcmed.parameter_dataset(3, 36, min_lat, max_lat, min_lon, max_lon, start_time, end_time) Cuba_and_Bahamas_bounds = Bounds(boundary_type='countries', countries=['Cuba', 'Bahamas']) # to mask out the data over Mexico and Canada TRMM_dataset2 = dsp.subset(TRMM_dataset, Cuba_and_Bahamas_bounds, extract=False) plotter.draw_contour_map(ma.mean(TRMM_dataset2.values, axis=0), TRMM_dataset2.lats, TRMM_dataset2.lons, fname='TRMM_without_Cuba_and_Bahamas') NCA_SW_bounds = Bounds(boundary_type='us_states', us_states=['CA', 'NV', 'UT', 'AZ', 'NM', 'CO']) # to mask out the data over Mexico and Canada TRMM_dataset3 = dsp.subset(TRMM_dataset2, NCA_SW_bounds, extract=True)
print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day != 1: end_time = end_time.replace(day=1) for i, dataset in enumerate(obs_datasets): min_lat = np.max([min_lat, dataset.lats.min()]) max_lat = np.min([max_lat, dataset.lats.max()]) min_lon = np.max([min_lon, dataset.lons.min()]) max_lon = np.min([max_lon, dataset.lons.max()]) if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(obs_datasets): obs_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) for i, dataset in enumerate(model_datasets): model_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution:
print("Fetching data from RCMED...") cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape, )) print( "Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(new_bounds, knmi_dataset) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape, )) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning use a timedelta > 366 days. I used 999 in this example knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=999)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=999)) print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, )) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, )) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """
knmi_dataset.name = "knmi" wrf_dataset.name = "wrf" # Date values from loaded datasets might not always fall on reasonable days. # With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ########################################################################## knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ########################################################################## subset = Bounds(lat_min=-45, lat_max=42, lon_min=-24, lon_max=60, start=datetime.datetime(1989, 1, 1), end=datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(knmi_dataset, subset) wrf_dataset = dsp.subset(wrf_dataset, subset) # Temporally re-bin the data into a monthly timestep. ########################################################################## knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly') # Spatially regrid the datasets onto a 1 degree grid. ########################################################################## # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step
with time_block(bcdp_results, 'Dataset Loading'): project = 'CORDEX-Africa' template = '*_{model}_*_{variable}.nc' bcdp.build_extractor(project, template, name_field='model', index=[1, 6]) ens = bcdp.load_local(paths=paths, project=project) # Ouput grid info domain = ens.overlap output_grid = bcdp.utils.grid_from_res((0.88, 0.88), domain) new_lats = output_grid.lat.values new_lons = output_grid.lon.values start_time = dt64_to_datetime(domain.time_bnds.min) end_time = dt64_to_datetime(domain.time_bnds.max) bnds = Bounds(lat_min=domain.lat_bnds.min, lat_max=domain.lat_bnds.max, lon_min=domain.lon_bnds.min, lon_max=domain.lon_bnds.max, start=start_time, end=end_time) with time_block(bcdp_results, 'Domain Subsetting'): ens = ens.subset() with time_block(bcdp_results, 'Seasonal Subsetting'): ens = ens.select_season(season='DJF') with time_block(bcdp_results, 'Resampling'): ens = ens.resample(freq='Y') with time_block(bcdp_results, 'Regridding'): ens.regrid(backend='scipy', method='linear', output_grid=output_grid)