def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get( 'temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get( 'spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get( 'spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [ dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets ] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def test_daily_time(self): # Test daily with time.hour != 0 self.monthly_dataset.times = np.array([ datetime.datetime(year, month, 15, 5) for year in range(2000, 2010) for month in range(1, 13) ]) new_ds = dp.normalize_dataset_datetimes(self.monthly_dataset, 'daily') # Check that all the days have been shifted to the first of the month self.assertTrue(all(x.hour == 0 for x in new_ds.times))
def test_daily_time(self): # Test daily with time.hour != 0 self.monthly_dataset.times = np.array([ datetime.datetime( year, month, 15, 5) for year in range(2000, 2010) for month in range(1, 13)]) new_ds = dp.normalize_dataset_datetimes(self.monthly_dataset, 'daily') # Check that all the days have been shifted to the first of the month self.assertTrue(all(x.hour == 0 for x in new_ds.times))
ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'], ref_data_info['parameter_id'], min_lat, max_lat, min_lon, max_lon, start_time, end_time) elif ref_data_info['data_source'] == 'ESGF': username=raw_input('Enter your ESGF OpenID:\n') password=raw_input('Enter your ESGF password:\n') ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'], variable = ref_data_info['variable'], esgf_username=username, esgf_password=password) ref_dataset = ds[0] else: print ' ' if temporal_resolution == 'daily' or temporal_resolution == 'monthly': ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution) if 'multiplying_factor' in ref_data_info.keys(): ref_dataset.values = ref_dataset.values*ref_data_info['multiplying_factor'] """ Step 2: Load model NetCDF Files into OCW Dataset Objects """ model_data_info = config['datasets']['targets'] model_lat_name = None model_lon_name = None if 'latitude_name' in model_data_info.keys(): model_lat_name = model_data_info['latitude_name'] if 'longitude_name' in model_data_info.keys(): model_lon_name = model_data_info['longitude_name'] boundary_check_model = True if 'GCM_data' in model_data_info.keys(): if model_data_info['GCM_data']: boundary_check_model = False
urllib.urlretrieve(FILE_LEADER + FILE_3, FILE_3) """ Step 1: Load Local NetCDF File into OCW Dataset Objects and store in list""" target_datasets.append(local.load_file(FILE_1, varName, name="KNMI")) target_datasets.append(local.load_file(FILE_2, varName, name="REGCM")) target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print( "Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation" ) # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing Datasets so they are the same shape """ print("Processing datasets ...") CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion( target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons)
target_datasets.append(local.load_file(FILE_2, varName, name="REGCM")) target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Daily Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset( 10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing datasets so they are the same shape ... """ print("Processing datasets so they are the same shape") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[ member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets):
cordex_af_pr.name = "cordex_af_pr" eu_cordex_tas = local.load_file(EU_CORDEX_TAS, "tas") eu_cordex_tas.name = "eu_cordex_tas" eu_cordex_pr = local.load_file(EU_CORDEX_PR, "pr") eu_cordex_pr.name = "eu_cordex_pr" cru_31_pr = local.load_file(CRU_31_PR, "pr") cru_31_pr.name = "cru_31_pr" cru_31_tas = local.load_file(CRU_31_TAS, "tas") cru_31_tas.name = "cru_31_tas" trmm_pr = local.load_file(TRMM_PR, "pcp") trmm_pr.name = "trmm_pr" # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 cordex_af_tas = dsp.normalize_dataset_datetimes(cordex_af_tas, "monthly") cordex_af_pr = dsp.normalize_dataset_datetimes(cordex_af_pr, "monthly") eu_cordex_pr = dsp.normalize_dataset_datetimes(eu_cordex_pr, "monthly") eu_cordex_tas = dsp.normalize_dataset_datetimes(eu_cordex_tas, "monthly") cru_31_pr = dsp.normalize_dataset_datetimes(cru_31_pr, "monthly") cru_31_tas = dsp.normalize_dataset_datetimes(cru_31_tas, "monthly") trmm_pr = dsp.normalize_dataset_datetimes(trmm_pr, "monthly") # Configure your evaluation here. The evaluation bounds are determined by # the lat/lon/time values that are set here. If you set the lat/lon values # outside of the range of the datasets' values you will get an error. Your # start/end time values should be in 12 month intervals due to the metrics # being used. If you want to change the datasets being used in the evaluation # you should set the ref/target dataset values here to the corresponding # loaded datasets from above.
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(subset, ref_dataset) target_datasets = [dsp.safe_subset(subset, ds) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(ref_dataset, subset) target_datasets = [dsp.safe_subset(ds, subset) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
def test_montly(self): new_ds = dp.normalize_dataset_datetimes( self.monthly_dataset, 'monthly') # Check that all the days have been shifted to the first of the month self.assertTrue(all(x.day == 1 for x in new_ds.times))
print 'Loading observation dataset:\n', ref_data_info ref_name = ref_data_info['data_name'] if ref_data_info['data_source'] == 'local': ref_dataset = local.load_file(ref_data_info['path'], ref_data_info['variable'], name=ref_name) elif ref_data_info['data_source'] == 'rcmed': ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'], ref_data_info['parameter_id'], min_lat, max_lat, min_lon, max_lon, start_time, end_time) else: print ' ' # TO DO: support ESGF ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution) if 'multiplying_factor' in ref_data_info.keys(): ref_dataset.values = ref_dataset.values * ref_data_info[ 'multiplying_factor'] """ Step 2: Load model NetCDF Files into OCW Dataset Objects """ model_data_info = config['datasets']['targets'] print 'Loading model datasets:\n', model_data_info if model_data_info['data_source'] == 'local': model_datasets, model_names = local.load_multiple_files( file_path=model_data_info['path'], variable_name=model_data_info['variable']) else: print ' ' # TO DO: support RCMED and ESGF for idata, dataset in enumerate(model_datasets): model_datasets[idata] = dsp.normalize_dataset_datetimes(
# Extract info we don't want to put into the loader config # Multiplying Factor to scale obs by. Currently only supported for reference # (first) dataset. We should instead make this a parameter for each # loader and Dataset objects. fact = data_info[0].pop('multiplying_factor', 1) """ Step 1: Load the datasets """ print('Loading datasets:\n{}'.format(data_info)) datasets = load_datasets_from_config(extra_opts, *data_info) multiplying_factor = np.ones(len(datasets)) multiplying_factor[0] = fact names = [dataset.name for dataset in datasets] for i, dataset in enumerate(datasets): res = dataset.temporal_resolution() if res == 'daily' or res == 'monthly': datasets[i] = dsp.normalize_dataset_datetimes(dataset, res) if multiplying_factor[i] != 1: datasets[i].values *= multiplying_factor[i] """ Step 2: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if maximum_overlap_period: start_time, end_time = utils.get_temporal_overlap(datasets) print('Maximum overlap period') print('start_time: {}'.format(start_time)) print('end_time: {}'.format(end_time)) if temporal_resolution == 'monthly' and end_time.day != 1: end_time = end_time.replace(day=1) for i, dataset in enumerate(datasets): min_lat = np.max([min_lat, dataset.lats.min()])
target_dataset.name = "cru_31_tas" LAT_MIN = -40 LAT_MAX = 40 LON_MIN = -20 LON_MAX = 55 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons)
target_datasets = [target_dataset, target_dataset2, target_dataset3, target_dataset4] LAT_MIN = 22 LAT_MAX = 71 LON_MIN = -43 LON_MAX = 64 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, 'monthly') target_datasets = [dsp.normalize_dataset_datetimes(target, 'monthly') for target in target_datasets] # Subset down the evaluation datasets to our selected evaluation bounds. ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) target_datasets = [dsp.subset(EVAL_BOUNDS, target) for target in target_datasets] # Do a monthly temporal rebin of the evaluation datasets. ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) target_datasets = [dsp.temporal_rebin(target, datetime.timedelta(days=30)) for target in target_datasets] # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0)
# Load the example datasets into OCW Dataset objects. We want to load # the 'tasmax' variable values. We'll also name the datasets for use # when plotting. ################################################################################ knmi_dataset = local.load_file(FILE_1, "tasmax") wrf_dataset = local.load_file(FILE_2, "tasmax") knmi_dataset.name = "knmi" wrf_dataset.name = "wrf" # Date values from loaded datasets might not always fall on reasonable days. # With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ################################################################################ knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ################################################################################ subset = Bounds(-45, 42, -24, 60, datetime.datetime(1989, 1, 1), datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(subset, knmi_dataset) wrf_dataset = dsp.subset(subset, wrf_dataset) # Temporally re-bin the data into a monthly timestep. ################################################################################ knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30)) wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30)) # Spatially regrid the datasets onto a 1 degree grid.
target_dataset.name = "cru_31_tas" LAT_MIN = -40 LAT_MAX = 40 LON_MIN = -20 LON_MAX = 55 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons)
] LAT_MIN = 22 LAT_MAX = 71 LON_MIN = -43 LON_MAX = 64 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, 'monthly') target_datasets = [ dsp.normalize_dataset_datetimes(target, 'monthly') for target in target_datasets ] # Subset down the evaluation datasets to our selected evaluation bounds. ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) target_datasets = [ dsp.subset(EVAL_BOUNDS, target) for target in target_datasets ] # Do a monthly temporal rebin of the evaluation datasets. ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) target_datasets = [ dsp.temporal_rebin(target, datetime.timedelta(days=30))
cordex_af_pr.name = "cordex_af_pr" eu_cordex_tas = local.load_file(EU_CORDEX_TAS, "tas") eu_cordex_tas.name = "eu_cordex_tas" eu_cordex_pr = local.load_file(EU_CORDEX_PR, "pr") eu_cordex_pr.name = "eu_cordex_pr" cru_31_pr = local.load_file(CRU_31_PR, "pr") cru_31_pr.name = "cru_31_pr" cru_31_tas = local.load_file(CRU_31_TAS, "tas") cru_31_tas.name = "cru_31_tas" trmm_pr = local.load_file(TRMM_PR, "pcp") trmm_pr.name = "trmm_pr" # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 cordex_af_tas = dsp.normalize_dataset_datetimes(cordex_af_tas, "monthly") cordex_af_pr = dsp.normalize_dataset_datetimes(cordex_af_pr, "monthly") eu_cordex_pr = dsp.normalize_dataset_datetimes(eu_cordex_pr, "monthly") eu_cordex_tas = dsp.normalize_dataset_datetimes(eu_cordex_tas, "monthly") cru_31_pr = dsp.normalize_dataset_datetimes(cru_31_pr, "monthly") cru_31_tas = dsp.normalize_dataset_datetimes(cru_31_tas, "monthly") trmm_pr = dsp.normalize_dataset_datetimes(trmm_pr, "monthly") # Configure your evaluation here. The evaluation bounds are determined by # the lat/lon/time values that are set here. If you set the lat/lon values # outside of the range of the datasets' values you will get an error. Your # start/end time values should be in 12 month intervals due to the metrics # being used. If you want to change the datasets being used in the evaluation # you should set the ref/target dataset values here to the corresponding # loaded datasets from above. #
# Extract info we don't want to put into the loader config # Multiplying Factor to scale obs by. Currently only supported for reference # (first) dataset. We should instead make this a parameter for each # loader and Dataset objects. fact = data_info[0].pop('multiplying_factor', 1) """ Step 1: Load the datasets """ print('Loading datasets:\n{}'.format(data_info)) datasets = load_datasets_from_config(extra_opts, *data_info) multiplying_factor = np.ones(len(datasets)) multiplying_factor[0] = fact names = [dataset.name for dataset in datasets] for i, dataset in enumerate(datasets): res = dataset.temporal_resolution() if res == 'daily' or res == 'monthly': datasets[i] = dsp.normalize_dataset_datetimes(dataset, res) if multiplying_factor[i] != 1: datasets[i].values *= multiplying_factor[i] """ Step 2: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if maximum_overlap_period: start_time, end_time = utils.get_temporal_overlap(datasets) print('Maximum overlap period') print('start_time: {}'.format(start_time)) print('end_time: {}'.format(end_time)) if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) for i, dataset in enumerate(datasets):
if 'multiplying_factor' in info: multiplying_factor[i] = info.pop('multiplying_factor') # If models are GCMs we can skip boundary check. Probably need to find a more # elegant way to express this in the config file API. boundary_check = True for i, info in enumerate(model_data_info): if 'boundary_check' in info: boundary_check = info.pop('boundary_check') """ Step 1: Load the observation data """ print 'Loading observation datasets:\n', obs_data_info obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info) obs_names = [dataset.name for dataset in obs_datasets] for i, dataset in enumerate(obs_datasets): if temporal_resolution == 'daily' or temporal_resolution == 'monthly': obs_datasets[i] = dsp.normalize_dataset_datetimes( dataset, temporal_resolution) if multiplying_factor[i] != 1: obs_dataset.values *= multiplying_factor[i] """ Step 2: Load model NetCDF Files into OCW Dataset Objects """ model_datasets = load_datasets_from_config(extra_opts, *model_data_info) model_names = [dataset.name for dataset in model_datasets] if temporal_resolution == 'daily' or temporal_resolution == 'monthly': for i, dataset in enumerate(model_datasets): model_datasets[i] = dsp.normalize_dataset_datetimes( dataset, temporal_resolution) """ Step 3: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if time_info['maximum_overlap_period']: start_time, end_time = utils.get_temporal_overlap(obs_datasets + model_datasets)
def test_montly(self): new_ds = dp.normalize_dataset_datetimes(self.monthly_dataset, 'monthly') # Check that all the days have been shifted to the first of the month self.assertTrue(all(x.day == 1 for x in new_ds.times))
# Load the example datasets into OCW Dataset objects. We want to load # the 'tasmax' variable values. We'll also name the datasets for use # when plotting. ########################################################################## knmi_dataset = local.load_file(FILE_1, "tasmax") wrf_dataset = local.load_file(FILE_2, "tasmax") knmi_dataset.name = "knmi" wrf_dataset.name = "wrf" # Date values from loaded datasets might not always fall on reasonable days. # With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ########################################################################## knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ########################################################################## subset = Bounds(lat_min=-45, lat_max=42, lon_min=-24, lon_max=60, start=datetime.datetime(1989, 1, 1), end=datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(knmi_dataset, subset) wrf_dataset = dsp.subset(wrf_dataset, subset) # Temporally re-bin the data into a monthly timestep.
# Extract info we don't want to put into the loader config # Multiplying Factor to scale obs by. Currently only supported for reference # (first) dataset. We should instead make this a parameter for each # loader and Dataset objects. fact = data_info[0].pop('multiplying_factor', 1) """ Step 1: Load the datasets """ print('Loading datasets:\n{}'.format(data_info)) datasets = load_datasets_from_config(extra_opts, *data_info) multiplying_factor = np.ones(len(datasets)) multiplying_factor[0] = fact names = [dataset.name for dataset in datasets] for i, dataset in enumerate(datasets): if temporal_resolution == 'daily' or temporal_resolution == 'monthly': datasets[i] = dsp.normalize_dataset_datetimes(dataset, temporal_resolution) if multiplying_factor[i] != 1: datasets[i].values *= multiplying_factor[i] """ Step 2: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting if time_info['maximum_overlap_period']: start_time, end_time = utils.get_temporal_overlap(datasets) print('Maximum overlap period') print('start_time: {}'.format(start_time)) print('end_time: {}'.format(end_time)) if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) for i, dataset in enumerate(datasets):