def setUp(self): # Set metric. self.metric = metrics.TemporalCorrelation() # Initialize reference dataset. self.ref_lats = np.array([10, 20, 30, 40, 50]) self.ref_lons = np.array([5, 15, 25, 35, 45]) self.ref_times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) self.ref_values = np.array(range(300)).reshape(12, 5, 5) self.ref_variable = "ref" self.ref_dataset = Dataset(self.ref_lats, self.ref_lons, self.ref_times, self.ref_values, self.ref_variable) # Initialize target datasets. self.tgt_lats = np.array([10, 20, 30, 40, 50]) self.tgt_lons = np.array([5, 15, 25, 35, 45]) self.tgt_times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) self.tgt_variable = "tgt" self.tgt_values_inc = np.array(range(300, 600)).reshape(12, 5, 5) self.tgt_values_dec = np.array(range(299, -1, -1)).reshape(12, 5, 5) self.tgt_dataset_inc = Dataset(self.tgt_lats, self.tgt_lons, self.tgt_times, self.tgt_values_inc, self.tgt_variable) self.tgt_dataset_dec = Dataset(self.tgt_lats, self.tgt_lons, self.tgt_times, self.tgt_values_dec, self.tgt_variable)
def setUp(self): # Set metric. self.metric = metrics.RMSError() # Initialize reference dataset. self.ref_lats = np.array([10, 20, 30, 40, 50]) self.ref_lons = np.array([5, 15, 25, 35, 45]) self.ref_times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) self.ref_values = np.array([4] * 300).reshape(12, 5, 5) self.ref_variable = "ref" self.ref_dataset = Dataset(self.ref_lats, self.ref_lons, self.ref_times, self.ref_values, self.ref_variable) # Initialize target dataset. self.tgt_lats = np.array([10, 20, 30, 40, 50]) self.tgt_lons = np.array([5, 15, 25, 35, 45]) self.tgt_times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) self.tgt_values = np.array([2] * 300).reshape(12, 5, 5) self.tgt_variable = "tgt" self.tgt_dataset = Dataset(self.tgt_lats, self.tgt_lons, self.tgt_times, self.tgt_values, self.tgt_variable)
def test_bad_values_shape(self): self.value = np.array([1, 2, 3, 4, 5]) with self.assertRaises(ValueError): Dataset(self.lat, self.lon, self.time, self.value, 'prec') self.value = self.value.reshape(1, 5) with self.assertRaises(ValueError): Dataset(self.lat, self.lon, self.time, self.value, 'prec')
def setUp(self): self.bias = Bias() # Initialize reference dataset self.reference_lat = np.array([10, 12, 14, 16, 18]) self.reference_lon = np.array([100, 102, 104, 106, 108]) self.reference_time = np.array( [dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) self.reference_value = flat_array.reshape(12, 5, 5) self.reference_variable = 'prec' self.reference_dataset = Dataset(self.reference_lat, self.reference_lon, self.reference_time, self.reference_value, self.reference_variable) # Initialize target dataset self.target_lat = np.array([1, 2, 4, 6, 8]) self.target_lon = np.array([10, 12, 14, 16, 18]) self.target_time = np.array( [dt.datetime(2001, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300, 600)) self.target_value = flat_array.reshape(12, 5, 5) self.target_variable = 'tasmax' self.target_dataset = Dataset(self.target_lat, self.target_lon, self.target_time, self.target_value, self.target_variable)
def setUp(self): self.eval = Evaluation(None, [], []) lat = np.array([10, 12, 14, 16, 18]) lon = np.array([100, 102, 104, 106, 108]) time = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) value = flat_array.reshape(12, 5, 5) self.variable = 'prec' self.other_var = 'temp' self.test_dataset = Dataset(lat, lon, time, value, self.variable) self.another_test_dataset = Dataset(lat, lon, time, value, self.other_var)
def test_temporal_resolution_yearly(self): self.time = np.array([dt.datetime(x, 6, 1) for x in range(2000, 2015)]) flat_array = np.array(range(375)) self.value = flat_array.reshape(15, 5, 5) self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable) self.assertEqual(self.test_dataset.temporal_resolution(), 'yearly')
def _create_fake_dataset(name): lats = numpy.array(range(-10, 25, 1)) lons = numpy.array(range(-30, 40, 1)) times = numpy.array(range(8)) values = numpy.zeros((len(times), len(lats), len(lons))) return Dataset(lats, lons, times, values, name=name)
def setUpClass(self): self.lats = np.array([10, 12, 14, 16, 18]) self.lons = np.array([100, 102, 104, 106, 108]) self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) self.values = flat_array.reshape(12, 5, 5) self.variable = 'var' self.units = 'units' self.origin = { 'source': 'local', 'path': '/a/fake/path.nc', 'lat_name': 'a lat name', 'lon_name': 'a lon name', 'time_name': 'a time name', 'elevation_index': 2 } self.name = 'name' self.dataset = Dataset(self.lats, self.lons, self.times, self.values, variable=self.variable, units=self.units, origin=self.origin, name=self.name) self.exported_info = writer.generate_dataset_config(self.dataset)
def test_reshape_not_full_year(self): new_time = np.array([datetime.datetime(2000, 1, 1) + relativedelta(months = x) for x in range(26)]) flat_array = np.array(range(650)) value = flat_array.reshape(26, 5, 5) bad_dataset = Dataset(self.lat, self.lon, new_time, value, self.variable) self.assertRaises(ValueError, utils.reshape_monthly_to_annually, bad_dataset)
def test_temporal_resolution_daily(self): self.time = np.array([dt.datetime(2000, 3, x) for x in range(1, 31)]) flat_array = np.array(range(750)) self.value = flat_array.reshape(30, 5, 5) self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable) self.assertEqual(self.test_dataset.temporal_resolution(), 'daily')
def setUpClass(self): self.lats = np.array([10, 12, 14, 16, 18]) self.lons = np.array([100, 102, 104, 106, 108]) self.times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) self.values = flat_array.reshape(12, 5, 5) self.variable = 'var' self.units = 'units' self.origin = { 'source': 'esgf', 'dataset_id': 'esgf dataset id', 'variable': 'var' } self.name = 'name' self.dataset = Dataset(self.lats, self.lons, self.times, self.values, variable=self.variable, units=self.units, origin=self.origin, name=self.name) self.exported_info = writer.generate_dataset_config(self.dataset)
def load_WRF_2d_files(file_path=None, filename_pattern=None, filelist=None, variable_name='T2', name=''): ''' Load multiple WRF (or nuWRF) original output files containing 2D fields such as precipitation and surface variables into a Dataset. The dataset can be spatially subset. :param file_path: Directory to the NetCDF file to load. :type file_path: :mod:`string` :param filename_pattern: Path to the NetCDF file to load. :type filename_pattern: :list:`string` :param filelist: A list of filenames :type filelist: :list:`string` :param variable_name: The variable name to load from the NetCDF file. :type variable_name: :mod:`string` :param name: (Optional) A name for the loaded dataset. :type name: :mod:`string` :returns: An OCW Dataset object with the requested variable's data from the NetCDF file. :rtype: :class:`dataset.Dataset` :raises ValueError: ''' if not filelist: WRF_files = [] for pattern in filename_pattern: WRF_files.extend(glob(file_path + pattern)) else: WRF_files = [line.rstrip('\n') for line in open(filelist)] WRF_files.sort() file_object_first = netCDF4.Dataset(WRF_files[0]) lats = file_object_first.variables['XLAT'][0, :] lons = file_object_first.variables['XLONG'][0, :] times = [] nfile = len(WRF_files) for ifile, file in enumerate(WRF_files): print 'Reading file ' + str(ifile + 1) + '/' + str(nfile), file file_object = netCDF4.Dataset(file) time_struct_parsed = strptime(file[-19:], "%Y-%m-%d_%H:%M:%S") for ihour in numpy.arange(24): times.append( datetime(*time_struct_parsed[:6]) + timedelta(hours=ihour)) values0 = file_object.variables[variable_name][:] if ifile == 0: values = values0 variable_unit = file_object.variables[variable_name].units else: values = numpy.concatenate((values, values0)) file_object.close() times = numpy.array(times) return Dataset(lats, lons, times, values, variable_name, units=variable_unit, name=name)
def load(url, variable): '''Load a Dataset from an OpenDAP URL :param url: The OpenDAP URL for the dataset of interest. :type url: String :param variable: The name of the variable to read from the dataset. :type variable: String :returns: A Dataset object containing the dataset pointed to by the OpenDAP URL. :raises: ServerError ''' # Grab the dataset information and pull the appropriate variable d = open_url(url) dataset = d[variable] # Grab the lat, lon, and time variable names. # We assume the variable order is (time, lat, lon) dataset_dimensions = dataset.dimensions time = dataset_dimensions[0] lat = dataset_dimensions[1] lon = dataset_dimensions[2] # Time is given to us in some units since an epoch. We need to convert # these values to datetime objects. Note that we use the main object's # time object and not the dataset specific reference to it. We need to # grab the 'units' from it and it fails on the dataset specific object. times = np.array(_convert_times_to_datetime(d[time])) lats = np.array(dataset[lat][:]) lons = np.array(dataset[lon][:]) values = np.array(dataset[:]) return Dataset(lats, lons, times, values, variable)
def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time): '''Get data from one database(parameter). :param dataset_id: Dataset id. :type dataset_id: Integer :param parameter_id: Parameter id :type parameter_id: Integer :param min_lat: Minimum latitude :type min_lat: Float :param max_lat: Maximum latitude :type max_lat: Float :param min_lon: Minimum longitude :type min_lon: Float :param max_lon: Maximum longitude :type max_lon: Float :param start_time: Start time :type start_time: Datetime :param end_time: End time :type end_time: Datetime :returns: Dataset object :rtype: Object ''' parameters_metadata = get_parameters_metadata() parameter_name, time_step, _, _, _, _, _= _get_parameter_info(parameters_metadata, parameter_id) url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step) lats, lons, times, values = _get_data(url) unique_lats_lons_times = _make_unique(lats, lons, times) unique_times = _calculate_time(unique_lats_lons_times[2], time_step) values = _reshape_values(values, unique_lats_lons_times) values = _make_mask_array(values, parameter_id, parameters_metadata) return Dataset(unique_lats_lons_times[0], unique_lats_lons_times[1], unique_times, values, parameter_name)
def setUp(self): self.taylor_diagram = metrics.SpatialPatternTaylorDiagram() self.ref_dataset = Dataset( np.array([1., 1., 1., 1., 1.]), np.array([1., 1., 1., 1., 1.]), np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), # Reshapped array with 300 values incremented by 5 np.arange(0, 1500, 5).reshape(12, 5, 5), 'ds1') self.tar_dataset = Dataset( np.array([1., 1., 1., 1., 1.]), np.array([1., 1., 1., 1., 1.]), np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), # Reshapped array with 300 values incremented by 2 np.arange(0, 600, 2).reshape(12, 5, 5), 'ds2')
def setUp(self): self.pattern_correlation = PatternCorrelation() self.ref_dataset = Dataset( np.array([1., 1., 1., 1., 1.]), np.array([1., 1., 1., 1., 1.]), np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), # Reshapped array with 300 values incremented by 5 np.arange(0, 1500, 5).reshape(12, 5, 5), 'ds1') self.tar_dataset = Dataset( np.array([1., 1., 1., 1., 1.]), np.array([1., 1., 1., 1., 1.]), np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), # Reshapped array with 300 values incremented by 2 np.arange(0, 600, 2).reshape(12, 5, 5), 'ds2')
def setUp(self): self.spatial_std_dev_ratio = SpatialStdDevRatio() self.ref_dataset = Dataset( np.array([1., 1., 1., 1., 1.]), np.array([1., 1., 1., 1., 1.]), np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), # Reshapped array with 300 values incremented by 5 np.arange(0, 1500, 5).reshape(12, 5, 5), 'ds1') self.tar_dataset = Dataset( np.array([1., 1., 1., 1., 1.]), np.array([1., 1., 1., 1., 1.]), np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]), # Reshapped array with 300 values incremented by 2 np.arange(0, 600, 2).reshape(12, 5, 5), 'ds2')
def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, name=''): '''Get data from one database(parameter). :param dataset_id: Dataset id. :type dataset_id: :class:`int` :param parameter_id: Parameter id :type parameter_id: :class:`int` :param min_lat: Minimum latitude :type min_lat: :class:`float` :param max_lat: Maximum latitude :type max_lat: :class:`float` :param min_lon: Minimum longitude :type min_lon: :class:`float` :param max_lon: Maximum longitude :type max_lon: :class:`float` :param start_time: Start time :type start_time: :class:`datetime.datetime` :param end_time: End time :type end_time: :class:`datetime.datetime` :param name: (Optional) A name for the loaded dataset. :type name: :mod:`string` :returns: An OCW Dataset object contained the requested data from RCMED. :rtype: :class:`dataset.Dataset` ''' parameters_metadata = get_parameters_metadata() parameter_name, time_step, _, _, _, _, parameter_units = _get_parameter_info(parameters_metadata, parameter_id) url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step) lats, lons, times, values = _get_data(url) unique_lats_lons_times = _make_unique(lats, lons, times) unique_times = _calculate_time(unique_lats_lons_times[2], time_step) values = _reshape_values(values, unique_lats_lons_times) values = _make_mask_array(values, parameter_id, parameters_metadata) origin = { 'source': 'rcmed', 'dataset_id': dataset_id, 'parameter_id': parameter_id } return Dataset(unique_lats_lons_times[0], unique_lats_lons_times[1], unique_times, values, variable=parameter_name, units=parameter_units, name=name, origin=origin)
def test_temporal_resolution_hourly(self): self.time = np.array( [dt.datetime(2000, 1, 1), dt.datetime(2000, 1, 1)]) flat_array = np.array(range(50)) self.value = flat_array.reshape(2, 5, 5) self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable) self.assertEqual(self.test_dataset.temporal_resolution(), 'minutely')
def setUp(self): self.lat = np.array([10, 12, 14, 16, 18]) self.lon = np.array([100, 102, 104, 106, 108]) self.time = np.array([datetime.datetime(2000, 1, 1) + relativedelta(months = x) for x in range(24)]) flat_array = np.array(range(600)) self.value = flat_array.reshape(24, 5, 5) self.variable = 'prec' self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable)
def test_different_dataset_temporal_overlap(self): new_times = np.array( [datetime.datetime(2002, x, 1) for x in range(1, 13)]) another_dataset = Dataset(self.lat, self.lon, new_times, self.value, self.variable) self.dataset_array = [self.test_dataset, another_dataset] maximum, minimum = utils.get_temporal_overlap(self.dataset_array) self.assertEqual(maximum, datetime.datetime(2002, 1, 1)) self.assertEqual(minimum, datetime.datetime(2000, 12, 1))
def test_lons_values_incorrectly_gridded(self): times = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) lats = np.arange(-30, 30) bad_lons = np.arange(360) flat_array = np.arange(len(times) * len(lats) * len(bad_lons)) values = flat_array.reshape(len(times), len(lats), len(bad_lons)) ds = Dataset(lats, bad_lons, times, values) np.testing.assert_array_equal(ds.lons, np.arange(-180, 180))
def setUp(self): self.lat = np.array([10, 12, 14, 16, 18]) self.lon = np.array([100, 102, 104, 106, 108]) self.time = np.array([dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) self.value = flat_array.reshape(12, 5, 5) self.variable = 'prec' self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable)
def setUp(self): self.lats = np.array([10, 20, 30, 40, 50]) self.lons = np.array([20, 30, 40, 50, 60]) start_date = datetime.datetime(2000, 1, 1) self.times = np.array([start_date + relativedelta(months=x) for x in range(12)]) self.values = np.ones(300).reshape(12, 5, 5) self.variable = 'testdata' self.dataset = Dataset(self.lats, self.lons, self.times, self.values, self.variable)
def test_2_dim_lats_lons(self): self.lat = np.array([10, 12, 14]).reshape(3, 1) self.lon = np.array([100, 102, 104]).reshape(3, 1) flat_array = np.array(range(18)) self.value = flat_array.reshape(6, 3, 1) self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable) avg = np.ma.array([1., 4., 7., 10., 13., 16.]) result = utils.calc_area_weighted_spatial_average(self.test_dataset) np.testing.assert_array_equal(avg, result)
def setUp(self): self.lat = np.array([10, 12, 14]) self.lon = np.array([100, 102, 104]) self.time = np.array( [datetime.datetime(2000, x, 1) for x in range(1, 7)]) flat_array = np.array(range(54)) self.value = flat_array.reshape(6, 3, 3) self.variable = 'prec' self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable)
def test_spatial_resolution_2_dim_lat_lon(self): self.lat = np.array([10, 12, 14, 16, 18, 20]) self.lon = np.array([100, 102, 104, 106, 108, 110]) self.lat = self.lat.reshape(3, 2) self.lon = self.lon.reshape(3, 2) flat_array = np.array(range(72)) self.value = flat_array.reshape(12, 3, 2) self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable) self.assertEqual(self.test_dataset.spatial_resolution(), (6, 6))
def load(url, variable, name=''): '''Load a Dataset from an OpenDAP URL :param url: The OpenDAP URL for the dataset of interest. :type url: :mod:`string` :param variable: The name of the variable to read from the dataset. :type variable: :mod:`string` :param name: (Optional) A name for the loaded dataset. :type name: :mod:`string` :returns: A :class:`dataset.Dataset` containing the dataset pointed to by the OpenDAP URL. :raises: ServerError ''' # Grab the dataset information and pull the appropriate variable d = open_url(url) dataset = d[variable] # By convention, but not by standard, if the dimensions exist, they will be in the order: # time (t), altitude (z), latitude (y), longitude (x) # but conventions aren't always followed and all dimensions aren't always present so # see if we can make some educated deductions before defaulting to just pulling the first three # columns. temp_dimensions = map(lambda x: x.lower(), dataset.dimensions) dataset_dimensions = dataset.dimensions time = dataset_dimensions[temp_dimensions.index('time') if 'time' in temp_dimensions else 0] lat = dataset_dimensions[temp_dimensions.index('lat') if 'lat' in temp_dimensions else 1] lon = dataset_dimensions[temp_dimensions.index('lon') if 'lon' in temp_dimensions else 2] # Time is given to us in some units since an epoch. We need to convert # these values to datetime objects. Note that we use the main object's # time object and not the dataset specific reference to it. We need to # grab the 'units' from it and it fails on the dataset specific object. times = np.array(convert_times_to_datetime(d[time])) lats = np.array(dataset[lat][:]) lons = np.array(dataset[lon][:]) values = np.array(dataset[:]) origin = {'source': 'dap', 'url': url} return Dataset(lats, lons, times, values, variable, name=name, origin=origin)
def test__2_dim_lats_lons_area_weight(self): self.lat = np.array([10, 12, 14]).reshape(3, 1) self.lon = np.array([100, 102, 104]).reshape(3, 1) flat_array = np.array(range(18)) self.value = flat_array.reshape(6, 3, 1) self.test_dataset = Dataset(self.lat, self.lon, self.time, self.value, self.variable) avg = np.ma.array([0.995053, 3.995053, 6.995053, 9.995053, 12.995053, 15.995053]) result = utils.calc_area_weighted_spatial_average( self.test_dataset, area_weight=True) np.testing.assert_array_almost_equal(avg, result)
def setUp(self): self.temporal_std_dev = TemporalStdDev() # Initialize target dataset self.target_lat = np.array([10, 12, 14, 16, 18]) self.target_lon = np.array([100, 102, 104, 106, 108]) self.target_time = np.array( [dt.datetime(2000, x, 1) for x in range(1, 13)]) flat_array = np.array(range(300)) self.target_value = flat_array.reshape(12, 5, 5) self.target_variable = 'prec' self.target_dataset = Dataset(self.target_lat, self.target_lon, self.target_time, self.target_value, self.target_variable)