def test_normalize_lon_lat(self): """ Test nominal execution """ dataset = xr.Dataset( {'first': (['latitude', 'longitude'], [[1, 2, 3], [2, 3, 4]])}) # Since normalization puts latitudes into descending order, we # expect the rows to be swapped. expected = xr.Dataset( {'first': (['lat', 'lon'], [[2, 3, 4], [1, 2, 3]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected) dataset = xr.Dataset( {'first': (['lat', 'long'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['lat', 'lon'], [[2, 3, 4], [1, 2, 3]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected) dataset = xr.Dataset( {'first': (['latitude', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['lat', 'spacetime'], [[2, 3, 4], [1, 2, 3]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected) dataset = xr.Dataset( {'first': (['zef', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['zef', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected)
def test_normalize_lon_lat(self): """ Test nominal execution """ dataset = xr.Dataset( {'first': (['latitude', 'longitude'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['lat', 'lon'], [[1, 2, 3], [2, 3, 4]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected) dataset = xr.Dataset( {'first': (['lat', 'long'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['lat', 'lon'], [[1, 2, 3], [2, 3, 4]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected) dataset = xr.Dataset( {'first': (['latitude', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['lat', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected) dataset = xr.Dataset( {'first': (['zef', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['zef', 'spacetime'], [[1, 2, 3], [2, 3, 4]])}) actual = normalize_dataset(dataset) assertDatasetEqual(actual, expected)
def test_normalize_with_missing_time_dim_from_filename(self): ds = xr.Dataset( { 'first': (['lat', 'lon'], np.zeros([90, 180])), 'second': (['lat', 'lon'], np.zeros([90, 180])) }, coords={ 'lat': np.linspace(-89.5, 89.5, 90), 'lon': np.linspace(-179.5, 179.5, 180) }, ) ds_encoding = dict(source='20150204_etfgz_20170309_dtsrgth') ds.encoding.update(ds_encoding) norm_ds = normalize_dataset(ds) self.assertIsNot(norm_ds, ds) self.assertEqual(len(norm_ds.coords), 4) self.assertIn('lon', norm_ds.coords) self.assertIn('lat', norm_ds.coords) self.assertIn('time', norm_ds.coords) self.assertIn('time_bnds', norm_ds.coords) self.assertEqual(norm_ds.first.shape, (1, 90, 180)) self.assertEqual(norm_ds.second.shape, (1, 90, 180)) self.assertEqual(norm_ds.coords['time'][0], xr.DataArray(pd.to_datetime('2016-02-21T00:00:00'))) self.assertEqual(norm_ds.coords['time_bnds'][0][0], xr.DataArray(pd.to_datetime('2015-02-04'))) self.assertEqual(norm_ds.coords['time_bnds'][0][1], xr.DataArray(pd.to_datetime('2017-03-09')))
def test_normalize_with_missing_time_dim(self): ds = xr.Dataset( { 'first': (['lat', 'lon'], np.zeros([90, 180])), 'second': (['lat', 'lon'], np.zeros([90, 180])) }, coords={ 'lat': np.linspace(-89.5, 89.5, 90), 'lon': np.linspace(-179.5, 179.5, 180) }, attrs={ 'time_coverage_start': '20120101', 'time_coverage_end': '20121231' }) norm_ds = normalize_dataset(ds) self.assertIsNot(norm_ds, ds) self.assertEqual(len(norm_ds.coords), 4) self.assertIn('lon', norm_ds.coords) self.assertIn('lat', norm_ds.coords) self.assertIn('time', norm_ds.coords) self.assertIn('time_bnds', norm_ds.coords) self.assertEqual(norm_ds.first.shape, (1, 90, 180)) self.assertEqual(norm_ds.second.shape, (1, 90, 180)) self.assertEqual(norm_ds.coords['time'][0], xr.DataArray(pd.to_datetime('2012-07-01T12:00:00'))) self.assertEqual(norm_ds.coords['time_bnds'][0][0], xr.DataArray(pd.to_datetime('2012-01-01'))) self.assertEqual(norm_ds.coords['time_bnds'][0][1], xr.DataArray(pd.to_datetime('2012-12-31')))
def test_normalize_julian_day(self): """ Test Julian Day -> Datetime conversion """ tuples = [gcal2jd(2000, x, 1) for x in range(1, 13)] ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.zeros([88, 90, 12])), 'second': (['lat', 'lon', 'time'], np.zeros([88, 90, 12])), 'lat': np.linspace(-88, 45, 88), 'lon': np.linspace(-178, 178, 90), 'time': [x[0] + x[1] for x in tuples] }) ds.time.attrs['long_name'] = 'time in julian days' expected = xr.Dataset({ 'first': (['time', 'lat', 'lon'], np.zeros([12, 88, 90])), 'second': (['time', 'lat', 'lon'], np.zeros([12, 88, 90])), 'lat': np.linspace(-88, 45, 88), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 13)] }) expected.time.attrs['long_name'] = 'time' actual = normalize_dataset(ds) assertDatasetEqual(actual, expected)
def test_normalize_with_time_called_t(self): ds = xr.Dataset( { 'first': (['time', 'lat', 'lon'], np.zeros([4, 90, 180])), 'second': (['time', 'lat', 'lon'], np.zeros([4, 90, 180])), 't': ('time', np.array([ '2005-07-02T00:00:00.000000000', '2006-07-02T12:00:00.000000000', '2007-07-03T00:00:00.000000000', '2008-07-02T00:00:00.000000000' ], dtype='datetime64[ns]')) }, coords={ 'lat': np.linspace(-89.5, 89.5, 90), 'lon': np.linspace(-179.5, 179.5, 180) }, attrs={ 'time_coverage_start': '2005-01-17', 'time_coverage_end': '2008-08-17' }) norm_ds = normalize_dataset(ds) self.assertIsNot(norm_ds, ds) self.assertEqual(len(norm_ds.coords), 3) self.assertIn('lon', norm_ds.coords) self.assertIn('lat', norm_ds.coords) self.assertIn('time', norm_ds.coords) self.assertEqual(norm_ds.first.shape, (4, 90, 180)) self.assertEqual(norm_ds.second.shape, (4, 90, 180)) self.assertEqual(norm_ds.coords['time'][0], xr.DataArray(pd.to_datetime('2005-07-02T00:00')))
def test_normalize_inverted_lat(self): first = np.zeros([3, 45, 90]) first[0, :, :] = np.eye(45, 90) ds = xr.Dataset({ 'first': (['time', 'lat', 'lon'], first), 'second': (['time', 'lat', 'lon'], np.zeros([3, 45, 90])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 4)] }).chunk(chunks={'time': 1}) first = np.zeros([3, 45, 90]) first[0, :, :] = np.flip(np.eye(45, 90), axis=0) expected = xr.Dataset({ 'first': (['time', 'lat', 'lon'], first), 'second': (['time', 'lat', 'lon'], np.zeros([3, 45, 90])), 'lat': np.linspace(88, -88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 4)] }).chunk(chunks={'time': 1}) actual = normalize_dataset(ds) xr.testing.assert_equal(actual, expected)
def test_normalize_lon_lat_2d(self): """ Test nominal execution """ dims = ('time', 'y', 'x') attrs = {'valid_min': 0., 'valid_max': 1.} t_size = 2 y_size = 3 x_size = 4 a_data = np.random.random_sample((t_size, y_size, x_size)) b_data = np.random.random_sample((t_size, y_size, x_size)) time_data = [1, 2] lat_data = [[10., 10., 10., 10.], [20., 20., 20., 20.], [30., 30., 30., 30.]] lon_data = [[-10., 0., 10., 20.], [-10., 0., 10., 20.], [-10., 0., 10., 20.]] dataset = xr.Dataset( { 'a': (dims, a_data, attrs), 'b': (dims, b_data, attrs) }, { 'time': (('time', ), time_data), 'lat': (('y', 'x'), lat_data), 'lon': (('y', 'x'), lon_data) }, { 'geospatial_lon_min': -15., 'geospatial_lon_max': 25., 'geospatial_lat_min': 5., 'geospatial_lat_max': 35. }) new_dims = ('time', 'lat', 'lon') expected = xr.Dataset( { 'a': (new_dims, a_data, attrs), 'b': (new_dims, b_data, attrs) }, { 'time': (('time', ), time_data), 'lat': (('lat', ), [10., 20., 30.]), 'lon': (('lon', ), [-10., 0., 10., 20.]), }, { 'geospatial_lon_min': -15., 'geospatial_lon_max': 25., 'geospatial_lat_min': 5., 'geospatial_lat_max': 35. }) actual = normalize_dataset(dataset) xr.testing.assert_equal(actual, expected)
def test_normalize_does_not_reorder_increasing_lat(self): first = np.zeros([3, 45, 90]) first[0, :, :] = np.eye(45, 90) ds = xr.Dataset({ 'first': (['time', 'lat', 'lon'], first), 'second': (['time', 'lat', 'lon'], np.zeros([3, 45, 90])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 4)] }).chunk(chunks={'time': 1}) actual = normalize_dataset(ds) xr.testing.assert_equal(actual, ds)
def test_nominal(self): """ Test nominal operation """ ds = self.new_cci_seal_level_ds() ds2 = normalize_dataset(ds) self.assertIsNot(ds2, ds) self.assertIn('ampl', ds2) self.assertIn('phase', ds2) self.assertIn('time', ds2.coords) self.assertIn('time_bnds', ds2.coords) self.assertNotIn('time_step', ds2.coords) self.assertEqual(['time', 'period', 'lat', 'lon'], list(ds2.ampl.dims)) self.assertEqual(['time', 'period', 'lat', 'lon'], list(ds2.phase.dims))
def test_fix_360_lon(self): # The following simulates a strangely geo-coded soil moisture dataset # we found at ... # lon_size = 360 lat_size = 130 time_size = 12 ds = xr.Dataset( { 'first': (['time', 'lat', 'lon' ], np.random.random_sample([time_size, lat_size, lon_size])), 'second': (['time', 'lat', 'lon' ], np.random.random_sample([time_size, lat_size, lon_size])) }, coords={ 'lon': np.linspace(1., 360., lon_size), 'lat': np.linspace(65., -64., lat_size), 'time': [datetime(2000, x, 1) for x in range(1, time_size + 1)] }, attrs=dict(geospatial_lon_min=0., geospatial_lon_max=360., geospatial_lat_min=-64.5, geospatial_lat_max=+65.5, geospatial_lon_resolution=1., geospatial_lat_resolution=1.)) new_ds = normalize_dataset(ds) self.assertIsNot(ds, new_ds) self.assertEqual(ds.dims, new_ds.dims) self.assertEqual(ds.sizes, new_ds.sizes) assert_array_almost_equal(new_ds.lon, np.linspace(-179.5, 179.5, 360)) assert_array_almost_equal(new_ds.lat, np.linspace(65., -64., 130)) assert_array_almost_equal(new_ds.first[..., :180], ds.first[..., 180:]) assert_array_almost_equal(new_ds.first[..., 180:], ds.first[..., :180]) assert_array_almost_equal(new_ds.second[..., :180], ds.second[..., 180:]) assert_array_almost_equal(new_ds.second[..., 180:], ds.second[..., :180]) self.assertEqual(-180., new_ds.attrs['geospatial_lon_min']) self.assertEqual(+180., new_ds.attrs['geospatial_lon_max']) self.assertEqual(-64.5, new_ds.attrs['geospatial_lat_min']) self.assertEqual(+65.5, new_ds.attrs['geospatial_lat_max']) self.assertEqual(1., new_ds.attrs['geospatial_lon_resolution']) self.assertEqual(1., new_ds.attrs['geospatial_lat_resolution'])
def test_no_change(self): """ Test nominal operation """ lon_size = 360 lat_size = 130 time_size = 12 ds = xr.Dataset( { 'first': (['time', 'lat', 'lon' ], np.random.random_sample([time_size, lat_size, lon_size])), 'second': (['time', 'lat', 'lon' ], np.random.random_sample([time_size, lat_size, lon_size])) }, coords={ 'lon': np.linspace(-179.5, -179.5, lon_size), 'lat': np.linspace(-65., 64., lat_size), 'time': [datetime(2000, x, 1) for x in range(1, time_size + 1)] }) ds2 = normalize_dataset(ds) self.assertIs(ds2, ds)
def test_normalize_zonal_lat_lon(self): resolution = 10 lat_size = 3 lat_coords = np.arange(0, 30, resolution) lon_coords = [i + 5. for i in np.arange(-180.0, 180.0, resolution)] lon_size = len(lon_coords) one_more_dim_size = 2 one_more_dim_coords = np.random.random(2) var_values_1_1d = xr.DataArray( np.random.random(lat_size), coords=[('latitude_centers', lat_coords)], dims=['latitude_centers'], attrs=dict(chunk_sizes=[lat_size], dimensions=['latitude_centers'])) var_values_1_1d.encoding = {'chunks': (lat_size, )} var_values_1_2d = xr.DataArray( np.array([var_values_1_1d.values for _ in lon_coords]).T, coords={ 'lat': lat_coords, 'lon': lon_coords }, dims=['lat', 'lon'], attrs=dict(chunk_sizes=[lat_size, lon_size], dimensions=['lat', 'lon'])) var_values_1_2d.encoding = {'chunks': (lat_size, lon_size)} var_values_2_2d = xr.DataArray( np.random.random(lat_size * one_more_dim_size).reshape( lat_size, one_more_dim_size), coords={ 'latitude_centers': lat_coords, 'one_more_dim': one_more_dim_coords }, dims=['latitude_centers', 'one_more_dim'], attrs=dict(chunk_sizes=[lat_size, one_more_dim_size], dimensions=['latitude_centers', 'one_more_dim'])) var_values_2_2d.encoding = {'chunks': (lat_size, one_more_dim_size)} var_values_2_3d = xr.DataArray( np.array([var_values_2_2d.values for _ in lon_coords]).T, coords={ 'one_more_dim': one_more_dim_coords, 'lat': lat_coords, 'lon': lon_coords, }, dims=[ 'one_more_dim', 'lat', 'lon', ], attrs=dict(chunk_sizes=[one_more_dim_size, lat_size, lon_size], dimensions=['one_more_dim', 'lat', 'lon'])) var_values_2_3d.encoding = { 'chunks': (one_more_dim_size, lat_size, lon_size) } dataset = xr.Dataset({ 'first': var_values_1_1d, 'second': var_values_2_2d }) expected = xr.Dataset({ 'first': var_values_1_2d, 'second': var_values_2_3d }) expected = expected.assign_coords(lon_bnds=xr.DataArray( [[i - (resolution / 2), i + (resolution / 2)] for i in expected.lon.values], dims=['lon', 'bnds'])) expected = expected.assign_coords(lat_bnds=xr.DataArray( [[i - (resolution / 2), i + (resolution / 2)] for i in expected.lat.values], dims=['lat', 'bnds'])) actual = normalize_dataset(dataset) xr.testing.assert_equal(actual, expected) self.assertEqual(actual.first.chunk_sizes, expected.first.chunk_sizes) self.assertEqual(actual.second.chunk_sizes, expected.second.chunk_sizes)