def test__extract_error_percentiles(error_threshold_cube, error_percentile_cube): """Test the extraction of error percentiles from error-probability cube.""" result = ApplyRainForestsCalibrationLightGBM( model_config_dict={})._extract_error_percentiles( error_threshold_cube, 4) assert result.long_name == error_percentile_cube.long_name assert result.units == error_percentile_cube.units assert result.coords() == error_percentile_cube.coords() assert result.attributes == error_percentile_cube.attributes # Test the case where error_threshold_cube has unit realization dimension error_threshold_cube = error_threshold_cube.extract( Constraint(realization=0)) error_threshold_cube = new_axis(error_threshold_cube, "realization") error_threshold_cube.transpose([1, 0, 2, 3]) error_percentile_cube = error_percentile_cube.extract( Constraint(realization=0)) error_percentile_cube = new_axis(error_percentile_cube, "realization") result = ApplyRainForestsCalibrationLightGBM( model_config_dict={})._extract_error_percentiles( error_threshold_cube, 4) assert result.long_name == error_percentile_cube.long_name assert result.units == error_percentile_cube.units assert result.coords() == error_percentile_cube.coords() assert result.attributes == error_percentile_cube.attributes
def test_masked_unit_array(self): cube = stock.simple_3d_mask() test_cube = cube[0, 0, 0] test_cube = new_axis(test_cube, 'longitude') test_cube = new_axis(test_cube, 'latitude') data_shape = test_cube.data.shape mask_shape = test_cube.data.mask.shape self.assertEqual(data_shape, mask_shape)
def test_masked_unit_array(self): cube = stock.simple_3d_mask() test_cube = cube[0, 0, 0] test_cube = new_axis(test_cube, "longitude") test_cube = new_axis(test_cube, "latitude") data_shape = test_cube.data.shape mask_shape = test_cube.data.mask.shape self.assertEqual(data_shape, mask_shape)
def test_lazy_data(self): cube = iris.cube.Cube(as_lazy_data(self.data)) cube.add_aux_coord(iris.coords.DimCoord([1], standard_name="time")) res = new_axis(cube, "time") self.assertTrue(cube.has_lazy_data()) self.assertTrue(res.has_lazy_data()) self.assertEqual(res.shape, (1, ) + cube.shape)
def test_lazy_data(self): filename = tests.get_data_path(("PP", "globClim1", "theta.pp")) cube = iris.load_cube(filename) new_cube = new_axis(cube, "time") self.assertTrue(cube.has_lazy_data()) self.assertTrue(new_cube.has_lazy_data()) self.assertEqual(new_cube.shape, (1, ) + cube.shape)
def _extract_error_percentiles(self, error_probability_cube, error_percentiles_count): """Extract error percentile values from the error exceedence probabilities. Args: error_probability_cube: A cube containing error exceedence probabilities. error_percentiles_count: The number of error percentiles to extract. The resulting percentiles will be evenly spaced over the interval (0, 100). Returns: Cube containing percentile values for the error distributions. """ error_percentiles = choose_set_of_percentiles( error_percentiles_count, sampling="quantile", ) error_percentiles_cube = ConvertProbabilitiesToPercentiles().process( error_probability_cube, percentiles=error_percentiles) if len(error_percentiles_cube.coord_dims("realization")) == 0: error_percentiles_cube = new_axis(error_percentiles_cube, "realization") return error_percentiles_cube
def test_add_unit_dimension(input_cube, expected_cube): """Test case where added dimension is of length 1.""" realization_coord = DimCoord([0], standard_name="realization", units=1) expected_cube = new_axis(expected_cube.extract(Constraint(realization=0)), "realization") output_cube = add_coordinate_to_cube(input_cube, realization_coord) assert output_cube == expected_cube
def test_lazy_data(self): filename = tests.get_data_path(('PP', 'globClim1', 'theta.pp')) cube = iris.load_cube(filename) new_cube = new_axis(cube, 'time') self.assertTrue(cube.has_lazy_data()) self.assertTrue(new_cube.has_lazy_data()) self.assertEqual(new_cube.shape, (1, ) + cube.shape)
def test_lazy_data(self): cube = iris.cube.Cube(NumpyArrayAdapter(self.data)) cube.add_aux_coord(iris.coords.DimCoord([1], standard_name='time')) res = new_axis(cube, 'time') self.assertTrue(cube.has_lazy_data()) self.assertTrue(res.has_lazy_data()) self.assertEqual(res.shape, (1,) + cube.shape)
def test_lazy_data(self): filename = tests.get_data_path(('PP', 'globClim1', 'theta.pp')) cube = iris.load_cube(filename) new_cube = new_axis(cube, 'time') self.assertTrue(cube.has_lazy_data()) self.assertTrue(new_cube.has_lazy_data()) self.assertEqual(new_cube.shape, (1,) + cube.shape)
def test_lazy_data(self): cube = iris.cube.Cube(as_lazy_data(self.data)) cube.add_aux_coord(iris.coords.DimCoord([1], standard_name='time')) res = new_axis(cube, 'time') self.assertTrue(cube.has_lazy_data()) self.assertTrue(res.has_lazy_data()) self.assertEqual(res.shape, (1,) + cube.shape)
def test_maint_factory(self): # Ensure that aux factory persists. data = np.arange(12, dtype='i8').reshape((3, 4)) orography = iris.coords.AuxCoord([10, 25, 50, 5], standard_name='surface_altitude', units='m') model_level = iris.coords.AuxCoord([2, 1, 0], standard_name='model_level_number') level_height = iris.coords.DimCoord([100, 50, 10], long_name='level_height', units='m', attributes={'positive': 'up'}, bounds=[[150, 75], [75, 20], [20, 0]]) sigma = iris.coords.AuxCoord([0.8, 0.9, 0.95], long_name='sigma', bounds=[[0.7, 0.85], [0.85, 0.97], [0.97, 1.0]]) hybrid_height = iris.aux_factory.HybridHeightFactory( level_height, sigma, orography) cube = iris.cube.Cube(data, standard_name='air_temperature', units='K', dim_coords_and_dims=[(level_height, 0)], aux_coords_and_dims=[(orography, 1), (model_level, 0), (sigma, 0)], aux_factories=[hybrid_height]) com = iris.cube.Cube(data[None], standard_name='air_temperature', units='K', dim_coords_and_dims=[(copy.copy(level_height), 1) ], aux_coords_and_dims=[(copy.copy(orography), 2), (copy.copy(model_level), 1), (copy.copy(sigma), 1)], aux_factories=[copy.copy(hybrid_height)]) res = new_axis(cube) self.assertEqual(res, com) self._assert_cube_notis(res, cube) # Check that factory dependencies are actual coords within the cube. # Addresses a former bug : https://github.com/SciTools/iris/pull/3263 factory, = list(res.aux_factories) deps = factory.dependencies for dep_name, dep_coord in six.iteritems(deps): coord_name = dep_coord.name() msg = ('Factory dependency {!r} is a coord named {!r}, ' 'but it is *not* the coord of that name in the new cube.') self.assertIs(dep_coord, res.coord(coord_name), msg.format(dep_name, coord_name))
def test_scalar_dimcoord(self): # Providing a scalar coordinate to promote. res = new_axis(self.cube, "time") com = iris.cube.Cube(self.data[None]) com.add_dim_coord(self.coords["lat"].copy(), 1) com.add_dim_coord(self.coords["lon"].copy(), 2) com.add_aux_coord(self.coords["time"].copy(), 0) com.add_aux_coord(self.coords["wibble"].copy(), None) self.assertEqual(res, com) self._assert_cube_notis(res, self.cube)
def test_scalar_auxcoord(self): # Providing a scalar coordinate to promote. res = new_axis(self.cube, 'wibble') com = iris.cube.Cube(self.data[None]) com.add_dim_coord(self.coords['lat'].copy(), 1) com.add_dim_coord(self.coords['lon'].copy(), 2) com.add_aux_coord(self.coords['time'].copy(), None) com.add_aux_coord(self.coords['wibble'].copy(), 0) self.assertEqual(res, com) self._assert_cube_notis(res, self.cube)
def test_maint_factory(self): # Ensure that aux factory persists. data = np.arange(12, dtype='i8').reshape((3, 4)) orography = iris.coords.AuxCoord([10, 25, 50, 5], standard_name='surface_altitude', units='m') model_level = iris.coords.AuxCoord([2, 1, 0], standard_name='model_level_number') level_height = iris.coords.DimCoord([100, 50, 10], long_name='level_height', units='m', attributes={'positive': 'up'}, bounds=[[150, 75], [75, 20], [20, 0]]) sigma = iris.coords.AuxCoord([0.8, 0.9, 0.95], long_name='sigma', bounds=[[0.7, 0.85], [0.85, 0.97], [0.97, 1.0]]) hybrid_height = iris.aux_factory.HybridHeightFactory( level_height, sigma, orography) cube = iris.cube.Cube(data, standard_name='air_temperature', units='K', dim_coords_and_dims=[(level_height, 0)], aux_coords_and_dims=[(orography, 1), (model_level, 0), (sigma, 0)], aux_factories=[hybrid_height]) com = iris.cube.Cube(data[None], standard_name='air_temperature', units='K', dim_coords_and_dims=[(copy.copy(level_height), 1) ], aux_coords_and_dims=[(copy.copy(orography), 2), (copy.copy(model_level), 1), (copy.copy(sigma), 1)], aux_factories=[copy.copy(hybrid_height)]) res = new_axis(cube) self.assertEqual(res, com) self._assert_cube_notis(res, cube)
def test_maint_factory(self): # Ensure that aux factory persists. data = np.arange(12, dtype="i8").reshape((3, 4)) orography = iris.coords.AuxCoord([10, 25, 50, 5], standard_name="surface_altitude", units="m") model_level = iris.coords.AuxCoord([2, 1, 0], standard_name="model_level_number") level_height = iris.coords.DimCoord( [100, 50, 10], long_name="level_height", units="m", attributes={"positive": "up"}, bounds=[[150, 75], [75, 20], [20, 0]], ) sigma = iris.coords.AuxCoord( [0.8, 0.9, 0.95], long_name="sigma", bounds=[[0.7, 0.85], [0.85, 0.97], [0.97, 1.0]] ) hybrid_height = iris.aux_factory.HybridHeightFactory(level_height, sigma, orography) cube = iris.cube.Cube( data, standard_name="air_temperature", units="K", dim_coords_and_dims=[(level_height, 0)], aux_coords_and_dims=[(orography, 1), (model_level, 0), (sigma, 0)], aux_factories=[hybrid_height], ) com = iris.cube.Cube( data[None], standard_name="air_temperature", units="K", dim_coords_and_dims=[(copy.copy(level_height), 1)], aux_coords_and_dims=[(copy.copy(orography), 2), (copy.copy(model_level), 1), (copy.copy(sigma), 1)], aux_factories=[copy.copy(hybrid_height)], ) res = new_axis(cube) self.assertEqual(res, com) self._assert_cube_notis(res, cube)
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def test_1d_single_value_common_axis(self): # Manually promote scalar time cube to be a 1d cube. single = CubeSignature(new_axis(self.scalar_cube, 'time')) self.assertEqual(self.series_inc.dim_metadata, single.dim_metadata) self.assertEqual(self.series_dec.dim_metadata, single.dim_metadata)
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num( datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data( hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError( "Unexpected number of dimensions for CALIOP data: {}". format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def segmentation_2D(track, field, dxy, threshold=0, target='maximum', method='watershed', max_distance=None): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts Parameters: track: pandas.DataFrame output from trackpy/maketrack field_in: iris.cube.Cube containing the 3D (time,x,y) field to perform the watershedding on threshold: float threshold for the watershedding field to be used for the mask target: string Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) method: str ('method') flag determining the algorithm to use (currently watershedding implemented) Output: segmentation_out: iris.cube.Cube Cloud mask, 0 outside and integer numbers according to track inside the clouds """ import numpy as np from skimage.morphology import watershed # from skimage.segmentation import random_walker import logging from iris.cube import CubeList from iris.util import new_axis from scipy.ndimage import distance_transform_edt logging.info('Start wateshedding 2D') # CubeList to store individual segmentation masks segmentation_out_list = CubeList() track['ncells'] = 0 if max_distance is not None: max_distance_pixel = np.ceil(max_distance / dxy) field_time = field.slices_over('time') for i, field_i in enumerate(field_time): # Create cube of the same dimensions and coordinates as input data to store mask: segmentation_out_i = 1 * field_i segmentation_out_i.rename('segmentation_mask') segmentation_out_i.units = 1 data_i = field_i.core_data() time_i = field_i.coord('time').units.num2date( field_i.coord('time').points[0]) tracks_i = track[track['time'] == time_i] # mask data outside region above/below threshold and invert data if tracking maxima: if target == 'maximum': unmasked = data_i > threshold data_i_segmentation = -1 * data_i elif target == 'minimum': unmasked = data_i < threshold data_i_segmentation = data_i else: raise ValueError('unknown type of target') markers = np.zeros_like(unmasked).astype(np.int32) for index, row in tracks_i.iterrows(): markers[int(row['hdim_1']), int(row['hdim_2'])] = row['feature'] markers[~unmasked] = 0 if method == 'watershed': segmentation_mask_i = watershed(data_i_segmentation, markers.astype(np.int32), mask=unmasked) # elif method=='random_walker': # #res1 = random_walker(Mask, markers,mode='cg') # res1=random_walker(data_i_segmentation, markers.astype(np.int32), # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) else: raise ValueError('unknown method, must be watershed') # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: for feature in tracks_i['feature']: D = distance_transform_edt((markers != feature).astype(int)) segmentation_mask_i[np.bitwise_and( segmentation_mask_i == feature, D > max_distance_pixel)] = 0 segmentation_out_i.data = segmentation_mask_i # using merge throws error, so cubes with time promoted to DimCoord and using concatenate: # segmentation_out_list.append(segmentation_out_i) segmentation_out_i_temp = new_axis(segmentation_out_i, scalar_coord='time') segmentation_out_list.append(segmentation_out_i_temp) # count number of grid cells asoociated to each tracked cell and write that into DataFrame: values, count = np.unique(segmentation_mask_i, return_counts=True) counts = dict(zip(values, count)) for index, row in tracks_i.iterrows(): if row['feature'] in counts.keys(): track.loc[index, 'ncells'] = counts[row['feature']] logging.debug('Finished segmentation 2D for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) #merge individual masks in CubeList into one Cube: # using merge throws error, so cubes with time promoted to DimCoord and using concatenate: # segmentation_out=segmentation_out_list.merge_cube() segmentation_out = segmentation_out_list.concatenate_cube() logging.debug('Finished segmentation 2D') return segmentation_out, track