def test_dual_mean_of_difference(self): # MEAN(B - A) and MEAN(C - A) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data) a_array = biggus.NumpyArrayAdapter(a_counter) b_counter = AccessCounter(raw_data * 3) b_array = biggus.NumpyArrayAdapter(b_counter) c_counter = AccessCounter(raw_data * 5) c_array = biggus.NumpyArrayAdapter(c_counter) b_sub_a_array = biggus.sub(b_array, a_array) mean_b_sub_a_array = biggus.mean(b_sub_a_array, axis=0) c_sub_a_array = biggus.sub(c_array, a_array) mean_c_sub_a_array = biggus.mean(c_sub_a_array, axis=0) mean_b_sub_a, mean_c_sub_a = biggus.ndarrays( [mean_b_sub_a_array, mean_c_sub_a_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean_b_sub_a, np.mean(raw_data * 2, axis=0)) np.testing.assert_array_almost_equal(mean_c_sub_a, np.mean(raw_data * 4, axis=0)) # Was the source data read just once? self.assert_counts(a_counter.counts, [1]) self.assert_counts(b_counter.counts, [1]) self.assert_counts(c_counter.counts, [1])
def setUp(self): self.arr1 = np.array([1, 2, 3]) self.arr2 = np.array([2, 1, 2]) self.biggus_arr1 = biggus.NumpyArrayAdapter(self.arr1) self.biggus_arr2 = biggus.NumpyArrayAdapter(self.arr2) self.marr1 = np.ma.masked_array([1, 2, 3], mask=[0, 1, 0]) self.marr2 = np.ma.masked_array([1, 5, 2], mask=[0, 0, 1]) self.biggus_marr1 = biggus.NumpyArrayAdapter(self.marr1) self.biggus_marr2 = biggus.NumpyArrayAdapter(self.marr2) ufunc_names = ['absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'ceil', 'conj', 'cos', 'cosh', 'deg2rad', 'divide', 'equal', 'exp', 'exp2', 'expm1', 'floor', 'floor_divide', 'fmax', 'fmin', 'greater', 'greater_equal', 'hypot', 'invert', 'left_shift', 'less', 'less_equal', 'log', 'log10', 'log2', 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'maximum', 'minimum', 'multiply', 'negative', 'not_equal', 'power', 'rad2deg', 'reciprocal', 'right_shift', 'rint', 'sign', 'sin', 'sinh', 'sqrt', 'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc'] self.ufuncs = [(name, getattr(np, name), getattr(biggus, name)) for name in ufunc_names]
def test_all_fns(self): fns_to_test = ['copysign', 'nextafter', 'ldexp', 'fmod'] arr1 = np.array([10, 2, 5]) arr2 = np.array([1, 5, 15]) biggus_arr1 = biggus.NumpyArrayAdapter(arr1) biggus_arr2 = biggus.NumpyArrayAdapter(arr2) for fn_name in fns_to_test: np_fn = getattr(np, fn_name) biggus_fn = getattr(biggus, fn_name) result = biggus_fn(biggus_arr1, biggus_arr2) assert_array_equal(result.ndarray(), np_fn(arr1, arr2))
def _test_elementwise(self, biggus_op, numpy_op): # Sequence of tests, defined as: # 1. Original array shape1. # 2. Original array shape2 # 3. Sequence of indexing operations to apply. tests = [ [(10, ), (10, ), []], [(30, 40), (30, 40), []], [(30, 40), (30, 40), (5, )], [(10, 30, 1), (1, 40), []], [(2, 3, 1), (1, 4), [slice(1, 2)]], [(500, 30, 40), (500, 30, 40), [slice(3, 6)]], [(500, 30, 40), (500, 30, 40), [(slice(None), slice(3, 6))]], ] axis = 0 ddof = 0 for shape1, shape2, cuts in tests: # Define some test data raw_data1 = np.linspace(0.0, 1.0, np.prod(shape1)).reshape(shape1) raw_data2 = np.linspace(0.2, 1.2, np.prod(shape2)).reshape(shape2) # Check the elementwise operation doesn't actually read any # data. data1 = AccessCounter(raw_data1) data2 = AccessCounter(raw_data2) array1 = biggus.NumpyArrayAdapter(data1) array2 = biggus.NumpyArrayAdapter(data2) op_array = biggus_op(array1, array2) self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data1.counts == 0).all()) self.assertTrue((data2.counts == 0).all()) # Compute the NumPy elementwise operation, and then wrap the # result as an array so we can apply biggus-style indexing. numpy_op_data = numpy_op(raw_data1, raw_data2) numpy_op_array = biggus.NumpyArrayAdapter(numpy_op_data) for keys in cuts: # Check slicing doesn't actually read any data. op_array = op_array[keys] self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data1.counts == 0).all()) self.assertTrue((data2.counts == 0).all()) # Update the NumPy result to match numpy_op_array = numpy_op_array[keys] # Check the NumPy and biggus numeric values match. op_result = op_array.ndarray() numpy_result = numpy_op_array.ndarray() np.testing.assert_array_equal(op_result, numpy_result)
def test_mean_of_difference(self): shape = (3, 4) size = np.prod(shape) raw_data1 = np.linspace(0.2, 1.0, num=size).reshape(shape) raw_data2 = np.linspace(0.3, 1.5, num=size).reshape(shape) array1 = biggus.NumpyArrayAdapter(raw_data1) array2 = biggus.NumpyArrayAdapter(raw_data2) difference = biggus.sub(array2, array1) mean_difference = biggus.mean(difference, axis=0) # Check the NumPy and biggus numeric values match. result = mean_difference.ndarray() numpy_result = np.mean(raw_data2 - raw_data1, axis=0) np.testing.assert_array_equal(result, numpy_result)
def test_mean_of_difference(self): # MEAN(A - B) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) data = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(data) data = AccessCounter(raw_data) b_array = biggus.NumpyArrayAdapter(data) mean_array = biggus.mean(biggus.sub(a_array, b_array), axis=0) mean = mean_array.ndarray() np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2, axis=0))
def test___rdiv__(self): # We only have rdiv on py2. if sys.version_info[0] == 2: a = biggus.NumpyArrayAdapter(RESULT_NDARRAY + 10) r = 5 / a self.assertIsInstance(r, biggus._Elementwise) assert_array_equal(r.ndarray(), 5 / (RESULT_NDARRAY + 10))
def test_ndarray(self): # Sequence of tests, defined as: # 1. Stack shape, # 2. item shape, # 3. expected result. tests = [ [(1, ), (3, ), np.arange(3).reshape(1, 3)], [(1, ), (3, 4), np.arange(12).reshape(1, 3, 4)], [(6, ), (3, 4), np.arange(72).reshape(6, 3, 4)], [(6, 70), (3, 4), np.arange(5040).reshape(6, 70, 3, 4)], ] for stack_shape, item_shape, target in tests: stack = np.empty(stack_shape, dtype='O') item_size = np.array(item_shape).prod() for index in np.ndindex(stack.shape): start = np.ravel_multi_index(index, stack_shape) * item_size concrete = np.arange(item_size).reshape(item_shape) concrete += start array = biggus.NumpyArrayAdapter(concrete) stack[index] = array array = biggus.ArrayStack(stack) result = array.ndarray() self.assertIsInstance(result, np.ndarray) self.assertEqual(array.dtype, result.dtype) self.assertEqual(array.shape, result.shape) np.testing.assert_array_equal(result, target)
def test__biggus_filter(self): shape = (1451, 1, 1) # Generate dummy data as biggus array. numpy_data = np.random.random(shape).astype(self.dtype) biggus_data = biggus.NumpyArrayAdapter(numpy_data) # Information for filter... # Dictionary of weights: key = offset (absolute value), value = weight weights = {0: 0.4, 1: 0.2, 2: 0.1} # This is equivalent to a weights array of [0.1, 0.2, 0.4, 0.2, 0.1]. filter_halfwidth = len(weights) - 1 # Filter data filtered_biggus_data = self._biggus_filter(biggus_data, weights) # Extract eddy component (original data - filtered data). eddy_biggus_data = (biggus_data[filter_halfwidth:-filter_halfwidth] - filtered_biggus_data) # Aggregate over time dimension. mean_eddy_biggus_data = biggus.mean(eddy_biggus_data, axis=0) # Force evaluation. mean_eddy_numpy_data = mean_eddy_biggus_data.ndarray() # Confirm correct shape. self.assertEqual(mean_eddy_numpy_data.shape, shape[1:])
def create_cube(lon_min, lon_max, bounds=False): n_lons = max(lon_min, lon_max) - min(lon_max, lon_min) data = np.arange(4 * 3 * n_lons, dtype='f4').reshape(4, 3, n_lons) data = biggus.NumpyArrayAdapter(data) cube = Cube(data, standard_name='x_wind', units='ms-1') cube.add_dim_coord( iris.coords.DimCoord([0, 20, 40, 80], long_name='level_height', units='m'), 0) cube.add_aux_coord( iris.coords.AuxCoord([1.0, 0.9, 0.8, 0.6], long_name='sigma'), 0) cube.add_dim_coord( iris.coords.DimCoord([-45, 0, 45], 'latitude', units='degrees'), 1) step = 1 if lon_max > lon_min else -1 cube.add_dim_coord( iris.coords.DimCoord(np.arange(lon_min, lon_max, step), 'longitude', units='degrees'), 2) if bounds: cube.coord('longitude').guess_bounds() cube.add_aux_coord( iris.coords.AuxCoord(np.arange(3 * n_lons).reshape(3, n_lons) * 10, 'surface_altitude', units='m'), [1, 2]) cube.add_aux_factory( iris.aux_factory.HybridHeightFactory(cube.coord('level_height'), cube.coord('sigma'), cube.coord('surface_altitude'))) return cube
def _check(self, data): array = biggus.NumpyArrayAdapter(data) result = std(array, axis=0, ddof=0).masked_array() expected = ma.std(data, axis=0, ddof=0) if expected.ndim == 0: expected = ma.asarray(expected) np.testing.assert_array_equal(result.filled(), expected.filled()) np.testing.assert_array_equal(result.mask, expected.mask)
def _check(self, data, dtype=None, shape=None): data = np.asarray(data, dtype=dtype) if shape is not None: data = data.reshape(shape) array = biggus.NumpyArrayAdapter(data) result = count(array, axis=0).ndarray() expected = np.ones(data.shape[1:]) * data.shape[0] np.testing.assert_array_equal(result, expected)
def test_unsupported_mdtol(self): # The VARIANCE aggregator supports lazy_aggregation but does # not provide mdtol handling. Check that a TypeError is raised # if this unsupported kwarg is specified. array = biggus.NumpyArrayAdapter(np.arange(8)) msg = "unexpected keyword argument 'mdtol'" with self.assertRaisesRegexp(TypeError, msg): VARIANCE.lazy_aggregate(array, axis=0, mdtol=0.8)
def _test_aggregation(self, biggus_op, numpy_op, **kwargs): # Sequence of tests, defined as: # 1. Original array shape. # 2. Sequence of indexing operations to apply. tests = [ [(10, ), []], [(30, 40), []], [(30, 40), [5]], [(500, 30, 40), [slice(3, 6)]], [(500, 30, 40), [(slice(None), slice(3, 6))]], ] for shape, cuts in tests: # Define some test data size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) for axis in range(len(shape)): # Check the aggregation operation doesn't actually read any # data. data = AccessCounter(raw_data) array = biggus.NumpyArrayAdapter(data) op_array = biggus_op(array, axis=axis, **kwargs) self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data.counts == 0).all()) # Compute the NumPy aggregation, and then wrap the result as # an array so we can apply biggus-style indexing. numpy_op_data = numpy_op(raw_data, axis=axis, **kwargs) numpy_op_array = biggus.NumpyArrayAdapter(numpy_op_data) for keys in cuts: # Check slicing doesn't actually read any data. op_array = op_array[keys] self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data.counts == 0).all()) # Update the NumPy result to match numpy_op_array = numpy_op_array[keys] # Check resolving `op_array` to a NumPy array only reads # each relevant source value once. op_result = op_array.ndarray() self.assertTrue((data.counts <= 1).all()) # Check the NumPy and biggus numeric values match. numpy_result = numpy_op_array.ndarray() np.testing.assert_array_almost_equal(op_result, numpy_result)
def test_different_shaped_accumulations(self): a = biggus.NumpyArrayAdapter(np.random.random(2)) b = biggus.NumpyArrayAdapter(np.zeros((2, 2))) c = biggus.mean(b, axis=1) d = a - c graph = DaskEngine().graph(d) func_names = { task[0].__name__ for task in graph.values() if callable(task[0]) } expected = { 'NumpyArrayAdapter\n(2, 2)', 'NumpyArrayAdapter\n(2,)', 'mean\n(axis=1)', 'subtract', 'gather' } self.assertEqual(expected, func_names)
def test_mean_of_mean(self): data = np.arange(24).reshape(3, 4, 2) array = biggus.NumpyArrayAdapter(data) mean1 = biggus.mean(array, axis=1) mean2 = biggus.mean(mean1, axis=-1) expected = np.mean(np.mean(data, axis=1), axis=-1) result = mean2.ndarray() np.testing.assert_array_equal(result, expected)
def setUp(self): self.data = np.arange(6.0).reshape((2, 3)) self.lazydata = biggus.NumpyArrayAdapter(self.data) cube = Cube(self.lazydata) for i_dim, name in enumerate(('y', 'x')): npts = cube.shape[i_dim] coord = DimCoord(np.arange(npts), long_name=name) cube.add_dim_coord(coord, i_dim) self.cube = cube
def test_all_fns(self): fns_to_test = ['isreal', 'iscomplex', 'isinf', 'isnan', 'signbit'] arr = np.array([-10, 0, 5]) biggus_arr = biggus.NumpyArrayAdapter(arr) for fn_name in fns_to_test: np_fn = getattr(np, fn_name) biggus_fn = getattr(biggus, fn_name) result = biggus_fn(biggus_arr) assert_array_equal(result.ndarray(), np_fn(arr))
def build_lazy_cube(self, points, bounds=None, nx=4): data = np.arange(len(points) * nx).reshape(len(points), nx) data = biggus.NumpyArrayAdapter(data) cube = iris.cube.Cube(data, standard_name='air_temperature', units='K') lat = iris.coords.DimCoord(points, 'latitude', bounds=bounds) lon = iris.coords.DimCoord(np.arange(nx), 'longitude') cube.add_dim_coord(lat, 0) cube.add_dim_coord(lon, 1) return cube
def build_lazy_cube(self): data = ma.array([[1., 1.], [1., 100000.]], mask=[[0, 0], [0, 1]]) data = biggus.NumpyArrayAdapter(data) cube = iris.cube.Cube(data, standard_name='air_temperature', units='K') lat = iris.coords.DimCoord([-10, 10], 'latitude') lon = iris.coords.DimCoord([10, 20], 'longitude') cube.add_dim_coord(lat, 0) cube.add_dim_coord(lon, 1) return cube
def _check(self, data, dtype=None, shape=None): data = np.asarray(data, dtype=dtype) if shape is not None: data = data.reshape(shape) array = biggus.NumpyArrayAdapter(data) result = self.biggus_operator(array, axis=0).ndarray() expected = self.numpy_operator(data, axis=0) if expected.ndim == 0: expected = np.asarray(expected) np.testing.assert_array_equal(result, expected)
def _test_flow(self, axis): data = np.arange(3 * 4 * 5, dtype='f4').reshape(3, 4, 5) array = biggus.NumpyArrayAdapter(data) mean = biggus.mean(array, axis=axis) # Artificially constrain the chunk size to eight bytes to # ensure biggus is stepping across axes in the correct # order. with mock.patch('biggus._init.MAX_CHUNK_SIZE', 8): op_result, = biggus.ndarrays([mean]) np_result = np.mean(data, axis=axis) np.testing.assert_array_almost_equal(op_result, np_result)
def test_biggus_complex(self): raw_points = np.arange(12).reshape(4, 3) points = biggus.NumpyArrayAdapter(raw_points) coord = iris.coords.AuxCoord(points) self.assertIsInstance(coord._points, biggus.Array) result = AuxCoordFactory._nd_points(coord, (3, 2), 5) # Check we haven't triggered the loading of the coordinate values. self.assertIsInstance(coord._points, biggus.Array) self.assertIsInstance(result, biggus.Array) expected = raw_points.T[np.newaxis, np.newaxis, ..., np.newaxis] self.assertArrayEqual(result, expected)
def _check(self, data): array = biggus.NumpyArrayAdapter(data) result = self.biggus_operator(array, axis=0).masked_array() expected = self.numpy_masked_operator(data, axis=0) if expected.ndim == 0: if expected is np.ma.masked: expected = ma.asarray(expected, dtype=array.dtype) else: expected = ma.asarray(expected) np.testing.assert_array_equal(result.filled(), expected.filled()) np.testing.assert_array_equal(result.mask, expected.mask)
def _check(self, data, dtype=None, shape=None, ddof=0): for bfunc, nfunc in self.funcs: data = np.asarray(data, dtype=dtype) if shape is not None: data = data.reshape(shape) array = biggus.NumpyArrayAdapter(data) result = bfunc(array, axis=0, ddof=ddof).ndarray() expected = nfunc(data, axis=0, ddof=ddof) if expected.ndim == 0: expected = np.asarray(expected) np.testing.assert_array_almost_equal(result, expected)
def test_masked_array_numpy_first_biggus_second(self): # Ensure that an operation where the biggus array is second (i.e. # calling the special method of the numpy array not the biggus array, # returns the expected type). mask = [False, True, False] arr = np.ma.array([1, 2, 3], mask=mask) barr = biggus.NumpyArrayAdapter(arr) result = (np.array([[1.]]) * barr).masked_array() target = np.array([[1.]]) * arr np.testing.assert_array_equal(result, target) np.testing.assert_array_equal(result.mask, target.mask)
def test_hybrid_height(self): cube = istk.simple_4d_with_hybrid_height() # Put a biggus array on the cube so we can test deferred loading. cube.lazy_data(biggus.NumpyArrayAdapter(cube.data)) traj = (('grid_latitude', [20.5, 21.5, 22.5, 23.5]), ('grid_longitude', [31, 32, 33, 34])) xsec = traj_interpolate(cube, traj, method='nearest') # Check that creating the trajectory hasn't led to the original # data being loaded. self.assertTrue(cube.has_lazy_data()) self.assertCML([cube, xsec], ('trajectory', 'hybrid_height.cml'))
def test_sd_and_mean_of_difference(self): # MEAN(A - B) and SD(A - B) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(a_counter) b_counter = AccessCounter(raw_data) b_array = biggus.NumpyArrayAdapter(b_counter) sub_array = biggus.sub(a_array, b_array) mean_array = biggus.mean(sub_array, axis=0) std_array = biggus.std(sub_array, axis=0) mean, std = biggus.ndarrays([mean_array, std_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2, axis=0)) np.testing.assert_array_almost_equal(std, np.std(raw_data * 2, axis=0)) # Was the source data read just once? self.assert_counts(a_counter.counts, [1]) self.assert_counts(b_counter.counts, [1])
def _biggus_filter(self, data, weights): # Filter a data array (time, <other dimensions>) using information in # weights dictionary. # # Args: # # * data: # biggus array of the data to be filtered # * weights: # dictionary of absolute record offset : weight # Build filter_matrix (time to time' mapping). shape = data.shape # Build filter matrix as a numpy array and then populate. filter_matrix_np = np.zeros((shape[0], shape[0])).astype(self.dtype) for offset, value in weights.items(): filter_matrix_np += np.diag([value] * (shape[0] - offset), k=offset) if offset > 0: filter_matrix_np += np.diag([value] * (shape[0] - offset), k=-offset) # Create biggus array for filter matrix, adding in other dimensions. for _ in shape[1:]: filter_matrix_np = filter_matrix_np[..., np.newaxis] filter_matrix_bg_single = biggus.NumpyArrayAdapter(filter_matrix_np) # Broadcast to correct shape (time, time', lat, lon). filter_matrix_bg = biggus.BroadcastArray( filter_matrix_bg_single, {i + 2: j for i, j in enumerate(shape[1:])}) # Broadcast filter to same shape. biggus_data_for_filter = biggus.BroadcastArray(data[np.newaxis, ...], {0: shape[0]}) # Multiply two arrays together and sum over second time dimension. filtered_data = biggus.sum(biggus_data_for_filter * filter_matrix_bg, axis=1) # Cut off records at start and end of output array where the filter # cannot be fully applied. filter_halfwidth = len(weights) - 1 filtered_data = filtered_data[filter_halfwidth:-filter_halfwidth] return filtered_data
def __init__(self, grib_message, grib_fh=None, auto_regularise=True): """Store the grib message and compute our extra keys.""" self.grib_message = grib_message deferred = grib_fh is not None # Store the file pointer and message length from the current # grib message before it's changed by calls to the grib-api. if deferred: # Note that, the grib-api has already read this message and # advanced the file pointer to the end of the message. offset = grib_fh.tell() message_length = gribapi.grib_get_long(grib_message, 'totalLength') if auto_regularise and _is_quasi_regular_grib(grib_message): warnings.warn('Regularising GRIB message.') if deferred: self._regularise_shape(grib_message) else: _regularise(grib_message) # Initialise the key-extension dictionary. # NOTE: this attribute *must* exist, or the the __getattr__ overload # can hit an infinite loop. self.extra_keys = {} self._confirm_in_scope() self._compute_extra_keys() # Calculate the data payload shape. shape = (gribapi.grib_get_long(grib_message, 'numberOfValues'),) if not self.gridType.startswith('reduced'): ni, nj = self.Ni, self.Nj j_fast = gribapi.grib_get_long(grib_message, 'jPointsAreConsecutive') shape = (nj, ni) if j_fast == 0 else (ni, nj) if deferred: # Wrap the reference to the data payload within the data proxy # in order to support deferred data loading. # The byte offset requires to be reset back to the first byte # of this message. The file pointer offset is always at the end # of the current message due to the grib-api reading the message. proxy = GribDataProxy(shape, np.zeros(.0).dtype, np.nan, grib_fh.name, offset - message_length, auto_regularise) self._data = biggus.NumpyArrayAdapter(proxy) else: self.data = _message_values(grib_message, shape)