def test_dataset_join(): import numpy as np ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]}) with pytest.raises(TypeError): apply_ufunc(operator.add, ds0, ds1, dataset_join='outer') def add(a, b, join, dataset_join): return apply_ufunc(operator.add, a, b, join=join, dataset_join=dataset_join, dataset_fill_value=np.nan) actual = add(ds0, ds1, 'outer', 'inner') expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]), 'x': [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds1, 'outer', 'outer') assert_identical(actual, expected) # if variables don't match, join will perform add with np.nan ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]}) actual = add(ds0, ds2, 'outer', 'inner') expected = xr.Dataset({'x': [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds2, 'outer', 'outer') expected = xr.Dataset({'a': ('x', [np.nan, np.nan, np.nan]), 'b': ('x', [np.nan, np.nan, np.nan]), 'x': [0, 1, 2]}) assert_identical(actual, expected)
def test_apply_dask_multiple_inputs(): import dask.array as da def covariance(x, y): return ((x - x.mean(axis=-1, keepdims=True)) * (y - y.mean(axis=-1, keepdims=True))).mean(axis=-1) rs = np.random.RandomState(42) array1 = da.from_array(rs.randn(4, 4), chunks=(2, 4)) array2 = da.from_array(rs.randn(4, 4), chunks=(2, 4)) data_array_1 = xr.DataArray(array1, dims=('x', 'z')) data_array_2 = xr.DataArray(array2, dims=('y', 'z')) expected = apply_ufunc( covariance, data_array_1.compute(), data_array_2.compute(), input_core_dims=[['z'], ['z']]) allowed = apply_ufunc( covariance, data_array_1, data_array_2, input_core_dims=[['z'], ['z']], dask='allowed') assert isinstance(allowed.data, da.Array) xr.testing.assert_allclose(expected, allowed.compute()) parallelized = apply_ufunc( covariance, data_array_1, data_array_2, input_core_dims=[['z'], ['z']], dask='parallelized', output_dtypes=[float]) assert isinstance(parallelized.data, da.Array) xr.testing.assert_allclose(expected, parallelized.compute())
def test_apply_exclude(): def concatenate(objects, dim='x'): def func(*x): return np.concatenate(x, axis=-1) result = apply_ufunc(func, *objects, input_core_dims=[[dim]] * len(objects), output_core_dims=[[dim]], exclude_dims={dim}) if isinstance(result, (xr.Dataset, xr.DataArray)): # note: this will fail if dim is not a coordinate on any input new_coord = np.concatenate([obj.coords[dim] for obj in objects]) result.coords[dim] = new_coord return result arrays = [np.array([1]), np.array([2, 3])] variables = [xr.Variable('x', a) for a in arrays] data_arrays = [xr.DataArray(v, {'x': c, 'y': ('x', range(len(c)))}) for v, c in zip(variables, [['a'], ['b', 'c']])] datasets = [xr.Dataset({'data': data_array}) for data_array in data_arrays] expected_array = np.array([1, 2, 3]) expected_variable = xr.Variable('x', expected_array) expected_data_array = xr.DataArray(expected_variable, [('x', list('abc'))]) expected_dataset = xr.Dataset({'data': expected_data_array}) assert_identical(expected_array, concatenate(arrays)) assert_identical(expected_variable, concatenate(variables)) assert_identical(expected_data_array, concatenate(data_arrays)) assert_identical(expected_dataset, concatenate(datasets)) # must also be a core dimension with pytest.raises(ValueError): apply_ufunc(identity, variables[0], exclude_dims={'x'})
def test_apply_exclude(): def concatenate(objects, dim='x'): sig = ([(dim,)] * len(objects), [(dim,)]) new_coord = np.concatenate( [obj.coords[dim] if hasattr(obj, 'coords') else [] for obj in objects]) func = lambda *x: np.concatenate(x, axis=-1) result = apply_ufunc(func, *objects, signature=sig, exclude_dims={dim}) if isinstance(result, (xr.Dataset, xr.DataArray)): result.coords[dim] = new_coord return result arrays = [np.array([1]), np.array([2, 3])] variables = [xr.Variable('x', a) for a in arrays] data_arrays = [xr.DataArray(v, {'x': c, 'y': ('x', range(len(c)))}) for v, c in zip(variables, [['a'], ['b', 'c']])] datasets = [xr.Dataset({'data': data_array}) for data_array in data_arrays] expected_array = np.array([1, 2, 3]) expected_variable = xr.Variable('x', expected_array) expected_data_array = xr.DataArray(expected_variable, [('x', list('abc'))]) expected_dataset = xr.Dataset({'data': expected_data_array}) assert_identical(expected_array, concatenate(arrays)) assert_identical(expected_variable, concatenate(variables)) assert_identical(expected_data_array, concatenate(data_arrays)) assert_identical(expected_dataset, concatenate(datasets)) identity = lambda x: x # must also be a core dimension with pytest.raises(ValueError): apply_ufunc(identity, variables[0], exclude_dims={'x'})
def xr_polyfit(obj, dim, ix=None, deg=0.5, poly='hermite'): """Fit a polynomial of degree ``deg`` using least-squares along ``dim``. Parameters ---------- obj : xarray.Dataset or xarray.DataArray The object to fit. dim : str, optional The dimension to fit along. ix : {None, int, array_like}, optional If ``None``, interpolate the polynomial at the original x points. If ``int``, linearly space this many points along the range of the original data and interpolate with these. If array-like, interpolate at these given points. deg : int or float, optional The degree of the polynomial to fit. Used directly if integer. If float supplied, with ``0.0 < deg < 1.0``, the proportion of the total possible degree to use. poly : {'chebyshev', 'polynomial', 'legendre', 'laguerre', hermite}, optional The type of polynomial to fit. Returns ------- new_xobj : xarray.DataArray or xarray.Dataset See Also -------- xr_unispline """ input_core_dims = [(dim,), (dim,)] args = (obj[dim], obj) if ix is None: kwargs = {'ix': ix, 'axis': -1, 'deg': deg, 'poly': poly} output_core_dims = [(dim,)] return apply_ufunc(_broadcast_polyfit, *args, kwargs=kwargs, input_core_dims=input_core_dims, output_core_dims=output_core_dims) if isinstance(ix, int): ix = np.linspace(float(obj[dim].min()), float(obj[dim].max()), ix) kwargs = {'ix': ix, 'axis': -1, 'deg': deg, 'poly': poly} output_core_dims = [('__temp_dim__',)] result = apply_ufunc(_broadcast_polyfit, *args, kwargs=kwargs, input_core_dims=input_core_dims, output_core_dims=output_core_dims) result['__temp_dim__'] = ix return result.rename({'__temp_dim__': dim})
def test_output_wrong_number(): variable = xr.Variable('x', np.arange(10)) def identity(x): return x def tuple3x(x): return (x, x, x) with raises_regex(ValueError, 'number of outputs'): apply_ufunc(identity, variable, output_core_dims=[(), ()]) with raises_regex(ValueError, 'number of outputs'): apply_ufunc(tuple3x, variable, output_core_dims=[(), ()])
def test_apply_input_core_dimension(): def first_element(obj, dim): def func(x): return x[..., 0] return apply_ufunc(func, obj, input_core_dims=[[dim]]) array = np.array([[1, 2], [3, 4]]) variable = xr.Variable(['x', 'y'], array) data_array = xr.DataArray(variable, {'x': ['a', 'b'], 'y': [-1, -2]}) dataset = xr.Dataset({'data': data_array}) expected_variable_x = xr.Variable(['y'], [1, 2]) expected_data_array_x = xr.DataArray(expected_variable_x, {'y': [-1, -2]}) expected_dataset_x = xr.Dataset({'data': expected_data_array_x}) expected_variable_y = xr.Variable(['x'], [1, 3]) expected_data_array_y = xr.DataArray(expected_variable_y, {'x': ['a', 'b']}) expected_dataset_y = xr.Dataset({'data': expected_data_array_y}) assert_identical(expected_variable_x, first_element(variable, 'x')) assert_identical(expected_variable_y, first_element(variable, 'y')) assert_identical(expected_data_array_x, first_element(data_array, 'x')) assert_identical(expected_data_array_y, first_element(data_array, 'y')) assert_identical(expected_dataset_x, first_element(dataset, 'x')) assert_identical(expected_dataset_y, first_element(dataset, 'y')) assert_identical(expected_data_array_x, first_element(data_array.groupby('y'), 'x')) assert_identical(expected_dataset_x, first_element(dataset.groupby('y'), 'x')) def multiply(*args): val = args[0] for arg in args[1:]: val = val * arg return val # regression test for GH:2341 with pytest.raises(ValueError): apply_ufunc(multiply, data_array, data_array['y'].values, input_core_dims=[['y']], output_core_dims=[['y']]) expected = xr.DataArray(multiply(data_array, data_array['y']), dims=['x', 'y'], coords=data_array.coords) actual = apply_ufunc(multiply, data_array, data_array['y'].values, input_core_dims=[['y'], []], output_core_dims=[['y']]) assert_identical(expected, actual)
def original_and_stack_negative(obj): func = lambda x: (x, xr.core.npcompat.stack([x, -x], axis=-1)) sig = ([()], [(), ('sign',)]) result = apply_ufunc(func, obj, signature=sig) if isinstance(result[1], (xr.Dataset, xr.DataArray)): result[1].coords['sign'] = [1, -1] return result
def xr_diff_u_err(obj, dim): """Propagate error through uneven-third-order finite difference derivative. If you have calculated a derivative already using ``xr_diff_u``, and you have data about the uncertainty on the original data, this function propagates that error through to be an error on the derivative. Parameters ---------- obj : xarray.Dataset or xarray.DataArray The object to differentiate. dim : str The dimension to differentiate along. Returns ------- new_xobj : xarray.DataArray or xarray.Dataset """ kwargs = {'axis': -1} input_core_dims = [(dim,), (dim,)] output_core_dims = [(dim,)] args = (obj[dim], obj) return apply_ufunc(_broadcast_diff_u_err, *args, input_core_dims=input_core_dims, output_core_dims=output_core_dims, kwargs=kwargs)
def xr_diff_u(obj, dim): """Uneven-third-order finite difference derivative [1]. [1] Singh, Ashok K., and B. S. Bhadauria. "Finite difference formulae for unequal sub-intervals using lagrange’s interpolation formula." International Journal of Mathematics and Analysis 3.17 (2009): 815-827. Parameters ---------- obj : xarray.Dataset or xarray.DataArray The object to differentiate. dim : str The dimension to differentiate along. Returns ------- new_xobj : xarray.DataArray or xarray.Dataset """ kwargs = {'axis': -1} input_core_dims = [(dim,), (dim,)] output_core_dims = [(dim,)] args = (obj[dim], obj) return apply_ufunc(_broadcast_diff_u, *args, input_core_dims=input_core_dims, output_core_dims=output_core_dims, kwargs=kwargs)
def test_vectorize(): data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y')) expected = xr.DataArray([1, 2], dims=['x']) actual = apply_ufunc(pandas_median, data_array, input_core_dims=[['y']], vectorize=True) assert_identical(expected, actual)
def original_and_stack_negative(obj): def func(x): return (x, np.stack([x, -x], axis=-1)) result = apply_ufunc(func, obj, output_core_dims=[[], ['sign']]) if isinstance(result[1], (xr.Dataset, xr.DataArray)): result[1].coords['sign'] = [1, -1] return result
def stack_negative(obj): def func(x): return np.stack([x, -x], axis=-1) result = apply_ufunc(func, obj, output_core_dims=[['sign']]) if isinstance(result, (xr.Dataset, xr.DataArray)): result.coords['sign'] = [1, -1] return result
def xr_interp(obj, dim, ix=100, order=3): """Interpolate along axis ``dim`` using :func:`scipy.interpolate.interp1d`. Parameters ---------- obj : xarray.Dataset or xarray.DataArray The object to interpolate. dim : str The axis to interpolate along. ix : int or array If int, interpolate to this many points spaced evenly along the range of the original data. If array, interpolate to those points directly. order : int Supplied to :func:`scipy.interpolate.interp1d` as the order of interpolation. Returns ------- new_xobj : xarray.DataArray or xarray.Dataset See Also -------- xr_pchip """ input_core_dims = [(dim,), (dim,)] args = (obj[dim], obj) kwargs = {'ix': ix, 'axis': -1, 'order': order} if ix is None: output_core_dims = [(dim,)] return apply_ufunc(_broadcast_interp, *args, kwargs=kwargs, input_core_dims=input_core_dims, output_core_dims=output_core_dims) if isinstance(ix, int): ix = np.linspace(float(obj[dim].min()), float(obj[dim].max()), ix) kwargs['ix'] = ix output_core_dims = [('__temp_dim__',)] result = apply_ufunc(_broadcast_interp, *args, kwargs=kwargs, input_core_dims=input_core_dims, output_core_dims=output_core_dims) result['__temp_dim__'] = ix return result.rename({'__temp_dim__': dim})
def xr_filter_wiener(obj, dim, mysize=5, noise=1e-2): kwargs = {'mysize': mysize, 'noise': noise, 'axis': -1} input_core_dims = [(dim,), (dim,)] output_core_dims = [(dim,)] args = (obj[dim], obj) return apply_ufunc(_broadcast_filter_wiener, *args, input_core_dims=input_core_dims, output_core_dims=output_core_dims, kwargs=kwargs)
def test_vectorize_dask(): data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y')) expected = xr.DataArray([1, 2], dims=['x']) actual = apply_ufunc(pandas_median, data_array.chunk({'x': 1}), input_core_dims=[['y']], vectorize=True, dask='parallelized', output_dtypes=[float]) assert_identical(expected, actual)
def concatenate(objects, dim='x'): sig = ([(dim,)] * len(objects), [(dim,)]) new_coord = np.concatenate( [obj.coords[dim] if hasattr(obj, 'coords') else [] for obj in objects]) func = lambda *x: np.concatenate(x, axis=-1) result = apply_ufunc(func, *objects, signature=sig, exclude_dims={dim}) if isinstance(result, (xr.Dataset, xr.DataArray)): result.coords[dim] = new_coord return result
def test_vectorize(): if LooseVersion(np.__version__) < LooseVersion('1.12.0'): pytest.skip('numpy 1.12 or later to support vectorize=True.') data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y')) expected = xr.DataArray([1, 2], dims=['x']) actual = apply_ufunc(pandas_median, data_array, input_core_dims=[['y']], vectorize=True) assert_identical(expected, actual)
def test_apply_dask(): import dask.array as da array = da.ones((2,), chunks=2) variable = _NoCacheVariable('x', array) coords = xr.DataArray(variable).coords.variables data_array = xr.DataArray(variable, coords, fastpath=True) dataset = xr.Dataset({'y': variable}) identity = lambda x: x # encountered dask array, but did not set dask_array='allowed' with pytest.raises(ValueError): apply_ufunc(identity, array) with pytest.raises(ValueError): apply_ufunc(identity, variable) with pytest.raises(ValueError): apply_ufunc(identity, data_array) with pytest.raises(ValueError): apply_ufunc(identity, dataset) # unknown setting for dask array handling with pytest.raises(ValueError): apply_ufunc(identity, array, dask_array='auto') def dask_safe_identity(x): return apply_ufunc(identity, x, dask_array='allowed') assert array is dask_safe_identity(array) actual = dask_safe_identity(variable) assert isinstance(actual.data, da.Array) assert_identical(variable, actual) actual = dask_safe_identity(data_array) assert isinstance(actual.data, da.Array) assert_identical(data_array, actual) actual = dask_safe_identity(dataset) assert isinstance(actual['y'].data, da.Array) assert_identical(dataset, actual)
def test_vectorize_dask(): if LooseVersion(np.__version__) < LooseVersion('1.12.0'): pytest.skip('numpy 1.12 or later to support vectorize=True.') data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y')) expected = xr.DataArray([1, 2], dims=['x']) actual = apply_ufunc(pandas_median, data_array.chunk({'x': 1}), input_core_dims=[['y']], vectorize=True, dask='parallelized', output_dtypes=[float]) assert_identical(expected, actual)
def concatenate(objects, dim='x'): def func(*x): return np.concatenate(x, axis=-1) result = apply_ufunc(func, *objects, input_core_dims=[[dim]] * len(objects), output_core_dims=[[dim]], exclude_dims={dim}) if isinstance(result, (xr.Dataset, xr.DataArray)): # note: this will fail if dim is not a coordinate on any input new_coord = np.concatenate([obj.coords[dim] for obj in objects]) result.coords[dim] = new_coord return result
def test_apply_exclude(): def concatenate(objects, dim="x"): def func(*x): return np.concatenate(x, axis=-1) result = apply_ufunc(func, *objects, input_core_dims=[[dim]] * len(objects), output_core_dims=[[dim]], exclude_dims={dim}) if isinstance(result, (xr.Dataset, xr.DataArray)): # note: this will fail if dim is not a coordinate on any input new_coord = np.concatenate([obj.coords[dim] for obj in objects]) result.coords[dim] = new_coord return result arrays = [np.array([1]), np.array([2, 3])] variables = [xr.Variable("x", a) for a in arrays] data_arrays = [ xr.DataArray(v, { "x": c, "y": ("x", range(len(c))) }) for v, c in zip(variables, [["a"], ["b", "c"]]) ] datasets = [xr.Dataset({"data": data_array}) for data_array in data_arrays] expected_array = np.array([1, 2, 3]) expected_variable = xr.Variable("x", expected_array) expected_data_array = xr.DataArray(expected_variable, [("x", list("abc"))]) expected_dataset = xr.Dataset({"data": expected_data_array}) assert_identical(expected_array, concatenate(arrays)) assert_identical(expected_variable, concatenate(variables)) assert_identical(expected_data_array, concatenate(data_arrays)) assert_identical(expected_dataset, concatenate(datasets)) # must also be a core dimension with pytest.raises(ValueError): apply_ufunc(identity, variables[0], exclude_dims={"x"})
def test_vectorize_dask_new_output_dims(): # regression test for GH3574 # run vectorization in dask.array.gufunc by using `dask='parallelized'` data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) func = lambda x: x[np.newaxis, ...] expected = data_array.expand_dims("z") actual = apply_ufunc( func, data_array.chunk({"x": 1}), output_core_dims=[["z"]], vectorize=True, dask="parallelized", output_dtypes=[float], dask_gufunc_kwargs=dict(output_sizes={"z": 1}), ).transpose(*expected.dims) assert_identical(expected, actual) with raises_regex(ValueError, "dimension 'z1' in 'output_sizes' must correspond"): apply_ufunc( func, data_array.chunk({"x": 1}), output_core_dims=[["z"]], vectorize=True, dask="parallelized", output_dtypes=[float], dask_gufunc_kwargs=dict(output_sizes={"z1": 1}), ) with raises_regex( ValueError, "dimension 'z' in 'output_core_dims' needs corresponding" ): apply_ufunc( func, data_array.chunk({"x": 1}), output_core_dims=[["z"]], vectorize=True, dask="parallelized", output_dtypes=[float], )
def test_apply_dask_parallelized(): import dask.array as da array = da.ones((2, 2), chunks=(1, 1)) data_array = xr.DataArray(array, dims=('x', 'y')) actual = apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float]) assert isinstance(actual.data, da.Array) assert actual.data.chunks == array.chunks assert_identical(data_array, actual)
def test_vectorize_dask(): # run vectorization in dask.array.gufunc by using `dask='parallelized'` data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) expected = xr.DataArray([1, 2], dims=["x"]) actual = apply_ufunc( pandas_median, data_array.chunk({"x": 1}), input_core_dims=[["y"]], vectorize=True, dask="parallelized", output_dtypes=[float], ) assert_identical(expected, actual)
def test_apply_dask_multiple_inputs(): import dask.array as da def covariance(x, y): return ((x - x.mean(axis=-1, keepdims=True)) * (y - y.mean(axis=-1, keepdims=True))).mean(axis=-1) rs = np.random.RandomState(42) array1 = da.from_array(rs.randn(4, 4), chunks=(2, 4)) array2 = da.from_array(rs.randn(4, 4), chunks=(2, 4)) data_array_1 = xr.DataArray(array1, dims=("x", "z")) data_array_2 = xr.DataArray(array2, dims=("y", "z")) expected = apply_ufunc( covariance, data_array_1.compute(), data_array_2.compute(), input_core_dims=[["z"], ["z"]], ) allowed = apply_ufunc( covariance, data_array_1, data_array_2, input_core_dims=[["z"], ["z"]], dask="allowed", ) assert isinstance(allowed.data, da.Array) xr.testing.assert_allclose(expected, allowed.compute()) parallelized = apply_ufunc( covariance, data_array_1, data_array_2, input_core_dims=[["z"], ["z"]], dask="parallelized", output_dtypes=[float], ) assert isinstance(parallelized.data, da.Array) xr.testing.assert_allclose(expected, parallelized.compute())
def test_apply_dask(): import dask.array as da array = da.ones((2, ), chunks=2) variable = xr.Variable("x", array) coords = xr.DataArray(variable).coords.variables data_array = xr.DataArray(variable, dims=["x"], coords=coords) dataset = xr.Dataset({"y": variable}) # encountered dask array, but did not set dask='allowed' with pytest.raises(ValueError): apply_ufunc(identity, array) with pytest.raises(ValueError): apply_ufunc(identity, variable) with pytest.raises(ValueError): apply_ufunc(identity, data_array) with pytest.raises(ValueError): apply_ufunc(identity, dataset) # unknown setting for dask array handling with pytest.raises(ValueError): apply_ufunc(identity, array, dask="unknown") def dask_safe_identity(x): return apply_ufunc(identity, x, dask="allowed") assert array is dask_safe_identity(array) actual = dask_safe_identity(variable) assert isinstance(actual.data, da.Array) assert_identical(variable, actual) actual = dask_safe_identity(data_array) assert isinstance(actual.data, da.Array) assert_identical(data_array, actual) actual = dask_safe_identity(dataset) assert isinstance(actual["y"].data, da.Array) assert_identical(dataset, actual)
def test_vectorize_dask_dtype_without_output_dtypes(data_array): # ensure output_dtypes is preserved with vectorize=True # GH4015 expected = data_array.copy() actual = apply_ufunc( identity, data_array.chunk({"x": 1}), vectorize=True, dask="parallelized", ) assert_identical(expected, actual) assert expected.dtype == actual.dtype
def test_vectorize_exclude_dims(): # GH 3890 data_array_a = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) data_array_b = xr.DataArray([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5]], dims=("x", "y")) expected = xr.DataArray([3, 5], dims=["x"]) actual = apply_ufunc( pandas_median_add, data_array_a, data_array_b, input_core_dims=[["y"], ["y"]], vectorize=True, exclude_dims=set("y"), ) assert_identical(expected, actual)
def test_dataset_join(): import numpy as np ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]}) with pytest.raises(TypeError): apply_ufunc(operator.add, ds0, ds1, dataset_join='outer') def add(a, b, join, dataset_join): return apply_ufunc(operator.add, a, b, join=join, dataset_join=dataset_join, dataset_fill_value=np.nan) actual = add(ds0, ds1, 'outer', 'inner') expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]), 'x': [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds1, 'outer', 'outer') assert_identical(actual, expected) # if variables don't match, join will perform add with np.nan ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]}) actual = add(ds0, ds2, 'outer', 'inner') expected = xr.Dataset({'x': [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds2, 'outer', 'outer') expected = xr.Dataset({ 'a': ('x', [np.nan, np.nan, np.nan]), 'b': ('x', [np.nan, np.nan, np.nan]), 'x': [0, 1, 2] }) assert_identical(actual, expected)
def test_vectorize_exclude_dims_dask(): # GH 3890 data_array_a = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) data_array_b = xr.DataArray([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5]], dims=("x", "y")) expected = xr.DataArray([3, 5], dims=["x"]) actual = apply_ufunc( pandas_median_add, data_array_a.chunk({"x": 1}), data_array_b.chunk({"x": 1}), input_core_dims=[["y"], ["y"]], exclude_dims=set("y"), vectorize=True, dask="parallelized", output_dtypes=[float], ) assert_identical(expected, actual)
def test_vectorize_dask_new_output_dims(): # regression test for GH3574 data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) func = lambda x: x[np.newaxis, ...] expected = data_array.expand_dims("z") actual = apply_ufunc( func, data_array.chunk({"x": 1}), output_core_dims=[["z"]], vectorize=True, dask="parallelized", output_dtypes=[float], output_sizes={ "z": 1 }, ).transpose(*expected.dims) assert_identical(expected, actual)
def _ffill(arr, axis): """ Generic implementation of ffill borrowed from xarray.core.missing.ffill. Requires bottleneck to work. """ import bottleneck as bn from xarray.core.computation import apply_ufunc # work around for bottleneck 178 _limit = arr.shape[axis] return apply_ufunc( bn.push, arr, dask="allowed", keep_attrs=True, output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis), )
def test_vectorize_dask_dtype_meta(): # meta dtype takes precedence data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) expected = xr.DataArray([1, 2], dims=["x"]) actual = apply_ufunc( pandas_median, data_array.chunk({"x": 1}), input_core_dims=[["y"]], vectorize=True, dask="parallelized", output_dtypes=[int], meta=np.ndarray((0, 0), dtype=np.float), ) assert_identical(expected, actual) assert np.float == actual.dtype
def xr_idxmin(obj, dim): """Find the coordinate of the minimum along ``dim``. Parameters ---------- obj : xarray.DataArray or xarray.Dataset Object to find coordnate maximum in. dim : str Dimension along which to find maximum Returns ------- new_xobj : xarray.DataArray or xarray.Dataset """ input_core_dims = [(dim,), (dim,)] kwargs = {'axis': -1} allna = obj.isnull().all(dim) return apply_ufunc(gufunc_idxmin, obj.fillna(np.inf), obj[dim], input_core_dims=input_core_dims, kwargs=kwargs, dask='allowed').where(~allna)
def test_output_wrong_dims(): variable = xr.Variable('x', np.arange(10)) def add_dim(x): return x[..., np.newaxis] def remove_dim(x): return x[..., 0] with raises_regex(ValueError, 'unexpected number of dimensions'): apply_ufunc(add_dim, variable, output_core_dims=[('y', 'z')]) with raises_regex(ValueError, 'unexpected number of dimensions'): apply_ufunc(add_dim, variable) with raises_regex(ValueError, 'unexpected number of dimensions'): apply_ufunc(remove_dim, variable)
def test_output_wrong_dims(): variable = xr.Variable("x", np.arange(10)) def add_dim(x): return x[..., np.newaxis] def remove_dim(x): return x[..., 0] with raises_regex(ValueError, "unexpected number of dimensions"): apply_ufunc(add_dim, variable, output_core_dims=[("y", "z")]) with raises_regex(ValueError, "unexpected number of dimensions"): apply_ufunc(add_dim, variable) with raises_regex(ValueError, "unexpected number of dimensions"): apply_ufunc(remove_dim, variable)
def xr_idxmin(obj, dim): """Find the coordinate of the minimum along ``dim``. Parameters ---------- obj : xarray.DataArray or xarray.Dataset Object to find coordnate maximum in. dim : str Dimension along which to find maximum Returns ------- new_xobj : xarray.DataArray or xarray.Dataset """ input_core_dims = [(dim, ), (dim, )] kwargs = {'axis': -1} allna = obj.isnull().all(dim) return apply_ufunc(gufunc_idxmin, obj.fillna(np.inf), obj[dim], input_core_dims=input_core_dims, kwargs=kwargs, dask='allowed').where(~allna)
def xr_filtfilt_bessel(obj, dim, N=2, Wn=0.4): """Filter (with forward and backward pass) data along ``dim`` using the bessel design :py:func:`scipy.signal.bessel`. Parameters ---------- obj : xarray.Dataset or xarray.DataArray The object to apply signal filtering to. dim : str The dimension to filter along. N : int, optional The order of the filter. Wn : scalar, optional Critical frequency. """ kwargs = {'N': N, 'Wn': Wn, 'axis': -1} input_core_dims = [(dim,), (dim,)] output_core_dims = [(dim,)] args = (obj[dim], obj) return apply_ufunc(_broadcast_filtfilt_bessel, *args, input_core_dims=input_core_dims, output_core_dims=output_core_dims, kwargs=kwargs)
def test_dataset_join(): ds0 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds1 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) # by default, cannot have different labels with raises_regex(ValueError, "indexes .* are not equal"): apply_ufunc(operator.add, ds0, ds1) with raises_regex(TypeError, "must supply"): apply_ufunc(operator.add, ds0, ds1, dataset_join="outer") def add(a, b, join, dataset_join): return apply_ufunc( operator.add, a, b, join=join, dataset_join=dataset_join, dataset_fill_value=np.nan, ) actual = add(ds0, ds1, "outer", "inner") expected = xr.Dataset({"a": ("x", [np.nan, 101, np.nan]), "x": [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds1, "outer", "outer") assert_identical(actual, expected) with raises_regex(ValueError, "data variable names"): apply_ufunc(operator.add, ds0, xr.Dataset({"b": 1})) ds2 = xr.Dataset({"b": ("x", [99, 3]), "x": [1, 2]}) actual = add(ds0, ds2, "outer", "inner") expected = xr.Dataset({"x": [0, 1, 2]}) assert_identical(actual, expected) # we used np.nan as the fill_value in add() above actual = add(ds0, ds2, "outer", "outer") expected = xr.Dataset( { "a": ("x", [np.nan, np.nan, np.nan]), "b": ("x", [np.nan, np.nan, np.nan]), "x": [0, 1, 2], } ) assert_identical(actual, expected)
def xr_filtfilt_bessel(obj, dim, N=2, Wn=0.4): """Filter (with forward and backward pass) data along ``dim`` using the bessel design :py:func:`scipy.signal.bessel`. Parameters ---------- obj : xarray.Dataset or xarray.DataArray The object to apply signal filtering to. dim : str The dimension to filter along. N : int, optional The order of the filter. Wn : scalar, optional Critical frequency. """ kwargs = {'N': N, 'Wn': Wn, 'axis': -1} input_core_dims = [(dim, ), (dim, )] output_core_dims = [(dim, )] args = (obj[dim], obj) return apply_ufunc(_broadcast_filtfilt_bessel, *args, input_core_dims=input_core_dims, output_core_dims=output_core_dims, kwargs=kwargs)
def test_dataset_join(): ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]}) # by default, cannot have different labels with raises_regex(ValueError, 'indexes .* are not equal'): apply_ufunc(operator.add, ds0, ds1) with raises_regex(TypeError, 'must supply'): apply_ufunc(operator.add, ds0, ds1, dataset_join='outer') def add(a, b, join, dataset_join): return apply_ufunc(operator.add, a, b, join=join, dataset_join=dataset_join, dataset_fill_value=np.nan) actual = add(ds0, ds1, 'outer', 'inner') expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]), 'x': [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds1, 'outer', 'outer') assert_identical(actual, expected) with raises_regex(ValueError, 'data variable names'): apply_ufunc(operator.add, ds0, xr.Dataset({'b': 1})) ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]}) actual = add(ds0, ds2, 'outer', 'inner') expected = xr.Dataset({'x': [0, 1, 2]}) assert_identical(actual, expected) # we used np.nan as the fill_value in add() above actual = add(ds0, ds2, 'outer', 'outer') expected = xr.Dataset({ 'a': ('x', [np.nan, np.nan, np.nan]), 'b': ('x', [np.nan, np.nan, np.nan]), 'x': [0, 1, 2] }) assert_identical(actual, expected)
def test_dataset_join(): ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]}) # by default, cannot have different labels with raises_regex(ValueError, 'indexes .* are not equal'): apply_ufunc(operator.add, ds0, ds1) with raises_regex(TypeError, 'must supply'): apply_ufunc(operator.add, ds0, ds1, dataset_join='outer') def add(a, b, join, dataset_join): return apply_ufunc(operator.add, a, b, join=join, dataset_join=dataset_join, dataset_fill_value=np.nan) actual = add(ds0, ds1, 'outer', 'inner') expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]), 'x': [0, 1, 2]}) assert_identical(actual, expected) actual = add(ds0, ds1, 'outer', 'outer') assert_identical(actual, expected) with raises_regex(ValueError, 'data variable names'): apply_ufunc(operator.add, ds0, xr.Dataset({'b': 1})) ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]}) actual = add(ds0, ds2, 'outer', 'inner') expected = xr.Dataset({'x': [0, 1, 2]}) assert_identical(actual, expected) # we used np.nan as the fill_value in add() above actual = add(ds0, ds2, 'outer', 'outer') expected = xr.Dataset({'a': ('x', [np.nan, np.nan, np.nan]), 'b': ('x', [np.nan, np.nan, np.nan]), 'x': [0, 1, 2]}) assert_identical(actual, expected)
def add(a, b, keep_attrs): if keep_attrs: return apply_ufunc(operator.add, a, b, keep_attrs=keep_attrs) else: return apply_ufunc(operator.add, a, b)
def parallel_add(x, y): return apply_ufunc(operator.add, x, y, dask='parallelized', output_dtypes=[np.int64])
def first_element(obj, dim): def func(x): return x[..., 0] return apply_ufunc(func, obj, input_core_dims=[[dim]])
def apply_truncate_broadcast_invalid(obj): return apply_ufunc(truncate, obj)
def add(a, b): return apply_ufunc(operator.add, a, b)
def twice(obj): def func(x): return (x, x) return apply_ufunc(func, obj, output_core_dims=[[], []])
def stack_negative(obj): def func(x): return np.stack([x, -x], axis=-1) return apply_ufunc(func, obj, output_core_dims=[['sign']], dask='parallelized', output_dtypes=[obj.dtype], output_sizes={'sign': 2})
def add(a, b, join, dataset_join): return apply_ufunc(operator.add, a, b, join=join, dataset_join=dataset_join, dataset_fill_value=np.nan)
def apply_truncate_x_z(obj): return apply_ufunc(truncate, obj, input_core_dims=[['x']], output_core_dims=[['z']])
def dask_safe_identity(x): return apply_ufunc(identity, x, dask='allowed')
def apply_truncate_x_x_valid(obj): return apply_ufunc(truncate, obj, input_core_dims=[['x']], output_core_dims=[['x']], exclude_dims={'x'})
def parallel_identity(x): return apply_ufunc(identity, x, dask='parallelized', output_dtypes=[x.dtype])
def test_apply_dask_parallelized_errors(): import dask.array as da array = da.ones((2, 2), chunks=(1, 1)) data_array = xr.DataArray(array, dims=('x', 'y')) with pytest.raises(NotImplementedError): apply_ufunc(identity, data_array, output_core_dims=[['z'], ['z']], dask='parallelized') with raises_regex(ValueError, 'dtypes'): apply_ufunc(identity, data_array, dask='parallelized') with raises_regex(TypeError, 'list'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=float) with raises_regex(ValueError, 'must have the same length'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float, float]) with raises_regex(ValueError, 'output_sizes'): apply_ufunc(identity, data_array, output_core_dims=[['z']], output_dtypes=[float], dask='parallelized') with raises_regex(ValueError, 'at least one input is an xarray object'): apply_ufunc(identity, array, dask='parallelized') with raises_regex(ValueError, 'consists of multiple chunks'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float], input_core_dims=[('y', )], output_core_dims=[('y', )])
def test_apply_dask_parallelized_errors(): import dask.array as da array = da.ones((2, 2), chunks=(1, 1)) data_array = xr.DataArray(array, dims=('x', 'y')) with pytest.raises(NotImplementedError): apply_ufunc(identity, data_array, output_core_dims=[['z'], ['z']], dask='parallelized') with raises_regex(ValueError, 'dtypes'): apply_ufunc(identity, data_array, dask='parallelized') with raises_regex(TypeError, 'list'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=float) with raises_regex(ValueError, 'must have the same length'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float, float]) with raises_regex(ValueError, 'output_sizes'): apply_ufunc(identity, data_array, output_core_dims=[['z']], output_dtypes=[float], dask='parallelized') with raises_regex(ValueError, 'at least one input is an xarray object'): apply_ufunc(identity, array, dask='parallelized') with raises_regex(ValueError, 'consists of multiple chunks'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float], input_core_dims=[('y',)], output_core_dims=[('y',)])