def test_categorical_mean(ddf): sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]], [[nan, 7, nan, nan], [nan, nan, nan, 17]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f64'))) assert_eq_xr(agg, out) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.mean('f32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.mean('f64'))) assert_eq_xr(agg, out)
def test_categorical_sum(ddf): sol = np.array([[[10, nan, nan, nan], [nan, nan, 60, nan]], [[nan, 35, nan, nan], [nan, nan, nan, 85]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64'))) assert_eq_xr(agg, out) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.sum('i64'))) assert_eq_xr(agg, out) sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]], [[nan, 35.0, nan, nan], [nan, nan, nan, 85.0]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64'))) assert_eq_xr(agg, out) # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2) for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.sum(col))) assert_eq_xr(agg, out)
def test_categorical_std(ddf): if cudf and isinstance(ddf._meta, cudf.DataFrame): pytest.skip("The 'std' reduction is yet supported on the GPU") sol = np.sqrt( np.array([[[2.5, nan, nan, nan], [nan, nan, 2., nan]], [[nan, 2., nan, nan], [nan, nan, nan, 2.]]])) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f32'))) assert_eq_xr(agg, out, True) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f64'))) assert_eq_xr(agg, out, True) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.std('f32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.std('f64'))) assert_eq_xr(agg, out) # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2) for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.std(col))) assert_eq_xr(agg, out)
def test_categorical_sum(ddf): sol = np.array([[[10, nan, nan, nan], [nan, nan, 60, nan]], [[nan, 35, nan, nan], [nan, nan, nan, 85]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64'))) assert_eq_xr(agg, out) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.sum('i64'))) assert_eq_xr(agg, out) sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]], [[nan, 35.0, nan, nan], [nan, nan, nan, 85.0]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64'))) assert_eq_xr(agg, out)
def test_count_cat(ddf): sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], [[0, 5, 0, 0], [0, 0, 0, 5]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.count_cat('cat')) assert_eq_xr(agg, out) # categorizing by (cat_int-10)%4 ought to give the same result out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.count())) assert_eq_xr(agg, out) # easier to write these tests in here, since we expect the same result with only slight tweaks # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[0], [0]], [[0], [0]]], axis=2) # categorizing by binning the integer arange columns using [0,20] into 4 bins. Same result as for count_cat for col in 'i32', 'i64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.count())) assert_eq_xr(agg, out) # as above, but for the float arange columns. Element 2 has a nan, so the first bin is one short, and the nan bin is +1 sol[0, 0, 0] = 4 sol[0, 0, 4] = 1 for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.count())) assert_eq_xr(agg, out)