Пример #1
0
def test_categorical_mean(ddf):
    sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]],
                    [[nan, 7, nan, nan], [nan, nan, nan, 17]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f64')))
    assert_eq_xr(agg, out)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.mean('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.mean('f64')))
    assert_eq_xr(agg, out)
Пример #2
0
def test_categorical_sum(ddf):
    sol = np.array([[[10, nan, nan, nan], [nan, nan, 60, nan]],
                    [[nan, 35, nan, nan], [nan, nan, nan, 85]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64')))
    assert_eq_xr(agg, out)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.sum('i64')))
    assert_eq_xr(agg, out)

    sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]],
                    [[nan, 35.0, nan, nan], [nan, nan, nan, 85.0]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64')))
    assert_eq_xr(agg, out)

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2)

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.sum(col)))
        assert_eq_xr(agg, out)
Пример #3
0
def test_categorical_std(ddf):
    if cudf and isinstance(ddf._meta, cudf.DataFrame):
        pytest.skip("The 'std' reduction is yet supported on the GPU")

    sol = np.sqrt(
        np.array([[[2.5, nan, nan, nan], [nan, nan, 2., nan]],
                  [[nan, 2., nan, nan], [nan, nan, nan, 2.]]]))
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f32')))
    assert_eq_xr(agg, out, True)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f64')))
    assert_eq_xr(agg, out, True)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.std('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.std('f64')))
    assert_eq_xr(agg, out)

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2)

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.std(col)))
        assert_eq_xr(agg, out)
Пример #4
0
def test_categorical_sum(ddf):
    sol = np.array([[[10, nan, nan, nan], [nan, nan, 60, nan]],
                    [[nan, 35, nan, nan], [nan, nan, nan, 85]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64')))
    assert_eq_xr(agg, out)

    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.sum('i32')))
    assert_eq_xr(agg, out)

    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10),
              ds.sum('i64')))
    assert_eq_xr(agg, out)

    sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]],
                    [[nan, 35.0, nan, nan], [nan, nan, nan, 85.0]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32')))
    assert_eq_xr(agg, out)

    agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64')))
    assert_eq_xr(agg, out)
Пример #5
0
def test_count_cat(ddf):
    sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], [[0, 5, 0, 0], [0, 0, 0,
                                                                  5]]])
    out = xr.DataArray(sol,
                       coords=(coords + [['a', 'b', 'c', 'd']]),
                       dims=(dims + ['cat']))
    agg = c.points(ddf, 'x', 'y', ds.count_cat('cat'))
    assert_eq_xr(agg, out)

    # categorizing by (cat_int-10)%4 ought to give the same result
    out = xr.DataArray(sol,
                       coords=(coords + [range(4)]),
                       dims=(dims + ['cat_int']))
    agg = c.points(
        ddf, 'x', 'y',
        ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.count()))
    assert_eq_xr(agg, out)

    # easier to write these tests in here, since we expect the same result with only slight tweaks

    # add an extra category (this will count nans and out of bounds)
    sol = np.append(sol, [[[0], [0]], [[0], [0]]], axis=2)

    # categorizing by binning the integer arange columns using [0,20] into 4 bins. Same result as for count_cat
    for col in 'i32', 'i64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.count()))
        assert_eq_xr(agg, out)

    # as above, but for the float arange columns. Element 2 has a nan, so the first bin is one short, and the nan bin is +1
    sol[0, 0, 0] = 4
    sol[0, 0, 4] = 1

    for col in 'f32', 'f64':
        out = xr.DataArray(sol,
                           coords=(coords + [range(5)]),
                           dims=(dims + [col]))
        agg = c.points(ddf, 'x', 'y',
                       ds.by(ds.category_binning(col, 0, 20, 4), ds.count()))
        assert_eq_xr(agg, out)