Пример #1
0
def test_crosstab_mismatch_zones_values_shape():
    zones = xa.DataArray(np.array([[1, 2]]))

    values = xa.DataArray(np.array([[[1, 2, np.nan]]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']

    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values)
Пример #2
0
def test_crosstab_invalid_layer():
    zones = xa.DataArray(np.array([[1, 2]]))

    values = xa.DataArray(np.array([[[1, 2, np.nan]]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']

    layer = 'cat'
    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values, layer=layer)
Пример #3
0
def test_crosstab_invalid_values():
    zones = xa.DataArray(np.array([[1, 2, 0]], dtype=np.int))

    # must be either int or float
    values = xa.DataArray(np.array([[['apples', 'foobar', 'cowboy']]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']

    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values)
Пример #4
0
def test_crosstab_3d():
    # expected results
    crosstab_3d_results = {
        'zone': [1, 2, 3],
        'cat1': [6, 5, 6],
        'cat2': [6, 5, 6],
        'cat3': [6, 5, 6],
        'cat4': [6, 5, 6],
    }

    # numpy case
    zones_np, _, values_np = create_zones_values(backend='numpy')
    df_np = crosstab(zones=zones_np,
                     values=values_np,
                     zone_ids=[1, 2, 3],
                     layer=-1)
    # dask case
    zones_da, _, values_da = create_zones_values(backend='dask')
    df_da = crosstab(zones=zones_da,
                     values=values_da,
                     zone_ids=[1, 2, 3],
                     cat_ids=['cat1', 'cat2', 'cat3', 'cat4'],
                     layer=-1)
    check_results(df_np, df_da, crosstab_3d_results)

    # ----- no values case ------
    crosstab_3d_novalues_results = {
        'zone': [1, 2, 3],
        'cat1': [0, 0, 0],
        'cat2': [0, 0, 0],
        'cat3': [0, 0, 0],
        'cat4': [0, 0, 0],
    }

    # numpy case
    zones_np, _, values_np = create_zones_values(backend='numpy')
    df_np = crosstab(zones=zones_np,
                     values=values_np,
                     layer=-1,
                     zone_ids=[1, 2, 3],
                     nodata_values=1)
    # dask case
    zones_da, _, values_da = create_zones_values(backend='dask')
    df_da = crosstab(zones=zones_da,
                     values=values_da,
                     layer=-1,
                     zone_ids=[1, 2, 3],
                     nodata_values=1)
    check_results(df_np, df_da, crosstab_3d_novalues_results)
Пример #5
0
def test_crosstab_invalid_zones():
    # invalid dims (must be 2d)
    zones = xa.DataArray(np.array([1, 2, 0]))

    values = xa.DataArray(np.array([[[1, 2, 0.5]]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']

    with pytest.raises(Exception) as e_info:
        crosstab(zones_agg=zones, values_agg=values)

    # invalid values (must be int)
    zones = xa.DataArray(np.array([[1, 2, 0.5]]))
    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values)
Пример #6
0
def test_crosstab_2d():
    # count agg, expected results
    crosstab_2d_results = {
        'zone': [1, 2, 3],
        0: [0, 0, 1],
        1: [6, 0, 0],
        2: [0, 4, 0],
    }

    # numpy case
    zones_np, values_np, _ = create_zones_values(backend='numpy')
    df_np = crosstab(
        zones=zones_np,
        values=values_np,
        zone_ids=[1, 2, 3],
        cat_ids=[0, 1, 2],
    )
    # dask case
    zones_da, values_da, _ = create_zones_values(backend='dask')
    df_da = crosstab(zones=zones_da,
                     values=values_da,
                     zone_ids=[1, 2, 3],
                     nodata_values=3)
    check_results(df_np, df_da, crosstab_2d_results)

    # percentage agg, expected results

    crosstab_2d_percentage_results = {
        'zone': [1, 2],
        1: [100, 0],
        2: [0, 100],
    }

    # numpy case
    df_np = crosstab(zones=zones_np,
                     values=values_np,
                     zone_ids=[1, 2],
                     cat_ids=[1, 2],
                     nodata_values=3,
                     agg='percentage')
    # dask case
    df_da = crosstab(zones=zones_da,
                     values=values_da,
                     zone_ids=[1, 2],
                     cat_ids=[1, 2],
                     nodata_values=3,
                     agg='percentage')
    check_results(df_np, df_da, crosstab_2d_percentage_results)
Пример #7
0
def test_crosstab_3d():
    # create valid `values_agg` of np.nan and np.inf
    values_agg = xa.DataArray(np.ones(24).reshape(2, 3, 4),
                              dims=['lat', 'lon', 'race'])
    values_agg['race'] = ['cat1', 'cat2', 'cat3', 'cat4']
    layer = 'race'

    # create a valid `zones_agg` with compatiable shape
    zones_arr = np.arange(6, dtype=np.int).reshape(2, 3)
    zones_agg = xa.DataArray(zones_arr)

    df = crosstab(zones_agg, values_agg, layer)

    num_cats = len(values_agg.dims[-1])
    # number of columns = number of categories
    assert len(df.columns) == num_cats

    # exclude region with 0 zone id
    zone_idx = list(set(np.unique(zones_arr)) - {0})
    num_zones = len(zone_idx)
    # number of rows = number of zones
    assert len(df.index) == num_zones

    num_nans = df.isnull().sum().sum()
    # no NaN
    assert num_nans == 0

    # values_agg are all 1s, so all categories have same percentage over zones
    for col in df.columns:
        assert len(df[col].unique()) == 1

    df['check_sum'] = df.apply(
        lambda r: r['cat1'] + r['cat2'] + r['cat3'] + r['cat4'], axis=1)
    # sum of a row is 1.0
    assert df['check_sum'][zone_idx[0]] == 1.0
Пример #8
0
def test_crosstab_no_values():
    # create valid `values_agg` of 0s
    values_agg = xa.DataArray(np.zeros(24).reshape(2, 3, 4),
                              dims=['lat', 'lon', 'race'])
    values_agg['race'] = ['cat1', 'cat2', 'cat3', 'cat4']

    # create a valid `zones_agg` with compatiable shape
    zones_arr = np.arange(6, dtype=np.int).reshape(2, 3)
    zones_agg = xa.DataArray(zones_arr)

    df = crosstab(zones_agg, values_agg)

    num_cats = len(values_agg.dims[-1])
    # number of columns = number of categories
    assert len(df.columns) == num_cats

    # exclude region with 0 zone id
    zone_idx = set(np.unique(zones_arr)) - {0}
    num_zones = len(zone_idx)
    # number of rows = number of zones
    assert len(df.index) == num_zones

    num_zeros = (df == 0).sum().sum()
    # all are 0s
    assert num_zeros == num_zones * num_cats
Пример #9
0
def test_crosstab_3d(backend, data_zones, data_values_3d, result_crosstab_3d):
    layer, zone_ids, expected_result = result_crosstab_3d
    df_result = crosstab(zones=data_zones,
                         values=data_values_3d,
                         zone_ids=zone_ids,
                         layer=layer)
    check_results(backend, df_result, expected_result)
Пример #10
0
def test_crosstab_2d():
    values_val = np.asarray([[0, 0, 10, 20],
                             [0, 0, 0, 10],
                             [np.inf, 30, 20, 50],
                             [10, 30, 40, 40],
                             [10, np.nan, 50, 0]])
    values_agg = xa.DataArray(values_val, dims=['lat', 'lon'])
    zones_val = np.asarray([[1, 1, 6, 6],
                            [1, 1, 6, 6],
                            [3, 5, 6, 6],
                            [3, 5, 7, 7],
                            [3, 7, 7, 0]])
    zones_agg = xa.DataArray(zones_val, dims=['lat', 'lon'])

    df = crosstab(zones_agg, values_agg)

    num_cats = 6  # 0, 10, 20, 30, 40, 50
    # number of columns = number of categories
    assert len(df.columns) == num_cats

    # exclude region with 0 zone id
    zone_idx = list(set(np.unique(zones_agg.data)) - {0})
    num_zones = len(zone_idx)
    # number of rows = number of zones
    assert len(df.index) == num_zones
    df.loc[:, 'check_sum'] = df.sum(axis=1)
    # sum of a row is 1.0
    assert df['check_sum'][zone_idx[0]] == 1.0
Пример #11
0
def test_percentage_crosstab_2d(backend, data_zones, data_values_2d,
                                result_percentage_crosstab_2d):
    nodata_values, zone_ids, cat_ids, expected_result = result_percentage_crosstab_2d
    df_result = crosstab(zones=data_zones,
                         values=data_values_2d,
                         zone_ids=zone_ids,
                         cat_ids=cat_ids,
                         nodata_values=nodata_values,
                         agg='percentage')
    check_results(backend, df_result, expected_result)
Пример #12
0
def test_count_crosstab_2d(backend, data_zones, data_values_2d,
                           result_count_crosstab_2d):
    zone_ids, cat_ids, expected_result = result_count_crosstab_2d
    df_result = crosstab(
        zones=data_zones,
        values=data_values_2d,
        zone_ids=zone_ids,
        cat_ids=cat_ids,
    )
    check_results(backend, df_result, expected_result)
Пример #13
0
def test_crosstab_3d_agg_method(backend, data_zones, data_values_3d,
                                result_crosstab_3d):
    layer, zone_ids, expected_result = result_crosstab_3d
    agg_methods = ['min', 'max', 'mean', 'sum', 'std', 'var', 'count']
    for agg in agg_methods:
        df_result = crosstab(zones=data_zones,
                             values=data_values_3d,
                             zone_ids=zone_ids,
                             layer=layer,
                             agg=agg)
        check_results(backend, df_result, expected_result[agg])
Пример #14
0
def test_crosstab_invalid_input():
    # invalid zones dims (must be 2d)
    zones = xa.DataArray(np.array([1, 2, 0]))
    values = xa.DataArray(np.array([[[1, 2, 0.5]]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']
    with pytest.raises(Exception) as e_info:
        crosstab(zones_agg=zones, values_agg=values)

    # invalid zones dtype (must be int)
    zones = xa.DataArray(np.array([[1, 2, 0.5]]))
    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values)

    # invalid values
    zones = xa.DataArray(np.array([[1, 2, 0]], dtype=np.int))
    # values must be either int or float
    values = xa.DataArray(np.array([[['apples', 'foobar', 'cowboy']]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']
    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values)

    # mismatch shape zones and values
    zones = xa.DataArray(np.array([[1, 2]]))
    values = xa.DataArray(np.array([[[1, 2, np.nan]]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']
    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values)

    # invalid layer
    zones = xa.DataArray(np.array([[1, 2]]))
    values = xa.DataArray(np.array([[[1, 2, np.nan]]]),
                          dims=['lat', 'lon', 'race'])
    values['race'] = ['cat1', 'cat2', 'cat3']
    # this layer does not exist in values agg
    layer = 'cat'
    with pytest.raises(Exception) as e_info:  # noqa
        crosstab(zones_agg=zones, values_agg=values, layer=layer)
Пример #15
0
def test_crosstab_2d_dtypes():
    values_val = np.asarray(
        [[0, 0, 10, 20], [0, 0, 0, 10], [np.inf, 30, 20, 50], [10, 30, 40, 40],
         [10, np.nan, 50, 0]],
        dtype=np.float16)
    values_agg = xa.DataArray(values_val, dims=['lat', 'lon'])
    zones_val = np.asarray([[1, 1, 6, 6], [1, 1, 6, 6], [3, 5, 6, 6],
                            [3, 5, 7, 7], [3, 7, 7, 0]])
    zones_agg = xa.DataArray(zones_val, dims=['lat', 'lon'])

    df = crosstab(zones_agg, values_agg)

    assert isinstance(df, pd.DataFrame)
Пример #16
0
def test_crosstab_no_zones():
    # create valid `values_agg`
    values_agg = xa.DataArray(np.zeros(24).reshape(2, 3, 4),
                              dims=['lat', 'lon', 'race'])
    values_agg['race'] = ['cat1', 'cat2', 'cat3', 'cat4']
    # create a valid `zones_agg` with compatiable shape
    # no zone
    zones_arr = np.zeros((2, 3), dtype=np.int)
    zones_agg = xa.DataArray(zones_arr)

    num_cats = len(values_agg.dims[-1])
    df = crosstab(zones_agg, values_agg)

    # number of columns = number of categories
    assert len(df.columns) == num_cats
    # no row as no zone
    assert len(df.index) == 0